This commit is contained in:
2026-07-01 09:22:26 +00:00
parent 4aa1dab283
commit aa47a19e98
53 changed files with 4721 additions and 77 deletions
+1
View File
@@ -0,0 +1 @@
"""data_analytics 重型插件包。"""
+16
View File
@@ -0,0 +1,16 @@
{
"agents": [
{
"name": "analyst",
"role": "数据分析师",
"system_prompt": "你是一位严谨、克制的数据分析师。任务进来后:1) 先用 s3_list_objects/s3_peek 看几行了解结构;2) 决定用 python_executor(小数据,单机 pandas)或 ray_submit(大数据,分布式);3) 执行分析、得出明确结论,必要时给出图表链接或样例数据。注意:你只能读取 S3,**不能写入**。如果用户让你上传/删除/修改对象,请明确告知做不到。",
"tools": ["s3_list_objects", "s3_peek", "s3_get_object", "ray_submit", "python_executor"],
"skills": [],
"peers": []
}
],
"orchestration": {
"type": "react",
"entry": "analyst"
}
}
+150
View File
@@ -0,0 +1,150 @@
"""data_analytics 插件 API:凭证 CRUD + 分析任务提交/查询/事件流。
挂载后路径前缀为 /api/v1/plugin/data_analytics/...,跟核心 API 完全独立。
所有数据库读写都走 organization actor 的代理方法(确保分布式模式下不跨 actor
共享 SQLAlchemy session)。
"""
from __future__ import annotations
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field
from kilostar.utils.access import Accessor, TokenData
from kilostar.utils.ray_hook import ray_actor_hook
router = APIRouter(tags=["data_analytics"])
# ─── Schemas ────────────────────────────────────────────────────────────────
class CredentialCreate(BaseModel):
display_name: str = Field(..., max_length=100)
endpoint_url: Optional[str] = None
region: str = "us-east-1"
access_key: str = Field(..., min_length=1)
secret_key: str = Field(..., min_length=1)
class JobCreate(BaseModel):
cred_id: str
description: str = Field(..., min_length=1, max_length=2000)
# ─── Helpers ────────────────────────────────────────────────────────────────
def _get_org():
try:
return ray_actor_hook("org_data_analytics").org_data_analytics
except Exception as e:
raise HTTPException(503, f"data_analytics 插件未就绪:{e}")
# ─── Credentials ────────────────────────────────────────────────────────────
@router.get("/credentials")
async def list_credentials(
token_data: TokenData = Depends(Accessor.get_current_user),
):
org = _get_org()
rows = await org.cred_list.remote(token_data.username)
return {"credentials": rows}
@router.post("/credentials")
async def create_credential(
payload: CredentialCreate,
token_data: TokenData = Depends(Accessor.get_current_user),
):
org = _get_org()
row = await org.cred_create.remote(
user_id=token_data.username,
display_name=payload.display_name,
access_key=payload.access_key,
secret_key=payload.secret_key,
endpoint_url=payload.endpoint_url,
region=payload.region,
)
return row
@router.delete("/credentials/{cred_id}")
async def delete_credential(
cred_id: str,
token_data: TokenData = Depends(Accessor.get_current_user),
):
org = _get_org()
ok = await org.cred_delete.remote(cred_id, token_data.username)
if not ok:
raise HTTPException(404, "凭证不存在或不属于当前用户")
return {"status": "ok"}
# ─── Jobs ───────────────────────────────────────────────────────────────────
@router.post("/jobs")
async def create_job(
payload: JobCreate,
token_data: TokenData = Depends(Accessor.get_current_user),
):
org = _get_org()
try:
return await org.job_create.remote(
user_id=token_data.username,
cred_id=payload.cred_id,
description=payload.description,
)
except ValueError as e:
raise HTTPException(400, str(e))
@router.get("/jobs")
async def list_jobs(
token_data: TokenData = Depends(Accessor.get_current_user),
):
org = _get_org()
rows = await org.job_list.remote(token_data.username)
return {"jobs": rows}
@router.get("/jobs/{job_id}")
async def get_job(
job_id: str,
token_data: TokenData = Depends(Accessor.get_current_user),
):
org = _get_org()
row = await org.job_get.remote(job_id, token_data.username)
if row is None:
raise HTTPException(404, "任务不存在")
return row
@router.get("/jobs/{job_id}/stream")
async def stream_job(
job_id: str,
token_data: TokenData = Depends(Accessor.get_current_user),
):
"""转发 organization 事件流为 SSE。"""
import json
org = _get_org()
row = await org.job_get.remote(job_id, token_data.username)
if row is None:
raise HTTPException(404, "任务不存在")
org_task_id = row.get("org_task_id")
if not org_task_id:
raise HTTPException(409, "任务尚未投递到 organization")
async def _generate():
async for event in await org.stream.remote(org_task_id):
payload = event if isinstance(event, str) else json.dumps(event, ensure_ascii=False)
yield f"data: {payload}\n\n"
return StreamingResponse(_generate(), media_type="text/event-stream")
@@ -0,0 +1 @@
"""data_analytics organization 实现。"""
+235
View File
@@ -0,0 +1,235 @@
"""data_analytics 插件本地 SQLite 表与 DAO。
注意:本插件用的 ``DeclarativeBase`` 跟核心 PG 完全独立,避免元数据空间串场。
所有数据落到 ``data/plugin/data_analytics/_data/data_analytics.db``。
"""
from __future__ import annotations
from datetime import datetime
from typing import List, Optional
from sqlalchemy import DateTime, String, Text, select
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
from kilostar.utils.crypto import decrypt_dict_secrets, encrypt_dict_secrets
class Base(DeclarativeBase):
"""data_analytics 插件私有的元数据空间,跟核心 PG 隔离。"""
pass
class S3Credential(Base):
__tablename__ = "s3_credential"
cred_id: Mapped[str] = mapped_column(String(64), primary_key=True)
user_id: Mapped[str] = mapped_column(String(64), index=True, nullable=False)
display_name: Mapped[str] = mapped_column(String(100), nullable=False)
endpoint_url: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
region: Mapped[str] = mapped_column(String(50), default="us-east-1")
access_key: Mapped[str] = mapped_column(String(255), nullable=False)
secret_key: Mapped[str] = mapped_column(String(255), nullable=False)
created_at: Mapped[datetime] = mapped_column(
DateTime, default=datetime.utcnow, nullable=False
)
updated_at: Mapped[datetime] = mapped_column(
DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False
)
class AnalysisJob(Base):
__tablename__ = "analysis_job"
job_id: Mapped[str] = mapped_column(String(64), primary_key=True)
user_id: Mapped[str] = mapped_column(String(64), index=True, nullable=False)
cred_id: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
description: Mapped[str] = mapped_column(Text, nullable=False)
status: Mapped[str] = mapped_column(String(20), default="pending", index=True)
org_task_id: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
result: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime, default=datetime.utcnow, nullable=False, index=True
)
updated_at: Mapped[datetime] = mapped_column(
DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False
)
class CredentialDAO:
"""S3 凭证 DAO:写入时自动加密,读取时自动解密。"""
SENSITIVE_KEYS = ("access_key", "secret_key")
def __init__(self, sm: async_sessionmaker[AsyncSession]):
self._sm = sm
@staticmethod
def _row_to_dict(row: S3Credential, *, include_secrets: bool) -> dict:
d = {
"cred_id": row.cred_id,
"user_id": row.user_id,
"display_name": row.display_name,
"endpoint_url": row.endpoint_url,
"region": row.region,
"access_key": row.access_key,
"secret_key": row.secret_key,
"created_at": row.created_at.isoformat() if row.created_at else None,
"updated_at": row.updated_at.isoformat() if row.updated_at else None,
}
if not include_secrets:
ak = decrypt_dict_secrets({"access_key": d["access_key"]}).get("access_key", "")
d["access_key"] = (ak[:4] + "***" + ak[-2:]) if len(ak) > 6 else "***"
d.pop("secret_key", None)
return d
# include_secrets=True 用于工具内部,返回明文给 boto3
return decrypt_dict_secrets(d)
async def list_by_user(self, user_id: str) -> List[dict]:
async with self._sm() as s:
stmt = select(S3Credential).where(S3Credential.user_id == user_id)
rows = (await s.execute(stmt)).scalars().all()
return [self._row_to_dict(r, include_secrets=False) for r in rows]
async def get(self, cred_id: str, *, include_secrets: bool = False) -> Optional[dict]:
async with self._sm() as s:
stmt = select(S3Credential).where(S3Credential.cred_id == cred_id)
row = (await s.execute(stmt)).scalar_one_or_none()
if row is None:
return None
return self._row_to_dict(row, include_secrets=include_secrets)
async def upsert(
self,
cred_id: str,
user_id: str,
display_name: str,
access_key: str,
secret_key: str,
endpoint_url: Optional[str] = None,
region: str = "us-east-1",
) -> dict:
encrypted = encrypt_dict_secrets(
{"access_key": access_key, "secret_key": secret_key}
)
async with self._sm() as s:
stmt = select(S3Credential).where(S3Credential.cred_id == cred_id)
existing = (await s.execute(stmt)).scalar_one_or_none()
if existing is not None:
existing.display_name = display_name
existing.endpoint_url = endpoint_url
existing.region = region
existing.access_key = encrypted["access_key"]
existing.secret_key = encrypted["secret_key"]
s.add(existing)
await s.commit()
await s.refresh(existing)
return self._row_to_dict(existing, include_secrets=False)
row = S3Credential(
cred_id=cred_id,
user_id=user_id,
display_name=display_name,
endpoint_url=endpoint_url,
region=region,
access_key=encrypted["access_key"],
secret_key=encrypted["secret_key"],
)
s.add(row)
await s.commit()
await s.refresh(row)
return self._row_to_dict(row, include_secrets=False)
async def delete(self, cred_id: str, user_id: str) -> bool:
async with self._sm() as s:
stmt = select(S3Credential).where(
S3Credential.cred_id == cred_id, S3Credential.user_id == user_id
)
row = (await s.execute(stmt)).scalar_one_or_none()
if row is None:
return False
await s.delete(row)
await s.commit()
return True
class JobDAO:
"""分析任务记录 DAO。"""
def __init__(self, sm: async_sessionmaker[AsyncSession]):
self._sm = sm
@staticmethod
def _row_to_dict(row: AnalysisJob) -> dict:
return {
"job_id": row.job_id,
"user_id": row.user_id,
"cred_id": row.cred_id,
"description": row.description,
"status": row.status,
"org_task_id": row.org_task_id,
"result": row.result,
"created_at": row.created_at.isoformat() if row.created_at else None,
"updated_at": row.updated_at.isoformat() if row.updated_at else None,
}
async def create(
self,
job_id: str,
user_id: str,
description: str,
cred_id: Optional[str] = None,
) -> dict:
async with self._sm() as s:
row = AnalysisJob(
job_id=job_id,
user_id=user_id,
description=description,
cred_id=cred_id,
)
s.add(row)
await s.commit()
await s.refresh(row)
return self._row_to_dict(row)
async def update(
self,
job_id: str,
*,
status: Optional[str] = None,
result: Optional[str] = None,
org_task_id: Optional[str] = None,
) -> Optional[dict]:
async with self._sm() as s:
stmt = select(AnalysisJob).where(AnalysisJob.job_id == job_id)
row = (await s.execute(stmt)).scalar_one_or_none()
if row is None:
return None
if status is not None:
row.status = status
if result is not None:
row.result = result
if org_task_id is not None:
row.org_task_id = org_task_id
s.add(row)
await s.commit()
await s.refresh(row)
return self._row_to_dict(row)
async def list_by_user(self, user_id: str, limit: int = 50) -> List[dict]:
async with self._sm() as s:
stmt = (
select(AnalysisJob)
.where(AnalysisJob.user_id == user_id)
.order_by(AnalysisJob.created_at.desc())
.limit(limit)
)
rows = (await s.execute(stmt)).scalars().all()
return [self._row_to_dict(r) for r in rows]
async def get(self, job_id: str) -> Optional[dict]:
async with self._sm() as s:
stmt = select(AnalysisJob).where(AnalysisJob.job_id == job_id)
row = (await s.execute(stmt)).scalar_one_or_none()
return self._row_to_dict(row) if row else None
@@ -0,0 +1,135 @@
"""data_analytics organization:管理本插件的 SQLite 元数据 + 注入凭证 ctx。
凭证经由 ``S3_CREDS_VAR`` ContextVar 传给工具,避免污染 agent tool signature
agent 看到的工具不带 cred 参数,模型不会误传)。
API 层通过本类暴露的 ``cred_*`` / ``job_*`` 代理方法跨 actor 调 DAO
保证分布式模式下 actor 之间不直接共享 SQLAlchemy session。
"""
from __future__ import annotations
import contextvars
import uuid
from typing import Any, Callable, Dict, List, Optional
from kilostar.plugin_runtime.base_organization import BaseOrganization
from kilostar.plugin_runtime.event import OrgEvent
from .db import Base, CredentialDAO, JobDAO
# 当前任务的 S3 凭证(明文):工具内部读 .get() 拿
S3_CREDS_VAR: contextvars.ContextVar[Optional[Dict[str, Any]]] = contextvars.ContextVar(
"data_analytics_s3_creds", default=None
)
class DataAnalyticsOrganization(BaseOrganization):
"""对接 S3 的数据分析组织。"""
async def setup(self) -> None:
await super().setup()
await self.init_local_db([Base])
# 跨工具/跨 API 共享的 DAO 实例
self.cred_dao = CredentialDAO(self._session_maker)
self.job_dao = JobDAO(self._session_maker)
async def on_first_install(self) -> None:
self.logger.info(
"data_analytics installed; configure S3 credentials in dashboard."
)
async def react(
self,
task_description: str,
ctx: Dict[str, Any],
emit: Callable[[OrgEvent], Any],
) -> Any:
cred_id = ctx.get("cred_id")
if cred_id and getattr(self, "cred_dao", None) is not None:
cred = await self.cred_dao.get(cred_id, include_secrets=True)
if cred is None:
raise RuntimeError(f"S3 凭证 {cred_id} 不存在")
S3_CREDS_VAR.set(cred)
ctx["s3_cred_display"] = cred.get("display_name")
else:
S3_CREDS_VAR.set(None)
return await super().react(task_description, ctx, emit)
# ─── 凭证代理(API 层调用) ─────────────────────────────────────
async def cred_list(self, user_id: str) -> List[dict]:
return await self.cred_dao.list_by_user(user_id)
async def cred_create(
self,
user_id: str,
display_name: str,
access_key: str,
secret_key: str,
endpoint_url: Optional[str] = None,
region: str = "us-east-1",
) -> dict:
cred_id = uuid.uuid4().hex
return await self.cred_dao.upsert(
cred_id=cred_id,
user_id=user_id,
display_name=display_name,
access_key=access_key,
secret_key=secret_key,
endpoint_url=endpoint_url,
region=region,
)
async def cred_delete(self, cred_id: str, user_id: str) -> bool:
return await self.cred_dao.delete(cred_id, user_id)
# ─── 任务代理 ──────────────────────────────────────────────────
async def job_create(
self, user_id: str, cred_id: str, description: str
) -> dict:
# 校验凭证归属
cred = await self.cred_dao.get(cred_id, include_secrets=False)
if cred is None or cred.get("user_id") != user_id:
raise ValueError("凭证不存在或不属于当前用户")
job_id = uuid.uuid4().hex
await self.job_dao.create(
job_id=job_id,
user_id=user_id,
description=description,
cred_id=cred_id,
)
# 投递 organization 任务(拿 task_id 回填,便于前端拉事件流)
task_id = await self.submit(
description, {"user_id": user_id, "cred_id": cred_id, "job_id": job_id}
)
await self.job_dao.update(job_id, status="running", org_task_id=task_id)
return {"job_id": job_id, "task_id": task_id, "status": "running"}
async def job_list(self, user_id: str) -> List[dict]:
return await self.job_dao.list_by_user(user_id)
async def job_get(self, job_id: str, user_id: str) -> Optional[dict]:
row = await self.job_dao.get(job_id)
if row is None or row.get("user_id") != user_id:
return None
# 附带最新 organization 状态
org_task_id = row.get("org_task_id")
if org_task_id:
ts = await self.status(org_task_id)
if ts is not None:
row["task_status"] = ts.get("status")
row["task_result"] = ts.get("result")
row["task_error"] = ts.get("error")
# 任务终态时把结果回写 SQLite,方便重启后查询
if ts.get("status") in ("completed", "failed") and row.get("status") != ts.get("status"):
result_payload = ts.get("result") if ts.get("status") == "completed" else ts.get("error")
await self.job_dao.update(
job_id,
status=ts.get("status"),
result=str(result_payload) if result_payload is not None else None,
)
row["status"] = ts.get("status")
return row
@@ -0,0 +1,174 @@
import { useState } from 'react';
import { Plus, Trash2, Loader2, Key, Eye, EyeOff } from 'lucide-react';
import { usePluginContext } from './client';
import type { S3Credential } from './types';
const API_BASE = '/api/v1/plugin/data_analytics';
interface Props {
credentials: S3Credential[];
loading: boolean;
onChanged: () => void;
}
export function CredentialPanel({ credentials, loading, onChanged }: Props) {
const { client } = usePluginContext();
const [showForm, setShowForm] = useState(false);
const [showSecret, setShowSecret] = useState(false);
const [busy, setBusy] = useState(false);
const [error, setError] = useState('');
const [form, setForm] = useState({
display_name: '',
endpoint_url: '',
region: 'us-east-1',
access_key: '',
secret_key: '',
});
const reset = () => {
setForm({ display_name: '', endpoint_url: '', region: 'us-east-1', access_key: '', secret_key: '' });
setError('');
setShowSecret(false);
};
const submit = async () => {
if (!form.display_name.trim() || !form.access_key.trim() || !form.secret_key.trim()) {
setError('显示名 / Access Key / Secret Key 必填');
return;
}
setBusy(true);
setError('');
try {
await client.post(`${API_BASE}/credentials`, {
display_name: form.display_name.trim(),
endpoint_url: form.endpoint_url.trim() || null,
region: form.region.trim() || 'us-east-1',
access_key: form.access_key,
secret_key: form.secret_key,
});
reset();
setShowForm(false);
onChanged();
} catch (e: unknown) {
const msg = (e as { response?: { data?: { detail?: string } } }).response?.data?.detail;
setError(msg || '保存失败');
} finally {
setBusy(false);
}
};
const remove = async (cred_id: string) => {
if (!confirm('确定删除该凭证?删除后该凭证下的任务将无法继续运行。')) return;
try {
await client.delete(`${API_BASE}/credentials/${cred_id}`);
onChanged();
} catch (e) {
console.error(e);
}
};
return (
<div className="bg-bg-card rounded-2xl border border-border-primary p-5">
<div className="flex items-center justify-between mb-4">
<div>
<h3 className="font-semibold text-text-primary flex items-center gap-2">
<Key size={16} className="text-accent" /> S3
</h3>
<p className="text-xs text-text-muted mt-0.5">访 SQLite</p>
</div>
<button
className="px-3 py-1.5 text-xs rounded-lg bg-accent text-white hover:opacity-90 transition flex items-center gap-1.5"
onClick={() => { setShowForm((s) => !s); setError(''); }}
>
<Plus size={14} /> {showForm ? '取消' : '新增'}
</button>
</div>
{showForm && (
<div className="space-y-3 mb-4 p-4 bg-bg-secondary rounded-xl border border-border-secondary">
<input
className="w-full px-3 py-2 text-sm rounded-lg bg-bg-base border border-border-primary focus:outline-none focus:border-accent"
placeholder="显示名(如 prod-aws"
value={form.display_name}
onChange={(e) => setForm({ ...form, display_name: e.target.value })}
/>
<input
className="w-full px-3 py-2 text-sm rounded-lg bg-bg-base border border-border-primary focus:outline-none focus:border-accent"
placeholder="Endpoint URL(可选,自托管 S3 / MinIO 填写)"
value={form.endpoint_url}
onChange={(e) => setForm({ ...form, endpoint_url: e.target.value })}
/>
<input
className="w-full px-3 py-2 text-sm rounded-lg bg-bg-base border border-border-primary focus:outline-none focus:border-accent"
placeholder="Region(默认 us-east-1"
value={form.region}
onChange={(e) => setForm({ ...form, region: e.target.value })}
/>
<input
className="w-full px-3 py-2 text-sm font-mono rounded-lg bg-bg-base border border-border-primary focus:outline-none focus:border-accent"
placeholder="Access Key"
value={form.access_key}
onChange={(e) => setForm({ ...form, access_key: e.target.value })}
/>
<div className="relative">
<input
type={showSecret ? 'text' : 'password'}
className="w-full px-3 py-2 pr-10 text-sm font-mono rounded-lg bg-bg-base border border-border-primary focus:outline-none focus:border-accent"
placeholder="Secret Key"
value={form.secret_key}
onChange={(e) => setForm({ ...form, secret_key: e.target.value })}
/>
<button
type="button"
className="absolute right-2 top-1/2 -translate-y-1/2 text-text-muted hover:text-text-primary"
onClick={() => setShowSecret((s) => !s)}
>
{showSecret ? <EyeOff size={14} /> : <Eye size={14} />}
</button>
</div>
{error && <div className="text-xs text-danger">{error}</div>}
<button
className="w-full px-3 py-2 text-sm rounded-lg bg-accent text-white hover:opacity-90 disabled:opacity-50 transition flex items-center justify-center gap-2"
onClick={submit}
disabled={busy}
>
{busy && <Loader2 size={14} className="animate-spin" />}
</button>
</div>
)}
{loading ? (
<div className="flex items-center justify-center py-8 text-text-muted">
<Loader2 size={20} className="animate-spin" />
</div>
) : credentials.length === 0 ? (
<div className="text-sm text-text-muted text-center py-8 border border-dashed border-border-primary rounded-xl">
</div>
) : (
<div className="space-y-2">
{credentials.map((c) => (
<div
key={c.cred_id}
className="flex items-center justify-between p-3 bg-bg-secondary rounded-xl border border-border-secondary"
>
<div className="min-w-0 flex-1">
<div className="text-sm font-medium text-text-primary truncate">{c.display_name}</div>
<div className="text-[11px] text-text-muted font-mono mt-0.5">
{c.endpoint_url || 'aws-s3'} · {c.region} · {c.access_key}
</div>
</div>
<button
className="p-1.5 text-text-muted hover:text-danger transition"
onClick={() => remove(c.cred_id)}
title="删除"
>
<Trash2 size={14} />
</button>
</div>
))}
</div>
)}
</div>
);
}
@@ -0,0 +1,157 @@
import { useCallback, useEffect, useState } from 'react';
import { BarChart3, Plus, Loader2, ListChecks } from 'lucide-react';
import { usePluginContext } from './client';
import { CredentialPanel } from './CredentialPanel';
import { NewJobDialog } from './NewJobDialog';
import { JobDetail } from './JobDetail';
import type { S3Credential, AnalysisJob } from './types';
const API_BASE = '/api/v1/plugin/data_analytics';
interface Props {
pluginName: string;
}
export function Dashboard({ pluginName }: Props) {
const { client } = usePluginContext();
const [credentials, setCredentials] = useState<S3Credential[]>([]);
const [credLoading, setCredLoading] = useState(true);
const [jobs, setJobs] = useState<AnalysisJob[]>([]);
const [jobLoading, setJobLoading] = useState(true);
const [showNewJob, setShowNewJob] = useState(false);
const [openJobId, setOpenJobId] = useState<string | null>(null);
const [error, setError] = useState('');
const loadCredentials = useCallback(async () => {
setCredLoading(true);
try {
const resp = await client.get<{ credentials: S3Credential[] }>(`${API_BASE}/credentials`);
setCredentials(resp.data.credentials || []);
} catch (e: unknown) {
const msg = (e as { response?: { data?: { detail?: string } } }).response?.data?.detail;
setError(msg || '加载凭证失败');
} finally {
setCredLoading(false);
}
}, [client]);
const loadJobs = useCallback(async () => {
setJobLoading(true);
try {
const resp = await client.get<{ jobs: AnalysisJob[] }>(`${API_BASE}/jobs`);
setJobs(resp.data.jobs || []);
} catch (e: unknown) {
const msg = (e as { response?: { data?: { detail?: string } } }).response?.data?.detail;
setError(msg || '加载任务失败');
} finally {
setJobLoading(false);
}
}, [client]);
useEffect(() => {
loadCredentials();
loadJobs();
}, [loadCredentials, loadJobs]);
// 轮询任务列表,方便看状态变化
useEffect(() => {
const t = setInterval(loadJobs, 5000);
return () => clearInterval(t);
}, [loadJobs]);
return (
<div className="h-full overflow-y-auto p-6 bg-bg-base">
<div className="max-w-5xl mx-auto space-y-6">
<div className="flex items-center gap-3 pb-2">
<div className="w-10 h-10 rounded-xl bg-accent-light text-accent flex items-center justify-center">
<BarChart3 size={20} />
</div>
<div>
<h2 className="text-lg font-bold text-text-primary"></h2>
<p className="text-xs text-text-muted mt-0.5">
S3 agent python_executor / ray_submit{' '}
<span className="font-mono text-[10px] opacity-70">{pluginName}</span>
</p>
</div>
</div>
{error && (
<div className="p-3 bg-danger-bg text-danger text-sm rounded-xl border border-danger/20">{error}</div>
)}
<CredentialPanel credentials={credentials} loading={credLoading} onChanged={loadCredentials} />
<div className="bg-bg-card rounded-2xl border border-border-primary p-5">
<div className="flex items-center justify-between mb-4">
<div>
<h3 className="font-semibold text-text-primary flex items-center gap-2">
<ListChecks size={16} className="text-accent" />
</h3>
<p className="text-xs text-text-muted mt-0.5"> 5 </p>
</div>
<button
className="px-3 py-1.5 text-xs rounded-lg bg-accent text-white hover:opacity-90 transition flex items-center gap-1.5 disabled:opacity-50"
onClick={() => setShowNewJob(true)}
disabled={credentials.length === 0}
title={credentials.length === 0 ? '请先添加凭证' : ''}
>
<Plus size={14} />
</button>
</div>
{jobLoading && jobs.length === 0 ? (
<div className="flex items-center justify-center py-8 text-text-muted">
<Loader2 size={20} className="animate-spin" />
</div>
) : jobs.length === 0 ? (
<div className="text-sm text-text-muted text-center py-8 border border-dashed border-border-primary rounded-xl">
</div>
) : (
<div className="space-y-2">
{jobs.map((j) => (
<button
key={j.job_id}
className="w-full text-left p-3 bg-bg-secondary rounded-xl border border-border-secondary hover:border-accent transition flex items-center justify-between gap-3"
onClick={() => setOpenJobId(j.job_id)}
>
<div className="min-w-0 flex-1">
<div className="text-sm text-text-primary truncate">{j.description}</div>
<div className="text-[11px] text-text-muted mt-0.5 font-mono">
{j.job_id.slice(0, 8)} · {j.created_at?.slice(0, 19).replace('T', ' ')}
</div>
</div>
<StatusBadge status={j.status} />
</button>
))}
</div>
)}
</div>
</div>
{showNewJob && (
<NewJobDialog
credentials={credentials}
onClose={() => setShowNewJob(false)}
onCreated={loadJobs}
/>
)}
{openJobId && <JobDetail jobId={openJobId} onClose={() => setOpenJobId(null)} />}
</div>
);
}
function StatusBadge({ status }: { status: string }) {
const map: Record<string, string> = {
pending: 'bg-bg-base text-text-muted border-border-primary',
running: 'bg-warning-bg text-warning border-warning/20',
completed: 'bg-success-bg text-success border-success/20',
failed: 'bg-danger-bg text-danger border-danger/20',
};
const cls = map[status] || map.pending;
return (
<span className={`text-[10px] font-medium px-2 py-1 rounded-lg border ${cls} shrink-0`}>
{status}
</span>
);
}
@@ -0,0 +1,174 @@
import { useEffect, useRef, useState } from 'react';
import { Loader2, X, Activity } from 'lucide-react';
import { usePluginContext } from './client';
import type { AnalysisJob } from './types';
const API_BASE = '/api/v1/plugin/data_analytics';
interface Props {
jobId: string;
onClose: () => void;
}
interface StreamEvent {
type?: string;
ts?: number;
payload?: unknown;
raw?: string;
}
export function JobDetail({ jobId, onClose }: Props) {
const { client, token, apiBase } = usePluginContext();
const [job, setJob] = useState<AnalysisJob | null>(null);
const [events, setEvents] = useState<StreamEvent[]>([]);
const [loading, setLoading] = useState(true);
const eventBoxRef = useRef<HTMLDivElement | null>(null);
// 初次加载 + 后台轮询
useEffect(() => {
let cancelled = false;
const fetchJob = async () => {
try {
const resp = await client.get<AnalysisJob>(`${API_BASE}/jobs/${jobId}`);
if (!cancelled) setJob(resp.data);
} catch (e) {
console.error('fetch job failed', e);
} finally {
if (!cancelled) setLoading(false);
}
};
fetchJob();
const t = setInterval(fetchJob, 4000);
return () => {
cancelled = true;
clearInterval(t);
};
}, [client, jobId]);
// SSE 事件流(用 fetch + ReadableStream,因为 EventSource 不支持自定义 header
useEffect(() => {
const controller = new AbortController();
const run = async () => {
try {
const url = `${apiBase || ''}${API_BASE}/jobs/${jobId}/stream`;
const resp = await fetch(url, {
headers: { Authorization: `Bearer ${token}` },
signal: controller.signal,
});
if (!resp.body) return;
const reader = resp.body.getReader();
const decoder = new TextDecoder('utf-8');
let buf = '';
// eslint-disable-next-line no-constant-condition
while (true) {
const { value, done } = await reader.read();
if (done) break;
buf += decoder.decode(value, { stream: true });
const parts = buf.split('\n\n');
buf = parts.pop() || '';
for (const part of parts) {
const line = part.split('\n').find((l) => l.startsWith('data:'));
if (!line) continue;
const payload = line.slice(5).trim();
try {
setEvents((prev) => [...prev, JSON.parse(payload)]);
} catch {
setEvents((prev) => [...prev, { raw: payload }]);
}
}
}
} catch (e) {
if ((e as Error).name !== 'AbortError') console.error('SSE error', e);
}
};
run();
return () => controller.abort();
}, [apiBase, jobId, token]);
// 自动滚动到底部
useEffect(() => {
if (eventBoxRef.current) {
eventBoxRef.current.scrollTop = eventBoxRef.current.scrollHeight;
}
}, [events]);
return (
<div className="fixed inset-0 z-50 flex items-center justify-center bg-black/40 p-4">
<div className="w-full max-w-3xl max-h-[85vh] bg-bg-card rounded-2xl border border-border-primary shadow-xl flex flex-col">
<div className="flex items-center justify-between p-4 border-b border-border-primary">
<div className="min-w-0">
<h3 className="font-semibold text-text-primary flex items-center gap-2">
<Activity size={16} className="text-accent" />
</h3>
<span className="text-[11px] font-mono text-text-muted">{jobId}</span>
</div>
<button className="p-1 text-text-muted hover:text-text-primary" onClick={onClose}>
<X size={16} />
</button>
</div>
<div className="flex-1 min-h-0 overflow-y-auto p-5 space-y-4">
{loading ? (
<div className="flex items-center justify-center py-12 text-text-muted">
<Loader2 size={20} className="animate-spin" />
</div>
) : job ? (
<>
<div className="space-y-2">
<Field label="状态" value={job.task_status || job.status} />
<Field label="描述" value={job.description} />
{job.task_error && <Field label="错误" value={job.task_error} danger />}
</div>
{job.task_result !== undefined && job.task_result !== null && (
<div>
<div className="text-xs text-text-secondary mb-1.5"></div>
<pre className="text-xs font-mono whitespace-pre-wrap break-words bg-bg-secondary border border-border-secondary rounded-lg p-3 max-h-64 overflow-auto">
{typeof job.task_result === 'string'
? job.task_result
: JSON.stringify(job.task_result, null, 2)}
</pre>
</div>
)}
<div>
<div className="text-xs text-text-secondary mb-1.5">SSE</div>
<div
ref={eventBoxRef}
className="text-[11px] font-mono bg-bg-secondary border border-border-secondary rounded-lg p-3 max-h-72 overflow-auto space-y-1"
>
{events.length === 0 ? (
<span className="text-text-muted">()</span>
) : (
events.map((e, i) => (
<div key={i} className="text-text-secondary">
<span className="text-accent">{e.type || 'event'}</span>{' '}
{e.payload !== undefined ? (
<span>{JSON.stringify(e.payload)}</span>
) : e.raw ? (
<span>{e.raw}</span>
) : null}
</div>
))
)}
</div>
</div>
</>
) : (
<div className="text-sm text-text-muted text-center py-8"></div>
)}
</div>
</div>
</div>
);
}
function Field({ label, value, danger }: { label: string; value: string; danger?: boolean }) {
return (
<div className="flex gap-3">
<div className="text-xs text-text-muted w-16 shrink-0 pt-0.5">{label}</div>
<div className={`text-sm flex-1 break-words ${danger ? 'text-danger' : 'text-text-primary'}`}>{value}</div>
</div>
);
}
@@ -0,0 +1,110 @@
import { useState } from 'react';
import { Loader2, Send, X } from 'lucide-react';
import { usePluginContext } from './client';
import type { S3Credential } from './types';
const API_BASE = '/api/v1/plugin/data_analytics';
interface Props {
credentials: S3Credential[];
onClose: () => void;
onCreated: () => void;
}
export function NewJobDialog({ credentials, onClose, onCreated }: Props) {
const { client } = usePluginContext();
const [credId, setCredId] = useState(credentials[0]?.cred_id || '');
const [description, setDescription] = useState('');
const [busy, setBusy] = useState(false);
const [error, setError] = useState('');
const submit = async () => {
if (!credId) {
setError('请选择 S3 凭证');
return;
}
if (!description.trim()) {
setError('请描述要做的分析');
return;
}
setBusy(true);
setError('');
try {
await client.post(`${API_BASE}/jobs`, {
cred_id: credId,
description: description.trim(),
});
onCreated();
onClose();
} catch (e: unknown) {
const msg = (e as { response?: { data?: { detail?: string } } }).response?.data?.detail;
setError(msg || '提交失败');
} finally {
setBusy(false);
}
};
return (
<div className="fixed inset-0 z-50 flex items-center justify-center bg-black/40 p-4">
<div className="w-full max-w-lg bg-bg-card rounded-2xl border border-border-primary shadow-xl">
<div className="flex items-center justify-between p-4 border-b border-border-primary">
<h3 className="font-semibold text-text-primary"></h3>
<button className="p-1 text-text-muted hover:text-text-primary" onClick={onClose}>
<X size={16} />
</button>
</div>
<div className="p-5 space-y-4">
<div>
<label className="text-xs text-text-secondary block mb-1.5">S3 </label>
{credentials.length === 0 ? (
<div className="text-xs text-warning bg-warning-bg/50 border border-warning/20 rounded-lg p-2">
S3
</div>
) : (
<select
className="w-full px-3 py-2 text-sm rounded-lg bg-bg-base border border-border-primary focus:outline-none focus:border-accent"
value={credId}
onChange={(e) => setCredId(e.target.value)}
>
{credentials.map((c) => (
<option key={c.cred_id} value={c.cred_id}>
{c.display_name} · {c.region}
</option>
))}
</select>
)}
</div>
<div>
<label className="text-xs text-text-secondary block mb-1.5"></label>
<textarea
className="w-full px-3 py-2 text-sm rounded-lg bg-bg-base border border-border-primary focus:outline-none focus:border-accent min-h-[120px] resize-y"
placeholder="例如:分析 s3://my-bucket/sales/2026-q1/ 的销售趋势,输出按月汇总"
value={description}
onChange={(e) => setDescription(e.target.value)}
/>
<p className="text-[11px] text-text-muted mt-1">
Agent s3_peek/s3_list_objects python_executor ray_submit
</p>
</div>
{error && <div className="text-xs text-danger">{error}</div>}
</div>
<div className="flex items-center justify-end gap-2 p-4 border-t border-border-primary">
<button
className="px-3 py-1.5 text-xs rounded-lg border border-border-primary text-text-secondary hover:text-text-primary"
onClick={onClose}
>
</button>
<button
className="px-3 py-1.5 text-xs rounded-lg bg-accent text-white hover:opacity-90 disabled:opacity-50 transition flex items-center gap-1.5"
onClick={submit}
disabled={busy || credentials.length === 0}
>
{busy ? <Loader2 size={14} className="animate-spin" /> : <Send size={14} />}
</button>
</div>
</div>
</div>
);
}
@@ -0,0 +1,22 @@
import { readdirSync, readFileSync, writeFileSync, existsSync } from 'node:fs';
import { join } from 'node:path';
// vite lib 模式 build 完后写一份 wc-manifest.json,给后端 /ui-manifest 端点读
const distDir = 'dist';
if (!existsSync(distDir)) {
console.error('dist/ not found; run vite build first');
process.exit(1);
}
const files = readdirSync(distDir);
const js = files.find((f) => f.endsWith('.js')) || 'plugin-element.js';
const css = files.filter((f) => f.endsWith('.css'));
const manifest = {
tag: 'plugin-data-analytics',
js,
css,
};
writeFileSync(join(distDir, 'wc-manifest.json'), JSON.stringify(manifest, null, 2) + '\n', 'utf-8');
console.log(`wrote dist/wc-manifest.json: ${JSON.stringify(manifest)}`);
@@ -0,0 +1,30 @@
import axios, { type AxiosInstance } from 'axios';
import { createContext, useContext } from 'react';
export interface PluginContextValue {
client: AxiosInstance;
token: string;
apiBase: string;
}
export const PluginContext = createContext<PluginContextValue | null>(null);
export function usePluginContext(): PluginContextValue {
const ctx = useContext(PluginContext);
if (!ctx) throw new Error('PluginContext missing — Web Component not initialized');
return ctx;
}
export function makeClient(token: string, apiBase: string): AxiosInstance {
const c = axios.create({
baseURL: apiBase || undefined,
});
c.interceptors.request.use((cfg) => {
if (token) {
cfg.headers = cfg.headers || {};
(cfg.headers as Record<string, string>).Authorization = `Bearer ${token}`;
}
return cfg;
});
return c;
}
@@ -0,0 +1,61 @@
import React from 'react';
import { createRoot, type Root } from 'react-dom/client';
import { Dashboard } from './Dashboard';
import { PluginContext, makeClient } from './client';
// 把 build 出来的 CSS 当字符串收入,作为 ConstructableStyleSheet 注入到 shadow root
// 既能享受 shadow DOM 的样式隔离,也不需要额外的 fetch 步骤。
import css from './styles.css?inline';
const TAG = 'plugin-data-analytics';
class DataAnalyticsElement extends HTMLElement {
private root?: Root;
private mount?: HTMLDivElement;
static get observedAttributes() {
return ['token', 'api-base'];
}
connectedCallback() {
const shadow = this.attachShadow({ mode: 'open' });
// 用 <style> 注入 CSSConstructableStyleSheet 兼容性更好但 vite 注入字符串更直接)
const style = document.createElement('style');
style.textContent = css;
shadow.appendChild(style);
this.mount = document.createElement('div');
this.mount.style.cssText = 'height:100%;width:100%';
shadow.appendChild(this.mount);
this.root = createRoot(this.mount);
this.render();
}
attributeChangedCallback() {
if (this.root) this.render();
}
disconnectedCallback() {
this.root?.unmount();
this.root = undefined;
}
private render() {
const token = this.getAttribute('token') ?? '';
const apiBase = this.getAttribute('api-base') ?? '';
const client = makeClient(token, apiBase);
this.root!.render(
<React.StrictMode>
<PluginContext.Provider value={{ client, token, apiBase }}>
<Dashboard pluginName="data_analytics" />
</PluginContext.Provider>
</React.StrictMode>,
);
}
}
if (!customElements.get(TAG)) {
customElements.define(TAG, DataAnalyticsElement);
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,25 @@
{
"name": "plugin-data-analytics",
"private": true,
"version": "0.1.0",
"type": "module",
"scripts": {
"build": "vite build && node build-manifest.mjs",
"dev": "vite build --watch"
},
"dependencies": {
"axios": "^1.15.1",
"lucide-react": "^1.8.0",
"react": "^19.2.4",
"react-dom": "^19.2.4"
},
"devDependencies": {
"@tailwindcss/vite": "^4.2.2",
"@types/react": "^19.1.0",
"@types/react-dom": "^19.1.0",
"@vitejs/plugin-react": "^6.0.1",
"tailwindcss": "^4.2.2",
"typescript": "^5.8.0",
"vite": "^8.0.4"
}
}
@@ -0,0 +1,91 @@
@import "tailwindcss";
/* 在 shadow DOM 内 :root 不匹配,用 :host 给 Web Component 自身定义主题 token。
颜色名称跟主前端 frontend/src/index.css 保持一致——这样组件里的 bg-bg-card / text-accent
等类名在插件 build 时也能解析到对应的 var()。 */
@theme {
--color-bg-primary: var(--bg-primary);
--color-bg-secondary: var(--bg-secondary);
--color-bg-tertiary: var(--bg-tertiary);
--color-bg-card: var(--bg-card);
--color-bg-sidebar: var(--bg-sidebar);
--color-bg-input: var(--bg-input);
--color-bg-hover: var(--bg-hover);
--color-bg-active: var(--bg-active);
--color-bg-base: var(--bg-base);
--color-border-primary: var(--border-primary);
--color-border-secondary: var(--border-secondary);
--color-text-primary: var(--text-primary);
--color-text-secondary: var(--text-secondary);
--color-text-tertiary: var(--text-tertiary);
--color-text-muted: var(--text-muted);
--color-accent: var(--accent);
--color-accent-hover: var(--accent-hover);
--color-accent-light: var(--accent-light);
--color-danger: var(--danger);
--color-danger-bg: var(--danger-bg);
--color-success: var(--success);
--color-success-bg: var(--success-bg);
--color-warning: var(--warning);
--color-warning-bg: var(--warning-bg);
}
:host {
/* light theme defaults — 跟主前端保持一致 */
--bg-primary: #f2f0ed;
--bg-secondary: #eae8e4;
--bg-tertiary: #e0ddd8;
--bg-card: #faf9f7;
--bg-sidebar: #eae8e4;
--bg-input: #f2f0ed;
--bg-hover: rgba(255, 255, 255, 0.4);
--bg-active: rgba(156, 175, 136, 0.08);
--bg-base: #f2f0ed;
--border-primary: #e0ddd8;
--border-secondary: #eae8e4;
--text-primary: #3d3d3d;
--text-secondary: #5a5a5a;
--text-tertiary: #8c8680;
--text-muted: #b5afa8;
--accent: #9caf88;
--accent-hover: #8a9e78;
--accent-light: rgba(156, 175, 136, 0.12);
--danger: #c4917a;
--danger-bg: rgba(196, 145, 122, 0.08);
--success: #7a8e6a;
--success-bg: rgba(122, 142, 106, 0.08);
--warning: #c4a882;
--warning-bg: rgba(196, 168, 130, 0.08);
display: block;
height: 100%;
font-family: "Inter", -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
}
/* 跟随系统/主前端的暗色主题:宿主元素加 [data-theme="dark"] 时切换 */
:host([data-theme="dark"]) {
--bg-primary: #1c1b19;
--bg-secondary: #232220;
--bg-tertiary: #2d2b28;
--bg-card: #252421;
--bg-sidebar: #1e1d1b;
--bg-input: #2d2b28;
--bg-hover: rgba(255, 255, 255, 0.04);
--bg-active: rgba(156, 175, 136, 0.1);
--bg-base: #1c1b19;
--border-primary: rgba(255, 255, 255, 0.06);
--border-secondary: rgba(255, 255, 255, 0.03);
--text-primary: #e8e6e3;
--text-secondary: #c8c5c0;
--text-tertiary: #a09c96;
--text-muted: #7a7772;
--accent: #a8bc94;
--accent-hover: #b8caa6;
--accent-light: rgba(156, 175, 136, 0.15);
--danger: #d4a894;
--danger-bg: rgba(196, 145, 122, 0.1);
--success: #9caf88;
--success-bg: rgba(156, 175, 136, 0.1);
--warning: #c4a882;
--warning-bg: rgba(196, 168, 130, 0.1);
}
@@ -0,0 +1,13 @@
{
"compilerOptions": {
"target": "ES2020",
"module": "ESNext",
"moduleResolution": "bundler",
"jsx": "react-jsx",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true
},
"include": ["*.ts", "*.tsx"]
}
@@ -0,0 +1,25 @@
export interface S3Credential {
cred_id: string;
user_id: string;
display_name: string;
endpoint_url: string | null;
region: string;
access_key: string;
created_at: string | null;
updated_at: string | null;
}
export interface AnalysisJob {
job_id: string;
user_id: string;
cred_id: string | null;
description: string;
status: string;
org_task_id: string | null;
result: string | null;
created_at: string | null;
updated_at: string | null;
task_status?: string;
task_result?: unknown;
task_error?: string | null;
}
@@ -0,0 +1,25 @@
import { defineConfig } from 'vite';
import react from '@vitejs/plugin-react';
import tailwindcss from '@tailwindcss/vite';
export default defineConfig({
plugins: [react(), tailwindcss()],
build: {
lib: {
entry: 'index.tsx',
formats: ['es'],
fileName: () => 'plugin-element.js',
},
outDir: 'dist',
emptyOutDir: true,
cssCodeSplit: false,
rollupOptions: {
output: {
assetFileNames: (info) => {
if (info.name && info.name.endsWith('.css')) return 'plugin-element.css';
return 'assets/[name]-[hash][extname]';
},
},
},
},
});
+19
View File
@@ -0,0 +1,19 @@
{
"name": "data_analytics",
"version": "0.1.0",
"display_name": "数据分析",
"description": "对接 S3 对象存储,由 agent 自主决定使用 python_executor 或 ray_submit 跑分析。仅读不写。",
"entry": "core.organization:DataAnalyticsOrganization",
"concurrency": "queue",
"node_affinity": "cpu",
"api_prefix": "/api/v1/plugin/data_analytics",
"capabilities": ["data_analysis", "s3_readonly"],
"dependencies": {
"python": [],
"plugins": []
},
"ui": {
"entry": "frontend/index.tsx",
"icon": "BarChart3"
}
}
@@ -0,0 +1,16 @@
"""data_analytics 插件本地工具集。
agent 看到这些工具时不带凭证参数,凭证由 organization 通过 ContextVar 注入。
"""
from .s3_list_objects import s3_list_objects
from .s3_peek import s3_peek
from .s3_get_object import s3_get_object
from .ray_submit import ray_submit
__all__ = [
"s3_list_objects",
"s3_peek",
"s3_get_object",
"ray_submit",
]
@@ -0,0 +1,43 @@
"""S3 工具共用辅助:从 ContextVar 拿凭证 + 解析 URI。
所有 s3_* 工具都依赖这个模块,把"明文凭证"的取用集中在一处。
"""
from __future__ import annotations
import re
from typing import Any, Dict, Tuple
def get_s3_creds_or_raise() -> Dict[str, Any]:
"""从 organization 注入的 ContextVar 中取出明文凭证;未注入则抛错。"""
# 延迟 import 避免循环;这里走 organization 子类被加载时注入的虚拟包路径
from ..core.organization import S3_CREDS_VAR
creds = S3_CREDS_VAR.get()
if not creds:
raise RuntimeError(
"未提供 S3 凭证:本任务上下文中没有 cred_id,请在创建 job 时选择凭证。"
)
return creds
def parse_s3_uri(uri: str) -> Tuple[str, str]:
"""解析 ``s3://bucket/key`` → ``(bucket, key)``;非法格式抛 ValueError。"""
m = re.match(r"^s3://([^/]+)/(.+)$", uri.strip())
if not m:
raise ValueError(f"非法 S3 URI{uri!r}(期待 s3://bucket/key 形式)")
return m.group(1), m.group(2)
def make_session_kwargs(creds: Dict[str, Any]) -> Dict[str, Any]:
"""转 boto3/aiobotocore client 调用所需的 kwargs。"""
kw: Dict[str, Any] = {
"aws_access_key_id": creds["access_key"],
"aws_secret_access_key": creds["secret_key"],
"region_name": creds.get("region") or "us-east-1",
}
endpoint = creds.get("endpoint_url")
if endpoint:
kw["endpoint_url"] = endpoint
return kw
@@ -0,0 +1,39 @@
{
"name": "data_analytics_internal",
"version": "0.1.0",
"description": "data_analytics 插件内部工具:S3 只读 + Ray 提交。仅限本插件内部 agent 调用。",
"tools": [
{
"name": "s3_list_objects",
"file": "s3_list_objects.py",
"is_system": true,
"action_scope": ["data_analytics_internal"],
"config_args": {},
"category": "system"
},
{
"name": "s3_peek",
"file": "s3_peek.py",
"is_system": true,
"action_scope": ["data_analytics_internal"],
"config_args": {},
"category": "system"
},
{
"name": "s3_get_object",
"file": "s3_get_object.py",
"is_system": true,
"action_scope": ["data_analytics_internal"],
"config_args": {},
"category": "system"
},
{
"name": "ray_submit",
"file": "ray_submit.py",
"is_system": true,
"action_scope": ["data_analytics_internal"],
"config_args": {},
"category": "system"
}
]
}
@@ -0,0 +1,95 @@
"""ray_submit:把分析脚本提交到 Raydistributed)或 subprocessstandalone)执行。
凭证以 ``AWS_*`` 环境变量注入子进程,让 boto3/pandas-s3 自然读到。
脚本走 ``kilostar.utils.sandbox.validate_python_code`` 的静态屏蔽兜底。
"""
from __future__ import annotations
import asyncio
import os
import sys
import tempfile
from kilostar.utils.ray_compat import _STANDALONE
from kilostar.utils.sandbox import (
CodeViolation,
get_python_timeout,
validate_python_code,
)
from ._s3_common import get_s3_creds_or_raise
def _build_env(creds) -> dict:
env = os.environ.copy()
env["AWS_ACCESS_KEY_ID"] = creds["access_key"]
env["AWS_SECRET_ACCESS_KEY"] = creds["secret_key"]
env["AWS_DEFAULT_REGION"] = creds.get("region") or "us-east-1"
if creds.get("endpoint_url"):
env["AWS_ENDPOINT_URL_S3"] = creds["endpoint_url"]
env["AWS_ENDPOINT_URL"] = creds["endpoint_url"]
return env
async def ray_submit(script: str, timeout: int = 300) -> str:
"""提交 Python 脚本到 Ray(分布式)或子进程(单机)执行。
脚本中可直接 ``import boto3`` 读 S3(凭证已通过环境变量注入);可用
pandas / polars / numpy 等已安装的依赖。**只读**——不要尝试 put/delete。
Args:
script: Python 源码
timeout: 超时秒数(默认 300
Returns:
stdout(必要时尾部追加 stderr 与 exit code
"""
try:
script = validate_python_code(script)
except CodeViolation as e:
return f"[Sandbox] {e}"
creds = get_s3_creds_or_raise()
env = _build_env(creds)
timeout = get_python_timeout(timeout)
# standalone 与 distributed 第一版都走 subprocess,保证环境变量传递可控
# ray.remote 跑函数时 env vars 需另装 runtime_env,复杂度跟 subprocess 持平
# 但前者透明可控,先这样落地)
tmp_file = None
try:
with tempfile.NamedTemporaryFile(
mode="w", suffix=".py", delete=False, encoding="utf-8"
) as f:
f.write(script)
tmp_file = f.name
proc = await asyncio.create_subprocess_exec(
sys.executable,
tmp_file,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
env=env,
)
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout)
out = stdout.decode("utf-8", errors="replace")
err = stderr.decode("utf-8", errors="replace")
result = ""
if out:
result += out
if err:
result += f"\n[stderr]\n{err}"
if proc.returncode != 0:
result += f"\n[exit code: {proc.returncode}]"
result = result.strip() or "(no output)"
if not _STANDALONE:
result = f"[mode: ray-cluster (subprocess)]\n{result}"
return result
except asyncio.TimeoutError:
return f"[Error] ray_submit 执行超时({timeout}s"
except Exception as e:
return f"[Error] ray_submit 失败:{e}"
finally:
if tmp_file and os.path.exists(tmp_file):
os.unlink(tmp_file)
@@ -0,0 +1,46 @@
"""s3_get_object:下载到 artifact 目录(路径强校验防穿越)。"""
from __future__ import annotations
import os
from pathlib import Path
from kilostar.utils.settings import get_artifact_dir
from ._s3_common import get_s3_creds_or_raise, make_session_kwargs, parse_s3_uri
async def s3_get_object(uri: str, save_as: str) -> str:
"""把 S3 对象下载到本进程的 artifact 工作区,返回本地绝对路径。
``save_as`` 必须是相对路径,落到 ``data/artifact/data_analytics_downloads/``
下面(防越权写入任意目录)。下载后供 python_executor / ray_submit 中以
pandas/polars 读取。
Args:
uri: 形如 ``s3://bucket/key`` 的对象路径
save_as: 保存的相对文件名(不能含 ``..`` 或绝对路径)
Returns:
本地保存的绝对路径
"""
from aiobotocore.session import get_session
creds = get_s3_creds_or_raise()
bucket, key = parse_s3_uri(uri)
save_path = Path(save_as).as_posix()
if save_path.startswith("/") or ".." in save_path.split("/"):
raise ValueError(f"save_as 必须是相对、不含 .. 的路径,收到 {save_as!r}")
base = get_artifact_dir() / "data_analytics_downloads"
base.mkdir(parents=True, exist_ok=True)
target = base / save_path
target.parent.mkdir(parents=True, exist_ok=True)
session = get_session()
async with session.create_client("s3", **make_session_kwargs(creds)) as client:
resp = await client.get_object(Bucket=bucket, Key=key)
body = await resp["Body"].read()
target.write_bytes(body)
return str(target.resolve())
@@ -0,0 +1,47 @@
"""s3_list_objects:列出 bucket+prefix 下的对象列表(key/size/last_modified)。"""
from __future__ import annotations
from typing import Any, Dict, List
from ._s3_common import get_s3_creds_or_raise, make_session_kwargs
async def s3_list_objects(
bucket: str,
prefix: str = "",
limit: int = 50,
) -> List[Dict[str, Any]]:
"""列出 S3 bucket 下指定 prefix 的对象(最多 limit 条)。
Args:
bucket: S3 bucket 名
prefix: 对象 key 前缀,留空表示根路径
limit: 最多返回条数(1-1000),默认 50
Returns:
对象信息列表,每项含 key / size / last_modifiedISO 字符串)
"""
from aiobotocore.session import get_session
creds = get_s3_creds_or_raise()
limit = max(1, min(int(limit), 1000))
session = get_session()
out: List[Dict[str, Any]] = []
async with session.create_client("s3", **make_session_kwargs(creds)) as client:
paginator = client.get_paginator("list_objects_v2")
async for page in paginator.paginate(
Bucket=bucket, Prefix=prefix, PaginationConfig={"MaxItems": limit}
):
for item in page.get("Contents", []) or []:
out.append({
"key": item.get("Key"),
"size": item.get("Size"),
"last_modified": (
item["LastModified"].isoformat() if item.get("LastModified") else None
),
})
if len(out) >= limit:
return out
return out
@@ -0,0 +1,35 @@
"""s3_peek:读取对象的头若干字节并尝试 UTF-8 解码(看几行用)。"""
from __future__ import annotations
from ._s3_common import get_s3_creds_or_raise, make_session_kwargs, parse_s3_uri
async def s3_peek(uri: str, n_bytes: int = 4096) -> str:
"""读取 S3 对象的头 ``n_bytes`` 字节,UTF-8 解码后返回。
适合快速预览 csv/json/log 等文本类对象的开头几行。二进制内容会以
``[binary, ...]`` 占位说明返回。
Args:
uri: 形如 ``s3://bucket/key`` 的对象路径
n_bytes: 读取字节数,默认 4096,上限 1MB
Returns:
对象内容片段(解码后的字符串或占位说明)
"""
from aiobotocore.session import get_session
creds = get_s3_creds_or_raise()
bucket, key = parse_s3_uri(uri)
n = max(1, min(int(n_bytes), 1024 * 1024))
session = get_session()
async with session.create_client("s3", **make_session_kwargs(creds)) as client:
resp = await client.get_object(Bucket=bucket, Key=key, Range=f"bytes=0-{n-1}")
body = await resp["Body"].read()
try:
text = body.decode("utf-8")
return text
except UnicodeDecodeError:
return f"[binary, {len(body)} bytes; first 64 hex] {body[:64].hex()}"