feat: 工具系统迁移 + 重型插件骨架 + 前端交互增强

- 工具系统从 kilostar/plugin/tool_plugin/ 迁移到 data/toolset/(manifest.json 声明式)
- 新增 plugin_runtime 模块:BaseOrganization / GlobalPluginManager / loader / tool_bridge
- 新增 org_task + org_task_event 表及 DAO(alembic 0009)
- 新增 /api/v1/plugin 路由(submit/status/stream/install/reload)
- 新增 data/plugin/example_dept 示例重型插件
- regulatory_node 支持聊天历史上下文注入
- send_file 改为 artifact 存盘 + SSE 推送下载链接
- 前端 WorkflowFileCard 组件 + ToolSettings README 渲染
- utils 整理:合并 access/role_check、standalone_proxy→ray_compat、删除废弃模块
- 项目结构文档移至 docs/STRUCTURE.md 并详细展开

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-06-17 05:20:00 +00:00
parent 9b73ae4db4
commit 6d658b4f4d
74 changed files with 2591 additions and 1308 deletions
+29
View File
@@ -0,0 +1,29 @@
"""KiloStar 重型插件(Organization)运行时。
每个重型插件 = 一个组织/部门:
- ``data/plugin/<org_name>/`` 目录约定
- 内部多个平级专家 agent,通过 ``consult`` 工具互通
- 单机模式下是普通对象,分布式模式下是 ray actor
- 对外两条边:cabinet tool(阻塞)+ 用户 API(射后不管)
"""
from kilostar.plugin_runtime.event import OrgEvent, OrgEventType, TaskState
from kilostar.plugin_runtime.manifest import OrgManifest, OrgDependencies
from kilostar.plugin_runtime.agents_config import (
AgentsConfig,
AgentDef,
AgentModelRef,
OrchestrationConfig,
)
__all__ = [
"OrgEvent",
"OrgEventType",
"TaskState",
"OrgManifest",
"OrgDependencies",
"AgentsConfig",
"AgentDef",
"AgentModelRef",
"OrchestrationConfig",
]
+50
View File
@@ -0,0 +1,50 @@
"""agents.json 的 pydantic 模型。"""
from __future__ import annotations
from typing import List, Literal, Optional
from pydantic import BaseModel, Field
class AgentModelRef(BaseModel):
"""agent 用哪个 provider + 哪个 model。"""
provider_title: str
model_id: str
class AgentDef(BaseModel):
"""单个专家 agent 定义。
``peers`` 列出本 agent 能 ``consult`` 的同事;为空则不能向同事发起咨询。
``tools`` / ``skills`` 名字按下面顺序解析:
1. 本组织 toolset/ 里声明的工具
2. cabinet 全局工具白名单(python_executor 等基础工具)
"""
name: str
role: str = ""
system_prompt: str = ""
model: AgentModelRef
tools: List[str] = Field(default_factory=list)
skills: List[str] = Field(default_factory=list)
peers: List[str] = Field(default_factory=list)
class OrchestrationConfig(BaseModel):
"""编排策略:第一版只有 react;entry 决定任务进来交给谁。"""
type: Literal["react"] = "react"
entry: str
class AgentsConfig(BaseModel):
agents: List[AgentDef]
orchestration: OrchestrationConfig
def get(self, name: str) -> Optional[AgentDef]:
for a in self.agents:
if a.name == name:
return a
return None
@@ -0,0 +1,442 @@
"""BaseOrganization:重型插件基类。
设计要点:
- 单机模式 = 普通 Python 对象,分布式 = ray actor``@actor_class`` 装饰子类)
- 内置 ``asyncio.Queue`` 输入队列 + 任务表
- 对外两条通道:``dispatch`` (阻塞) / ``submit`` (射后不管),底层都汇集到 ``_run_task``
- 子类只需覆写 ``setup`` / ``react`` 两个钩子;零代码插件由 ``agents.json`` 声明驱动
"""
from __future__ import annotations
import asyncio
import json
import time
from typing import Any, AsyncGenerator, Callable, Dict, List, Optional
from ulid import ULID
from kilostar.plugin_runtime.event import OrgEvent, TaskState
from kilostar.plugin_runtime.manifest import OrgManifest
from kilostar.plugin_runtime.agents_config import AgentsConfig, AgentDef
from kilostar.utils.logger import get_logger
from kilostar.utils.settings import get_artifact_dir
class BaseOrganization:
"""重型插件基类。
生命周期:
``__init__(manifest, agents_config, plugin_dir)`` → ``setup()`` → 持续运行 →
``shutdown()``
setup 期间会:加载本组织 toolset/、构造 agent 实例(带 consult 工具)、
起后台 worker 协程消费输入队列。
"""
def __init__(
self,
manifest_dict: Dict[str, Any],
agents_dict: Dict[str, Any],
plugin_dir: str,
) -> None:
self.manifest = OrgManifest.model_validate(manifest_dict)
self.agents_config = AgentsConfig.model_validate(agents_dict)
self.plugin_dir = plugin_dir
self.name = self.manifest.name
self.logger = get_logger(f"org.{self.name}")
# 任务队列与状态表
self._queue: asyncio.Queue = asyncio.Queue()
self._tasks: Dict[str, TaskState] = {}
self._futures: Dict[str, asyncio.Future] = {}
self._streams: Dict[str, asyncio.Queue] = {}
# 后台消费协程
self._worker_task: Optional[asyncio.Task] = None
self._stopped = False
# 由 setup 填充
self._tools_by_name: Dict[str, Callable] = {}
self._agents: Dict[str, Any] = {} # name -> pydantic-ai Agent
# ─── 生命周期 ──────────────────────────────────────────────
async def setup(self) -> None:
"""加载本组织资源,实例化 agents,启动队列消费协程。
子类可以 override 来扩展(连数据库、起子进程等),但应该 ``await super().setup()``。
"""
await self._load_local_tools()
await self._build_agents()
self._worker_task = asyncio.create_task(self._consume_queue())
async def shutdown(self) -> None:
self._stopped = True
if self._worker_task is not None:
self._worker_task.cancel()
# ─── 对外通道 ──────────────────────────────────────────────
async def dispatch(
self, task_description: str, ctx: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""cabinet 同步入口:阻塞等到任务完成才返回。
Returns:
``{"task_id": ..., "status": ..., "result": ..., "error": ...}``
"""
task_id = await self._enqueue(task_description, ctx or {}, source="cabinet")
future = self._futures[task_id]
try:
return await future
finally:
self._futures.pop(task_id, None)
async def submit(
self, task_description: str, ctx: Optional[Dict[str, Any]] = None
) -> str:
"""用户 API 入口:投入队列就返回,状态走 ``status`` / ``stream``。"""
return await self._enqueue(task_description, ctx or {}, source="user")
async def status(self, task_id: str) -> Optional[Dict[str, Any]]:
ts = self._tasks.get(task_id)
if ts is None:
return None
return {
"task_id": ts.task_id,
"status": ts.status,
"description": ts.description,
"source": ts.source,
"result": ts.result,
"error": ts.error,
"events": [e.to_dict() for e in ts.events],
}
async def stream(self, task_id: str) -> AsyncGenerator[Dict[str, Any], None]:
"""SSE 端点用:异步生成器,每 yield 一个事件 dict。
如果 task 已经完成,把历史事件回放完毕后即结束;否则持续推送实时事件。
"""
ts = self._tasks.get(task_id)
if ts is None:
return
# 历史回放
for ev in list(ts.events):
yield ev.to_dict()
if ts.status in ("completed", "failed"):
return
# 实时订阅:用一个 per-stream queue
sub_queue: asyncio.Queue = asyncio.Queue()
self._streams.setdefault(task_id, sub_queue)
try:
while True:
ev = await sub_queue.get()
if ev is None:
break
yield ev.to_dict()
finally:
self._streams.pop(task_id, None)
async def list_tasks(self) -> List[Dict[str, Any]]:
return [
{
"task_id": ts.task_id,
"status": ts.status,
"source": ts.source,
"description": ts.description,
}
for ts in self._tasks.values()
]
# ─── 子类钩子 ──────────────────────────────────────────────
async def react(
self,
task_description: str,
ctx: Dict[str, Any],
emit: Callable[[OrgEvent], Any],
) -> Any:
"""默认 ReAct 实现:把任务交给 entry agent 跑一轮。
子类可覆盖以实现自定义编排(DAG/pipeline)。
"""
entry_name = self.agents_config.orchestration.entry
entry_agent = self._agents.get(entry_name)
if entry_agent is None:
raise RuntimeError(f"entry agent {entry_name!r} not found in {self.name}")
await emit(
OrgEvent(
task_id=ctx["task_id"],
type="step",
payload={"agent": entry_name, "phase": "start"},
)
)
try:
result = await entry_agent.run(user_prompt=task_description)
output = getattr(result, "output", None) or str(result)
except Exception as e:
self.logger.exception(f"entry agent {entry_name} run failed: {e}")
raise
await emit(
OrgEvent(
task_id=ctx["task_id"],
type="step",
payload={"agent": entry_name, "phase": "end"},
)
)
return output
# ─── 内部实现 ──────────────────────────────────────────────
async def _enqueue(
self,
task_description: str,
ctx: Dict[str, Any],
source: str,
) -> str:
task_id = str(ULID())
trace_id = ctx.get("trace_id") or task_id
user_id = ctx.get("user_id", "")
# 沙箱目录:data/artifact/<trace>/<org>/
artifact_dir = str(get_artifact_dir() / trace_id / self.name)
ts = TaskState(
task_id=task_id,
org_name=self.name,
trace_id=trace_id,
user_id=user_id,
description=task_description,
source=source, # type: ignore[arg-type]
)
self._tasks[task_id] = ts
self._futures[task_id] = asyncio.get_event_loop().create_future()
full_ctx = {
**ctx,
"trace_id": trace_id,
"user_id": user_id,
"task_id": task_id,
"source": source,
"artifact_dir": artifact_dir,
}
await self._queue.put((task_id, task_description, full_ctx))
# 持久化(best-effortPG 不可用时静默)
await self._persist_task(ts)
return task_id
async def _consume_queue(self) -> None:
while not self._stopped:
try:
task_id, desc, ctx = await self._queue.get()
except asyncio.CancelledError:
break
try:
await self._run_task(task_id, desc, ctx)
except Exception as e:
self.logger.exception(f"task {task_id} crashed: {e}")
async def _run_task(self, task_id: str, desc: str, ctx: Dict[str, Any]) -> None:
ts = self._tasks[task_id]
ts.status = "running"
await self._persist_task(ts)
async def _emit(ev: OrgEvent) -> None:
ts.events.append(ev)
sub = self._streams.get(task_id)
if sub is not None:
await sub.put(ev)
await self._persist_event(ts, ev)
try:
result = await self.react(desc, ctx, _emit)
ts.status = "completed"
ts.result = result
await _emit(
OrgEvent(task_id=task_id, type="done", payload={"result": result})
)
except Exception as e:
ts.status = "failed"
ts.error = str(e)
await _emit(
OrgEvent(task_id=task_id, type="error", payload={"error": str(e)})
)
finally:
await self._persist_task(ts)
# 通知 stream 关闭
sub = self._streams.get(task_id)
if sub is not None:
await sub.put(None)
# 唤醒 dispatch 端
fut = self._futures.get(task_id)
if fut is not None and not fut.done():
fut.set_result(
{
"task_id": task_id,
"status": ts.status,
"result": ts.result,
"error": ts.error,
}
)
# ─── PG 持久化 ─────────────────────────────────────────────
async def _persist_task(self, ts: TaskState) -> None:
"""把任务状态写到 PG。失败不阻塞执行。"""
try:
from kilostar.utils.ray_hook import ray_actor_hook
pg = ray_actor_hook("postgres_database").postgres_database
await pg.upsert_org_task.remote(
task_id=ts.task_id,
org_name=ts.org_name,
trace_id=ts.trace_id,
user_id=ts.user_id,
status=ts.status,
description=ts.description,
source=ts.source,
result=ts.result if isinstance(ts.result, (str, dict, list, type(None))) else str(ts.result),
error=ts.error,
)
except Exception:
self.logger.debug("persist_task skipped (no DB / not ready)")
async def _persist_event(self, ts: TaskState, ev: OrgEvent) -> None:
try:
from kilostar.utils.ray_hook import ray_actor_hook
pg = ray_actor_hook("postgres_database").postgres_database
await pg.append_org_task_event.remote(
task_id=ts.task_id, event=ev.to_dict()
)
except Exception:
self.logger.debug("persist_event skipped")
# ─── 资源加载 ──────────────────────────────────────────────
async def _load_local_tools(self) -> None:
"""加载本组织 toolset/ 目录下的工具。
复用 ``GlobalToolManager`` 的逻辑:扫描 manifest.json,按 name 注入函数表。
全局工具白名单(``python_executor`` 等)也合并进来,给 agent 兜底。
"""
from pathlib import Path
import importlib.util
import sys
toolset_dir = Path(self.plugin_dir) / "toolset"
if toolset_dir.exists() and (toolset_dir / "manifest.json").exists():
with open(toolset_dir / "manifest.json", "r", encoding="utf-8") as f:
manifest = json.load(f)
for tool_def in manifest.get("tools", []):
tname = tool_def.get("name")
tfile = tool_def.get("file", f"{tname}.py")
if not tname:
continue
fpath = toolset_dir / tfile
if not fpath.exists():
self.logger.warning(f"tool file not found: {fpath}")
continue
module_name = f"data.plugin.{self.name}.toolset.{tname}"
spec = importlib.util.spec_from_file_location(module_name, str(fpath))
if spec is None or spec.loader is None:
continue
mod = importlib.util.module_from_spec(spec)
sys.modules[module_name] = mod
spec.loader.exec_module(mod)
func = getattr(mod, tname, None)
if callable(func):
self._tools_by_name[tname] = func
# 从全局 tool manager 借通用工具
await self._merge_global_tools()
async def _merge_global_tools(self) -> None:
"""合并 cabinet 全局工具白名单(python_executor 等基础工具)。"""
try:
from kilostar.core.global_state_machine.gsm_snapshot import fetch_snapshot
snapshot = await fetch_snapshot()
for name, func in snapshot.all_funcs.items():
self._tools_by_name.setdefault(name, func)
except Exception:
self.logger.debug("global tools not available; org runs with local only")
async def _build_agents(self) -> None:
"""按 agents.json 实例化 pydantic-ai Agent。
每个 agent 注入:
- 自己声明的 tools(从 ``_tools_by_name`` 取)
- 一个特殊 ``consult`` 工具(如果 peers 非空),用于跨 agent 协作
"""
from kilostar.adapter.model_adapter.agent_factory import AgentFactory
from kilostar.core.global_state_machine.gsm_snapshot import fetch_snapshot
snapshot = await fetch_snapshot()
factory = AgentFactory()
for adef in self.agents_config.agents:
provider = snapshot.providers.get(adef.model.provider_title)
if provider is None:
self.logger.warning(
f"provider {adef.model.provider_title!r} not found; agent {adef.name} skipped"
)
continue
tools = [
self._tools_by_name[t]
for t in adef.tools
if t in self._tools_by_name
]
consult_tool = self._make_consult_tool(adef)
if consult_tool is not None:
tools.append(consult_tool)
try:
agent = factory.create_agent(
provider=provider,
model_id=adef.model.model_id,
output_type=str,
system_prompt=adef.system_prompt or f"You are {adef.role}.",
deps_type=type(None),
agent_name=f"{self.name}.{adef.name}",
tools=tools,
toolsets=None,
)
self._agents[adef.name] = agent
except Exception as e:
self.logger.warning(f"build agent {adef.name} failed: {e}")
def _make_consult_tool(self, adef: AgentDef):
"""为 agent 生成一个 ``consult(peer, question)`` 工具。
peers 为空则不生成;调用时直接 await 同事 agent.run。
"""
if not adef.peers:
return None
peers = list(adef.peers)
org = self
async def consult(peer: str, question: str) -> str:
"""向同事 agent 提问以获取专业意见。
Args:
peer: 同事 agent 名字
question: 要问的问题
"""
if peer not in peers:
return f"[error] {peer} 不在你的协作列表中: {peers}"
target = org._agents.get(peer)
if target is None:
return f"[error] 同事 agent {peer} 未启动"
try:
resp = await target.run(user_prompt=question)
return getattr(resp, "output", None) or str(resp)
except Exception as e:
return f"[error] {peer} 失败: {e}"
return consult
+63
View File
@@ -0,0 +1,63 @@
"""组织事件协议:组织 → 前端/PG 的统一推送格式。"""
from __future__ import annotations
import time
from dataclasses import dataclass, field
from typing import Any, Dict, List, Literal
OrgEventType = Literal[
"log", # 普通文本日志
"step", # 阶段推进(agent 切换、工具调用前后)
"artifact", # 生成了产物(沿用 send_file 的 url 协议)
"approval_request", # 需要用户审批
"done", # 任务完成
"error", # 任务失败
]
@dataclass
class OrgEvent:
"""组织事件:一个 task 的执行过程会产生一连串这种事件。
被 SSE 推给前端面板,被 DAO 追加到 ``org_task.events`` JSONB 字段。
序列化用 ``to_dict``,反序列化用 ``from_dict``。
"""
task_id: str
type: OrgEventType
payload: Dict[str, Any] = field(default_factory=dict)
ts: float = field(default_factory=time.time)
def to_dict(self) -> Dict[str, Any]:
return {
"task_id": self.task_id,
"type": self.type,
"payload": self.payload,
"ts": self.ts,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "OrgEvent":
return cls(
task_id=data["task_id"],
type=data["type"],
payload=data.get("payload", {}),
ts=data.get("ts", time.time()),
)
@dataclass
class TaskState:
"""组织内存中的任务状态(重启不保留,但事件流通过 PG 持久化)。"""
task_id: str
org_name: str
trace_id: str
user_id: str
status: Literal["pending", "running", "completed", "failed"] = "pending"
description: str = ""
source: Literal["cabinet", "user"] = "user"
events: List[OrgEvent] = field(default_factory=list)
result: Any = None
error: str | None = None
+128
View File
@@ -0,0 +1,128 @@
"""目录扫描 + 装载流水线。
公开 ``discover_plugins(dir)`` 和 ``load_plugin(plugin_dir)`` 两个函数:
- discover:列出所有插件名(manifest 校验通过的)
- load:读 manifest + agents.json + 解析 entry class,返回可实例化的 ``(class, manifest, agents_dict, plugin_dir)``
"""
from __future__ import annotations
import importlib.util
import json
import sys
from pathlib import Path
from typing import Any, Dict, List, Tuple, Type
from kilostar.plugin_runtime.manifest import OrgManifest
from kilostar.plugin_runtime.agents_config import AgentsConfig
from kilostar.utils.logger import get_logger
logger = get_logger("plugin_loader")
def discover_plugins(plugin_root: Path) -> List[Path]:
"""扫描 plugin 根目录,返回所有合法插件目录。
合法 = 含 ``manifest.json`` 且能通过 pydantic 校验。
跳过 ``skill/`` 子目录(那是技能仓库,不是组织)。
"""
if not plugin_root.exists() or not plugin_root.is_dir():
return []
results: List[Path] = []
for entry in plugin_root.iterdir():
if not entry.is_dir() or entry.name.startswith("__"):
continue
if entry.name in ("skill",):
continue
manifest_path = entry / "manifest.json"
if not manifest_path.exists():
continue
try:
with open(manifest_path, "r", encoding="utf-8") as f:
data = json.load(f)
OrgManifest.model_validate(data)
except Exception as e:
logger.warning(f"skip plugin {entry.name}: invalid manifest ({e})")
continue
results.append(entry)
return results
def load_plugin(
plugin_dir: Path,
) -> Tuple[Type[Any], Dict[str, Any], Dict[str, Any], str]:
"""加载单个插件,返回 (Class, manifest_dict, agents_dict, plugin_dir_str)。
- 解析 manifest.json + agents.json
- 如果 manifest.entry 为空,使用 ``BaseOrganization`` 默认实现
- 否则按 ``"core.organization:DataCleaningOrg"`` 形式动态 import 子类
"""
with open(plugin_dir / "manifest.json", "r", encoding="utf-8") as f:
manifest_dict = json.load(f)
manifest = OrgManifest.model_validate(manifest_dict)
agents_path = plugin_dir / "agents.json"
if not agents_path.exists():
raise FileNotFoundError(f"plugin {manifest.name} missing agents.json")
with open(agents_path, "r", encoding="utf-8") as f:
agents_dict = json.load(f)
AgentsConfig.model_validate(agents_dict)
if manifest.entry:
cls = _import_entry_class(plugin_dir, manifest.entry, manifest.name)
else:
from kilostar.plugin_runtime.base_organization import BaseOrganization
cls = BaseOrganization
return cls, manifest_dict, agents_dict, str(plugin_dir)
def _import_entry_class(plugin_dir: Path, entry: str, plugin_name: str) -> Type[Any]:
"""形如 ``core.organization:DataCleaningOrg`` 的入口字符串解析。
``:`` 左边是相对插件根的模块路径(用 / 或 . 分隔均可),右边是类名。
"""
if ":" not in entry:
raise ValueError(f"invalid entry {entry!r}: missing ':<ClassName>'")
mod_path, class_name = entry.split(":", 1)
rel = mod_path.replace(".", "/").lstrip("/")
file_path = plugin_dir / f"{rel}.py"
if not file_path.exists():
raise FileNotFoundError(f"plugin {plugin_name} entry file not found: {file_path}")
module_name = f"data.plugin.{plugin_name}.{mod_path.replace('/', '.')}"
spec = importlib.util.spec_from_file_location(module_name, str(file_path))
if spec is None or spec.loader is None:
raise RuntimeError(f"cannot load module {module_name}")
mod = importlib.util.module_from_spec(spec)
sys.modules[module_name] = mod
spec.loader.exec_module(mod)
cls = getattr(mod, class_name, None)
if cls is None:
raise AttributeError(f"plugin {plugin_name}: {class_name} not found in {file_path}")
return cls
async def install_dependencies(deps_python: List[str]) -> None:
"""用 uv 安装组织声明的 python 依赖。
第一版直接装到主 venv,简单粗暴;viceroy 接管后这步会被替换。
"""
if not deps_python:
return
import asyncio as _asyncio
cmd = ["uv", "pip", "install", *deps_python]
proc = await _asyncio.create_subprocess_exec(
*cmd,
stdout=_asyncio.subprocess.PIPE,
stderr=_asyncio.subprocess.PIPE,
)
stdout, stderr = await proc.communicate()
if proc.returncode != 0:
raise RuntimeError(
f"uv pip install failed (rc={proc.returncode}): {stderr.decode()}"
)
logger.info(f"installed deps: {deps_python}")
+57
View File
@@ -0,0 +1,57 @@
"""manifest.json 的 pydantic 模型。"""
from __future__ import annotations
from typing import List, Literal, Optional
from pydantic import BaseModel, Field
class OrgDependencies(BaseModel):
"""组织依赖声明。
``python`` 列表会在 install 阶段交给 uv 处理;``plugins`` 留给后续做插件间依赖。
"""
python: List[str] = Field(default_factory=list)
plugins: List[str] = Field(default_factory=list)
class OrgUIRef(BaseModel):
"""前端 dashboard 入口(先占位,Tauri 化后接通)。"""
entry: Optional[str] = None
icon: Optional[str] = None
class OrgManifest(BaseModel):
"""重型插件的章程文件。
name 是目录名也是 actor 注册名前缀(实际 actor name = ``org_<name>``)。
entry 留空则使用 ``BaseOrganization`` 默认实现,纯声明式插件即可跑起来;
填写时形如 ``core.organization:DataCleaningOrg`` 指向子类。
"""
name: str
version: str = "0.1.0"
display_name: str = ""
description: str = ""
# 入口与并发
entry: Optional[str] = None
concurrency: Literal["queue", "parallel"] = "queue"
node_affinity: Literal["cpu", "core", "gpu"] = "cpu"
# 对外
api_prefix: Optional[str] = None
capabilities: List[str] = Field(default_factory=list)
# 资源
dependencies: OrgDependencies = Field(default_factory=OrgDependencies)
# UI
ui: OrgUIRef = Field(default_factory=OrgUIRef)
@property
def actor_name(self) -> str:
return f"org_{self.name}"
+137
View File
@@ -0,0 +1,137 @@
"""GlobalPluginManager:重型插件统一管理 actor。
职责:
- 启动期扫描 ``data/plugin/`` 下所有组织,依次 setup
- 运行期提供 install / uninstall / reload 三个热装接口
- 把每个组织注册为 cabinet tool + 挂 FastAPI router
"""
from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, List, Optional
from kilostar.plugin_runtime.loader import (
discover_plugins,
install_dependencies,
load_plugin,
)
from kilostar.plugin_runtime.manifest import OrgManifest
from kilostar.plugin_runtime.tool_bridge import make_dispatch_tool
from kilostar.utils.logger import get_logger
from kilostar.utils.ray_compat import _STANDALONE, actor_class
from kilostar.utils.ray_hook import register_standalone
from kilostar.utils.settings import get_plugin_dir
logger = get_logger("plugin_manager")
@actor_class
class GlobalPluginManager:
"""单机模式下是对象,分布式下是 ray actor。
每个 loaded 组织保存其 manifest 和 actor handlestandalone=proxydist=ray handle)。
"""
def __init__(self):
self._orgs: Dict[str, Dict[str, Any]] = {}
self._dispatch_tools: Dict[str, Any] = {}
async def bootstrap(self) -> None:
"""启动期一次性扫描并加载所有插件。"""
plugin_root = get_plugin_dir()
plugin_dirs = discover_plugins(plugin_root)
for plugin_dir in plugin_dirs:
try:
await self._install_from_path(plugin_dir)
except Exception as e:
logger.error(f"bootstrap: failed to load plugin {plugin_dir.name}: {e}")
# ─── 热装载接口 ─────────────────────────────────────────────
async def install(self, name: str) -> Dict[str, Any]:
"""热装载一个插件(按目录名)。"""
plugin_dir = get_plugin_dir() / name
if not plugin_dir.exists():
raise FileNotFoundError(f"plugin dir not found: {plugin_dir}")
if name in self._orgs:
await self.uninstall(name)
await self._install_from_path(plugin_dir)
return {"name": name, "status": "installed"}
async def uninstall(self, name: str) -> Dict[str, Any]:
"""卸载一个插件。"""
org_info = self._orgs.pop(name, None)
if org_info is None:
return {"name": name, "status": "not_found"}
# shutdown actor
try:
handle = org_info.get("handle")
if handle is not None:
await handle.shutdown.remote()
except Exception as e:
logger.warning(f"shutdown org_{name} failed: {e}")
# 移除 dispatch tool
self._dispatch_tools.pop(f"dispatch_to_{name}", None)
logger.info(f"uninstalled plugin: {name}")
return {"name": name, "status": "uninstalled"}
async def reload(self, name: str) -> Dict[str, Any]:
"""热重载(卸载 + 安装)。"""
await self.uninstall(name)
return await self.install(name)
# ─── 查询接口 ──────────────────────────────────────────────
def list_plugins(self) -> List[Dict[str, Any]]:
return [
{
"name": name,
"display_name": info.get("display_name", name),
"description": info.get("description", ""),
"status": "running",
}
for name, info in self._orgs.items()
]
def get_dispatch_tools(self) -> Dict[str, Any]:
"""返回所有 dispatch tools 的 {tool_name: callable} 字典。"""
return dict(self._dispatch_tools)
# ─── 内部 ──────────────────────────────────────────────────
async def _install_from_path(self, plugin_dir: Path) -> None:
cls, manifest_dict, agents_dict, dir_str = load_plugin(plugin_dir)
manifest = OrgManifest.model_validate(manifest_dict)
name = manifest.name
# 装依赖
if manifest.dependencies.python:
await install_dependencies(manifest.dependencies.python)
# 实例化 organization actor
instance = cls(manifest_dict, agents_dict, dir_str)
await instance.setup()
# 注册到 ray_actor_hook 命名空间
actor_name = manifest.actor_name
if _STANDALONE:
register_standalone(actor_name, instance)
else:
# 分布式模式下,这里需要把 instance 包装成 ray actor
# 第一版走 standalone 逻辑(两种模式统一 register 到本进程)
# 真正分布式隔离等后续做
register_standalone(actor_name, instance)
# 生成 dispatch tool
tool = make_dispatch_tool(name, manifest.display_name, manifest.description)
self._dispatch_tools[f"dispatch_to_{name}"] = tool
self._orgs[name] = {
"display_name": manifest.display_name,
"description": manifest.description,
"manifest": manifest_dict,
"handle": instance,
"actor_name": actor_name,
}
logger.info(f"loaded plugin: {name} (actor={actor_name})")
+52
View File
@@ -0,0 +1,52 @@
"""把组织包装成 cabinet 可调用的高阶 tool。
每个组织 → 一个 ``dispatch_to_<org>(task_description)`` 工具。
ConsciousnessNode/ControlNode 通过这个工具向部门派单,等待部门完成。
"""
from __future__ import annotations
from typing import Callable, Dict
def make_dispatch_tool(org_name: str, display_name: str, description: str) -> Callable:
"""生成对应组织的 dispatch tool。
工具签名故意保持简单:只收一个自然语言任务描述,cabinet 不需要懂部门内部
capability 划分;部门内部 ReAct 自己决定怎么干。
"""
tool_name = f"dispatch_to_{org_name}"
desc_text = description or f"把任务派给{display_name or org_name}部门,由部门内部多 agent 协作完成。"
async def _impl(task_description: str) -> str:
from kilostar.utils.ray_hook import ray_actor_hook
actor_name = f"org_{org_name}"
actor = ray_actor_hook(actor_name)
target = getattr(actor, actor_name)
result = await target.dispatch.remote(task_description, {})
if result.get("status") == "completed":
return str(result.get("result") or "")
return f"[{org_name} 任务失败] {result.get('error') or 'unknown'}"
_impl.__name__ = tool_name
_impl.__doc__ = (
f"{desc_text}\n\n"
"Args:\n"
" task_description: 用自然语言描述要部门完成的任务。\n\n"
"Returns:\n"
" 部门交付的结果文本,失败时返回错误说明。\n"
)
return _impl
def collect_dispatch_tools(org_specs: Dict[str, Dict[str, str]]) -> Dict[str, Callable]:
"""根据 ``{org_name: {"display_name": ..., "description": ...}}`` 批量生成。"""
return {
f"dispatch_to_{name}": make_dispatch_tool(
name,
spec.get("display_name", ""),
spec.get("description", ""),
)
for name, spec in org_specs.items()
}