feat: 新增工具插件、系统日志、workflow配置及前端优化
1. 新增工具插件(edit_file, python_executor, search_file, shell_executor, write_file) 2. 新增系统事件日志模块和API 3. 新增workflow配置文件和详情API 4. 前端增加SSE、错误边界、设置引导等组件 5. 优化认证加密、速率限制、配置加载等工具模块 6. 删除废弃的cluster和health API 7. 补充单元测试和集成测试 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -23,8 +23,7 @@ from ray import serve
|
||||
|
||||
from .agent import agent_router
|
||||
from .auth import auth_router
|
||||
from .cluster import cluster_router
|
||||
from .health import health_router
|
||||
from .system import system_router
|
||||
from .platform.frontend import client_router
|
||||
from .platform.onebot import onebot_router
|
||||
from .provider import provider_router
|
||||
@@ -53,7 +52,13 @@ def _get_locale(request: Request) -> str | None:
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
_cors_origins_env = os.environ.get("KILOSTAR_CORS_ORIGINS", "*")
|
||||
_cors_origins_env = os.environ.get("KILOSTAR_CORS_ORIGINS", "")
|
||||
_is_dev = os.environ.get("KILOSTAR_ENV", "production").lower() in ("dev", "development")
|
||||
if not _cors_origins_env and _is_dev:
|
||||
_cors_origins_env = "*"
|
||||
elif not _cors_origins_env:
|
||||
_cors_origins_env = "http://localhost:8000"
|
||||
|
||||
_cors_origins = [o.strip() for o in _cors_origins_env.split(",") if o.strip()]
|
||||
_allow_credentials = "*" not in _cors_origins
|
||||
app.add_middleware(
|
||||
@@ -83,13 +88,12 @@ async def request_id_middleware(request: Request, call_next):
|
||||
response.headers["X-Request-Id"] = request_id
|
||||
return response
|
||||
|
||||
app.include_router(health_router) # 健康检查
|
||||
app.include_router(system_router) # 健康探针 + 系统信息
|
||||
app.include_router(client_router) # 客户端路径
|
||||
app.include_router(onebot_router) # OneBot v11 路径
|
||||
app.include_router(auth_router) # 用户路径
|
||||
app.include_router(provider_router) # 供应商路径
|
||||
app.include_router(resource_router) # 资源路径
|
||||
app.include_router(cluster_router) # 集群信息路径
|
||||
app.include_router(agent_router) # agent路径
|
||||
app.include_router(workflow_router) # workflow路径
|
||||
app.include_router(chat_router) # chat路径
|
||||
|
||||
@@ -116,7 +116,9 @@ async def load_agent(
|
||||
case _:
|
||||
pass
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"加载节点失败: {str(e)}")
|
||||
from kilostar.utils.logger import get_logger
|
||||
get_logger("agent_api").exception(f"加载节点失败: {e}")
|
||||
raise HTTPException(status_code=500, detail="加载节点失败,请查看服务端日志")
|
||||
return {"message": "创建成功"}
|
||||
|
||||
|
||||
|
||||
+30
-5
@@ -12,7 +12,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi import APIRouter, Request
|
||||
from fastapi import Depends
|
||||
from pydantic import BaseModel
|
||||
from kilostar.utils.access import Accessor, TokenData
|
||||
@@ -21,6 +21,7 @@ from kilostar.utils.ray_hook import ray_actor_hook
|
||||
from kilostar.utils.check_user.role_check import RoleChecker
|
||||
from kilostar.core.postgres_database.model import UserAuthority
|
||||
from kilostar.utils.error import UserNotExistError
|
||||
from kilostar.utils.rate_limit import register_limiter, login_limiter
|
||||
|
||||
auth_router = APIRouter(prefix="/api/v1/auth", tags=["auth"])
|
||||
|
||||
@@ -33,8 +34,9 @@ class UserRegister(BaseModel):
|
||||
|
||||
|
||||
@auth_router.post("/register")
|
||||
async def create_user(user_register: UserRegister):
|
||||
async def create_user(user_register: UserRegister, request: Request):
|
||||
"""注册新用户:异步线程池里做 argon2 哈希,再交由 PostgresDatabase Actor 落库。"""
|
||||
register_limiter.check(request)
|
||||
postgres_database = ray_actor_hook("postgres_database").postgres_database
|
||||
hashed_password = await run_in_threadpool(
|
||||
Accessor.hash_password, user_register.password
|
||||
@@ -53,16 +55,39 @@ class UserLogin(BaseModel):
|
||||
|
||||
|
||||
@auth_router.post("/login")
|
||||
async def login_user(user_login: UserLogin):
|
||||
async def login_user(user_login: UserLogin, request: Request):
|
||||
"""用户登录:查询用户后在线程池中校验口令,校验成功则签发 JWT。"""
|
||||
login_limiter.check(request)
|
||||
postgres_database = ray_actor_hook("postgres_database").postgres_database
|
||||
user = await postgres_database.login_user.remote(user_login.user_name)
|
||||
if not user:
|
||||
raise UserNotExistError()
|
||||
token = await run_in_threadpool(
|
||||
tokens = await run_in_threadpool(
|
||||
Accessor.login_hashed_password, user, user_login.password
|
||||
)
|
||||
return {"message": "success", "token": token}
|
||||
return {
|
||||
"message": "success",
|
||||
"token": tokens["access_token"],
|
||||
"access_token": tokens["access_token"],
|
||||
"refresh_token": tokens["refresh_token"],
|
||||
}
|
||||
|
||||
|
||||
class RefreshTokenRequest(BaseModel):
|
||||
"""``POST /refresh`` 入参:refresh token。"""
|
||||
|
||||
refresh_token: str
|
||||
|
||||
|
||||
@auth_router.post("/refresh")
|
||||
async def refresh_token(body: RefreshTokenRequest):
|
||||
"""用 refresh token 换取新的 access token + refresh token 对。"""
|
||||
tokens = Accessor.refresh_access_token(body.refresh_token)
|
||||
return {
|
||||
"message": "success",
|
||||
"access_token": tokens["access_token"],
|
||||
"refresh_token": tokens["refresh_token"],
|
||||
}
|
||||
|
||||
|
||||
class ChangeAuthorityRequest(BaseModel):
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
# Copyright 2026 zhaoxi826
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from fastapi import APIRouter
|
||||
|
||||
cluster_router = APIRouter(prefix="/api/v1/cluster", tags=["cluster"])
|
||||
|
||||
# Monitor websocket API temporarily removed
|
||||
@@ -1,54 +0,0 @@
|
||||
# Copyright 2026 zhaoxi826
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""健康检查端点:用于容器存活/就绪探针。"""
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from kilostar.utils.ray_hook import ray_actor_hook
|
||||
|
||||
health_router = APIRouter(tags=["health"])
|
||||
|
||||
|
||||
@health_router.get("/health/live", include_in_schema=True)
|
||||
async def liveness():
|
||||
"""存活探针:进程能响应即视为存活。"""
|
||||
return {"status": "alive"}
|
||||
|
||||
|
||||
@health_router.get("/health/ready", include_in_schema=True)
|
||||
async def readiness():
|
||||
"""就绪探针:检查关键依赖(Postgres / GSM Actor)是否可达。"""
|
||||
checks = {"postgres": False, "global_state_machine": False}
|
||||
|
||||
try:
|
||||
postgres_database = ray_actor_hook("postgres_database").postgres_database
|
||||
await postgres_database.ping.remote()
|
||||
checks["postgres"] = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
gsm = ray_actor_hook("global_state_machine").global_state_machine
|
||||
await gsm.get_skill_list.remote()
|
||||
checks["global_state_machine"] = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
all_ok = all(checks.values())
|
||||
return JSONResponse(
|
||||
status_code=200 if all_ok else 503,
|
||||
content={"status": "ready" if all_ok else "not_ready", "checks": checks},
|
||||
)
|
||||
@@ -49,12 +49,20 @@ onebot_router = APIRouter(prefix="/api/v1/adapter/onebot", tags=["onebot"])
|
||||
def _verify_token(token_from_header: Optional[str]) -> None:
|
||||
"""校验 OneBot 实现端在 ``Authorization`` 头里携带的 access_token。
|
||||
|
||||
若环境变量 ``ONEBOT_ACCESS_TOKEN`` 未设置则跳过校验。OneBot v11 规范要求
|
||||
格式为 ``Bearer <token>``,这里同时容忍只填 token 字符串本身的写法。
|
||||
若环境变量 ``ONEBOT_ACCESS_TOKEN`` 未设置,根据运行模式决策:
|
||||
- 开发模式(KILOSTAR_ENV=dev):跳过校验并记录警告
|
||||
- 生产模式:拒绝所有请求,强制要求配置 token
|
||||
"""
|
||||
expected = os.environ.get("ONEBOT_ACCESS_TOKEN")
|
||||
if not expected:
|
||||
return
|
||||
is_dev = os.environ.get("KILOSTAR_ENV", "production").lower() in ("dev", "development")
|
||||
if is_dev:
|
||||
logger.warning("[OneBot] ONEBOT_ACCESS_TOKEN 未设置,开发模式下跳过认证")
|
||||
return
|
||||
raise HTTPException(
|
||||
status_code=401,
|
||||
detail="ONEBOT_ACCESS_TOKEN 未配置,拒绝未认证的 OneBot 连接",
|
||||
)
|
||||
if not token_from_header:
|
||||
raise HTTPException(status_code=401, detail="Missing access_token")
|
||||
raw = token_from_header.removeprefix("Bearer ").removeprefix("Token ").strip()
|
||||
|
||||
@@ -0,0 +1,108 @@
|
||||
# Copyright 2026 zhaoxi826
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""系统级端点:健康探针 + 集群/系统信息。
|
||||
|
||||
健康探针路径刻意保持在根(``/health/live`` / ``/health/ready``),不加
|
||||
``/api/v1`` 前缀——这是 k8s liveness/readiness probe 的惯例配置,加前缀会
|
||||
让运维侧探针 URL 变复杂。系统信息类端点则走 ``/api/v1/system`` 前缀。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from kilostar.utils.ray_hook import ray_actor_hook
|
||||
from kilostar.utils.access import Accessor, TokenData
|
||||
from kilostar.utils.check_user.role_check import RoleChecker
|
||||
from kilostar.core.postgres_database.model import UserAuthority
|
||||
from kilostar.utils.config_loader import (
|
||||
get_workflow_config,
|
||||
save_workflow_config,
|
||||
WorkflowConfig,
|
||||
)
|
||||
|
||||
system_router = APIRouter(tags=["system"])
|
||||
|
||||
|
||||
@system_router.get("/health/live", include_in_schema=True)
|
||||
async def liveness():
|
||||
"""存活探针:进程能响应即视为存活。"""
|
||||
return {"status": "alive"}
|
||||
|
||||
|
||||
@system_router.get("/health/ready", include_in_schema=True)
|
||||
async def readiness():
|
||||
"""就绪探针:检查关键依赖(Postgres / GSM Actor)是否可达。"""
|
||||
checks = {"postgres": False, "global_state_machine": False}
|
||||
|
||||
try:
|
||||
postgres_database = ray_actor_hook("postgres_database").postgres_database
|
||||
await postgres_database.ping.remote()
|
||||
checks["postgres"] = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
gsm = ray_actor_hook("global_state_machine").global_state_machine
|
||||
await gsm.get_skill_list.remote()
|
||||
checks["global_state_machine"] = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
all_ok = all(checks.values())
|
||||
return JSONResponse(
|
||||
status_code=200 if all_ok else 503,
|
||||
content={"status": "ready" if all_ok else "not_ready", "checks": checks},
|
||||
)
|
||||
|
||||
|
||||
@system_router.get("/config/workflow")
|
||||
async def get_workflow_config_endpoint(
|
||||
_: TokenData = Depends(Accessor.get_current_user),
|
||||
):
|
||||
config = get_workflow_config()
|
||||
return {"config": config.model_dump()}
|
||||
|
||||
|
||||
@system_router.put("/config/workflow")
|
||||
async def update_workflow_config_endpoint(
|
||||
update: WorkflowConfig,
|
||||
_: TokenData = Depends(RoleChecker(allowed_roles=UserAuthority.USER)),
|
||||
):
|
||||
save_workflow_config(update)
|
||||
return {"status": "ok", "config": update.model_dump()}
|
||||
|
||||
|
||||
@system_router.get("/logs")
|
||||
async def query_system_logs(
|
||||
trace_id: str | None = None,
|
||||
event_type: str | None = None,
|
||||
level: str | None = None,
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
_: TokenData = Depends(Accessor.get_current_user),
|
||||
):
|
||||
from kilostar.utils.ray_hook import ray_actor_hook
|
||||
|
||||
pg = await ray_actor_hook.get_actor("postgres_database")
|
||||
logs = await pg.query_event_logs.remote(
|
||||
trace_id=trace_id,
|
||||
event_type=event_type,
|
||||
level=level,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
return {"logs": logs, "count": len(logs)}
|
||||
@@ -66,7 +66,23 @@ async def get_workflow_list(
|
||||
|
||||
|
||||
@workflow_router.get("/sse/{trace_id}")
|
||||
async def get_workflow_sse(trace_id: str, request: Request):
|
||||
async def get_workflow_sse(
|
||||
trace_id: str,
|
||||
request: Request,
|
||||
token_data: TokenData = Depends(Accessor.get_current_user),
|
||||
):
|
||||
"""SSE 事件流。
|
||||
|
||||
鉴权走标准 ``Authorization: Bearer`` 头(前端用 fetch-based SSE,
|
||||
token 不进 URL)。校验该 trace_id 属于当前用户。
|
||||
"""
|
||||
postgres_database = ray_actor_hook("postgres_database").postgres_database
|
||||
wf = await postgres_database.get_workflow.remote(trace_id)
|
||||
if not wf:
|
||||
raise HTTPException(status_code=404, detail="Workflow not found")
|
||||
if getattr(wf, "user_id", None) != token_data.user_id:
|
||||
raise HTTPException(status_code=403, detail="Forbidden")
|
||||
|
||||
global_workflow_manager = ray_actor_hook(
|
||||
"global_workflow_manager"
|
||||
).global_workflow_manager
|
||||
@@ -88,7 +104,18 @@ async def get_workflow_sse(trace_id: str, request: Request):
|
||||
|
||||
|
||||
@workflow_router.post("/reply/{trace_id}")
|
||||
async def post_workflow_reply(trace_id: str, request: Request):
|
||||
async def post_workflow_reply(
|
||||
trace_id: str,
|
||||
request: Request,
|
||||
token_data: TokenData = Depends(Accessor.get_current_user),
|
||||
):
|
||||
postgres_database = ray_actor_hook("postgres_database").postgres_database
|
||||
wf = await postgres_database.get_workflow.remote(trace_id)
|
||||
if not wf:
|
||||
raise HTTPException(status_code=404, detail="Workflow not found")
|
||||
if getattr(wf, "user_id", None) != token_data.user_id:
|
||||
raise HTTPException(status_code=403, detail="Forbidden")
|
||||
|
||||
data = await request.json()
|
||||
reply_msg = data.get("message", "")
|
||||
global_workflow_manager = ray_actor_hook(
|
||||
@@ -106,10 +133,24 @@ async def get_workflow_detail(
|
||||
wf = await postgres_database.get_workflow.remote(trace_id)
|
||||
if not wf:
|
||||
raise HTTPException(status_code=404, detail="Workflow not found")
|
||||
if getattr(wf, "user_id", None) != token_data.user_id:
|
||||
raise HTTPException(status_code=403, detail="Forbidden")
|
||||
|
||||
context = await postgres_database.get_workflow_context.remote(trace_id)
|
||||
|
||||
steps = context.work_link if context and hasattr(context, "work_link") else []
|
||||
work_link = (
|
||||
context.work_link if context and hasattr(context, "work_link") else []
|
||||
)
|
||||
workflow_log = (
|
||||
context.workflow_log if context and hasattr(context, "workflow_log") else []
|
||||
)
|
||||
workflow_pointer = (
|
||||
context.workflow_pointer
|
||||
if context and getattr(context, "workflow_pointer", None) is not None
|
||||
else 0
|
||||
)
|
||||
|
||||
steps = _merge_runtime_status(work_link, workflow_log)
|
||||
|
||||
return {
|
||||
"trace_id": trace_id,
|
||||
@@ -117,10 +158,49 @@ async def get_workflow_detail(
|
||||
"status": wf.status,
|
||||
"command": wf.command,
|
||||
"steps": steps,
|
||||
"current_step": workflow_pointer,
|
||||
"context_blackboard": context.blackboard if context else {},
|
||||
}
|
||||
|
||||
|
||||
def _merge_runtime_status(work_link: list, workflow_log: list) -> list:
|
||||
"""把运行期状态从 ``workflow_log`` 反推并 merge 到每个静态 step 上。
|
||||
|
||||
``work_link`` 是 step 的**静态定义**(名字 / node 类型 / action),不含运行期
|
||||
状态;运行期状态散落在 ``workflow_log`` 里——其结构为::
|
||||
|
||||
[{"<step_index>": [timestamp, status, message]}, ...]
|
||||
|
||||
同一 step 可能出现多条(working → completed),取**最后一条**的 status 作为
|
||||
该 step 当前状态。没有日志记录的 step 视为 ``pending``。
|
||||
|
||||
前端 ``WorkflowDiagram`` 依赖每个 step 的 ``status`` 字段着色,这个拼装让
|
||||
后端真正把运行期状态喂过去。
|
||||
"""
|
||||
# step_index -> 最新 status
|
||||
latest_status: dict[int, str] = {}
|
||||
for entry in workflow_log or []:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
for key, payload in entry.items():
|
||||
try:
|
||||
idx = int(key)
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
if isinstance(payload, (list, tuple)) and len(payload) >= 2:
|
||||
latest_status[idx] = payload[1]
|
||||
|
||||
merged = []
|
||||
for i, step in enumerate(work_link or []):
|
||||
step_copy = dict(step) if isinstance(step, dict) else {}
|
||||
# step 自带的 step 字段优先,否则用位置索引
|
||||
step_idx = step_copy.get("step")
|
||||
lookup_idx = (step_idx - 1) if isinstance(step_idx, int) else i
|
||||
step_copy["status"] = latest_status.get(lookup_idx, "pending")
|
||||
merged.append(step_copy)
|
||||
return merged
|
||||
|
||||
|
||||
@workflow_router.post("/{trace_id}/resume")
|
||||
async def resume_workflow(
|
||||
trace_id: str,
|
||||
@@ -151,9 +231,9 @@ async def resume_workflow(
|
||||
|
||||
from kilostar.core.work.workflow.workflow_engine import run_workflow_task
|
||||
|
||||
# workflow_data 在 resume 路径上不会被使用(hydrate 会走 resume 分支),
|
||||
# 这里给个空 dict 占位即可
|
||||
run_workflow_task.remote({}, trace_id)
|
||||
# resume_only=True:task 入口 hydrate 失败会 fail-fast,绝不 fall through
|
||||
# 到"全新模式空跑"。workflow_data 在 resume 路径上不会被使用,传空 dict 占位。
|
||||
run_workflow_task.remote({}, trace_id, resume_only=True)
|
||||
return {"trace_id": trace_id, "status": "resuming"}
|
||||
|
||||
|
||||
|
||||
@@ -203,6 +203,10 @@ class GlobalStateMachine:
|
||||
"""返回某个 scope 下的"系统 + 自定义工具组"toolset 列表(不含 MCP)。"""
|
||||
return self._global_tool_manager.get_toolsets_for_scope(scope)
|
||||
|
||||
def get_retrieval_toolsets_for_scope(self, scope: str) -> List[Any]:
|
||||
"""仅返回 retrieval 工具集(system_node 专用,不包含 generation 工具)。"""
|
||||
return self._global_tool_manager.get_retrieval_toolsets_for_scope(scope)
|
||||
|
||||
# ─── MCP Server Registry ───────────────────────────────────
|
||||
|
||||
async def add_mcp_server(self, server_id: str, config: Dict[str, Any]) -> bool:
|
||||
|
||||
@@ -34,7 +34,9 @@ class GlobalToolManager:
|
||||
def __init__(self) -> None:
|
||||
self.tool_metadata = {}
|
||||
self._tool_funcs = defaultdict(dict)
|
||||
self._retrieval_tool_funcs = defaultdict(dict)
|
||||
self._system_toolsets = {}
|
||||
self._retrieval_toolsets = {}
|
||||
self._custom_toolsets = {}
|
||||
self._third_party_funcs = {}
|
||||
self.tool_mapper = defaultdict(dict)
|
||||
@@ -75,11 +77,14 @@ class GlobalToolManager:
|
||||
is_system = bool(tool_data_cls.model_fields.get("is_system").default)
|
||||
category_field = tool_data_cls.model_fields.get("category")
|
||||
category = (category_field.default if category_field else "other") or "other"
|
||||
toolset_field = tool_data_cls.model_fields.get("toolset")
|
||||
toolset_name = (toolset_field.default if toolset_field else "other") or "other"
|
||||
|
||||
self.tool_metadata[plugin_name] = {
|
||||
"name": plugin_name,
|
||||
"is_system": is_system,
|
||||
"category": category,
|
||||
"toolset": toolset_name,
|
||||
"action_scope": list(action_scopes),
|
||||
}
|
||||
|
||||
@@ -92,12 +97,15 @@ class GlobalToolManager:
|
||||
for scope in scopes:
|
||||
self._tool_funcs[scope][plugin_name] = tool_func
|
||||
self.tool_mapper[scope][plugin_name] = tool_data_cls
|
||||
if toolset_name == "retrieval":
|
||||
self._retrieval_tool_funcs[scope][plugin_name] = tool_func
|
||||
else:
|
||||
self._third_party_funcs[plugin_name] = tool_func
|
||||
for scope in scopes:
|
||||
self.tool_mapper[scope][plugin_name] = tool_data_cls
|
||||
|
||||
self._build_system_toolsets()
|
||||
self._build_retrieval_toolsets()
|
||||
|
||||
def _build_system_toolsets(self) -> None:
|
||||
FunctionToolset = self._import_function_toolset()
|
||||
@@ -114,6 +122,21 @@ class GlobalToolManager:
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to build system toolset {scope}: {e}")
|
||||
|
||||
def _build_retrieval_toolsets(self) -> None:
|
||||
FunctionToolset = self._import_function_toolset()
|
||||
if FunctionToolset is None:
|
||||
return
|
||||
for scope, name_to_func in self._retrieval_tool_funcs.items():
|
||||
if not name_to_func:
|
||||
continue
|
||||
try:
|
||||
self._retrieval_toolsets[scope] = FunctionToolset(
|
||||
tools=list(name_to_func.values()),
|
||||
id=f"retrieval::{scope}",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to build retrieval toolset {scope}: {e}")
|
||||
|
||||
def rebuild_custom_toolsets(self, custom_defs: Dict[str, Dict[str, Any]]) -> None:
|
||||
"""根据 DB 中的自定义工具组定义重建 custom FunctionToolset。"""
|
||||
FunctionToolset = self._import_function_toolset()
|
||||
@@ -170,6 +193,15 @@ class GlobalToolManager:
|
||||
result.extend(self._custom_toolsets.values())
|
||||
return result
|
||||
|
||||
def get_retrieval_toolsets_for_scope(self, scope: str) -> List[Any]:
|
||||
"""仅返回 retrieval 工具集(system_node 专用)。"""
|
||||
result: List[Any] = []
|
||||
for s in ("default", scope):
|
||||
ts = self._retrieval_toolsets.get(s)
|
||||
if ts is not None:
|
||||
result.append(ts)
|
||||
return result
|
||||
|
||||
# ─── Metadata accessors ───
|
||||
|
||||
def is_third_party_tool(self, tool_name: str) -> bool:
|
||||
|
||||
@@ -33,6 +33,7 @@ from kilostar.core.postgres_database.model.system_node import SystemNodeConfigMo
|
||||
from kilostar.core.postgres_database.model.mcp_server import MCPServerModel
|
||||
from kilostar.core.postgres_database.model.tool_config import ToolConfigModel
|
||||
from kilostar.core.postgres_database.model.custom_toolset import CustomToolsetModel
|
||||
from kilostar.core.postgres_database.model.system_event_log import SystemEventLog
|
||||
|
||||
# 兼容旧代码的别名
|
||||
Provider = ProviderModel
|
||||
@@ -61,5 +62,6 @@ __all__ = [
|
||||
"MCPServerModel",
|
||||
"ToolConfigModel",
|
||||
"CustomToolsetModel",
|
||||
"SystemEventLog",
|
||||
"AgentType",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
from sqlalchemy import String, DateTime, Integer, func, Text
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from .base import BaseDataModel
|
||||
|
||||
|
||||
class SystemEventLog(BaseDataModel):
|
||||
__tablename__ = "system_event_log"
|
||||
|
||||
id: Mapped[int] = mapped_column(
|
||||
Integer, primary_key=True, autoincrement=True
|
||||
)
|
||||
trace_id: Mapped[str] = mapped_column(
|
||||
String(64), index=True, comment="关联的工作流 trace_id"
|
||||
)
|
||||
event_type: Mapped[str] = mapped_column(
|
||||
String(50), index=True,
|
||||
comment="事件类型: workflow_start/step_enter/step_complete/step_error/workflow_complete/workflow_fail/system"
|
||||
)
|
||||
level: Mapped[str] = mapped_column(
|
||||
String(10), index=True, default="info",
|
||||
comment="日志级别: info/warn/error"
|
||||
)
|
||||
node_name: Mapped[str | None] = mapped_column(
|
||||
String(100), nullable=True, comment="相关节点名称"
|
||||
)
|
||||
message: Mapped[str] = mapped_column(
|
||||
Text, comment="日志消息正文"
|
||||
)
|
||||
extra_data: Mapped[dict | None] = mapped_column(
|
||||
JSONB, nullable=True, comment="附加元数据(step_index/output 等)"
|
||||
)
|
||||
created_at: Mapped[str] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), index=True
|
||||
)
|
||||
@@ -0,0 +1,72 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import List, Optional
|
||||
from sqlalchemy import select, desc
|
||||
from sqlalchemy.ext.asyncio import async_sessionmaker, AsyncSession
|
||||
|
||||
from kilostar.core.postgres_database.model.system_event_log import SystemEventLog
|
||||
from kilostar.core.postgres_database.database_exception import database_exception
|
||||
|
||||
|
||||
class SystemEventLogDatabase:
|
||||
def __init__(self, async_session_maker: async_sessionmaker[AsyncSession]):
|
||||
self.async_session_maker = async_session_maker
|
||||
|
||||
@database_exception
|
||||
async def insert_event(
|
||||
self,
|
||||
trace_id: str,
|
||||
event_type: str,
|
||||
level: str,
|
||||
message: str,
|
||||
node_name: Optional[str] = None,
|
||||
metadata: Optional[dict] = None,
|
||||
) -> None:
|
||||
async with self.async_session_maker() as session:
|
||||
log = SystemEventLog(
|
||||
trace_id=trace_id,
|
||||
event_type=event_type,
|
||||
level=level,
|
||||
message=message,
|
||||
node_name=node_name,
|
||||
extra_data=metadata,
|
||||
)
|
||||
session.add(log)
|
||||
await session.commit()
|
||||
|
||||
@database_exception
|
||||
async def query_events(
|
||||
self,
|
||||
trace_id: Optional[str] = None,
|
||||
event_type: Optional[str] = None,
|
||||
level: Optional[str] = None,
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
) -> List[dict]:
|
||||
async with self.async_session_maker() as session:
|
||||
stmt = select(SystemEventLog).order_by(desc(SystemEventLog.created_at))
|
||||
|
||||
if trace_id:
|
||||
stmt = stmt.where(SystemEventLog.trace_id == trace_id)
|
||||
if event_type:
|
||||
stmt = stmt.where(SystemEventLog.event_type == event_type)
|
||||
if level:
|
||||
stmt = stmt.where(SystemEventLog.level == level)
|
||||
|
||||
stmt = stmt.offset(offset).limit(limit)
|
||||
result = await session.execute(stmt)
|
||||
rows = result.scalars().all()
|
||||
|
||||
return [
|
||||
{
|
||||
"id": r.id,
|
||||
"trace_id": r.trace_id,
|
||||
"event_type": r.event_type,
|
||||
"level": r.level,
|
||||
"node_name": r.node_name,
|
||||
"message": r.message,
|
||||
"metadata": r.extra_data,
|
||||
"created_at": str(r.created_at) if r.created_at else None,
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
@@ -41,6 +41,7 @@ from kilostar.core.postgres_database.model.system_node import SystemNodeConfigMo
|
||||
from kilostar.core.postgres_database.model.mcp_server import MCPServerModel
|
||||
from kilostar.core.postgres_database.model.tool_config import ToolConfigModel
|
||||
from kilostar.core.postgres_database.model.custom_toolset import CustomToolsetModel
|
||||
from kilostar.core.postgres_database.model.system_event_log import SystemEventLog
|
||||
|
||||
from .module.individual import IndividualDatabase
|
||||
from .module.user import AuthDatabase
|
||||
@@ -51,6 +52,7 @@ from .module.chat_history import ChatHistoryDatabase
|
||||
from .module.mcp_server import MCPServerDatabase
|
||||
from .module.tool_config import ToolConfigDatabase
|
||||
from .module.custom_toolset import CustomToolsetDatabase
|
||||
from .module.system_event_log import SystemEventLogDatabase
|
||||
|
||||
|
||||
@ray.remote
|
||||
@@ -85,6 +87,7 @@ class PostgresDatabase:
|
||||
self._mcp_server_database = MCPServerDatabase(self.async_session_maker)
|
||||
self._tool_config_database = ToolConfigDatabase(self.async_session_maker)
|
||||
self._custom_toolset_database = CustomToolsetDatabase(self.async_session_maker)
|
||||
self._system_event_log_database = SystemEventLogDatabase(self.async_session_maker)
|
||||
|
||||
self.ready_event = asyncio.Event()
|
||||
|
||||
@@ -94,11 +97,10 @@ class PostgresDatabase:
|
||||
async with self.async_engine.begin() as conn:
|
||||
await conn.run_sync(BaseDataModel.metadata.create_all)
|
||||
print("✅ 数据库表创建/验证完成")
|
||||
self.ready_event.set()
|
||||
except Exception as e:
|
||||
print(f"❌ 数据库初始化失败: {e}")
|
||||
raise
|
||||
finally:
|
||||
self.ready_event.set()
|
||||
|
||||
async def ping(self) -> bool:
|
||||
"""轻量探活:等待 ready 后执行 ``SELECT 1``。"""
|
||||
@@ -376,3 +378,35 @@ class PostgresDatabase:
|
||||
"""删除一个自定义工具组。"""
|
||||
await self.ready_event.wait()
|
||||
return await self._custom_toolset_database.delete(toolset_id)
|
||||
|
||||
# System Event Log Methods
|
||||
async def insert_event_log(
|
||||
self,
|
||||
trace_id: str,
|
||||
event_type: str,
|
||||
level: str,
|
||||
message: str,
|
||||
node_name=None,
|
||||
metadata=None,
|
||||
):
|
||||
await self.ready_event.wait()
|
||||
return await self._system_event_log_database.insert_event(
|
||||
trace_id=trace_id,
|
||||
event_type=event_type,
|
||||
level=level,
|
||||
message=message,
|
||||
node_name=node_name,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
async def query_event_logs(
|
||||
self, trace_id=None, event_type=None, level=None, limit=100, offset=0
|
||||
):
|
||||
await self.ready_event.wait()
|
||||
return await self._system_event_log_database.query_events(
|
||||
trace_id=trace_id,
|
||||
event_type=event_type,
|
||||
level=level,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
|
||||
@@ -61,6 +61,7 @@ class WorkflowGraphState(BaseModel):
|
||||
# 已发过 put_pending 的 HumanApproval step index 列表;resume 后避免重复推送。
|
||||
# 用 list(不是 set)是为了 pydantic_graph 序列化 history 时 JSON 友好。
|
||||
approvals_notified: List[int] = Field(default_factory=list)
|
||||
jump_counts: Dict[str, int] = Field(default_factory=dict)
|
||||
|
||||
|
||||
# 业务侧执行入口:把 step + state 喂进去,拿到 (output_text, success_bool)
|
||||
@@ -277,8 +278,13 @@ async def _execute_step(
|
||||
)
|
||||
|
||||
try:
|
||||
output_text, success = await executor(step_data, state)
|
||||
except Exception as e: # 执行器抛异常 → 走失败分支
|
||||
step_timeout = step_data.get("timeout", 300)
|
||||
output_text, success = await asyncio.wait_for(
|
||||
executor(step_data, state), timeout=step_timeout
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
output_text, success = f"步骤执行超时({step_data.get('timeout', 300)}s)", False
|
||||
except Exception as e:
|
||||
output_text, success = str(e), False
|
||||
|
||||
if success:
|
||||
@@ -311,6 +317,25 @@ async def _execute_step(
|
||||
logic_gate = step_data.get("logic_gate") or {}
|
||||
fail_target = logic_gate.get("if_fail")
|
||||
if fail_target and "jump_to_step_" in fail_target:
|
||||
from kilostar.utils.config_loader import get_workflow_config
|
||||
|
||||
max_attempts = get_workflow_config().retry.max_attempts
|
||||
jump_key = f"{state.current_step_index}->{fail_target}"
|
||||
state.jump_counts[jump_key] = state.jump_counts.get(jump_key, 0) + 1
|
||||
|
||||
if state.jump_counts[jump_key] > max_attempts:
|
||||
state.logs.append(
|
||||
{
|
||||
str(state.current_step_index): [
|
||||
str(datetime.datetime.now()),
|
||||
"failed",
|
||||
f"环重试次数超过上限 ({max_attempts}),终止工作流",
|
||||
]
|
||||
}
|
||||
)
|
||||
await _persist_context(ctx, status=WorkflowStatus.FAILED.value)
|
||||
return Finalize(status=WorkflowStatus.FAILED.value)
|
||||
|
||||
target_step = int(fail_target.split("_")[-1]) - 1
|
||||
state.current_step_index = target_step
|
||||
await _persist_context(ctx, status=WorkflowStatus.RUNNING.value)
|
||||
@@ -495,14 +520,21 @@ async def resume_workflow_graph(
|
||||
|
||||
|
||||
@ray.remote
|
||||
def run_workflow_task(workflow_data: dict, trace_id: str):
|
||||
def run_workflow_task(
|
||||
workflow_data: dict, trace_id: str, resume_only: bool = False
|
||||
):
|
||||
"""workflow 的 ray task 入口:一次性执行,跑完即销毁。
|
||||
|
||||
生产路径下持久化交给 ``PostgresStatePersistence`` —— 即便进程崩溃,再 fire
|
||||
一次相同 ``trace_id`` 的任务(或调 ``/workflow/{trace_id}/resume``)即可
|
||||
续跑。同时为了支持 fresh start,先尝试 ``hydrate``:
|
||||
续跑。入口先尝试 ``hydrate``:
|
||||
- hydrate 拿到内容 → 走 resume 路径
|
||||
- hydrate 没拿到 → 走全新路径
|
||||
- hydrate 没拿到 → 走全新路径(用传入的 ``workflow_data``)
|
||||
|
||||
``resume_only``:由 ``/resume`` API 显式置 True。此模式下 hydrate 失败
|
||||
(抛异常或没有持久化记录)必须 fail-fast,而不能 fall through 到全新路径——
|
||||
否则会拿着空 ``workflow_data`` 空跑一个 ``work_link=[]`` 的 workflow 并误判
|
||||
为 COMPLETED(静默 bug)。
|
||||
|
||||
ray task 是新进程,contextvars 不会从 caller 传过来,所以入口先 bind 一次
|
||||
``trace_id``,让节点内的日志自动带上它。
|
||||
@@ -511,6 +543,9 @@ def run_workflow_task(workflow_data: dict, trace_id: str):
|
||||
from kilostar.core.work.workflow.graph_persistence import (
|
||||
build_postgres_persistence,
|
||||
)
|
||||
from kilostar.utils.logger import get_logger
|
||||
|
||||
_logger = get_logger("workflow_task")
|
||||
|
||||
async def _entry() -> None:
|
||||
with trace_id_scope(trace_id):
|
||||
@@ -519,9 +554,20 @@ def run_workflow_task(workflow_data: dict, trace_id: str):
|
||||
recovered = False
|
||||
try:
|
||||
recovered = await persistence.hydrate()
|
||||
except Exception: # pragma: no cover - 防御
|
||||
except Exception as e:
|
||||
if resume_only:
|
||||
_logger.error(f"resume 失败:无法 hydrate 图持久化记录: {e}")
|
||||
raise
|
||||
recovered = False
|
||||
|
||||
if resume_only and not recovered:
|
||||
msg = (
|
||||
f"resume 失败:trace {trace_id} 没有可恢复的图持久化记录,"
|
||||
"拒绝以全新模式空跑"
|
||||
)
|
||||
_logger.error(msg)
|
||||
raise RuntimeError(msg)
|
||||
|
||||
if recovered:
|
||||
await resume_workflow_graph(trace_id, persistence=persistence)
|
||||
else:
|
||||
|
||||
@@ -18,7 +18,7 @@ from typing import List, Literal, Dict
|
||||
|
||||
|
||||
class ApprovalToolData(BaseToolData):
|
||||
"""``approval`` 工具的元数据:默认面向 control/consciousness 两类节点开放。"""
|
||||
"""``approval`` 工具的元数据:分配给所有系统节点和 skill_individual。"""
|
||||
|
||||
is_system: bool = True
|
||||
action_scope: List[
|
||||
@@ -29,7 +29,7 @@ class ApprovalToolData(BaseToolData):
|
||||
"growth_node",
|
||||
"",
|
||||
]
|
||||
] = ["control_node", "consciousness_node"]
|
||||
] = []
|
||||
config_args: Dict[str, str] = {}
|
||||
category: str = "system"
|
||||
|
||||
|
||||
@@ -34,3 +34,5 @@ class BaseToolData(BaseModel):
|
||||
config_args: Dict[str, str] = {}
|
||||
category: str = "other"
|
||||
"""工具分类:system(系统内置)、search(搜索)、mcp(MCP 服务器)、other(其他)"""
|
||||
toolset: str = "other"
|
||||
"""工具集:retrieval(检索)、generation(生成)、other(其他)。system_node 只能用 retrieval 集。"""
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
import os
|
||||
from typing import List, Literal, Dict
|
||||
|
||||
from kilostar.plugin.tool_plugin.base_tool import BaseToolData
|
||||
|
||||
|
||||
class EditFileToolData(BaseToolData):
|
||||
is_system: bool = True
|
||||
action_scope: List[
|
||||
Literal[
|
||||
"control_node",
|
||||
"consciousness_node",
|
||||
"regulatory_node",
|
||||
"growth_node",
|
||||
"",
|
||||
]
|
||||
] = []
|
||||
config_args: Dict[str, str] = {}
|
||||
category: str = "system"
|
||||
|
||||
|
||||
async def edit_file(
|
||||
file_path: str,
|
||||
old_content: str,
|
||||
new_content: str,
|
||||
) -> str:
|
||||
"""通过查找替换的方式编辑文件内容。
|
||||
|
||||
Args:
|
||||
file_path: 文件的路径
|
||||
old_content: 要被替换的原始内容片段
|
||||
new_content: 替换后的新内容
|
||||
|
||||
Returns:
|
||||
操作结果描述
|
||||
"""
|
||||
try:
|
||||
if not os.path.exists(file_path):
|
||||
return f"[Error] 文件不存在: {file_path}"
|
||||
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
if old_content not in content:
|
||||
return f"[Error] 未在文件中找到要替换的内容片段"
|
||||
|
||||
new_file_content = content.replace(old_content, new_content, 1)
|
||||
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(new_file_content)
|
||||
|
||||
return f"已成功编辑文件: {file_path}"
|
||||
except Exception as e:
|
||||
return f"[Error] 编辑文件失败: {e}"
|
||||
@@ -33,7 +33,7 @@ class FileReaderToolData(BaseToolData):
|
||||
"growth_node",
|
||||
"",
|
||||
]
|
||||
] = ["control_node"]
|
||||
] = []
|
||||
config_args: Dict[str, str] = {}
|
||||
category: str = "system"
|
||||
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
import asyncio
|
||||
import sys
|
||||
import tempfile
|
||||
import os
|
||||
from typing import List, Literal, Dict
|
||||
|
||||
from kilostar.plugin.tool_plugin.base_tool import BaseToolData
|
||||
|
||||
|
||||
class PythonExecutorToolData(BaseToolData):
|
||||
is_system: bool = True
|
||||
action_scope: List[
|
||||
Literal[
|
||||
"control_node",
|
||||
"consciousness_node",
|
||||
"regulatory_node",
|
||||
"growth_node",
|
||||
"",
|
||||
]
|
||||
] = []
|
||||
config_args: Dict[str, str] = {}
|
||||
category: str = "system"
|
||||
|
||||
|
||||
async def python_executor(code: str, timeout: int = 30) -> str:
|
||||
"""执行 Python 代码片段并返回输出。
|
||||
|
||||
Args:
|
||||
code: 要执行的 Python 代码
|
||||
timeout: 超时秒数,默认 30 秒
|
||||
|
||||
Returns:
|
||||
代码的标准输出 + 标准错误
|
||||
"""
|
||||
tmp_file = None
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="w", suffix=".py", delete=False, encoding="utf-8"
|
||||
) as f:
|
||||
f.write(code)
|
||||
tmp_file = f.name
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
sys.executable, tmp_file,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
stdout, stderr = await asyncio.wait_for(
|
||||
proc.communicate(), timeout=timeout
|
||||
)
|
||||
output = stdout.decode("utf-8", errors="replace")
|
||||
err_output = stderr.decode("utf-8", errors="replace")
|
||||
result = ""
|
||||
if output:
|
||||
result += output
|
||||
if err_output:
|
||||
result += f"\n[stderr]\n{err_output}"
|
||||
if proc.returncode != 0:
|
||||
result += f"\n[exit code: {proc.returncode}]"
|
||||
return result.strip() or "(no output)"
|
||||
except asyncio.TimeoutError:
|
||||
return f"[Error] Python 代码执行超时({timeout}s)"
|
||||
except Exception as e:
|
||||
return f"[Error] 执行失败: {e}"
|
||||
finally:
|
||||
if tmp_file and os.path.exists(tmp_file):
|
||||
os.unlink(tmp_file)
|
||||
@@ -0,0 +1,58 @@
|
||||
import asyncio
|
||||
from typing import List, Literal, Dict
|
||||
|
||||
from kilostar.plugin.tool_plugin.base_tool import BaseToolData
|
||||
|
||||
|
||||
class SearchFileToolData(BaseToolData):
|
||||
is_system: bool = True
|
||||
action_scope: List[
|
||||
Literal[
|
||||
"control_node",
|
||||
"consciousness_node",
|
||||
"regulatory_node",
|
||||
"growth_node",
|
||||
"",
|
||||
]
|
||||
] = []
|
||||
config_args: Dict[str, str] = {}
|
||||
category: str = "system"
|
||||
|
||||
|
||||
async def search_file(
|
||||
keyword: str,
|
||||
directory: str = ".",
|
||||
file_pattern: str = "*",
|
||||
max_results: int = 20,
|
||||
) -> str:
|
||||
"""在指定目录下递归搜索包含关键字的文件内容。
|
||||
|
||||
Args:
|
||||
keyword: 要搜索的关键字或正则表达式
|
||||
directory: 搜索的根目录,默认当前目录
|
||||
file_pattern: 文件名匹配模式,如 "*.py"
|
||||
max_results: 最大返回结果数
|
||||
|
||||
Returns:
|
||||
匹配的文件名和行内容
|
||||
"""
|
||||
try:
|
||||
cmd = (
|
||||
f"grep -rn --include='{file_pattern}' "
|
||||
f"-m {max_results} '{keyword}' '{directory}' 2>/dev/null "
|
||||
f"| head -n {max_results}"
|
||||
)
|
||||
proc = await asyncio.create_subprocess_shell(
|
||||
cmd,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=30)
|
||||
output = stdout.decode("utf-8", errors="replace").strip()
|
||||
if not output:
|
||||
return f"未找到包含 '{keyword}' 的匹配项"
|
||||
return output
|
||||
except asyncio.TimeoutError:
|
||||
return "[Error] 搜索超时"
|
||||
except Exception as e:
|
||||
return f"[Error] 搜索失败: {e}"
|
||||
@@ -0,0 +1,54 @@
|
||||
import asyncio
|
||||
from typing import List, Literal, Dict
|
||||
|
||||
from kilostar.plugin.tool_plugin.base_tool import BaseToolData
|
||||
|
||||
|
||||
class ShellExecutorToolData(BaseToolData):
|
||||
is_system: bool = True
|
||||
action_scope: List[
|
||||
Literal[
|
||||
"control_node",
|
||||
"consciousness_node",
|
||||
"regulatory_node",
|
||||
"growth_node",
|
||||
"",
|
||||
]
|
||||
] = []
|
||||
config_args: Dict[str, str] = {}
|
||||
category: str = "system"
|
||||
|
||||
|
||||
async def shell_executor(command: str, timeout: int = 30) -> str:
|
||||
"""在服务器上执行 shell 命令并返回输出。
|
||||
|
||||
Args:
|
||||
command: 要执行的 shell 命令
|
||||
timeout: 超时秒数,默认 30 秒
|
||||
|
||||
Returns:
|
||||
命令的 stdout + stderr 输出
|
||||
"""
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_shell(
|
||||
command,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
stdout, stderr = await asyncio.wait_for(
|
||||
proc.communicate(), timeout=timeout
|
||||
)
|
||||
output = stdout.decode("utf-8", errors="replace")
|
||||
err_output = stderr.decode("utf-8", errors="replace")
|
||||
result = ""
|
||||
if output:
|
||||
result += output
|
||||
if err_output:
|
||||
result += f"\n[stderr]\n{err_output}"
|
||||
if proc.returncode != 0:
|
||||
result += f"\n[exit code: {proc.returncode}]"
|
||||
return result.strip() or "(no output)"
|
||||
except asyncio.TimeoutError:
|
||||
return f"[Error] 命令执行超时({timeout}s)"
|
||||
except Exception as e:
|
||||
return f"[Error] 执行失败: {e}"
|
||||
@@ -0,0 +1,42 @@
|
||||
import os
|
||||
from typing import List, Literal, Dict
|
||||
|
||||
from kilostar.plugin.tool_plugin.base_tool import BaseToolData
|
||||
|
||||
|
||||
class WriteFileToolData(BaseToolData):
|
||||
is_system: bool = True
|
||||
action_scope: List[
|
||||
Literal[
|
||||
"control_node",
|
||||
"consciousness_node",
|
||||
"regulatory_node",
|
||||
"growth_node",
|
||||
"",
|
||||
]
|
||||
] = []
|
||||
config_args: Dict[str, str] = {}
|
||||
category: str = "system"
|
||||
|
||||
|
||||
async def write_file(file_path: str, content: str) -> str:
|
||||
"""将内容写入指定文件(会覆盖已有内容,自动创建目录)。
|
||||
|
||||
Args:
|
||||
file_path: 文件的路径
|
||||
content: 要写入的内容
|
||||
|
||||
Returns:
|
||||
操作结果描述
|
||||
"""
|
||||
try:
|
||||
dir_path = os.path.dirname(file_path)
|
||||
if dir_path:
|
||||
os.makedirs(dir_path, exist_ok=True)
|
||||
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(content)
|
||||
|
||||
return f"已成功写入文件: {file_path}({len(content)} 字符)"
|
||||
except Exception as e:
|
||||
return f"[Error] 写入文件失败: {e}"
|
||||
@@ -28,7 +28,8 @@ if TYPE_CHECKING:
|
||||
|
||||
|
||||
ALGORITHM = "HS256"
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES = 60 * 24
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES = 60 * 2
|
||||
REFRESH_TOKEN_EXPIRE_DAYS = 7
|
||||
_INSECURE_SECRETS = {"secret", "114514", "changethiskey12345"}
|
||||
|
||||
|
||||
@@ -84,9 +85,51 @@ class Accessor:
|
||||
expire = datetime.now(timezone.utc) + timedelta(
|
||||
minutes=ACCESS_TOKEN_EXPIRE_MINUTES
|
||||
)
|
||||
to_encode.update({"exp": int(expire.timestamp())})
|
||||
to_encode.update({"exp": int(expire.timestamp()), "type": "access"})
|
||||
return jwt.encode(to_encode, _get_secret_key(), algorithm=ALGORITHM)
|
||||
|
||||
@staticmethod
|
||||
def _create_refresh_token(data: dict) -> str:
|
||||
"""生成长效 refresh token(默认 7 天有效期)。"""
|
||||
to_encode = data.copy()
|
||||
expire = datetime.now(timezone.utc) + timedelta(
|
||||
days=REFRESH_TOKEN_EXPIRE_DAYS
|
||||
)
|
||||
to_encode.update({"exp": int(expire.timestamp()), "type": "refresh"})
|
||||
return jwt.encode(to_encode, _get_secret_key(), algorithm=ALGORITHM)
|
||||
|
||||
@staticmethod
|
||||
def verify_refresh_token(token: str) -> TokenData:
|
||||
"""校验 refresh token 有效性并返回用户身份;过期或类型不对抛 401。"""
|
||||
try:
|
||||
payload = jwt.decode(token, _get_secret_key(), algorithms=[ALGORITHM])
|
||||
if payload.get("type") != "refresh":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="无效的 refresh token",
|
||||
)
|
||||
return TokenData(**{k: v for k, v in payload.items() if k != "type"})
|
||||
except jwt.ExpiredSignatureError:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Refresh token 已过期,请重新登录",
|
||||
)
|
||||
except (jwt.InvalidTokenError, ValidationError):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="无效的 refresh token",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def refresh_access_token(refresh_token: str) -> dict:
|
||||
"""用 refresh token 换取新的 access token + refresh token 对。"""
|
||||
token_data = Accessor.verify_refresh_token(refresh_token)
|
||||
payload = {"user_id": token_data.user_id, "username": token_data.username}
|
||||
return {
|
||||
"access_token": Accessor._create_access_token(payload),
|
||||
"refresh_token": Accessor._create_refresh_token(payload),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def verify_password(plain_password: str, hashed_password: str) -> bool:
|
||||
"""校验明文口令是否匹配数据库中存储的哈希。"""
|
||||
@@ -105,8 +148,8 @@ class Accessor:
|
||||
return Accessor._decode_token(token)
|
||||
|
||||
@staticmethod
|
||||
def login_hashed_password(user: "User", password: str) -> str:
|
||||
"""完成登录核验:找不到用户或密码错误抛 401,否则签发新令牌。"""
|
||||
def login_hashed_password(user: "User", password: str) -> dict:
|
||||
"""完成登录核验:找不到用户或密码错误抛 401,否则签发 access + refresh 令牌对。"""
|
||||
if not user:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
@@ -118,13 +161,21 @@ class Accessor:
|
||||
detail="用户名或密码错误",
|
||||
)
|
||||
token_payload = {"user_id": str(user.user_id), "username": user.user_name}
|
||||
return Accessor._create_access_token(data=token_payload)
|
||||
return {
|
||||
"access_token": Accessor._create_access_token(data=token_payload),
|
||||
"refresh_token": Accessor._create_refresh_token(data=token_payload),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def hash_password(password: str) -> str:
|
||||
"""对明文口令做强哈希;空值或长度不足 6 位会抛 ValueError。"""
|
||||
"""对明文口令做强哈希;空值或不满足复杂度要求会抛 ValueError。"""
|
||||
if not password:
|
||||
raise ValueError("密码不能为空")
|
||||
if len(password) < 6:
|
||||
raise ValueError("密码长度不能小于 6 位")
|
||||
if len(password) < 8:
|
||||
raise ValueError("密码长度不能小于 8 位")
|
||||
has_upper = any(c.isupper() for c in password)
|
||||
has_lower = any(c.islower() for c in password)
|
||||
has_digit = any(c.isdigit() for c in password)
|
||||
if not (has_upper and has_lower and has_digit):
|
||||
raise ValueError("密码必须包含大写字母、小写字母和数字")
|
||||
return password_hasher.hash(password)
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
"""Workflow 配置文件管理:读取、缓存、热重载。
|
||||
|
||||
配置文件路径:``config/workflow.yaml``(相对于项目根目录)。
|
||||
采用模块级单例 + 文件修改时间检测,保证:
|
||||
- 首次调用时懒加载
|
||||
- reload_workflow_config() 显式触发重载
|
||||
- 工作流引擎调 get_workflow_config() 始终拿到最新生效值
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
_CONFIG_DIR = Path(__file__).resolve().parent.parent.parent / "config"
|
||||
_WORKFLOW_YAML = _CONFIG_DIR / "workflow.yaml"
|
||||
|
||||
|
||||
class RetryConfig(BaseModel):
|
||||
max_attempts: int = Field(default=5, ge=1, le=100)
|
||||
|
||||
|
||||
class WorkflowConfig(BaseModel):
|
||||
retry: RetryConfig = Field(default_factory=RetryConfig)
|
||||
|
||||
|
||||
_current: WorkflowConfig | None = None
|
||||
|
||||
|
||||
def _load_from_disk() -> WorkflowConfig:
|
||||
if not _WORKFLOW_YAML.exists():
|
||||
return WorkflowConfig()
|
||||
with open(_WORKFLOW_YAML, "r", encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f) or {}
|
||||
return WorkflowConfig.model_validate(data)
|
||||
|
||||
|
||||
def get_workflow_config() -> WorkflowConfig:
|
||||
global _current
|
||||
if _current is None:
|
||||
_current = _load_from_disk()
|
||||
return _current
|
||||
|
||||
|
||||
def reload_workflow_config() -> WorkflowConfig:
|
||||
global _current
|
||||
_current = _load_from_disk()
|
||||
return _current
|
||||
|
||||
|
||||
def save_workflow_config(config: WorkflowConfig) -> None:
|
||||
_WORKFLOW_YAML.parent.mkdir(parents=True, exist_ok=True)
|
||||
data = config.model_dump()
|
||||
with open(_WORKFLOW_YAML, "w", encoding="utf-8") as f:
|
||||
yaml.dump(data, f, default_flow_style=False, allow_unicode=True)
|
||||
reload_workflow_config()
|
||||
@@ -125,6 +125,19 @@ async def get_all_toolsets_for_scope(scope: str) -> List[Any]:
|
||||
return toolsets
|
||||
|
||||
|
||||
async def get_retrieval_toolsets_for_scope(scope: str) -> List[Any]:
|
||||
"""仅返回 retrieval 工具集(system_node 专用)。不含 generation 和 MCP 工具。"""
|
||||
toolsets: List[Any] = []
|
||||
try:
|
||||
gsm = ray_actor_hook("global_state_machine").global_state_machine
|
||||
retrieval = await gsm.get_retrieval_toolsets_for_scope.remote(scope)
|
||||
if retrieval:
|
||||
toolsets.extend(retrieval)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load retrieval toolsets ({scope}): {e}")
|
||||
return toolsets
|
||||
|
||||
|
||||
async def list_mcp_tools_for_configs(
|
||||
configs: Dict[str, Dict[str, Any]],
|
||||
) -> List[Dict[str, Any]]:
|
||||
|
||||
@@ -0,0 +1,45 @@
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from typing import Dict, Tuple
|
||||
|
||||
from fastapi import HTTPException, Request
|
||||
|
||||
|
||||
class InMemoryRateLimiter:
|
||||
"""基于滑动窗口的内存限流器。
|
||||
|
||||
按 IP 地址追踪请求次数,超出阈值时抛出 429。
|
||||
适用于单实例部署;集群部署应替换为 Redis 后端。
|
||||
"""
|
||||
|
||||
def __init__(self, max_requests: int = 5, window_seconds: int = 60):
|
||||
self._max_requests = max_requests
|
||||
self._window_seconds = window_seconds
|
||||
self._requests: Dict[str, list[float]] = defaultdict(list)
|
||||
|
||||
def _get_client_ip(self, request: Request) -> str:
|
||||
forwarded = request.headers.get("X-Forwarded-For")
|
||||
if forwarded:
|
||||
return forwarded.split(",")[0].strip()
|
||||
return request.client.host if request.client else "unknown"
|
||||
|
||||
def _cleanup(self, key: str, now: float) -> None:
|
||||
cutoff = now - self._window_seconds
|
||||
self._requests[key] = [
|
||||
t for t in self._requests[key] if t > cutoff
|
||||
]
|
||||
|
||||
def check(self, request: Request) -> None:
|
||||
now = time.time()
|
||||
key = self._get_client_ip(request)
|
||||
self._cleanup(key, now)
|
||||
if len(self._requests[key]) >= self._max_requests:
|
||||
raise HTTPException(
|
||||
status_code=429,
|
||||
detail="请求过于频繁,请稍后再试",
|
||||
)
|
||||
self._requests[key].append(now)
|
||||
|
||||
|
||||
register_limiter = InMemoryRateLimiter(max_requests=5, window_seconds=60)
|
||||
login_limiter = InMemoryRateLimiter(max_requests=10, window_seconds=60)
|
||||
@@ -11,6 +11,7 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import time
|
||||
import ray
|
||||
from functools import lru_cache
|
||||
|
||||
@@ -47,14 +48,57 @@ def clear_actor_cache():
|
||||
_get_cached_actor_handle.cache_clear()
|
||||
|
||||
|
||||
def ray_actor_hook(*actor_names: str):
|
||||
def wait_for_actor(
|
||||
actor_name: str, *, timeout: float = 10.0, interval: float = 0.5
|
||||
):
|
||||
"""阻塞等待某个 actor 就绪,返回其句柄。
|
||||
|
||||
用于"启动期 / ray task 入口刚拉起"这类场景——被依赖的 actor 可能还没注册。
|
||||
在 ``timeout`` 内按 ``interval`` 轮询 ``ray.get_actor``;拿到就立即返回,
|
||||
超时则抛带清晰上下文的 ``TimeoutError``(而不是裸 ``ValueError``)。
|
||||
|
||||
Args:
|
||||
actor_name: actor 注册名
|
||||
timeout: 最长等待秒数;``<=0`` 表示只试一次(等价于直接取句柄)
|
||||
interval: 轮询间隔秒数
|
||||
|
||||
Raises:
|
||||
TimeoutError: 超时仍未就绪。原始异常通过 ``raise ... from`` 链保留。
|
||||
"""
|
||||
deadline = time.monotonic() + max(timeout, 0.0)
|
||||
last_err: Exception | None = None
|
||||
while True:
|
||||
try:
|
||||
return _get_cached_actor_handle(actor_name)
|
||||
except Exception as e: # ray.get_actor 失败一般是 ValueError
|
||||
last_err = e
|
||||
# 失败不能让 lru_cache 留下脏数据(异常本身不会被缓存,
|
||||
# 但若底层换实现,这里清一次更稳妥)
|
||||
if time.monotonic() >= deadline:
|
||||
raise TimeoutError(
|
||||
f"等待 actor {actor_name!r} 就绪超时({timeout}s):{last_err}"
|
||||
) from last_err
|
||||
time.sleep(interval)
|
||||
|
||||
|
||||
def ray_actor_hook(*actor_names: str, timeout: float = 0.0, interval: float = 0.5):
|
||||
"""按名字批量取出 Ray Actor 句柄,组装成一个 ``ActorList`` 返回。
|
||||
|
||||
例:``actors = ray_actor_hook("postgres_database", "global_state_machine")``,
|
||||
随后即可用 ``actors.postgres_database`` 拿到对应句柄。
|
||||
|
||||
Args:
|
||||
timeout: ``>0`` 时对每个 actor 走 ``wait_for_actor`` 等待就绪(启动期用);
|
||||
缺省 ``0`` 保持原"快速失败"语义——actor 不在立即抛异常。
|
||||
interval: 等待轮询间隔,仅在 ``timeout>0`` 时生效。
|
||||
"""
|
||||
actor_list = ActorList()
|
||||
for actor_name in actor_names:
|
||||
handle = _get_cached_actor_handle(actor_name)
|
||||
if timeout > 0:
|
||||
handle = wait_for_actor(
|
||||
actor_name, timeout=timeout, interval=interval
|
||||
)
|
||||
else:
|
||||
handle = _get_cached_actor_handle(actor_name)
|
||||
setattr(actor_list, actor_name, handle)
|
||||
return actor_list
|
||||
|
||||
Reference in New Issue
Block a user