feat: 新增工具插件、系统日志、workflow配置及前端优化

1. 新增工具插件(edit_file, python_executor, search_file, shell_executor, write_file)
2. 新增系统事件日志模块和API
3. 新增workflow配置文件和详情API
4. 前端增加SSE、错误边界、设置引导等组件
5. 优化认证加密、速率限制、配置加载等工具模块
6. 删除废弃的cluster和health API
7. 补充单元测试和集成测试

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-03 07:34:43 +00:00
parent f04fef916f
commit a53ffebe0e
57 changed files with 2804 additions and 271 deletions
+9 -5
View File
@@ -23,8 +23,7 @@ from ray import serve
from .agent import agent_router
from .auth import auth_router
from .cluster import cluster_router
from .health import health_router
from .system import system_router
from .platform.frontend import client_router
from .platform.onebot import onebot_router
from .provider import provider_router
@@ -53,7 +52,13 @@ def _get_locale(request: Request) -> str | None:
app = FastAPI()
_cors_origins_env = os.environ.get("KILOSTAR_CORS_ORIGINS", "*")
_cors_origins_env = os.environ.get("KILOSTAR_CORS_ORIGINS", "")
_is_dev = os.environ.get("KILOSTAR_ENV", "production").lower() in ("dev", "development")
if not _cors_origins_env and _is_dev:
_cors_origins_env = "*"
elif not _cors_origins_env:
_cors_origins_env = "http://localhost:8000"
_cors_origins = [o.strip() for o in _cors_origins_env.split(",") if o.strip()]
_allow_credentials = "*" not in _cors_origins
app.add_middleware(
@@ -83,13 +88,12 @@ async def request_id_middleware(request: Request, call_next):
response.headers["X-Request-Id"] = request_id
return response
app.include_router(health_router) # 健康检查
app.include_router(system_router) # 健康探针 + 系统信息
app.include_router(client_router) # 客户端路径
app.include_router(onebot_router) # OneBot v11 路径
app.include_router(auth_router) # 用户路径
app.include_router(provider_router) # 供应商路径
app.include_router(resource_router) # 资源路径
app.include_router(cluster_router) # 集群信息路径
app.include_router(agent_router) # agent路径
app.include_router(workflow_router) # workflow路径
app.include_router(chat_router) # chat路径
+3 -1
View File
@@ -116,7 +116,9 @@ async def load_agent(
case _:
pass
except Exception as e:
raise HTTPException(status_code=500, detail=f"加载节点失败: {str(e)}")
from kilostar.utils.logger import get_logger
get_logger("agent_api").exception(f"加载节点失败: {e}")
raise HTTPException(status_code=500, detail="加载节点失败,请查看服务端日志")
return {"message": "创建成功"}
+30 -5
View File
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from fastapi import APIRouter
from fastapi import APIRouter, Request
from fastapi import Depends
from pydantic import BaseModel
from kilostar.utils.access import Accessor, TokenData
@@ -21,6 +21,7 @@ from kilostar.utils.ray_hook import ray_actor_hook
from kilostar.utils.check_user.role_check import RoleChecker
from kilostar.core.postgres_database.model import UserAuthority
from kilostar.utils.error import UserNotExistError
from kilostar.utils.rate_limit import register_limiter, login_limiter
auth_router = APIRouter(prefix="/api/v1/auth", tags=["auth"])
@@ -33,8 +34,9 @@ class UserRegister(BaseModel):
@auth_router.post("/register")
async def create_user(user_register: UserRegister):
async def create_user(user_register: UserRegister, request: Request):
"""注册新用户:异步线程池里做 argon2 哈希,再交由 PostgresDatabase Actor 落库。"""
register_limiter.check(request)
postgres_database = ray_actor_hook("postgres_database").postgres_database
hashed_password = await run_in_threadpool(
Accessor.hash_password, user_register.password
@@ -53,16 +55,39 @@ class UserLogin(BaseModel):
@auth_router.post("/login")
async def login_user(user_login: UserLogin):
async def login_user(user_login: UserLogin, request: Request):
"""用户登录:查询用户后在线程池中校验口令,校验成功则签发 JWT。"""
login_limiter.check(request)
postgres_database = ray_actor_hook("postgres_database").postgres_database
user = await postgres_database.login_user.remote(user_login.user_name)
if not user:
raise UserNotExistError()
token = await run_in_threadpool(
tokens = await run_in_threadpool(
Accessor.login_hashed_password, user, user_login.password
)
return {"message": "success", "token": token}
return {
"message": "success",
"token": tokens["access_token"],
"access_token": tokens["access_token"],
"refresh_token": tokens["refresh_token"],
}
class RefreshTokenRequest(BaseModel):
"""``POST /refresh`` 入参:refresh token。"""
refresh_token: str
@auth_router.post("/refresh")
async def refresh_token(body: RefreshTokenRequest):
"""用 refresh token 换取新的 access token + refresh token 对。"""
tokens = Accessor.refresh_access_token(body.refresh_token)
return {
"message": "success",
"access_token": tokens["access_token"],
"refresh_token": tokens["refresh_token"],
}
class ChangeAuthorityRequest(BaseModel):
-19
View File
@@ -1,19 +0,0 @@
# Copyright 2026 zhaoxi826
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from fastapi import APIRouter
cluster_router = APIRouter(prefix="/api/v1/cluster", tags=["cluster"])
# Monitor websocket API temporarily removed
-54
View File
@@ -1,54 +0,0 @@
# Copyright 2026 zhaoxi826
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""健康检查端点:用于容器存活/就绪探针。"""
from fastapi import APIRouter
from fastapi.responses import JSONResponse
from kilostar.utils.ray_hook import ray_actor_hook
health_router = APIRouter(tags=["health"])
@health_router.get("/health/live", include_in_schema=True)
async def liveness():
"""存活探针:进程能响应即视为存活。"""
return {"status": "alive"}
@health_router.get("/health/ready", include_in_schema=True)
async def readiness():
"""就绪探针:检查关键依赖(Postgres / GSM Actor)是否可达。"""
checks = {"postgres": False, "global_state_machine": False}
try:
postgres_database = ray_actor_hook("postgres_database").postgres_database
await postgres_database.ping.remote()
checks["postgres"] = True
except Exception:
pass
try:
gsm = ray_actor_hook("global_state_machine").global_state_machine
await gsm.get_skill_list.remote()
checks["global_state_machine"] = True
except Exception:
pass
all_ok = all(checks.values())
return JSONResponse(
status_code=200 if all_ok else 503,
content={"status": "ready" if all_ok else "not_ready", "checks": checks},
)
+11 -3
View File
@@ -49,12 +49,20 @@ onebot_router = APIRouter(prefix="/api/v1/adapter/onebot", tags=["onebot"])
def _verify_token(token_from_header: Optional[str]) -> None:
"""校验 OneBot 实现端在 ``Authorization`` 头里携带的 access_token。
若环境变量 ``ONEBOT_ACCESS_TOKEN`` 未设置则跳过校验。OneBot v11 规范要求
格式为 ``Bearer <token>``,这里同时容忍只填 token 字符串本身的写法。
若环境变量 ``ONEBOT_ACCESS_TOKEN`` 未设置,根据运行模式决策:
- 开发模式(KILOSTAR_ENV=dev):跳过校验并记录警告
- 生产模式:拒绝所有请求,强制要求配置 token
"""
expected = os.environ.get("ONEBOT_ACCESS_TOKEN")
if not expected:
return
is_dev = os.environ.get("KILOSTAR_ENV", "production").lower() in ("dev", "development")
if is_dev:
logger.warning("[OneBot] ONEBOT_ACCESS_TOKEN 未设置,开发模式下跳过认证")
return
raise HTTPException(
status_code=401,
detail="ONEBOT_ACCESS_TOKEN 未配置,拒绝未认证的 OneBot 连接",
)
if not token_from_header:
raise HTTPException(status_code=401, detail="Missing access_token")
raw = token_from_header.removeprefix("Bearer ").removeprefix("Token ").strip()
+108
View File
@@ -0,0 +1,108 @@
# Copyright 2026 zhaoxi826
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""系统级端点:健康探针 + 集群/系统信息。
健康探针路径刻意保持在根(``/health/live`` / ``/health/ready``),不加
``/api/v1`` 前缀——这是 k8s liveness/readiness probe 的惯例配置,加前缀会
让运维侧探针 URL 变复杂。系统信息类端点则走 ``/api/v1/system`` 前缀。
"""
from __future__ import annotations
from fastapi import APIRouter, Depends
from fastapi.responses import JSONResponse
from kilostar.utils.ray_hook import ray_actor_hook
from kilostar.utils.access import Accessor, TokenData
from kilostar.utils.check_user.role_check import RoleChecker
from kilostar.core.postgres_database.model import UserAuthority
from kilostar.utils.config_loader import (
get_workflow_config,
save_workflow_config,
WorkflowConfig,
)
system_router = APIRouter(tags=["system"])
@system_router.get("/health/live", include_in_schema=True)
async def liveness():
"""存活探针:进程能响应即视为存活。"""
return {"status": "alive"}
@system_router.get("/health/ready", include_in_schema=True)
async def readiness():
"""就绪探针:检查关键依赖(Postgres / GSM Actor)是否可达。"""
checks = {"postgres": False, "global_state_machine": False}
try:
postgres_database = ray_actor_hook("postgres_database").postgres_database
await postgres_database.ping.remote()
checks["postgres"] = True
except Exception:
pass
try:
gsm = ray_actor_hook("global_state_machine").global_state_machine
await gsm.get_skill_list.remote()
checks["global_state_machine"] = True
except Exception:
pass
all_ok = all(checks.values())
return JSONResponse(
status_code=200 if all_ok else 503,
content={"status": "ready" if all_ok else "not_ready", "checks": checks},
)
@system_router.get("/config/workflow")
async def get_workflow_config_endpoint(
_: TokenData = Depends(Accessor.get_current_user),
):
config = get_workflow_config()
return {"config": config.model_dump()}
@system_router.put("/config/workflow")
async def update_workflow_config_endpoint(
update: WorkflowConfig,
_: TokenData = Depends(RoleChecker(allowed_roles=UserAuthority.USER)),
):
save_workflow_config(update)
return {"status": "ok", "config": update.model_dump()}
@system_router.get("/logs")
async def query_system_logs(
trace_id: str | None = None,
event_type: str | None = None,
level: str | None = None,
limit: int = 100,
offset: int = 0,
_: TokenData = Depends(Accessor.get_current_user),
):
from kilostar.utils.ray_hook import ray_actor_hook
pg = await ray_actor_hook.get_actor("postgres_database")
logs = await pg.query_event_logs.remote(
trace_id=trace_id,
event_type=event_type,
level=level,
limit=limit,
offset=offset,
)
return {"logs": logs, "count": len(logs)}
+86 -6
View File
@@ -66,7 +66,23 @@ async def get_workflow_list(
@workflow_router.get("/sse/{trace_id}")
async def get_workflow_sse(trace_id: str, request: Request):
async def get_workflow_sse(
trace_id: str,
request: Request,
token_data: TokenData = Depends(Accessor.get_current_user),
):
"""SSE 事件流。
鉴权走标准 ``Authorization: Bearer`` 头(前端用 fetch-based SSE
token 不进 URL)。校验该 trace_id 属于当前用户。
"""
postgres_database = ray_actor_hook("postgres_database").postgres_database
wf = await postgres_database.get_workflow.remote(trace_id)
if not wf:
raise HTTPException(status_code=404, detail="Workflow not found")
if getattr(wf, "user_id", None) != token_data.user_id:
raise HTTPException(status_code=403, detail="Forbidden")
global_workflow_manager = ray_actor_hook(
"global_workflow_manager"
).global_workflow_manager
@@ -88,7 +104,18 @@ async def get_workflow_sse(trace_id: str, request: Request):
@workflow_router.post("/reply/{trace_id}")
async def post_workflow_reply(trace_id: str, request: Request):
async def post_workflow_reply(
trace_id: str,
request: Request,
token_data: TokenData = Depends(Accessor.get_current_user),
):
postgres_database = ray_actor_hook("postgres_database").postgres_database
wf = await postgres_database.get_workflow.remote(trace_id)
if not wf:
raise HTTPException(status_code=404, detail="Workflow not found")
if getattr(wf, "user_id", None) != token_data.user_id:
raise HTTPException(status_code=403, detail="Forbidden")
data = await request.json()
reply_msg = data.get("message", "")
global_workflow_manager = ray_actor_hook(
@@ -106,10 +133,24 @@ async def get_workflow_detail(
wf = await postgres_database.get_workflow.remote(trace_id)
if not wf:
raise HTTPException(status_code=404, detail="Workflow not found")
if getattr(wf, "user_id", None) != token_data.user_id:
raise HTTPException(status_code=403, detail="Forbidden")
context = await postgres_database.get_workflow_context.remote(trace_id)
steps = context.work_link if context and hasattr(context, "work_link") else []
work_link = (
context.work_link if context and hasattr(context, "work_link") else []
)
workflow_log = (
context.workflow_log if context and hasattr(context, "workflow_log") else []
)
workflow_pointer = (
context.workflow_pointer
if context and getattr(context, "workflow_pointer", None) is not None
else 0
)
steps = _merge_runtime_status(work_link, workflow_log)
return {
"trace_id": trace_id,
@@ -117,10 +158,49 @@ async def get_workflow_detail(
"status": wf.status,
"command": wf.command,
"steps": steps,
"current_step": workflow_pointer,
"context_blackboard": context.blackboard if context else {},
}
def _merge_runtime_status(work_link: list, workflow_log: list) -> list:
"""把运行期状态从 ``workflow_log`` 反推并 merge 到每个静态 step 上。
``work_link`` 是 step 的**静态定义**(名字 / node 类型 / action),不含运行期
状态;运行期状态散落在 ``workflow_log`` 里——其结构为::
[{"<step_index>": [timestamp, status, message]}, ...]
同一 step 可能出现多条(working → completed),取**最后一条**的 status 作为
该 step 当前状态。没有日志记录的 step 视为 ``pending``。
前端 ``WorkflowDiagram`` 依赖每个 step 的 ``status`` 字段着色,这个拼装让
后端真正把运行期状态喂过去。
"""
# step_index -> 最新 status
latest_status: dict[int, str] = {}
for entry in workflow_log or []:
if not isinstance(entry, dict):
continue
for key, payload in entry.items():
try:
idx = int(key)
except (ValueError, TypeError):
continue
if isinstance(payload, (list, tuple)) and len(payload) >= 2:
latest_status[idx] = payload[1]
merged = []
for i, step in enumerate(work_link or []):
step_copy = dict(step) if isinstance(step, dict) else {}
# step 自带的 step 字段优先,否则用位置索引
step_idx = step_copy.get("step")
lookup_idx = (step_idx - 1) if isinstance(step_idx, int) else i
step_copy["status"] = latest_status.get(lookup_idx, "pending")
merged.append(step_copy)
return merged
@workflow_router.post("/{trace_id}/resume")
async def resume_workflow(
trace_id: str,
@@ -151,9 +231,9 @@ async def resume_workflow(
from kilostar.core.work.workflow.workflow_engine import run_workflow_task
# workflow_data 在 resume 路径上不会被使用(hydrate 会走 resume 分支),
# 这里给个空 dict 占位即可
run_workflow_task.remote({}, trace_id)
# resume_only=Truetask 入口 hydrate 失败会 fail-fast,绝不 fall through
# 到"全新模式空跑"。workflow_data 在 resume 路径上不会被使用,传空 dict 占位
run_workflow_task.remote({}, trace_id, resume_only=True)
return {"trace_id": trace_id, "status": "resuming"}