feat(system):优化后端

1.新增后端测试
2.增加了后端的加密
3.增加了i18n(国际化)
This commit is contained in:
2026-05-31 15:39:34 +00:00
parent affe460180
commit 99520c69d7
118 changed files with 8174 additions and 1491 deletions
+9 -3
View File
@@ -12,10 +12,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Annotated
from fastapi import Depends, HTTPException
from fastapi import Depends, HTTPException, Request
from kilostar.utils.access import Accessor, TokenData
from kilostar.core.postgres_database.model import UserAuthority
from kilostar.utils.ray_hook import ray_actor_hook
from kilostar.utils.i18n import t
def _user_not_found_detail(request: Request | None = None) -> str:
loc = request.headers.get("accept-language") if request else None
return t("user_not_found", accept_language=loc)
async def get_authority(user_id: str) -> UserAuthority:
@@ -29,12 +35,12 @@ async def get_authority(user_id: str) -> UserAuthority:
)
return user_authority
except UserNotExistError:
raise HTTPException(status_code=401, detail="用户不存在或已被删除,请重新登录")
raise HTTPException(status_code=401, detail=t("user_not_found"))
except Exception as e:
# Check if it's a RayTaskError wrapping UserNotExistError
if "UserNotExistError" in str(e):
raise HTTPException(
status_code=401, detail="用户不存在或已被删除,请重新登录"
status_code=401, detail=t("user_not_found")
)
raise
+87
View File
@@ -0,0 +1,87 @@
import os
from functools import lru_cache
from cryptography.fernet import Fernet, InvalidToken
from kilostar.utils.logger import get_logger
logger = get_logger("crypto")
_VERSION_PREFIX = "v1:"
_SENSITIVE_KEYS = {"key", "token", "secret", "password", "apikey", "api_key"}
class CryptoError(Exception):
pass
@lru_cache(maxsize=1)
def _get_fernet() -> Fernet:
raw = os.environ.get("KILOSTAR_SECRET_KEY", "")
if not raw:
raise CryptoError(
"环境变量 KILOSTAR_SECRET_KEY 未设置,无法进行加解密。"
"请生成一个密钥:python -c \"from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())\""
)
try:
return Fernet(raw.encode() if isinstance(raw, str) else raw)
except Exception as e:
raise CryptoError(f"KILOSTAR_SECRET_KEY 格式无效: {e}") from e
def encrypt_secret(plaintext: str) -> str:
if not plaintext:
return plaintext
f = _get_fernet()
token = f.encrypt(plaintext.encode("utf-8"))
return _VERSION_PREFIX + token.decode("utf-8")
def decrypt_secret(ciphertext: str) -> str:
if not ciphertext:
return ciphertext
if not ciphertext.startswith(_VERSION_PREFIX):
return ciphertext
raw = ciphertext[len(_VERSION_PREFIX):]
f = _get_fernet()
try:
return f.decrypt(raw.encode("utf-8")).decode("utf-8")
except InvalidToken as e:
raise CryptoError("解密失败:密钥不匹配或密文已损坏") from e
def is_encrypted(value: str) -> bool:
return isinstance(value, str) and value.startswith(_VERSION_PREFIX)
def _is_sensitive_key(key: str) -> bool:
lower = key.lower()
return any(s in lower for s in _SENSITIVE_KEYS)
def encrypt_dict_secrets(data: dict) -> dict:
if not isinstance(data, dict):
return data
out: dict = {}
for k, v in data.items():
if _is_sensitive_key(k) and isinstance(v, str) and v and not is_encrypted(v):
out[k] = encrypt_secret(v)
else:
out[k] = v
return out
def decrypt_dict_secrets(data: dict) -> dict:
if not isinstance(data, dict):
return data
out: dict = {}
for k, v in data.items():
if _is_sensitive_key(k) and isinstance(v, str) and is_encrypted(v):
try:
out[k] = decrypt_secret(v)
except CryptoError as e:
logger.error(f"字段 {k} 解密失败: {e}")
out[k] = v
else:
out[k] = v
return out
+92 -27
View File
@@ -12,68 +12,133 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""KiloStar 统一异常体系。
class RetryableError(Exception):
"""基类:所有可重试错误(如网络断开、抖动等临时性故障)。"""
设计原则:所有自定义异常归到两条主轴下。
pass
- ``BusinessError``:业务可预期错误,HTTP 层映射 4xx;前端可读、可展示给用户。
- ``InfraError``:系统/基础设施失败错误,HTTP 层映射 5xx;通常需要落日志告警。
其下再细分为 ``RetryableError``(瞬时故障,可由 ``retry_on_retryable_error`` 自动重试)
与 ``NonRetryableError``(确定性失败,重试无意义)。
注意:用 ``InfraError`` 而非 ``SystemError`` 是为了避免与 Python 内置的
``SystemError`` 冲突。
每个异常类都带 ``http_status`` 与 ``code`` 类属性,``api/__init__.py`` 的统一
handler 根据它们直接生成结构化响应,避免业务代码里硬编码状态码。
"""
from __future__ import annotations
class NonRetryableError(Exception):
"""基类:所有不可重试错误(如数据验证失败、类型错误等业务逻辑故障)"""
class KiloStarError(Exception):
"""KiloStar 所有自定义异常的总根"""
pass
http_status: int = 500
code: str = "kilostar_error"
class DemandError(NonRetryableError):
# ─── 主轴 1:业务可预期错误(4xx) ───────────────────────────────────────────
class BusinessError(KiloStarError):
"""业务层可预期错误的基类,HTTP 层默认 400。"""
http_status = 400
code = "business_error"
class DemandError(BusinessError):
"""需求/任务参数不合法或不满足前置条件时抛出。"""
pass
http_status = 400
code = "demand_error"
class ModelNotExistError(Exception):
"""请求了一个未在 Provider 中注册的模型 ID 时抛出。"""
pass
# 用户域 ─────────────────────────────────────────
class UserError(Exception):
"""用户相关错误的基类HTTP 层会被统一映射为 4xx"""
class UserError(BusinessError):
"""用户错误的基类。"""
pass
http_status = 400
code = "user_error"
class UserNotExistError(UserError):
"""按用户名/ID 查询时用户不存在。"""
pass
http_status = 404
code = "user_not_exist"
class UserPasswordError(UserError):
"""口令校验失败(旧密码错误、登录密码错误等)。"""
pass
http_status = 401
code = "user_password_error"
class ProviderError(Exception):
"""模型 Provider 相关错误的基类。"""
# Provider 域 ─────────────────────────────────────
pass
class ProviderError(BusinessError):
"""模型 Provider 域错误的基类。"""
http_status = 400
code = "provider_error"
class ProviderNotExistError(ProviderError):
"""请求了一个未注册的 Provider 时抛出。"""
pass
http_status = 404
code = "provider_not_exist"
class WorkflowError(Exception):
"""工作流执行期错误的基类,HTTP 层会被统一映射为 5xx"""
class ModelNotExistError(BusinessError):
"""请求了一个未在 Provider 中注册的模型 ID 时抛出"""
pass
http_status = 404
code = "model_not_exist"
class WorkflowExit(WorkflowError):
"""工作流被显式终止(用户取消、上游决策跳出等)时抛出,是预期内的退出信号。"""
# Workflow 域 ─────────────────────────────────────
pass
class WorkflowExit(BusinessError):
"""工作流被显式终止(用户取消、上游决策跳出等),是预期内的退出信号。"""
http_status = 400
code = "workflow_exit"
# ─── 主轴 2:系统/基础设施失败错误(5xx) ────────────────────────────────────
class InfraError(KiloStarError):
"""系统/基础设施失败错误的基类,HTTP 层默认 500。"""
http_status = 500
code = "infra_error"
class RetryableError(InfraError):
"""瞬时故障(如网络抖动),可由 ``retry_on_retryable_error`` 自动重试。"""
http_status = 503
code = "retryable_error"
class NonRetryableError(InfraError):
"""确定性的系统失败,重试无意义。"""
http_status = 500
code = "non_retryable_error"
class WorkflowError(InfraError):
"""工作流执行期错误的基类,HTTP 层映射为 5xx。"""
http_status = 500
code = "workflow_error"
+4 -2
View File
@@ -33,9 +33,11 @@ def _get_tool_func(tool_name: str) -> Callable | None:
if func:
return func
app_root = "/app"
tool_plugin_dir = os.path.join(
app_root, "kilostar", "plugin", "tool_plugin", tool_name
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
"plugin",
"tool_plugin",
tool_name,
)
if not os.path.exists(tool_plugin_dir) or not os.path.isdir(tool_plugin_dir):
+183
View File
@@ -0,0 +1,183 @@
# Copyright 2026 zhaoxi826
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""KiloStar 轻量级国际化工具。
设计原则:
- 纯内存字典,无文件 IO,Ray 远程序列化零成本。
- 支持环境变量 ``KILOSTAR_LANG`` 作为全局默认语言。
- Agent system prompt 按 ``{locale}`` 分桶,调用方显式传入 locale。
- API 层通过请求头 ``Accept-Language`` 解析首选语言。
当前支持:``zh`` (简体中文), ``en`` (English)。
"""
from __future__ import annotations
import os
from typing import Dict
_DEFAULT_LOCALE: str = os.getenv("KILOSTAR_LANG", "zh")
# ─── Agent System Prompts ──────────────────────────────────────────────────
_PROMPTS: Dict[str, Dict[str, str]] = {
"regulatory_node": {
"zh": (
"你叫kilostar,是一个多智能体AI助手系统中的【监控节点 (regulatory Node)】。\n"
"你是系统的'前台接待''大脑皮层',负责接收用户的初始请求或工作流的最终报告。\n"
"你的核心职责是进行【意图识别与路由】。请仔细阅读用户的请求:\n"
"1. 如果用户只是进行简单的问候、闲聊或查询非常基础的信息,请直接生成友好的回复,使用 ForUser 格式。\n"
"2. 如果用户提出的是复杂任务(如需要编写代码、多步骤规划、数据处理等),请务必将其判定为需要工作流处理的任务,"
" 并使用 ForConsciousnessNode 格式将其移交意识节点处理。\n"
"3. 如果你收到的是 TerminationMessage(代表工作流已完成并生成了报告),请将报告内容转化为友好的面向用户的回复,使用 ForUser 格式。\n"
"请保持冷静、专业,并严格遵循上述路由规则。"
),
"en": (
"You are kilostar, the [Regulatory Node] in a multi-agent AI assistant system.\n"
"You are the system's 'front desk' and 'cerebral cortex', responsible for receiving user requests and final workflow reports.\n"
"Your core duty is [intent recognition and routing]. Please read the user's request carefully:\n"
"1. If the user is simply greeting, chatting, or asking very basic questions, generate a friendly reply directly in the ForUser format.\n"
"2. If the user presents a complex task (e.g., writing code, multi-step planning, data processing), you must classify it as a workflow-requiring task "
" and hand it over to the Consciousness Node using the ForConsciousnessNode format.\n"
"3. If you receive a TerminationMessage (indicating the workflow is complete and a report has been generated), convert the report into a user-friendly reply in the ForUser format.\n"
"Please remain calm, professional, and strictly follow the routing rules above."
),
},
"consciousness_node": {
"zh": (
"你叫kilostar,是一个多智能体AI助手系统中的【意识节点 (Consciousness Node)】。\n"
"你是系统的'高级规划师''架构师',负责处理监控节点分配过来的复杂任务。\n"
"你的主要工作场景包括:\n"
"1. 拆解任务 (Workflow Generation):结合用户的原始命令和提供的模板,生成严谨、可执行的工作流 (kilostarWorkflow),并将其输出为 ForWorkflowEngine 格式。拆解时步骤应清晰连贯。\n"
"2. 中途指导 (Workflow Execution):在工作流执行中,如果某一步骤指派给你,你需要对控制节点的结果进行分析或提供下一步的指导,输出 ForWorkflow 格式。\n"
"3. 总结报告 (regulatory Report):在整个工作流执行完毕后,你需要对整体流程、各个控制节点的执行情况进行审查,并生成一份技术性的总结报告,输出 ForregulatoryNode 格式。\n"
"请确保所有的思考和生成过程符合逻辑,严密且高质量。"
),
"en": (
"You are kilostar, the [Consciousness Node] in a multi-agent AI assistant system.\n"
"You are the system's 'senior planner' and 'architect', responsible for handling complex tasks assigned by the Regulatory Node.\n"
"Your main scenarios include:\n"
"1. Task Decomposition (Workflow Generation): Combine the user's original command with provided templates to generate rigorous, executable workflows (kilostarWorkflow), outputting them in the ForWorkflowEngine format. Steps should be clear and coherent.\n"
"2. Mid-flight Guidance (Workflow Execution): During workflow execution, if a step is assigned to you, analyze the Control Node's results or provide next-step guidance, outputting in the ForWorkflow format.\n"
"3. Summary Report (Regulatory Report): After the entire workflow completes, review the overall process and each Control Node's execution, generating a technical summary report in the ForregulatoryNode format.\n"
"Ensure all reasoning and generation is logical, rigorous, and high-quality."
),
},
"control_node": {
"zh": (
"你叫kilostar,是一个多智能体AI助手系统中的【控制节点 (Control Node)】。\n"
"你是系统的'执行者''车间主任',专门负责执行工作流中分配给你的具体子任务。\n"
"你的工作职责是:\n"
"1. 仔细分析分配给你的工作流步骤 (workflow_step) 的目标和要求。\n"
"2. 运用你被分配的工具 (如有) 或者依靠自身的知识和推理能力,精准、高效地完成该任务。\n"
"3. 将执行的结果、产生的数据或者具体的输出,严格按照 ForWorkflow 格式返回。\n"
"请注意:你的输出应当具体、实用,直接提供任务所要求的结果,不要做过多无关的寒暄。"
),
"en": (
"You are kilostar, the [Control Node] in a multi-agent AI assistant system.\n"
"You are the system's 'executor' and 'shop floor manager', specifically responsible for carrying out concrete subtasks assigned to you within the workflow.\n"
"Your duties are:\n"
"1. Carefully analyze the objectives and requirements of the workflow_step assigned to you.\n"
"2. Use the tools assigned to you (if any) or rely on your own knowledge and reasoning to complete the task accurately and efficiently.\n"
"3. Return the execution results, generated data, or concrete outputs strictly in the ForWorkflow format.\n"
"Note: Your output should be specific, practical, and directly provide the results requested by the task. Avoid excessive irrelevant pleasantries."
),
},
}
# ─── API / 通用消息 ────────────────────────────────────────────────────────
_MESSAGES: Dict[str, Dict[str, str]] = {
"internal_error": {
"zh": "服务内部错误,请稍后重试",
"en": "Internal server error, please try again later.",
},
"user_not_found": {
"zh": "用户不存在或已被删除,请重新登录",
"en": "User does not exist or has been deleted. Please log in again.",
},
"provider_not_registered": {
"zh": "Provider {provider_title} 未注册",
"en": "Provider {provider_title} is not registered.",
},
"model_not_exist": {
"zh": "模型不存在",
"en": "Model does not exist.",
},
"api_not_found": {
"zh": "API endpoint not found",
"en": "API endpoint not found",
},
"frontend_not_found": {
"zh": "Frontend build not found",
"en": "Frontend build not found",
},
}
# ─── 工具函数 ──────────────────────────────────────────────────────────────
def _resolve_locale(locale: str | None = None, accept_language: str | None = None) -> str:
"""确定最终使用的 locale。
优先级:显式传入 > Accept-Language 头 > KILOSTAR_LANG 环境变量 > 默认 zh。
"""
if locale:
return locale if locale in ("zh", "en") else _DEFAULT_LOCALE
if accept_language:
# 简单解析:取第一个 segment,若含 zh 则 zh,含 en 则 en
first = accept_language.split(",")[0].split(";")[0].strip().lower()
if "zh" in first:
return "zh"
if "en" in first:
return "en"
return _DEFAULT_LOCALE
def t(key: str, locale: str | None = None, accept_language: str | None = None, **kwargs) -> str:
"""通用消息翻译。
Args:
key: 消息键,如 ``internal_error``。
locale: 显式指定语言代码(``zh`` / ``en``)。
accept_language: 前端传来的 ``Accept-Language`` 头内容。
**kwargs: 模板变量插值。
Returns:
翻译后的字符串;若 key 不存在则返回 key 本身。
"""
loc = _resolve_locale(locale, accept_language)
text = _MESSAGES.get(loc, {}).get(key) or _MESSAGES.get(_DEFAULT_LOCALE, {}).get(key) or key
return text.format(**kwargs) if kwargs else text
def agent_prompt(agent_name: str, locale: str | None = None, accept_language: str | None = None) -> str:
"""获取指定 Agent 的 system prompt,并追加语言指令。
Args:
agent_name: ``regulatory_node`` / ``consciousness_node`` / ``control_node``
locale: 显式指定语言代码。
accept_language: ``Accept-Language`` 头内容。
Returns:
完整 system prompt(含 "请使用 XX 语言回复" 的追加指令)。
"""
loc = _resolve_locale(locale, accept_language)
prompt = _PROMPTS.get(agent_name, {}).get(loc) or _PROMPTS.get(agent_name, {}).get(_DEFAULT_LOCALE, "")
lang_instruction = {
"zh": "\n\n【重要】请始终使用简体中文进行思考和回复。",
"en": "\n\n[Important] Please always think and reply in English.",
}.get(loc, "")
return prompt + lang_instruction
+72 -9
View File
@@ -12,24 +12,83 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
from loguru import logger
from rich.logging import RichHandler
from loguru._logger import Logger
from kilostar.utils.request_context import get_request_id, get_trace_id
def _is_json_mode() -> bool:
"""根据环境变量决定是否启用 JSON 结构化日志。
支持开关:``KILOSTAR_LOG_FORMAT=json`` 或 ``KILOSTAR_LOG_JSON=1/true``。
"""
fmt = os.environ.get("KILOSTAR_LOG_FORMAT", "").lower()
if fmt == "json":
return True
flag = os.environ.get("KILOSTAR_LOG_JSON", "").lower()
return flag in {"1", "true", "yes", "on"}
def _ctx_patcher(record):
"""日志切面:每条日志写出前,把 contextvars 里的 request_id / trace_id 注入。
显式 ``bind(trace_id=...)`` 的 logger 优先(业务代码可以覆盖切面值);
没有 bind 时回退到 contextvars,没有 contextvars 时为空串。
"""
extra = record["extra"]
if not extra.get("trace_id"):
extra["trace_id"] = get_trace_id()
if not extra.get("request_id"):
extra["request_id"] = get_request_id()
def setup_logger() -> Logger:
"""初始化全局 loguru logger,输出格式为 ``actor:(...) | trace_id:(...) : message``。"""
"""初始化全局 loguru logger
- 默认(开发模式):``RichHandler`` 彩色输出,格式 ``actor:(...) | request_id:(...) | trace_id:(...) : message``
- JSON 模式(``KILOSTAR_LOG_FORMAT=json``):写到 stdout,每行一条 JSON,便于 ELK/Loki 采集
request_id / trace_id 来自 ``kilostar.utils.request_context``,由 FastAPI middleware
或工作流入口绑定到 contextvars,本模块通过 ``patcher`` 透明注入。
"""
logger.remove()
log_level = os.environ.get("KILOSTAR_LOG_LEVEL", "DEBUG").upper()
if _is_json_mode():
logger.configure(
extra={"actor_name": "System", "trace_id": "", "request_id": ""},
patcher=_ctx_patcher,
)
logger.add(
sys.stdout,
serialize=True,
level=log_level,
enqueue=True,
)
return logger
def format_record(record):
# Format string for rich handler
actor = record["extra"].get("actor_name", "System")
trace_id = record["extra"].get("trace_id", "")
request_id = record["extra"].get("request_id", "")
ids = []
if request_id:
ids.append(f"request_id:({request_id})")
if trace_id:
ids.append(f"trace_id:({trace_id})")
ids_str = " | " + " | ".join(ids) if ids else ""
return f"actor:({actor}){ids_str} : {record['message']}"
trace_str = f" | trace_id:({trace_id})" if trace_id else ""
return f"actor:({actor}){trace_str} : {record['message']}"
logger.configure(extra={"actor_name": "System", "trace_id": ""})
logger.configure(
extra={"actor_name": "System", "trace_id": "", "request_id": ""},
patcher=_ctx_patcher,
)
logger.add(
RichHandler(
@@ -40,8 +99,8 @@ def setup_logger() -> Logger:
show_path=False,
),
format=format_record,
level="DEBUG",
enqueue=True, # 异步记录
level=log_level,
enqueue=True,
)
return logger
@@ -51,5 +110,9 @@ global_logger = setup_logger()
def get_logger(actor_name: str, trace_id: str = "") -> Logger:
"""获取一个绑定了 actor_name 与可选 trace_id 的 logger,便于日志按 Actor/请求归类。"""
"""获取一个绑定了 actor_name 与可选 trace_id 的 logger,便于日志按 Actor/请求归类。
若 ``trace_id`` 留空,会回退到 ``contextvars`` 中的当前值(由 middleware 或
工作流入口设置)。显式传值则会覆盖切面注入。
"""
return global_logger.bind(actor_name=actor_name, trace_id=trace_id)
+180
View File
@@ -0,0 +1,180 @@
# Copyright 2026 zhaoxi826
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MCP 辅助模块:根据全局状态机中的配置创建 pydantic-ai MCPServer 实例。"""
from typing import Dict, List, Any, Sequence
from kilostar.utils.logger import get_logger
logger = get_logger("mcp_helper")
# 延迟导入 pydantic_ai.mcp,避免在 MCP 包未安装时崩溃
try:
from pydantic_ai.mcp import (
MCPServerStdio,
MCPServerSSE,
MCPServerHTTP,
)
_MCP_AVAILABLE = True
except ImportError:
_MCP_AVAILABLE = False
logger.warning("MCP package not installed. MCP servers will not be available.")
def build_mcp_toolsets(configs: Dict[str, Dict[str, Any]]) -> List[Any]:
"""根据配置字典创建 MCPServer 实例列表。
Args:
configs: {server_id: {"name": ..., "transport": ..., ...}}
Returns:
MCPServer 实例列表(可直接传给 Agent 的 toolsets 参数)
"""
if not _MCP_AVAILABLE:
return []
toolsets = []
for server_id, cfg in configs.items():
try:
transport = cfg.get("transport", "stdio")
tool_prefix = cfg.get("tool_prefix")
name = cfg.get("name", server_id)
if transport == "stdio":
server = MCPServerStdio(
command=cfg.get("command", ""),
args=cfg.get("args", []),
env=cfg.get("env"),
tool_prefix=tool_prefix,
id=server_id,
)
elif transport == "sse":
server = MCPServerSSE(
url=cfg.get("url", ""),
tool_prefix=tool_prefix,
id=server_id,
)
elif transport == "http":
server = MCPServerHTTP(
url=cfg.get("url", ""),
tool_prefix=tool_prefix,
id=server_id,
)
else:
logger.warning(f"Unsupported MCP transport: {transport} for server {name}")
continue
toolsets.append(server)
logger.info(f"MCP server '{name}' ({transport}) registered as toolset")
except Exception as e:
logger.error(f"Failed to build MCP server '{server_id}': {e}")
return toolsets
async def get_mcp_toolsets_from_gsm() -> List[Any]:
"""从 GlobalStateMachine 拉取 MCP 配置并构建 toolsets。"""
if not _MCP_AVAILABLE:
return []
try:
from kilostar.core.global_state_machine.gsm_snapshot import fetch_snapshot
# 走快照:MCP 配置变更频率极低,本地缓存命中率近 100%
snapshot = await fetch_snapshot()
return build_mcp_toolsets(snapshot.mcp_servers)
except Exception as e:
logger.error(f"Failed to load MCP configs from GSM: {e}")
return []
async def get_all_toolsets_for_scope(scope: str) -> List[Any]:
"""汇总某个 scope 下的全部 toolsetsystem + personal + mcp。
返回顺序保持稳定:先本地 toolsetsystem → personal),再 MCP toolset。
任意一类拉取失败仅记录日志,不影响其他类。
"""
toolsets: List[Any] = []
try:
from kilostar.core.global_state_machine.gsm_snapshot import (
build_toolsets_for_scope,
fetch_snapshot,
)
# 一次快照拉取覆盖 system + custom toolsets,本地按 scope 重建 FunctionToolset
snapshot = await fetch_snapshot()
local = build_toolsets_for_scope(snapshot, scope)
if local:
toolsets.extend(local)
except Exception as e:
logger.error(f"Failed to load local toolsets from GSM ({scope}): {e}")
toolsets.extend(await get_mcp_toolsets_from_gsm())
return toolsets
async def list_mcp_tools_for_configs(
configs: Dict[str, Dict[str, Any]],
) -> List[Dict[str, Any]]:
"""对每个 MCP 服务器逐个尝试连接,列出它们暴露的工具名。
实现层面会进入 ``async with server:`` 上下文,调用一次 ``get_tools()``
再把工具名(带 tool_prefix)抽出来。任何一个 server 失败都不影响其他 server
出错时该项 ``tools=[]`` 并附带 ``error`` 字段。
"""
result: List[Dict[str, Any]] = []
if not _MCP_AVAILABLE:
return result
servers = build_mcp_toolsets(configs)
for server in servers:
server_id = getattr(server, "id", None)
cfg = configs.get(server_id, {}) if server_id else {}
name = cfg.get("name", server_id or "unknown")
transport = cfg.get("transport", "stdio")
item: Dict[str, Any] = {
"server_id": server_id,
"name": name,
"transport": transport,
"tool_prefix": cfg.get("tool_prefix"),
"tools": [],
}
try:
async with server:
tools = await server.get_tools()
item["tools"] = [
getattr(t, "name", None) or getattr(t, "tool_name", str(t))
for t in tools
]
except Exception as e:
item["error"] = str(e)
logger.warning(f"MCP server '{name}' list_tools failed: {e}")
result.append(item)
return result
async def list_mcp_tools_from_gsm() -> List[Dict[str, Any]]:
"""从 GlobalStateMachine 拉取配置后调用 :func:`list_mcp_tools_for_configs`。"""
if not _MCP_AVAILABLE:
return []
try:
from kilostar.core.global_state_machine.gsm_snapshot import fetch_snapshot
snapshot = await fetch_snapshot()
return await list_mcp_tools_for_configs(snapshot.mcp_servers)
except Exception as e:
logger.error(f"Failed to list MCP tools from GSM: {e}")
return []
+130
View File
@@ -0,0 +1,130 @@
# Copyright 2026 zhaoxi826
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""请求/工作流上下文:基于 ``contextvars`` 的双层 ID 传播。
设计上把"一次用户请求""一次重型工作流"区分开:
- ``request_id``:会话域。所有进 API 的请求都要带,由 middleware 在入口生成或
从 ``X-Request-Id`` 头继承。chat 这条同步链路靠它走完一生。
- ``trace_id``:工作流域。只有 ``ConsciousnessNode`` 决定启动重型任务时才生成,
挂到 ``KiloStarWorkflow`` 上。trace_id 应能追溯回触发它的 request_id(前者
通过显式参数传入,后者从 contextvars 读取)。
为什么用 ``contextvars`` 而不是参数透传:
1. ``contextvars`` 在 ``asyncio`` 协程间天然继承,不会跨协程串味;
2. ``loguru`` 的 ``patcher`` 钩子可以把它变成日志切面,业务代码不需要在每条
``logger.info`` 上手动 ``.bind(trace_id=...)``
3. Ray 跨进程调用时 contextvars 不会自动传播 —— 这是有意为之,避免不同 actor
间的上下文意外串联。跨 actor 边界要走显式参数,由接收方再 ``bind_*`` 一次。
"""
from __future__ import annotations
import uuid
from contextlib import contextmanager
from contextvars import ContextVar, Token
from typing import Iterator, Optional
_request_id_var: ContextVar[str] = ContextVar("kilostar_request_id", default="")
_trace_id_var: ContextVar[str] = ContextVar("kilostar_trace_id", default="")
def get_request_id() -> str:
"""返回当前协程的 ``request_id``,未绑定时返回空串。"""
return _request_id_var.get()
def get_trace_id() -> str:
"""返回当前协程的 ``trace_id``,未绑定时返回空串。"""
return _trace_id_var.get()
def bind_request_id(request_id: str) -> Token:
"""直接绑定 ``request_id`` 到当前 context,返回 token 以便 ``reset`` 还原。
返回的 ``Token`` 只能在与 ``set`` 同一线程/协程中传给 ``reset``,否则会抛
``ValueError``。一般情况下推荐用 ``request_id_scope`` 上下文管理器代替。
"""
return _request_id_var.set(request_id)
def bind_trace_id(trace_id: str) -> Token:
"""直接绑定 ``trace_id`` 到当前 context,返回 token 以便 ``reset`` 还原。"""
return _trace_id_var.set(trace_id)
def reset_request_id(token: Token) -> None:
_request_id_var.reset(token)
def reset_trace_id(token: Token) -> None:
_trace_id_var.reset(token)
@contextmanager
def request_id_scope(request_id: str) -> Iterator[str]:
"""``with`` 范围内绑定 request_id,退出自动还原。"""
token = _request_id_var.set(request_id)
try:
yield request_id
finally:
_request_id_var.reset(token)
@contextmanager
def trace_id_scope(trace_id: str) -> Iterator[str]:
"""``with`` 范围内绑定 trace_id,退出自动还原。"""
token = _trace_id_var.set(trace_id)
try:
yield trace_id
finally:
_trace_id_var.reset(token)
def new_request_id(prefix: str = "req") -> str:
"""生成一个新的 request_id``<prefix>-<uuid4 hex>``。"""
return f"{prefix}-{uuid.uuid4().hex}"
def snapshot() -> dict[str, str]:
"""返回当前上下文 ID 的快照,便于跨 actor/task 边界显式透传。"""
return {
"request_id": _request_id_var.get(),
"trace_id": _trace_id_var.get(),
}
@contextmanager
def apply_snapshot(snap: Optional[dict[str, str]]) -> Iterator[None]:
"""把外部传来的 snapshot 在当前 context 内生效一次(用于跨 Ray actor 调用时)。"""
if not snap:
yield
return
tokens: list[Token] = []
if snap.get("request_id"):
tokens.append(_request_id_var.set(snap["request_id"]))
if snap.get("trace_id"):
tokens.append(_trace_id_var.set(snap["trace_id"]))
try:
yield
finally:
for tok in reversed(tokens):
try:
tok.var.reset(tok)
except (ValueError, LookupError):
# token 可能因协程切换失效,宽容处理
pass