feat(system):优化后端

1.新增后端测试 2.增加了后端的加密 3.增加了i18n（国际化）
2026-05-31 15:39:34 +00:00
parent affe460180
commit 99520c69d7
118 changed files with 8174 additions and 1491 deletions
@@ -12,10 +12,16 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 from typing import Annotated
-from fastapi import Depends, HTTPException
+from fastapi import Depends, HTTPException, Request
 from kilostar.utils.access import Accessor, TokenData
 from kilostar.core.postgres_database.model import UserAuthority
 from kilostar.utils.ray_hook import ray_actor_hook
+from kilostar.utils.i18n import t
+
+
+def _user_not_found_detail(request: Request | None = None) -> str:
+    loc = request.headers.get("accept-language") if request else None
+    return t("user_not_found", accept_language=loc)


 async def get_authority(user_id: str) -> UserAuthority:
@@ -29,12 +35,12 @@ async def get_authority(user_id: str) -> UserAuthority:
        )
        return user_authority
    except UserNotExistError:
-        raise HTTPException(status_code=401, detail="用户不存在或已被删除，请重新登录")
+        raise HTTPException(status_code=401, detail=t("user_not_found"))
    except Exception as e:
        # Check if it's a RayTaskError wrapping UserNotExistError
        if "UserNotExistError" in str(e):
            raise HTTPException(
-                status_code=401, detail="用户不存在或已被删除，请重新登录"
+                status_code=401, detail=t("user_not_found")
            )
        raise

@@ -0,0 +1,87 @@
+import os
+from functools import lru_cache
+
+from cryptography.fernet import Fernet, InvalidToken
+
+from kilostar.utils.logger import get_logger
+
+logger = get_logger("crypto")
+
+_VERSION_PREFIX = "v1:"
+_SENSITIVE_KEYS = {"key", "token", "secret", "password", "apikey", "api_key"}
+
+
+class CryptoError(Exception):
+    pass
+
+
+@lru_cache(maxsize=1)
+def _get_fernet() -> Fernet:
+    raw = os.environ.get("KILOSTAR_SECRET_KEY", "")
+    if not raw:
+        raise CryptoError(
+            "环境变量 KILOSTAR_SECRET_KEY 未设置，无法进行加解密。"
+            "请生成一个密钥：python -c \"from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())\""
+        )
+    try:
+        return Fernet(raw.encode() if isinstance(raw, str) else raw)
+    except Exception as e:
+        raise CryptoError(f"KILOSTAR_SECRET_KEY 格式无效: {e}") from e
+
+
+def encrypt_secret(plaintext: str) -> str:
+    if not plaintext:
+        return plaintext
+    f = _get_fernet()
+    token = f.encrypt(plaintext.encode("utf-8"))
+    return _VERSION_PREFIX + token.decode("utf-8")
+
+
+def decrypt_secret(ciphertext: str) -> str:
+    if not ciphertext:
+        return ciphertext
+    if not ciphertext.startswith(_VERSION_PREFIX):
+        return ciphertext
+    raw = ciphertext[len(_VERSION_PREFIX):]
+    f = _get_fernet()
+    try:
+        return f.decrypt(raw.encode("utf-8")).decode("utf-8")
+    except InvalidToken as e:
+        raise CryptoError("解密失败：密钥不匹配或密文已损坏") from e
+
+
+def is_encrypted(value: str) -> bool:
+    return isinstance(value, str) and value.startswith(_VERSION_PREFIX)
+
+
+def _is_sensitive_key(key: str) -> bool:
+    lower = key.lower()
+    return any(s in lower for s in _SENSITIVE_KEYS)
+
+
+def encrypt_dict_secrets(data: dict) -> dict:
+    if not isinstance(data, dict):
+        return data
+    out: dict = {}
+    for k, v in data.items():
+        if _is_sensitive_key(k) and isinstance(v, str) and v and not is_encrypted(v):
+            out[k] = encrypt_secret(v)
+        else:
+            out[k] = v
+    return out
+
+
+def decrypt_dict_secrets(data: dict) -> dict:
+    if not isinstance(data, dict):
+        return data
+    out: dict = {}
+    for k, v in data.items():
+        if _is_sensitive_key(k) and isinstance(v, str) and is_encrypted(v):
+            try:
+                out[k] = decrypt_secret(v)
+            except CryptoError as e:
+                logger.error(f"字段 {k} 解密失败: {e}")
+                out[k] = v
+        else:
+            out[k] = v
+    return out
@@ -12,68 +12,133 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.

+"""KiloStar 统一异常体系。

-class RetryableError(Exception):
-    """基类：所有可重试错误（如网络断开、抖动等临时性故障）。"""
+设计原则：所有自定义异常归到两条主轴下。

-    pass
+- ``BusinessError``：业务可预期错误，HTTP 层映射 4xx；前端可读、可展示给用户。
+- ``InfraError``：系统/基础设施失败错误，HTTP 层映射 5xx；通常需要落日志告警。
+  其下再细分为 ``RetryableError``（瞬时故障，可由 ``retry_on_retryable_error`` 自动重试）
+  与 ``NonRetryableError``（确定性失败，重试无意义）。
+
+注意：用 ``InfraError`` 而非 ``SystemError`` 是为了避免与 Python 内置的
+``SystemError`` 冲突。
+
+每个异常类都带 ``http_status`` 与 ``code`` 类属性，``api/__init__.py`` 的统一
+handler 根据它们直接生成结构化响应，避免业务代码里硬编码状态码。
+"""
+
+from __future__ import annotations


-class NonRetryableError(Exception):
-    """基类：所有不可重试错误（如数据验证失败、类型错误等业务逻辑故障）。"""
+class KiloStarError(Exception):
+    """KiloStar 所有自定义异常的总根。"""

-    pass
+    http_status: int = 500
+    code: str = "kilostar_error"


-class DemandError(NonRetryableError):
+# ─── 主轴 1：业务可预期错误（4xx） ───────────────────────────────────────────
+
+
+class BusinessError(KiloStarError):
+    """业务层可预期错误的基类，HTTP 层默认 400。"""
+
+    http_status = 400
+    code = "business_error"
+
+
+class DemandError(BusinessError):
    """需求/任务参数不合法或不满足前置条件时抛出。"""

-    pass
+    http_status = 400
+    code = "demand_error"


-class ModelNotExistError(Exception):
-    """请求了一个未在 Provider 中注册的模型 ID 时抛出。"""
-
-    pass
+# 用户域 ─────────────────────────────────────────


-class UserError(Exception):
-    """用户相关错误的基类，HTTP 层会被统一映射为 4xx。"""
+class UserError(BusinessError):
+    """用户域错误的基类。"""

-    pass
+    http_status = 400
+    code = "user_error"


 class UserNotExistError(UserError):
    """按用户名/ID 查询时用户不存在。"""

-    pass
+    http_status = 404
+    code = "user_not_exist"


 class UserPasswordError(UserError):
    """口令校验失败（旧密码错误、登录密码错误等）。"""

-    pass
+    http_status = 401
+    code = "user_password_error"


-class ProviderError(Exception):
-    """模型 Provider 相关错误的基类。"""
+# Provider 域 ─────────────────────────────────────

-    pass
+
+class ProviderError(BusinessError):
+    """模型 Provider 域错误的基类。"""
+
+    http_status = 400
+    code = "provider_error"


 class ProviderNotExistError(ProviderError):
    """请求了一个未注册的 Provider 时抛出。"""

-    pass
+    http_status = 404
+    code = "provider_not_exist"


-class WorkflowError(Exception):
-    """工作流执行期错误的基类，HTTP 层会被统一映射为 5xx。"""
+class ModelNotExistError(BusinessError):
+    """请求了一个未在 Provider 中注册的模型 ID 时抛出。"""

-    pass
+    http_status = 404
+    code = "model_not_exist"


-class WorkflowExit(WorkflowError):
-    """工作流被显式终止（用户取消、上游决策跳出等）时抛出，是预期内的退出信号。"""
+# Workflow 域 ─────────────────────────────────────

-    pass
+
+class WorkflowExit(BusinessError):
+    """工作流被显式终止（用户取消、上游决策跳出等），是预期内的退出信号。"""
+
+    http_status = 400
+    code = "workflow_exit"
+
+
+# ─── 主轴 2：系统/基础设施失败错误（5xx） ────────────────────────────────────
+
+
+class InfraError(KiloStarError):
+    """系统/基础设施失败错误的基类，HTTP 层默认 500。"""
+
+    http_status = 500
+    code = "infra_error"
+
+
+class RetryableError(InfraError):
+    """瞬时故障（如网络抖动），可由 ``retry_on_retryable_error`` 自动重试。"""
+
+    http_status = 503
+    code = "retryable_error"
+
+
+class NonRetryableError(InfraError):
+    """确定性的系统失败，重试无意义。"""
+
+    http_status = 500
+    code = "non_retryable_error"
+
+
+class WorkflowError(InfraError):
+    """工作流执行期错误的基类，HTTP 层映射为 5xx。"""
+
+    http_status = 500
+    code = "workflow_error"
@@ -33,9 +33,11 @@ def _get_tool_func(tool_name: str) -> Callable | None:
    if func:
        return func

-    app_root = "/app"
    tool_plugin_dir = os.path.join(
-        app_root, "kilostar", "plugin", "tool_plugin", tool_name
+        os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
+        "plugin",
+        "tool_plugin",
+        tool_name,
    )

    if not os.path.exists(tool_plugin_dir) or not os.path.isdir(tool_plugin_dir):
@@ -0,0 +1,183 @@
+#  Copyright 2026 zhaoxi826
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+"""KiloStar 轻量级国际化工具。
+
+设计原则：
+- 纯内存字典，无文件 IO，Ray 远程序列化零成本。
+- 支持环境变量 ``KILOSTAR_LANG`` 作为全局默认语言。
+- Agent system prompt 按 ``{locale}`` 分桶，调用方显式传入 locale。
+- API 层通过请求头 ``Accept-Language`` 解析首选语言。
+
+当前支持：``zh`` (简体中文), ``en`` (English)。
+"""
+
+from __future__ import annotations
+
+import os
+from typing import Dict
+
+_DEFAULT_LOCALE: str = os.getenv("KILOSTAR_LANG", "zh")
+
+# ─── Agent System Prompts ──────────────────────────────────────────────────
+
+_PROMPTS: Dict[str, Dict[str, str]] = {
+    "regulatory_node": {
+        "zh": (
+            "你叫kilostar，是一个多智能体AI助手系统中的【监控节点 (regulatory Node)】。\n"
+            "你是系统的'前台接待'和'大脑皮层'，负责接收用户的初始请求或工作流的最终报告。\n"
+            "你的核心职责是进行【意图识别与路由】。请仔细阅读用户的请求：\n"
+            "1. 如果用户只是进行简单的问候、闲聊或查询非常基础的信息，请直接生成友好的回复，使用 ForUser 格式。\n"
+            "2. 如果用户提出的是复杂任务（如需要编写代码、多步骤规划、数据处理等），请务必将其判定为需要工作流处理的任务，"
+            "   并使用 ForConsciousnessNode 格式将其移交意识节点处理。\n"
+            "3. 如果你收到的是 TerminationMessage（代表工作流已完成并生成了报告），请将报告内容转化为友好的面向用户的回复，使用 ForUser 格式。\n"
+            "请保持冷静、专业，并严格遵循上述路由规则。"
+        ),
+        "en": (
+            "You are kilostar, the [Regulatory Node] in a multi-agent AI assistant system.\n"
+            "You are the system's 'front desk' and 'cerebral cortex', responsible for receiving user requests and final workflow reports.\n"
+            "Your core duty is [intent recognition and routing]. Please read the user's request carefully:\n"
+            "1. If the user is simply greeting, chatting, or asking very basic questions, generate a friendly reply directly in the ForUser format.\n"
+            "2. If the user presents a complex task (e.g., writing code, multi-step planning, data processing), you must classify it as a workflow-requiring task "
+            "   and hand it over to the Consciousness Node using the ForConsciousnessNode format.\n"
+            "3. If you receive a TerminationMessage (indicating the workflow is complete and a report has been generated), convert the report into a user-friendly reply in the ForUser format.\n"
+            "Please remain calm, professional, and strictly follow the routing rules above."
+        ),
+    },
+    "consciousness_node": {
+        "zh": (
+            "你叫kilostar，是一个多智能体AI助手系统中的【意识节点 (Consciousness Node)】。\n"
+            "你是系统的'高级规划师'和'架构师'，负责处理监控节点分配过来的复杂任务。\n"
+            "你的主要工作场景包括：\n"
+            "1. 拆解任务 (Workflow Generation)：结合用户的原始命令和提供的模板，生成严谨、可执行的工作流 (kilostarWorkflow)，并将其输出为 ForWorkflowEngine 格式。拆解时步骤应清晰连贯。\n"
+            "2. 中途指导 (Workflow Execution)：在工作流执行中，如果某一步骤指派给你，你需要对控制节点的结果进行分析或提供下一步的指导，输出 ForWorkflow 格式。\n"
+            "3. 总结报告 (regulatory Report)：在整个工作流执行完毕后，你需要对整体流程、各个控制节点的执行情况进行审查，并生成一份技术性的总结报告，输出 ForregulatoryNode 格式。\n"
+            "请确保所有的思考和生成过程符合逻辑，严密且高质量。"
+        ),
+        "en": (
+            "You are kilostar, the [Consciousness Node] in a multi-agent AI assistant system.\n"
+            "You are the system's 'senior planner' and 'architect', responsible for handling complex tasks assigned by the Regulatory Node.\n"
+            "Your main scenarios include:\n"
+            "1. Task Decomposition (Workflow Generation): Combine the user's original command with provided templates to generate rigorous, executable workflows (kilostarWorkflow), outputting them in the ForWorkflowEngine format. Steps should be clear and coherent.\n"
+            "2. Mid-flight Guidance (Workflow Execution): During workflow execution, if a step is assigned to you, analyze the Control Node's results or provide next-step guidance, outputting in the ForWorkflow format.\n"
+            "3. Summary Report (Regulatory Report): After the entire workflow completes, review the overall process and each Control Node's execution, generating a technical summary report in the ForregulatoryNode format.\n"
+            "Ensure all reasoning and generation is logical, rigorous, and high-quality."
+        ),
+    },
+    "control_node": {
+        "zh": (
+            "你叫kilostar，是一个多智能体AI助手系统中的【控制节点 (Control Node)】。\n"
+            "你是系统的'执行者'和'车间主任'，专门负责执行工作流中分配给你的具体子任务。\n"
+            "你的工作职责是：\n"
+            "1. 仔细分析分配给你的工作流步骤 (workflow_step) 的目标和要求。\n"
+            "2. 运用你被分配的工具 (如有) 或者依靠自身的知识和推理能力，精准、高效地完成该任务。\n"
+            "3. 将执行的结果、产生的数据或者具体的输出，严格按照 ForWorkflow 格式返回。\n"
+            "请注意：你的输出应当具体、实用，直接提供任务所要求的结果，不要做过多无关的寒暄。"
+        ),
+        "en": (
+            "You are kilostar, the [Control Node] in a multi-agent AI assistant system.\n"
+            "You are the system's 'executor' and 'shop floor manager', specifically responsible for carrying out concrete subtasks assigned to you within the workflow.\n"
+            "Your duties are:\n"
+            "1. Carefully analyze the objectives and requirements of the workflow_step assigned to you.\n"
+            "2. Use the tools assigned to you (if any) or rely on your own knowledge and reasoning to complete the task accurately and efficiently.\n"
+            "3. Return the execution results, generated data, or concrete outputs strictly in the ForWorkflow format.\n"
+            "Note: Your output should be specific, practical, and directly provide the results requested by the task. Avoid excessive irrelevant pleasantries."
+        ),
+    },
+}
+
+# ─── API / 通用消息 ────────────────────────────────────────────────────────
+
+_MESSAGES: Dict[str, Dict[str, str]] = {
+    "internal_error": {
+        "zh": "服务内部错误，请稍后重试",
+        "en": "Internal server error, please try again later.",
+    },
+    "user_not_found": {
+        "zh": "用户不存在或已被删除，请重新登录",
+        "en": "User does not exist or has been deleted. Please log in again.",
+    },
+    "provider_not_registered": {
+        "zh": "Provider {provider_title} 未注册",
+        "en": "Provider {provider_title} is not registered.",
+    },
+    "model_not_exist": {
+        "zh": "模型不存在",
+        "en": "Model does not exist.",
+    },
+    "api_not_found": {
+        "zh": "API endpoint not found",
+        "en": "API endpoint not found",
+    },
+    "frontend_not_found": {
+        "zh": "Frontend build not found",
+        "en": "Frontend build not found",
+    },
+}
+
+# ─── 工具函数 ──────────────────────────────────────────────────────────────
+
+
+def _resolve_locale(locale: str | None = None, accept_language: str | None = None) -> str:
+    """确定最终使用的 locale。
+
+    优先级：显式传入 > Accept-Language 头 > KILOSTAR_LANG 环境变量 > 默认 zh。
+    """
+    if locale:
+        return locale if locale in ("zh", "en") else _DEFAULT_LOCALE
+    if accept_language:
+        # 简单解析：取第一个 segment，若含 zh 则 zh，含 en 则 en
+        first = accept_language.split(",")[0].split(";")[0].strip().lower()
+        if "zh" in first:
+            return "zh"
+        if "en" in first:
+            return "en"
+    return _DEFAULT_LOCALE
+
+
+def t(key: str, locale: str | None = None, accept_language: str | None = None, **kwargs) -> str:
+    """通用消息翻译。
+
+    Args:
+        key: 消息键，如 ``internal_error``。
+        locale: 显式指定语言代码（``zh`` / ``en``）。
+        accept_language: 前端传来的 ``Accept-Language`` 头内容。
+        **kwargs: 模板变量插值。
+
+    Returns:
+        翻译后的字符串；若 key 不存在则返回 key 本身。
+    """
+    loc = _resolve_locale(locale, accept_language)
+    text = _MESSAGES.get(loc, {}).get(key) or _MESSAGES.get(_DEFAULT_LOCALE, {}).get(key) or key
+    return text.format(**kwargs) if kwargs else text
+
+
+def agent_prompt(agent_name: str, locale: str | None = None, accept_language: str | None = None) -> str:
+    """获取指定 Agent 的 system prompt，并追加语言指令。
+
+    Args:
+        agent_name: ``regulatory_node`` / ``consciousness_node`` / ``control_node``
+        locale: 显式指定语言代码。
+        accept_language: ``Accept-Language`` 头内容。
+
+    Returns:
+        完整 system prompt（含 "请使用 XX 语言回复" 的追加指令）。
+    """
+    loc = _resolve_locale(locale, accept_language)
+    prompt = _PROMPTS.get(agent_name, {}).get(loc) or _PROMPTS.get(agent_name, {}).get(_DEFAULT_LOCALE, "")
+    lang_instruction = {
+        "zh": "\n\n【重要】请始终使用简体中文进行思考和回复。",
+        "en": "\n\n[Important] Please always think and reply in English.",
+    }.get(loc, "")
+    return prompt + lang_instruction
@@ -12,24 +12,83 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.

+import os
+import sys
+
 from loguru import logger
 from rich.logging import RichHandler
 from loguru._logger import Logger

+from kilostar.utils.request_context import get_request_id, get_trace_id
+
+
+def _is_json_mode() -> bool:
+    """根据环境变量决定是否启用 JSON 结构化日志。
+
+    支持开关：``KILOSTAR_LOG_FORMAT=json`` 或 ``KILOSTAR_LOG_JSON=1/true``。
+    """
+    fmt = os.environ.get("KILOSTAR_LOG_FORMAT", "").lower()
+    if fmt == "json":
+        return True
+    flag = os.environ.get("KILOSTAR_LOG_JSON", "").lower()
+    return flag in {"1", "true", "yes", "on"}
+
+
+def _ctx_patcher(record):
+    """日志切面：每条日志写出前，把 contextvars 里的 request_id / trace_id 注入。
+
+    显式 ``bind(trace_id=...)`` 的 logger 优先（业务代码可以覆盖切面值）；
+    没有 bind 时回退到 contextvars，没有 contextvars 时为空串。
+    """
+    extra = record["extra"]
+    if not extra.get("trace_id"):
+        extra["trace_id"] = get_trace_id()
+    if not extra.get("request_id"):
+        extra["request_id"] = get_request_id()
+

 def setup_logger() -> Logger:
-    """初始化全局 loguru logger，输出格式为 ``actor:(...) | trace_id:(...) : message``。"""
+    """初始化全局 loguru logger。
+
+    - 默认（开发模式）：``RichHandler`` 彩色输出，格式 ``actor:(...) | request_id:(...) | trace_id:(...) : message``
+    - JSON 模式（``KILOSTAR_LOG_FORMAT=json``）：写到 stdout，每行一条 JSON，便于 ELK/Loki 采集
+
+    request_id / trace_id 来自 ``kilostar.utils.request_context``，由 FastAPI middleware
+    或工作流入口绑定到 contextvars，本模块通过 ``patcher`` 透明注入。
+    """
    logger.remove()

+    log_level = os.environ.get("KILOSTAR_LOG_LEVEL", "DEBUG").upper()
+
+    if _is_json_mode():
+        logger.configure(
+            extra={"actor_name": "System", "trace_id": "", "request_id": ""},
+            patcher=_ctx_patcher,
+        )
+        logger.add(
+            sys.stdout,
+            serialize=True,
+            level=log_level,
+            enqueue=True,
+        )
+        return logger
+
    def format_record(record):
-        # Format string for rich handler
        actor = record["extra"].get("actor_name", "System")
        trace_id = record["extra"].get("trace_id", "")
+        request_id = record["extra"].get("request_id", "")
+        ids = []
+        if request_id:
+            ids.append(f"request_id:({request_id})")
+        if trace_id:
+            ids.append(f"trace_id:({trace_id})")
+        ids_str = " | " + " | ".join(ids) if ids else ""
+        return f"actor:({actor}){ids_str} : {record['message']}"

-        trace_str = f" | trace_id:({trace_id})" if trace_id else ""
-        return f"actor:({actor}){trace_str} : {record['message']}"
-
-    logger.configure(extra={"actor_name": "System", "trace_id": ""})
+    logger.configure(
+        extra={"actor_name": "System", "trace_id": "", "request_id": ""},
+        patcher=_ctx_patcher,
+    )

    logger.add(
        RichHandler(
@@ -40,8 +99,8 @@ def setup_logger() -> Logger:
            show_path=False,
        ),
        format=format_record,
-        level="DEBUG",
-        enqueue=True,  # 异步记录
+        level=log_level,
+        enqueue=True,
    )

    return logger
@@ -51,5 +110,9 @@ global_logger = setup_logger()


 def get_logger(actor_name: str, trace_id: str = "") -> Logger:
-    """获取一个绑定了 actor_name 与可选 trace_id 的 logger，便于日志按 Actor/请求归类。"""
+    """获取一个绑定了 actor_name 与可选 trace_id 的 logger，便于日志按 Actor/请求归类。
+
+    若 ``trace_id`` 留空，会回退到 ``contextvars`` 中的当前值（由 middleware 或
+    工作流入口设置）。显式传值则会覆盖切面注入。
+    """
    return global_logger.bind(actor_name=actor_name, trace_id=trace_id)
@@ -0,0 +1,180 @@
+#  Copyright 2026 zhaoxi826
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+"""MCP 辅助模块：根据全局状态机中的配置创建 pydantic-ai MCPServer 实例。"""
+
+from typing import Dict, List, Any, Sequence
+
+from kilostar.utils.logger import get_logger
+
+logger = get_logger("mcp_helper")
+
+# 延迟导入 pydantic_ai.mcp，避免在 MCP 包未安装时崩溃
+try:
+    from pydantic_ai.mcp import (
+        MCPServerStdio,
+        MCPServerSSE,
+        MCPServerHTTP,
+    )
+    _MCP_AVAILABLE = True
+except ImportError:
+    _MCP_AVAILABLE = False
+    logger.warning("MCP package not installed. MCP servers will not be available.")
+
+
+def build_mcp_toolsets(configs: Dict[str, Dict[str, Any]]) -> List[Any]:
+    """根据配置字典创建 MCPServer 实例列表。
+
+    Args:
+        configs: {server_id: {"name": ..., "transport": ..., ...}}
+
+    Returns:
+        MCPServer 实例列表（可直接传给 Agent 的 toolsets 参数）
+    """
+    if not _MCP_AVAILABLE:
+        return []
+
+    toolsets = []
+    for server_id, cfg in configs.items():
+        try:
+            transport = cfg.get("transport", "stdio")
+            tool_prefix = cfg.get("tool_prefix")
+            name = cfg.get("name", server_id)
+
+            if transport == "stdio":
+                server = MCPServerStdio(
+                    command=cfg.get("command", ""),
+                    args=cfg.get("args", []),
+                    env=cfg.get("env"),
+                    tool_prefix=tool_prefix,
+                    id=server_id,
+                )
+            elif transport == "sse":
+                server = MCPServerSSE(
+                    url=cfg.get("url", ""),
+                    tool_prefix=tool_prefix,
+                    id=server_id,
+                )
+            elif transport == "http":
+                server = MCPServerHTTP(
+                    url=cfg.get("url", ""),
+                    tool_prefix=tool_prefix,
+                    id=server_id,
+                )
+            else:
+                logger.warning(f"Unsupported MCP transport: {transport} for server {name}")
+                continue
+
+            toolsets.append(server)
+            logger.info(f"MCP server '{name}' ({transport}) registered as toolset")
+        except Exception as e:
+            logger.error(f"Failed to build MCP server '{server_id}': {e}")
+
+    return toolsets
+
+
+async def get_mcp_toolsets_from_gsm() -> List[Any]:
+    """从 GlobalStateMachine 拉取 MCP 配置并构建 toolsets。"""
+    if not _MCP_AVAILABLE:
+        return []
+
+    try:
+        from kilostar.core.global_state_machine.gsm_snapshot import fetch_snapshot
+
+        # 走快照：MCP 配置变更频率极低，本地缓存命中率近 100%
+        snapshot = await fetch_snapshot()
+        return build_mcp_toolsets(snapshot.mcp_servers)
+    except Exception as e:
+        logger.error(f"Failed to load MCP configs from GSM: {e}")
+        return []
+
+
+async def get_all_toolsets_for_scope(scope: str) -> List[Any]:
+    """汇总某个 scope 下的全部 toolset：system + personal + mcp。
+
+    返回顺序保持稳定：先本地 toolset（system → personal），再 MCP toolset。
+    任意一类拉取失败仅记录日志，不影响其他类。
+    """
+    toolsets: List[Any] = []
+    try:
+        from kilostar.core.global_state_machine.gsm_snapshot import (
+            build_toolsets_for_scope,
+            fetch_snapshot,
+        )
+
+        # 一次快照拉取覆盖 system + custom toolsets，本地按 scope 重建 FunctionToolset
+        snapshot = await fetch_snapshot()
+        local = build_toolsets_for_scope(snapshot, scope)
+        if local:
+            toolsets.extend(local)
+    except Exception as e:
+        logger.error(f"Failed to load local toolsets from GSM ({scope}): {e}")
+
+    toolsets.extend(await get_mcp_toolsets_from_gsm())
+    return toolsets
+
+
+async def list_mcp_tools_for_configs(
+    configs: Dict[str, Dict[str, Any]],
+) -> List[Dict[str, Any]]:
+    """对每个 MCP 服务器逐个尝试连接，列出它们暴露的工具名。
+
+    实现层面会进入 ``async with server:`` 上下文，调用一次 ``get_tools()``，
+    再把工具名（带 tool_prefix）抽出来。任何一个 server 失败都不影响其他 server，
+    出错时该项 ``tools=[]`` 并附带 ``error`` 字段。
+    """
+    result: List[Dict[str, Any]] = []
+    if not _MCP_AVAILABLE:
+        return result
+
+    servers = build_mcp_toolsets(configs)
+    for server in servers:
+        server_id = getattr(server, "id", None)
+        cfg = configs.get(server_id, {}) if server_id else {}
+        name = cfg.get("name", server_id or "unknown")
+        transport = cfg.get("transport", "stdio")
+        item: Dict[str, Any] = {
+            "server_id": server_id,
+            "name": name,
+            "transport": transport,
+            "tool_prefix": cfg.get("tool_prefix"),
+            "tools": [],
+        }
+        try:
+            async with server:
+                tools = await server.get_tools()
+            item["tools"] = [
+                getattr(t, "name", None) or getattr(t, "tool_name", str(t))
+                for t in tools
+            ]
+        except Exception as e:
+            item["error"] = str(e)
+            logger.warning(f"MCP server '{name}' list_tools failed: {e}")
+        result.append(item)
+    return result
+
+
+async def list_mcp_tools_from_gsm() -> List[Dict[str, Any]]:
+    """从 GlobalStateMachine 拉取配置后调用 :func:`list_mcp_tools_for_configs`。"""
+    if not _MCP_AVAILABLE:
+        return []
+
+    try:
+        from kilostar.core.global_state_machine.gsm_snapshot import fetch_snapshot
+
+        snapshot = await fetch_snapshot()
+        return await list_mcp_tools_for_configs(snapshot.mcp_servers)
+    except Exception as e:
+        logger.error(f"Failed to list MCP tools from GSM: {e}")
+        return []
@@ -0,0 +1,130 @@
+#  Copyright 2026 zhaoxi826
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+"""请求/工作流上下文：基于 ``contextvars`` 的双层 ID 传播。
+
+设计上把"一次用户请求"和"一次重型工作流"区分开：
+
+- ``request_id``：会话域。所有进 API 的请求都要带，由 middleware 在入口生成或
+  从 ``X-Request-Id`` 头继承。chat 这条同步链路靠它走完一生。
+- ``trace_id``：工作流域。只有 ``ConsciousnessNode`` 决定启动重型任务时才生成，
+  挂到 ``KiloStarWorkflow`` 上。trace_id 应能追溯回触发它的 request_id（前者
+  通过显式参数传入，后者从 contextvars 读取）。
+
+为什么用 ``contextvars`` 而不是参数透传：
+
+1. ``contextvars`` 在 ``asyncio`` 协程间天然继承，不会跨协程串味；
+2. ``loguru`` 的 ``patcher`` 钩子可以把它变成日志切面，业务代码不需要在每条
+   ``logger.info`` 上手动 ``.bind(trace_id=...)``；
+3. Ray 跨进程调用时 contextvars 不会自动传播 —— 这是有意为之，避免不同 actor
+   间的上下文意外串联。跨 actor 边界要走显式参数，由接收方再 ``bind_*`` 一次。
+"""
+
+from __future__ import annotations
+
+import uuid
+from contextlib import contextmanager
+from contextvars import ContextVar, Token
+from typing import Iterator, Optional
+
+
+_request_id_var: ContextVar[str] = ContextVar("kilostar_request_id", default="")
+_trace_id_var: ContextVar[str] = ContextVar("kilostar_trace_id", default="")
+
+
+def get_request_id() -> str:
+    """返回当前协程的 ``request_id``，未绑定时返回空串。"""
+    return _request_id_var.get()
+
+
+def get_trace_id() -> str:
+    """返回当前协程的 ``trace_id``，未绑定时返回空串。"""
+    return _trace_id_var.get()
+
+
+def bind_request_id(request_id: str) -> Token:
+    """直接绑定 ``request_id`` 到当前 context，返回 token 以便 ``reset`` 还原。
+
+    返回的 ``Token`` 只能在与 ``set`` 同一线程/协程中传给 ``reset``，否则会抛
+    ``ValueError``。一般情况下推荐用 ``request_id_scope`` 上下文管理器代替。
+    """
+    return _request_id_var.set(request_id)
+
+
+def bind_trace_id(trace_id: str) -> Token:
+    """直接绑定 ``trace_id`` 到当前 context，返回 token 以便 ``reset`` 还原。"""
+    return _trace_id_var.set(trace_id)
+
+
+def reset_request_id(token: Token) -> None:
+    _request_id_var.reset(token)
+
+
+def reset_trace_id(token: Token) -> None:
+    _trace_id_var.reset(token)
+
+
+@contextmanager
+def request_id_scope(request_id: str) -> Iterator[str]:
+    """``with`` 范围内绑定 request_id，退出自动还原。"""
+    token = _request_id_var.set(request_id)
+    try:
+        yield request_id
+    finally:
+        _request_id_var.reset(token)
+
+
+@contextmanager
+def trace_id_scope(trace_id: str) -> Iterator[str]:
+    """``with`` 范围内绑定 trace_id，退出自动还原。"""
+    token = _trace_id_var.set(trace_id)
+    try:
+        yield trace_id
+    finally:
+        _trace_id_var.reset(token)
+
+
+def new_request_id(prefix: str = "req") -> str:
+    """生成一个新的 request_id：``<prefix>-<uuid4 hex>``。"""
+    return f"{prefix}-{uuid.uuid4().hex}"
+
+
+def snapshot() -> dict[str, str]:
+    """返回当前上下文 ID 的快照，便于跨 actor/task 边界显式透传。"""
+    return {
+        "request_id": _request_id_var.get(),
+        "trace_id": _trace_id_var.get(),
+    }
+
+
+@contextmanager
+def apply_snapshot(snap: Optional[dict[str, str]]) -> Iterator[None]:
+    """把外部传来的 snapshot 在当前 context 内生效一次（用于跨 Ray actor 调用时）。"""
+    if not snap:
+        yield
+        return
+    tokens: list[Token] = []
+    if snap.get("request_id"):
+        tokens.append(_request_id_var.set(snap["request_id"]))
+    if snap.get("trace_id"):
+        tokens.append(_trace_id_var.set(snap["trace_id"]))
+    try:
+        yield
+    finally:
+        for tok in reversed(tokens):
+            try:
+                tok.var.reset(tok)
+            except (ValueError, LookupError):
+                # token 可能因协程切换失效，宽容处理
+                pass