feat(system):优化后端
1.新增后端测试 2.增加了后端的加密 3.增加了i18n(国际化)
This commit is contained in:
@@ -12,10 +12,16 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from typing import Annotated
|
||||
from fastapi import Depends, HTTPException
|
||||
from fastapi import Depends, HTTPException, Request
|
||||
from kilostar.utils.access import Accessor, TokenData
|
||||
from kilostar.core.postgres_database.model import UserAuthority
|
||||
from kilostar.utils.ray_hook import ray_actor_hook
|
||||
from kilostar.utils.i18n import t
|
||||
|
||||
|
||||
def _user_not_found_detail(request: Request | None = None) -> str:
|
||||
loc = request.headers.get("accept-language") if request else None
|
||||
return t("user_not_found", accept_language=loc)
|
||||
|
||||
|
||||
async def get_authority(user_id: str) -> UserAuthority:
|
||||
@@ -29,12 +35,12 @@ async def get_authority(user_id: str) -> UserAuthority:
|
||||
)
|
||||
return user_authority
|
||||
except UserNotExistError:
|
||||
raise HTTPException(status_code=401, detail="用户不存在或已被删除,请重新登录")
|
||||
raise HTTPException(status_code=401, detail=t("user_not_found"))
|
||||
except Exception as e:
|
||||
# Check if it's a RayTaskError wrapping UserNotExistError
|
||||
if "UserNotExistError" in str(e):
|
||||
raise HTTPException(
|
||||
status_code=401, detail="用户不存在或已被删除,请重新登录"
|
||||
status_code=401, detail=t("user_not_found")
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
@@ -0,0 +1,87 @@
|
||||
import os
|
||||
from functools import lru_cache
|
||||
|
||||
from cryptography.fernet import Fernet, InvalidToken
|
||||
|
||||
from kilostar.utils.logger import get_logger
|
||||
|
||||
logger = get_logger("crypto")
|
||||
|
||||
_VERSION_PREFIX = "v1:"
|
||||
_SENSITIVE_KEYS = {"key", "token", "secret", "password", "apikey", "api_key"}
|
||||
|
||||
|
||||
class CryptoError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _get_fernet() -> Fernet:
|
||||
raw = os.environ.get("KILOSTAR_SECRET_KEY", "")
|
||||
if not raw:
|
||||
raise CryptoError(
|
||||
"环境变量 KILOSTAR_SECRET_KEY 未设置,无法进行加解密。"
|
||||
"请生成一个密钥:python -c \"from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())\""
|
||||
)
|
||||
try:
|
||||
return Fernet(raw.encode() if isinstance(raw, str) else raw)
|
||||
except Exception as e:
|
||||
raise CryptoError(f"KILOSTAR_SECRET_KEY 格式无效: {e}") from e
|
||||
|
||||
|
||||
def encrypt_secret(plaintext: str) -> str:
|
||||
if not plaintext:
|
||||
return plaintext
|
||||
f = _get_fernet()
|
||||
token = f.encrypt(plaintext.encode("utf-8"))
|
||||
return _VERSION_PREFIX + token.decode("utf-8")
|
||||
|
||||
|
||||
def decrypt_secret(ciphertext: str) -> str:
|
||||
if not ciphertext:
|
||||
return ciphertext
|
||||
if not ciphertext.startswith(_VERSION_PREFIX):
|
||||
return ciphertext
|
||||
raw = ciphertext[len(_VERSION_PREFIX):]
|
||||
f = _get_fernet()
|
||||
try:
|
||||
return f.decrypt(raw.encode("utf-8")).decode("utf-8")
|
||||
except InvalidToken as e:
|
||||
raise CryptoError("解密失败:密钥不匹配或密文已损坏") from e
|
||||
|
||||
|
||||
def is_encrypted(value: str) -> bool:
|
||||
return isinstance(value, str) and value.startswith(_VERSION_PREFIX)
|
||||
|
||||
|
||||
def _is_sensitive_key(key: str) -> bool:
|
||||
lower = key.lower()
|
||||
return any(s in lower for s in _SENSITIVE_KEYS)
|
||||
|
||||
|
||||
def encrypt_dict_secrets(data: dict) -> dict:
|
||||
if not isinstance(data, dict):
|
||||
return data
|
||||
out: dict = {}
|
||||
for k, v in data.items():
|
||||
if _is_sensitive_key(k) and isinstance(v, str) and v and not is_encrypted(v):
|
||||
out[k] = encrypt_secret(v)
|
||||
else:
|
||||
out[k] = v
|
||||
return out
|
||||
|
||||
|
||||
def decrypt_dict_secrets(data: dict) -> dict:
|
||||
if not isinstance(data, dict):
|
||||
return data
|
||||
out: dict = {}
|
||||
for k, v in data.items():
|
||||
if _is_sensitive_key(k) and isinstance(v, str) and is_encrypted(v):
|
||||
try:
|
||||
out[k] = decrypt_secret(v)
|
||||
except CryptoError as e:
|
||||
logger.error(f"字段 {k} 解密失败: {e}")
|
||||
out[k] = v
|
||||
else:
|
||||
out[k] = v
|
||||
return out
|
||||
+92
-27
@@ -12,68 +12,133 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""KiloStar 统一异常体系。
|
||||
|
||||
class RetryableError(Exception):
|
||||
"""基类:所有可重试错误(如网络断开、抖动等临时性故障)。"""
|
||||
设计原则:所有自定义异常归到两条主轴下。
|
||||
|
||||
pass
|
||||
- ``BusinessError``:业务可预期错误,HTTP 层映射 4xx;前端可读、可展示给用户。
|
||||
- ``InfraError``:系统/基础设施失败错误,HTTP 层映射 5xx;通常需要落日志告警。
|
||||
其下再细分为 ``RetryableError``(瞬时故障,可由 ``retry_on_retryable_error`` 自动重试)
|
||||
与 ``NonRetryableError``(确定性失败,重试无意义)。
|
||||
|
||||
注意:用 ``InfraError`` 而非 ``SystemError`` 是为了避免与 Python 内置的
|
||||
``SystemError`` 冲突。
|
||||
|
||||
每个异常类都带 ``http_status`` 与 ``code`` 类属性,``api/__init__.py`` 的统一
|
||||
handler 根据它们直接生成结构化响应,避免业务代码里硬编码状态码。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
class NonRetryableError(Exception):
|
||||
"""基类:所有不可重试错误(如数据验证失败、类型错误等业务逻辑故障)。"""
|
||||
class KiloStarError(Exception):
|
||||
"""KiloStar 所有自定义异常的总根。"""
|
||||
|
||||
pass
|
||||
http_status: int = 500
|
||||
code: str = "kilostar_error"
|
||||
|
||||
|
||||
class DemandError(NonRetryableError):
|
||||
# ─── 主轴 1:业务可预期错误(4xx) ───────────────────────────────────────────
|
||||
|
||||
|
||||
class BusinessError(KiloStarError):
|
||||
"""业务层可预期错误的基类,HTTP 层默认 400。"""
|
||||
|
||||
http_status = 400
|
||||
code = "business_error"
|
||||
|
||||
|
||||
class DemandError(BusinessError):
|
||||
"""需求/任务参数不合法或不满足前置条件时抛出。"""
|
||||
|
||||
pass
|
||||
http_status = 400
|
||||
code = "demand_error"
|
||||
|
||||
|
||||
class ModelNotExistError(Exception):
|
||||
"""请求了一个未在 Provider 中注册的模型 ID 时抛出。"""
|
||||
|
||||
pass
|
||||
# 用户域 ─────────────────────────────────────────
|
||||
|
||||
|
||||
class UserError(Exception):
|
||||
"""用户相关错误的基类,HTTP 层会被统一映射为 4xx。"""
|
||||
class UserError(BusinessError):
|
||||
"""用户域错误的基类。"""
|
||||
|
||||
pass
|
||||
http_status = 400
|
||||
code = "user_error"
|
||||
|
||||
|
||||
class UserNotExistError(UserError):
|
||||
"""按用户名/ID 查询时用户不存在。"""
|
||||
|
||||
pass
|
||||
http_status = 404
|
||||
code = "user_not_exist"
|
||||
|
||||
|
||||
class UserPasswordError(UserError):
|
||||
"""口令校验失败(旧密码错误、登录密码错误等)。"""
|
||||
|
||||
pass
|
||||
http_status = 401
|
||||
code = "user_password_error"
|
||||
|
||||
|
||||
class ProviderError(Exception):
|
||||
"""模型 Provider 相关错误的基类。"""
|
||||
# Provider 域 ─────────────────────────────────────
|
||||
|
||||
pass
|
||||
|
||||
class ProviderError(BusinessError):
|
||||
"""模型 Provider 域错误的基类。"""
|
||||
|
||||
http_status = 400
|
||||
code = "provider_error"
|
||||
|
||||
|
||||
class ProviderNotExistError(ProviderError):
|
||||
"""请求了一个未注册的 Provider 时抛出。"""
|
||||
|
||||
pass
|
||||
http_status = 404
|
||||
code = "provider_not_exist"
|
||||
|
||||
|
||||
class WorkflowError(Exception):
|
||||
"""工作流执行期错误的基类,HTTP 层会被统一映射为 5xx。"""
|
||||
class ModelNotExistError(BusinessError):
|
||||
"""请求了一个未在 Provider 中注册的模型 ID 时抛出。"""
|
||||
|
||||
pass
|
||||
http_status = 404
|
||||
code = "model_not_exist"
|
||||
|
||||
|
||||
class WorkflowExit(WorkflowError):
|
||||
"""工作流被显式终止(用户取消、上游决策跳出等)时抛出,是预期内的退出信号。"""
|
||||
# Workflow 域 ─────────────────────────────────────
|
||||
|
||||
pass
|
||||
|
||||
class WorkflowExit(BusinessError):
|
||||
"""工作流被显式终止(用户取消、上游决策跳出等),是预期内的退出信号。"""
|
||||
|
||||
http_status = 400
|
||||
code = "workflow_exit"
|
||||
|
||||
|
||||
# ─── 主轴 2:系统/基础设施失败错误(5xx) ────────────────────────────────────
|
||||
|
||||
|
||||
class InfraError(KiloStarError):
|
||||
"""系统/基础设施失败错误的基类,HTTP 层默认 500。"""
|
||||
|
||||
http_status = 500
|
||||
code = "infra_error"
|
||||
|
||||
|
||||
class RetryableError(InfraError):
|
||||
"""瞬时故障(如网络抖动),可由 ``retry_on_retryable_error`` 自动重试。"""
|
||||
|
||||
http_status = 503
|
||||
code = "retryable_error"
|
||||
|
||||
|
||||
class NonRetryableError(InfraError):
|
||||
"""确定性的系统失败,重试无意义。"""
|
||||
|
||||
http_status = 500
|
||||
code = "non_retryable_error"
|
||||
|
||||
|
||||
class WorkflowError(InfraError):
|
||||
"""工作流执行期错误的基类,HTTP 层映射为 5xx。"""
|
||||
|
||||
http_status = 500
|
||||
code = "workflow_error"
|
||||
|
||||
@@ -33,9 +33,11 @@ def _get_tool_func(tool_name: str) -> Callable | None:
|
||||
if func:
|
||||
return func
|
||||
|
||||
app_root = "/app"
|
||||
tool_plugin_dir = os.path.join(
|
||||
app_root, "kilostar", "plugin", "tool_plugin", tool_name
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||
"plugin",
|
||||
"tool_plugin",
|
||||
tool_name,
|
||||
)
|
||||
|
||||
if not os.path.exists(tool_plugin_dir) or not os.path.isdir(tool_plugin_dir):
|
||||
|
||||
@@ -0,0 +1,183 @@
|
||||
# Copyright 2026 zhaoxi826
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""KiloStar 轻量级国际化工具。
|
||||
|
||||
设计原则:
|
||||
- 纯内存字典,无文件 IO,Ray 远程序列化零成本。
|
||||
- 支持环境变量 ``KILOSTAR_LANG`` 作为全局默认语言。
|
||||
- Agent system prompt 按 ``{locale}`` 分桶,调用方显式传入 locale。
|
||||
- API 层通过请求头 ``Accept-Language`` 解析首选语言。
|
||||
|
||||
当前支持:``zh`` (简体中文), ``en`` (English)。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Dict
|
||||
|
||||
_DEFAULT_LOCALE: str = os.getenv("KILOSTAR_LANG", "zh")
|
||||
|
||||
# ─── Agent System Prompts ──────────────────────────────────────────────────
|
||||
|
||||
_PROMPTS: Dict[str, Dict[str, str]] = {
|
||||
"regulatory_node": {
|
||||
"zh": (
|
||||
"你叫kilostar,是一个多智能体AI助手系统中的【监控节点 (regulatory Node)】。\n"
|
||||
"你是系统的'前台接待'和'大脑皮层',负责接收用户的初始请求或工作流的最终报告。\n"
|
||||
"你的核心职责是进行【意图识别与路由】。请仔细阅读用户的请求:\n"
|
||||
"1. 如果用户只是进行简单的问候、闲聊或查询非常基础的信息,请直接生成友好的回复,使用 ForUser 格式。\n"
|
||||
"2. 如果用户提出的是复杂任务(如需要编写代码、多步骤规划、数据处理等),请务必将其判定为需要工作流处理的任务,"
|
||||
" 并使用 ForConsciousnessNode 格式将其移交意识节点处理。\n"
|
||||
"3. 如果你收到的是 TerminationMessage(代表工作流已完成并生成了报告),请将报告内容转化为友好的面向用户的回复,使用 ForUser 格式。\n"
|
||||
"请保持冷静、专业,并严格遵循上述路由规则。"
|
||||
),
|
||||
"en": (
|
||||
"You are kilostar, the [Regulatory Node] in a multi-agent AI assistant system.\n"
|
||||
"You are the system's 'front desk' and 'cerebral cortex', responsible for receiving user requests and final workflow reports.\n"
|
||||
"Your core duty is [intent recognition and routing]. Please read the user's request carefully:\n"
|
||||
"1. If the user is simply greeting, chatting, or asking very basic questions, generate a friendly reply directly in the ForUser format.\n"
|
||||
"2. If the user presents a complex task (e.g., writing code, multi-step planning, data processing), you must classify it as a workflow-requiring task "
|
||||
" and hand it over to the Consciousness Node using the ForConsciousnessNode format.\n"
|
||||
"3. If you receive a TerminationMessage (indicating the workflow is complete and a report has been generated), convert the report into a user-friendly reply in the ForUser format.\n"
|
||||
"Please remain calm, professional, and strictly follow the routing rules above."
|
||||
),
|
||||
},
|
||||
"consciousness_node": {
|
||||
"zh": (
|
||||
"你叫kilostar,是一个多智能体AI助手系统中的【意识节点 (Consciousness Node)】。\n"
|
||||
"你是系统的'高级规划师'和'架构师',负责处理监控节点分配过来的复杂任务。\n"
|
||||
"你的主要工作场景包括:\n"
|
||||
"1. 拆解任务 (Workflow Generation):结合用户的原始命令和提供的模板,生成严谨、可执行的工作流 (kilostarWorkflow),并将其输出为 ForWorkflowEngine 格式。拆解时步骤应清晰连贯。\n"
|
||||
"2. 中途指导 (Workflow Execution):在工作流执行中,如果某一步骤指派给你,你需要对控制节点的结果进行分析或提供下一步的指导,输出 ForWorkflow 格式。\n"
|
||||
"3. 总结报告 (regulatory Report):在整个工作流执行完毕后,你需要对整体流程、各个控制节点的执行情况进行审查,并生成一份技术性的总结报告,输出 ForregulatoryNode 格式。\n"
|
||||
"请确保所有的思考和生成过程符合逻辑,严密且高质量。"
|
||||
),
|
||||
"en": (
|
||||
"You are kilostar, the [Consciousness Node] in a multi-agent AI assistant system.\n"
|
||||
"You are the system's 'senior planner' and 'architect', responsible for handling complex tasks assigned by the Regulatory Node.\n"
|
||||
"Your main scenarios include:\n"
|
||||
"1. Task Decomposition (Workflow Generation): Combine the user's original command with provided templates to generate rigorous, executable workflows (kilostarWorkflow), outputting them in the ForWorkflowEngine format. Steps should be clear and coherent.\n"
|
||||
"2. Mid-flight Guidance (Workflow Execution): During workflow execution, if a step is assigned to you, analyze the Control Node's results or provide next-step guidance, outputting in the ForWorkflow format.\n"
|
||||
"3. Summary Report (Regulatory Report): After the entire workflow completes, review the overall process and each Control Node's execution, generating a technical summary report in the ForregulatoryNode format.\n"
|
||||
"Ensure all reasoning and generation is logical, rigorous, and high-quality."
|
||||
),
|
||||
},
|
||||
"control_node": {
|
||||
"zh": (
|
||||
"你叫kilostar,是一个多智能体AI助手系统中的【控制节点 (Control Node)】。\n"
|
||||
"你是系统的'执行者'和'车间主任',专门负责执行工作流中分配给你的具体子任务。\n"
|
||||
"你的工作职责是:\n"
|
||||
"1. 仔细分析分配给你的工作流步骤 (workflow_step) 的目标和要求。\n"
|
||||
"2. 运用你被分配的工具 (如有) 或者依靠自身的知识和推理能力,精准、高效地完成该任务。\n"
|
||||
"3. 将执行的结果、产生的数据或者具体的输出,严格按照 ForWorkflow 格式返回。\n"
|
||||
"请注意:你的输出应当具体、实用,直接提供任务所要求的结果,不要做过多无关的寒暄。"
|
||||
),
|
||||
"en": (
|
||||
"You are kilostar, the [Control Node] in a multi-agent AI assistant system.\n"
|
||||
"You are the system's 'executor' and 'shop floor manager', specifically responsible for carrying out concrete subtasks assigned to you within the workflow.\n"
|
||||
"Your duties are:\n"
|
||||
"1. Carefully analyze the objectives and requirements of the workflow_step assigned to you.\n"
|
||||
"2. Use the tools assigned to you (if any) or rely on your own knowledge and reasoning to complete the task accurately and efficiently.\n"
|
||||
"3. Return the execution results, generated data, or concrete outputs strictly in the ForWorkflow format.\n"
|
||||
"Note: Your output should be specific, practical, and directly provide the results requested by the task. Avoid excessive irrelevant pleasantries."
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
# ─── API / 通用消息 ────────────────────────────────────────────────────────
|
||||
|
||||
_MESSAGES: Dict[str, Dict[str, str]] = {
|
||||
"internal_error": {
|
||||
"zh": "服务内部错误,请稍后重试",
|
||||
"en": "Internal server error, please try again later.",
|
||||
},
|
||||
"user_not_found": {
|
||||
"zh": "用户不存在或已被删除,请重新登录",
|
||||
"en": "User does not exist or has been deleted. Please log in again.",
|
||||
},
|
||||
"provider_not_registered": {
|
||||
"zh": "Provider {provider_title} 未注册",
|
||||
"en": "Provider {provider_title} is not registered.",
|
||||
},
|
||||
"model_not_exist": {
|
||||
"zh": "模型不存在",
|
||||
"en": "Model does not exist.",
|
||||
},
|
||||
"api_not_found": {
|
||||
"zh": "API endpoint not found",
|
||||
"en": "API endpoint not found",
|
||||
},
|
||||
"frontend_not_found": {
|
||||
"zh": "Frontend build not found",
|
||||
"en": "Frontend build not found",
|
||||
},
|
||||
}
|
||||
|
||||
# ─── 工具函数 ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _resolve_locale(locale: str | None = None, accept_language: str | None = None) -> str:
|
||||
"""确定最终使用的 locale。
|
||||
|
||||
优先级:显式传入 > Accept-Language 头 > KILOSTAR_LANG 环境变量 > 默认 zh。
|
||||
"""
|
||||
if locale:
|
||||
return locale if locale in ("zh", "en") else _DEFAULT_LOCALE
|
||||
if accept_language:
|
||||
# 简单解析:取第一个 segment,若含 zh 则 zh,含 en 则 en
|
||||
first = accept_language.split(",")[0].split(";")[0].strip().lower()
|
||||
if "zh" in first:
|
||||
return "zh"
|
||||
if "en" in first:
|
||||
return "en"
|
||||
return _DEFAULT_LOCALE
|
||||
|
||||
|
||||
def t(key: str, locale: str | None = None, accept_language: str | None = None, **kwargs) -> str:
|
||||
"""通用消息翻译。
|
||||
|
||||
Args:
|
||||
key: 消息键,如 ``internal_error``。
|
||||
locale: 显式指定语言代码(``zh`` / ``en``)。
|
||||
accept_language: 前端传来的 ``Accept-Language`` 头内容。
|
||||
**kwargs: 模板变量插值。
|
||||
|
||||
Returns:
|
||||
翻译后的字符串;若 key 不存在则返回 key 本身。
|
||||
"""
|
||||
loc = _resolve_locale(locale, accept_language)
|
||||
text = _MESSAGES.get(loc, {}).get(key) or _MESSAGES.get(_DEFAULT_LOCALE, {}).get(key) or key
|
||||
return text.format(**kwargs) if kwargs else text
|
||||
|
||||
|
||||
def agent_prompt(agent_name: str, locale: str | None = None, accept_language: str | None = None) -> str:
|
||||
"""获取指定 Agent 的 system prompt,并追加语言指令。
|
||||
|
||||
Args:
|
||||
agent_name: ``regulatory_node`` / ``consciousness_node`` / ``control_node``
|
||||
locale: 显式指定语言代码。
|
||||
accept_language: ``Accept-Language`` 头内容。
|
||||
|
||||
Returns:
|
||||
完整 system prompt(含 "请使用 XX 语言回复" 的追加指令)。
|
||||
"""
|
||||
loc = _resolve_locale(locale, accept_language)
|
||||
prompt = _PROMPTS.get(agent_name, {}).get(loc) or _PROMPTS.get(agent_name, {}).get(_DEFAULT_LOCALE, "")
|
||||
lang_instruction = {
|
||||
"zh": "\n\n【重要】请始终使用简体中文进行思考和回复。",
|
||||
"en": "\n\n[Important] Please always think and reply in English.",
|
||||
}.get(loc, "")
|
||||
return prompt + lang_instruction
|
||||
@@ -12,24 +12,83 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from loguru import logger
|
||||
from rich.logging import RichHandler
|
||||
from loguru._logger import Logger
|
||||
|
||||
from kilostar.utils.request_context import get_request_id, get_trace_id
|
||||
|
||||
|
||||
def _is_json_mode() -> bool:
|
||||
"""根据环境变量决定是否启用 JSON 结构化日志。
|
||||
|
||||
支持开关:``KILOSTAR_LOG_FORMAT=json`` 或 ``KILOSTAR_LOG_JSON=1/true``。
|
||||
"""
|
||||
fmt = os.environ.get("KILOSTAR_LOG_FORMAT", "").lower()
|
||||
if fmt == "json":
|
||||
return True
|
||||
flag = os.environ.get("KILOSTAR_LOG_JSON", "").lower()
|
||||
return flag in {"1", "true", "yes", "on"}
|
||||
|
||||
|
||||
def _ctx_patcher(record):
|
||||
"""日志切面:每条日志写出前,把 contextvars 里的 request_id / trace_id 注入。
|
||||
|
||||
显式 ``bind(trace_id=...)`` 的 logger 优先(业务代码可以覆盖切面值);
|
||||
没有 bind 时回退到 contextvars,没有 contextvars 时为空串。
|
||||
"""
|
||||
extra = record["extra"]
|
||||
if not extra.get("trace_id"):
|
||||
extra["trace_id"] = get_trace_id()
|
||||
if not extra.get("request_id"):
|
||||
extra["request_id"] = get_request_id()
|
||||
|
||||
|
||||
def setup_logger() -> Logger:
|
||||
"""初始化全局 loguru logger,输出格式为 ``actor:(...) | trace_id:(...) : message``。"""
|
||||
"""初始化全局 loguru logger。
|
||||
|
||||
- 默认(开发模式):``RichHandler`` 彩色输出,格式 ``actor:(...) | request_id:(...) | trace_id:(...) : message``
|
||||
- JSON 模式(``KILOSTAR_LOG_FORMAT=json``):写到 stdout,每行一条 JSON,便于 ELK/Loki 采集
|
||||
|
||||
request_id / trace_id 来自 ``kilostar.utils.request_context``,由 FastAPI middleware
|
||||
或工作流入口绑定到 contextvars,本模块通过 ``patcher`` 透明注入。
|
||||
"""
|
||||
logger.remove()
|
||||
|
||||
log_level = os.environ.get("KILOSTAR_LOG_LEVEL", "DEBUG").upper()
|
||||
|
||||
if _is_json_mode():
|
||||
logger.configure(
|
||||
extra={"actor_name": "System", "trace_id": "", "request_id": ""},
|
||||
patcher=_ctx_patcher,
|
||||
)
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
serialize=True,
|
||||
level=log_level,
|
||||
enqueue=True,
|
||||
)
|
||||
return logger
|
||||
|
||||
def format_record(record):
|
||||
# Format string for rich handler
|
||||
actor = record["extra"].get("actor_name", "System")
|
||||
trace_id = record["extra"].get("trace_id", "")
|
||||
request_id = record["extra"].get("request_id", "")
|
||||
ids = []
|
||||
if request_id:
|
||||
ids.append(f"request_id:({request_id})")
|
||||
if trace_id:
|
||||
ids.append(f"trace_id:({trace_id})")
|
||||
ids_str = " | " + " | ".join(ids) if ids else ""
|
||||
return f"actor:({actor}){ids_str} : {record['message']}"
|
||||
|
||||
trace_str = f" | trace_id:({trace_id})" if trace_id else ""
|
||||
return f"actor:({actor}){trace_str} : {record['message']}"
|
||||
|
||||
logger.configure(extra={"actor_name": "System", "trace_id": ""})
|
||||
logger.configure(
|
||||
extra={"actor_name": "System", "trace_id": "", "request_id": ""},
|
||||
patcher=_ctx_patcher,
|
||||
)
|
||||
|
||||
logger.add(
|
||||
RichHandler(
|
||||
@@ -40,8 +99,8 @@ def setup_logger() -> Logger:
|
||||
show_path=False,
|
||||
),
|
||||
format=format_record,
|
||||
level="DEBUG",
|
||||
enqueue=True, # 异步记录
|
||||
level=log_level,
|
||||
enqueue=True,
|
||||
)
|
||||
|
||||
return logger
|
||||
@@ -51,5 +110,9 @@ global_logger = setup_logger()
|
||||
|
||||
|
||||
def get_logger(actor_name: str, trace_id: str = "") -> Logger:
|
||||
"""获取一个绑定了 actor_name 与可选 trace_id 的 logger,便于日志按 Actor/请求归类。"""
|
||||
"""获取一个绑定了 actor_name 与可选 trace_id 的 logger,便于日志按 Actor/请求归类。
|
||||
|
||||
若 ``trace_id`` 留空,会回退到 ``contextvars`` 中的当前值(由 middleware 或
|
||||
工作流入口设置)。显式传值则会覆盖切面注入。
|
||||
"""
|
||||
return global_logger.bind(actor_name=actor_name, trace_id=trace_id)
|
||||
|
||||
@@ -0,0 +1,180 @@
|
||||
# Copyright 2026 zhaoxi826
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""MCP 辅助模块:根据全局状态机中的配置创建 pydantic-ai MCPServer 实例。"""
|
||||
|
||||
from typing import Dict, List, Any, Sequence
|
||||
|
||||
from kilostar.utils.logger import get_logger
|
||||
|
||||
logger = get_logger("mcp_helper")
|
||||
|
||||
# 延迟导入 pydantic_ai.mcp,避免在 MCP 包未安装时崩溃
|
||||
try:
|
||||
from pydantic_ai.mcp import (
|
||||
MCPServerStdio,
|
||||
MCPServerSSE,
|
||||
MCPServerHTTP,
|
||||
)
|
||||
_MCP_AVAILABLE = True
|
||||
except ImportError:
|
||||
_MCP_AVAILABLE = False
|
||||
logger.warning("MCP package not installed. MCP servers will not be available.")
|
||||
|
||||
|
||||
def build_mcp_toolsets(configs: Dict[str, Dict[str, Any]]) -> List[Any]:
|
||||
"""根据配置字典创建 MCPServer 实例列表。
|
||||
|
||||
Args:
|
||||
configs: {server_id: {"name": ..., "transport": ..., ...}}
|
||||
|
||||
Returns:
|
||||
MCPServer 实例列表(可直接传给 Agent 的 toolsets 参数)
|
||||
"""
|
||||
if not _MCP_AVAILABLE:
|
||||
return []
|
||||
|
||||
toolsets = []
|
||||
for server_id, cfg in configs.items():
|
||||
try:
|
||||
transport = cfg.get("transport", "stdio")
|
||||
tool_prefix = cfg.get("tool_prefix")
|
||||
name = cfg.get("name", server_id)
|
||||
|
||||
if transport == "stdio":
|
||||
server = MCPServerStdio(
|
||||
command=cfg.get("command", ""),
|
||||
args=cfg.get("args", []),
|
||||
env=cfg.get("env"),
|
||||
tool_prefix=tool_prefix,
|
||||
id=server_id,
|
||||
)
|
||||
elif transport == "sse":
|
||||
server = MCPServerSSE(
|
||||
url=cfg.get("url", ""),
|
||||
tool_prefix=tool_prefix,
|
||||
id=server_id,
|
||||
)
|
||||
elif transport == "http":
|
||||
server = MCPServerHTTP(
|
||||
url=cfg.get("url", ""),
|
||||
tool_prefix=tool_prefix,
|
||||
id=server_id,
|
||||
)
|
||||
else:
|
||||
logger.warning(f"Unsupported MCP transport: {transport} for server {name}")
|
||||
continue
|
||||
|
||||
toolsets.append(server)
|
||||
logger.info(f"MCP server '{name}' ({transport}) registered as toolset")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to build MCP server '{server_id}': {e}")
|
||||
|
||||
return toolsets
|
||||
|
||||
|
||||
async def get_mcp_toolsets_from_gsm() -> List[Any]:
|
||||
"""从 GlobalStateMachine 拉取 MCP 配置并构建 toolsets。"""
|
||||
if not _MCP_AVAILABLE:
|
||||
return []
|
||||
|
||||
try:
|
||||
from kilostar.core.global_state_machine.gsm_snapshot import fetch_snapshot
|
||||
|
||||
# 走快照:MCP 配置变更频率极低,本地缓存命中率近 100%
|
||||
snapshot = await fetch_snapshot()
|
||||
return build_mcp_toolsets(snapshot.mcp_servers)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load MCP configs from GSM: {e}")
|
||||
return []
|
||||
|
||||
|
||||
async def get_all_toolsets_for_scope(scope: str) -> List[Any]:
|
||||
"""汇总某个 scope 下的全部 toolset:system + personal + mcp。
|
||||
|
||||
返回顺序保持稳定:先本地 toolset(system → personal),再 MCP toolset。
|
||||
任意一类拉取失败仅记录日志,不影响其他类。
|
||||
"""
|
||||
toolsets: List[Any] = []
|
||||
try:
|
||||
from kilostar.core.global_state_machine.gsm_snapshot import (
|
||||
build_toolsets_for_scope,
|
||||
fetch_snapshot,
|
||||
)
|
||||
|
||||
# 一次快照拉取覆盖 system + custom toolsets,本地按 scope 重建 FunctionToolset
|
||||
snapshot = await fetch_snapshot()
|
||||
local = build_toolsets_for_scope(snapshot, scope)
|
||||
if local:
|
||||
toolsets.extend(local)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load local toolsets from GSM ({scope}): {e}")
|
||||
|
||||
toolsets.extend(await get_mcp_toolsets_from_gsm())
|
||||
return toolsets
|
||||
|
||||
|
||||
async def list_mcp_tools_for_configs(
|
||||
configs: Dict[str, Dict[str, Any]],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""对每个 MCP 服务器逐个尝试连接,列出它们暴露的工具名。
|
||||
|
||||
实现层面会进入 ``async with server:`` 上下文,调用一次 ``get_tools()``,
|
||||
再把工具名(带 tool_prefix)抽出来。任何一个 server 失败都不影响其他 server,
|
||||
出错时该项 ``tools=[]`` 并附带 ``error`` 字段。
|
||||
"""
|
||||
result: List[Dict[str, Any]] = []
|
||||
if not _MCP_AVAILABLE:
|
||||
return result
|
||||
|
||||
servers = build_mcp_toolsets(configs)
|
||||
for server in servers:
|
||||
server_id = getattr(server, "id", None)
|
||||
cfg = configs.get(server_id, {}) if server_id else {}
|
||||
name = cfg.get("name", server_id or "unknown")
|
||||
transport = cfg.get("transport", "stdio")
|
||||
item: Dict[str, Any] = {
|
||||
"server_id": server_id,
|
||||
"name": name,
|
||||
"transport": transport,
|
||||
"tool_prefix": cfg.get("tool_prefix"),
|
||||
"tools": [],
|
||||
}
|
||||
try:
|
||||
async with server:
|
||||
tools = await server.get_tools()
|
||||
item["tools"] = [
|
||||
getattr(t, "name", None) or getattr(t, "tool_name", str(t))
|
||||
for t in tools
|
||||
]
|
||||
except Exception as e:
|
||||
item["error"] = str(e)
|
||||
logger.warning(f"MCP server '{name}' list_tools failed: {e}")
|
||||
result.append(item)
|
||||
return result
|
||||
|
||||
|
||||
async def list_mcp_tools_from_gsm() -> List[Dict[str, Any]]:
|
||||
"""从 GlobalStateMachine 拉取配置后调用 :func:`list_mcp_tools_for_configs`。"""
|
||||
if not _MCP_AVAILABLE:
|
||||
return []
|
||||
|
||||
try:
|
||||
from kilostar.core.global_state_machine.gsm_snapshot import fetch_snapshot
|
||||
|
||||
snapshot = await fetch_snapshot()
|
||||
return await list_mcp_tools_for_configs(snapshot.mcp_servers)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list MCP tools from GSM: {e}")
|
||||
return []
|
||||
@@ -0,0 +1,130 @@
|
||||
# Copyright 2026 zhaoxi826
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""请求/工作流上下文:基于 ``contextvars`` 的双层 ID 传播。
|
||||
|
||||
设计上把"一次用户请求"和"一次重型工作流"区分开:
|
||||
|
||||
- ``request_id``:会话域。所有进 API 的请求都要带,由 middleware 在入口生成或
|
||||
从 ``X-Request-Id`` 头继承。chat 这条同步链路靠它走完一生。
|
||||
- ``trace_id``:工作流域。只有 ``ConsciousnessNode`` 决定启动重型任务时才生成,
|
||||
挂到 ``KiloStarWorkflow`` 上。trace_id 应能追溯回触发它的 request_id(前者
|
||||
通过显式参数传入,后者从 contextvars 读取)。
|
||||
|
||||
为什么用 ``contextvars`` 而不是参数透传:
|
||||
|
||||
1. ``contextvars`` 在 ``asyncio`` 协程间天然继承,不会跨协程串味;
|
||||
2. ``loguru`` 的 ``patcher`` 钩子可以把它变成日志切面,业务代码不需要在每条
|
||||
``logger.info`` 上手动 ``.bind(trace_id=...)``;
|
||||
3. Ray 跨进程调用时 contextvars 不会自动传播 —— 这是有意为之,避免不同 actor
|
||||
间的上下文意外串联。跨 actor 边界要走显式参数,由接收方再 ``bind_*`` 一次。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from contextlib import contextmanager
|
||||
from contextvars import ContextVar, Token
|
||||
from typing import Iterator, Optional
|
||||
|
||||
|
||||
_request_id_var: ContextVar[str] = ContextVar("kilostar_request_id", default="")
|
||||
_trace_id_var: ContextVar[str] = ContextVar("kilostar_trace_id", default="")
|
||||
|
||||
|
||||
def get_request_id() -> str:
|
||||
"""返回当前协程的 ``request_id``,未绑定时返回空串。"""
|
||||
return _request_id_var.get()
|
||||
|
||||
|
||||
def get_trace_id() -> str:
|
||||
"""返回当前协程的 ``trace_id``,未绑定时返回空串。"""
|
||||
return _trace_id_var.get()
|
||||
|
||||
|
||||
def bind_request_id(request_id: str) -> Token:
|
||||
"""直接绑定 ``request_id`` 到当前 context,返回 token 以便 ``reset`` 还原。
|
||||
|
||||
返回的 ``Token`` 只能在与 ``set`` 同一线程/协程中传给 ``reset``,否则会抛
|
||||
``ValueError``。一般情况下推荐用 ``request_id_scope`` 上下文管理器代替。
|
||||
"""
|
||||
return _request_id_var.set(request_id)
|
||||
|
||||
|
||||
def bind_trace_id(trace_id: str) -> Token:
|
||||
"""直接绑定 ``trace_id`` 到当前 context,返回 token 以便 ``reset`` 还原。"""
|
||||
return _trace_id_var.set(trace_id)
|
||||
|
||||
|
||||
def reset_request_id(token: Token) -> None:
|
||||
_request_id_var.reset(token)
|
||||
|
||||
|
||||
def reset_trace_id(token: Token) -> None:
|
||||
_trace_id_var.reset(token)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def request_id_scope(request_id: str) -> Iterator[str]:
|
||||
"""``with`` 范围内绑定 request_id,退出自动还原。"""
|
||||
token = _request_id_var.set(request_id)
|
||||
try:
|
||||
yield request_id
|
||||
finally:
|
||||
_request_id_var.reset(token)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def trace_id_scope(trace_id: str) -> Iterator[str]:
|
||||
"""``with`` 范围内绑定 trace_id,退出自动还原。"""
|
||||
token = _trace_id_var.set(trace_id)
|
||||
try:
|
||||
yield trace_id
|
||||
finally:
|
||||
_trace_id_var.reset(token)
|
||||
|
||||
|
||||
def new_request_id(prefix: str = "req") -> str:
|
||||
"""生成一个新的 request_id:``<prefix>-<uuid4 hex>``。"""
|
||||
return f"{prefix}-{uuid.uuid4().hex}"
|
||||
|
||||
|
||||
def snapshot() -> dict[str, str]:
|
||||
"""返回当前上下文 ID 的快照,便于跨 actor/task 边界显式透传。"""
|
||||
return {
|
||||
"request_id": _request_id_var.get(),
|
||||
"trace_id": _trace_id_var.get(),
|
||||
}
|
||||
|
||||
|
||||
@contextmanager
|
||||
def apply_snapshot(snap: Optional[dict[str, str]]) -> Iterator[None]:
|
||||
"""把外部传来的 snapshot 在当前 context 内生效一次(用于跨 Ray actor 调用时)。"""
|
||||
if not snap:
|
||||
yield
|
||||
return
|
||||
tokens: list[Token] = []
|
||||
if snap.get("request_id"):
|
||||
tokens.append(_request_id_var.set(snap["request_id"]))
|
||||
if snap.get("trace_id"):
|
||||
tokens.append(_trace_id_var.set(snap["trace_id"]))
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
for tok in reversed(tokens):
|
||||
try:
|
||||
tok.var.reset(tok)
|
||||
except (ValueError, LookupError):
|
||||
# token 可能因协程切换失效,宽容处理
|
||||
pass
|
||||
Reference in New Issue
Block a user