feat: 人设模板系统、节点调度标签、pydantic-settings收敛、错误处理增强

新增persona_template表和CRUD API,BaseIndividualModel增加node_affinity和template_origin_id字段,
WorkerCluster支持多集群Ray资源调度,环境变量收敛到pydantic-settings统一校验,
数据库异常转换为结构化BusinessError/RetryableError,系统节点支持custom_system_prompt。

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-06-04 06:07:46 +00:00
parent f3a92a793e
commit 8f1398c591
23 changed files with 582 additions and 48 deletions
+4 -3
View File
@@ -21,6 +21,7 @@ from fastapi.responses import FileResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from kilostar.utils.standalone_proxy import _STANDALONE
from kilostar.utils.settings import get_settings
if not _STANDALONE:
from ray import serve
@@ -51,13 +52,13 @@ _api_logger = get_logger("api")
def _get_locale(request: Request) -> str | None:
"""从请求头解析首选语言,供异常 handler 使用。"""
return request.headers.get("accept-language") or None
app = FastAPI()
_cors_origins_env = os.environ.get("KILOSTAR_CORS_ORIGINS", "")
_is_dev = os.environ.get("KILOSTAR_ENV", "production").lower() in ("dev", "development")
_settings = get_settings()
_cors_origins_env = _settings.kilostar_cors_origins
_is_dev = _settings.security.kilostar_env.lower() in ("dev", "development")
if not _cors_origins_env and _is_dev:
_cors_origins_env = "*"
elif not _cors_origins_env:
+4 -2
View File
@@ -266,8 +266,10 @@ async def send_message(
if not user_id and not group_id:
raise ValueError("必须指定 user_id 或 group_id 之一")
base = base_url or os.environ.get("ONEBOT_HTTP_URL", "http://127.0.0.1:5700")
token = access_token or os.environ.get("ONEBOT_ACCESS_TOKEN")
from kilostar.utils.settings import get_settings
_ob = get_settings().onebot
base = base_url or _ob.onebot_http_url
token = access_token or _ob.onebot_access_token or None
if group_id:
action = "send_group_msg"
+7
View File
@@ -106,3 +106,10 @@ async def query_system_logs(
offset=offset,
)
return {"logs": logs, "count": len(logs)}
@system_router.get("/api/v1/system/node-labels")
async def get_node_labels(
_: TokenData = Depends(Accessor.get_current_user),
):
return {"node_labels": ["cpu", "core", "gpu"]}
@@ -48,8 +48,9 @@ class ConsciousnessNode:
tools_list: list[str] = None,
toolsets=None,
locale: str | None = None,
custom_system_prompt: str | None = None,
) -> None:
system_prompt: str = agent_prompt("consciousness_node", locale=locale)
system_prompt: str = agent_prompt("consciousness_node", locale=locale, custom_system_prompt=custom_system_prompt)
output_type = Union[ForregulatoryNode, ForWorkflow, ForWorkflowEngine]
from kilostar.utils.get_tool import load_tools_from_list
from kilostar.core.global_state_machine.gsm_snapshot import fetch_snapshot
@@ -47,6 +47,7 @@ class ControlNode:
tools_list: list[str] = None,
toolsets=None,
locale: str | None = None,
custom_system_prompt: str | None = None,
) -> None:
"""
create_agent方法,将agent对象装配到Control的属性内
@@ -58,11 +59,12 @@ class ControlNode:
provider_title: 供应商名
model_id: 模型id
locale: 语言代码(zh/en),控制system prompt语言
custom_system_prompt: 管理员自定义追加提示词(可选)
Returns:
无返回
"""
system_prompt: str = agent_prompt("control_node", locale=locale)
system_prompt: str = agent_prompt("control_node", locale=locale, custom_system_prompt=custom_system_prompt)
output_type = ForWorkflow
from kilostar.utils.get_tool import load_tools_from_list
from kilostar.core.global_state_machine.gsm_snapshot import fetch_snapshot
@@ -49,6 +49,7 @@ class RegulatoryNode:
tools_list: list[str] = None,
toolsets=None,
locale: str | None = None,
custom_system_prompt: str | None = None,
) -> None:
"""
create_agent方法,将agent对象装配到regulatoryNode的属性内
@@ -60,10 +61,11 @@ class RegulatoryNode:
model_id: 模型id
tools_list: 工具列表
locale: 语言代码(zh/en),控制system prompt语言
custom_system_prompt: 管理员自定义追加提示词(可选)
Returns:
无返回
"""
system_prompt: str = agent_prompt("regulatory_node", locale=locale)
system_prompt: str = agent_prompt("regulatory_node", locale=locale, custom_system_prompt=custom_system_prompt)
output_type = Union[MessageResponse]
from kilostar.utils.get_tool import load_tools_from_list
from kilostar.core.global_state_machine.gsm_snapshot import fetch_snapshot
@@ -14,7 +14,7 @@
from sqlalchemy.exc import IntegrityError, OperationalError
from pydantic import ValidationError
from kilostar.utils.error import UserNotExistError
from kilostar.utils.error import UserNotExistError, BusinessError, RetryableError
from kilostar.utils.logger import get_logger
@@ -31,14 +31,16 @@ def database_exception(func):
logger.error(f"对象校验失败:{e}")
raise e
except IntegrityError as e:
logger.error(f"数据库完整性错误 (如重复记录): {e}")
raise e
logger.warning(f"数据库完整性冲突: {e.orig}")
err = BusinessError(str(e.orig))
err.http_status = 409
err.code = "conflict"
raise err from e
except OperationalError as e:
logger.error(f"数据库连接异常: {e}")
raise e
except UserNotExistError as e:
logger.error(f"更改密码失败,用户不存在:{e}")
raise e
raise RetryableError(f"数据库暂时不可用,请稍后重试: {e}") from e
except (UserNotExistError, BusinessError):
raise
except Exception as e:
logger.exception(f"未预期的数据库错误: {e}")
raise e
@@ -34,6 +34,7 @@ from kilostar.core.postgres_database.model.mcp_server import MCPServerModel
from kilostar.core.postgres_database.model.tool_config import ToolConfigModel
from kilostar.core.postgres_database.model.custom_toolset import CustomToolsetModel
from kilostar.core.postgres_database.model.system_event_log import SystemEventLog
from kilostar.core.postgres_database.model.persona_template import PersonaTemplate
# 兼容旧代码的别名
Provider = ProviderModel
@@ -63,5 +64,6 @@ __all__ = [
"ToolConfigModel",
"CustomToolsetModel",
"SystemEventLog",
"PersonaTemplate",
"AgentType",
]
@@ -43,6 +43,12 @@ class BaseIndividualModel(BaseDataModel):
owner_id: Mapped[str] = mapped_column(String(64), index=True)
agent_type: Mapped[str] = mapped_column(String(32))
node_affinity: Mapped[str] = mapped_column(String(32), nullable=False, default="cpu")
template_origin_id: Mapped[Optional[str]] = mapped_column(
ForeignKey("persona_template.template_id", ondelete="SET NULL"),
nullable=True,
index=True,
)
__mapper_args__ = {"polymorphic_on": "agent_type", "polymorphic_identity": "base"}
@@ -0,0 +1,26 @@
from typing import List, Optional
from sqlalchemy import String, Text, Boolean, text
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Mapped, mapped_column
from .base import BaseDataModel
class PersonaTemplate(BaseDataModel):
__tablename__ = "persona_template"
template_id: Mapped[str] = mapped_column(String(64), primary_key=True)
name: Mapped[str] = mapped_column(String(100), nullable=False, index=True)
description: Mapped[str] = mapped_column(Text, nullable=False, default="")
system_prompt: Mapped[str] = mapped_column(Text, nullable=False, default="")
agent_type: Mapped[str] = mapped_column(String(32), nullable=False, default="ordinary")
provider_title: Mapped[Optional[str]] = mapped_column(String(50))
model_id: Mapped[Optional[str]] = mapped_column(String(100))
tools: Mapped[Optional[List[str]]] = mapped_column(
JSONB, default=list, server_default=text("'[]'::jsonb")
)
tags: Mapped[Optional[List[str]]] = mapped_column(
JSONB, default=list, server_default=text("'[]'::jsonb")
)
is_builtin: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
owner_id: Mapped[Optional[str]] = mapped_column(String(64), index=True)
@@ -0,0 +1,73 @@
from sqlalchemy import select
from ulid import ULID
from kilostar.core.postgres_database.model.persona_template import PersonaTemplate
from kilostar.core.postgres_database.database_exception import database_exception
class PersonaTemplateDatabase:
def __init__(self, async_session_maker):
self.async_session_maker = async_session_maker
@database_exception
async def add_template(self, **kwargs) -> PersonaTemplate:
async with self.async_session_maker() as session:
tpl = PersonaTemplate(template_id=str(ULID()), **kwargs)
session.add(tpl)
await session.commit()
await session.refresh(tpl)
return tpl
@database_exception
async def get_template(self, template_id: str):
async with self.async_session_maker() as session:
result = await session.execute(
select(PersonaTemplate).where(PersonaTemplate.template_id == template_id)
)
return result.scalar_one_or_none()
@database_exception
async def list_templates(self, owner_id: str = None, include_builtin: bool = True):
async with self.async_session_maker() as session:
stmt = select(PersonaTemplate)
if owner_id and include_builtin:
from sqlalchemy import or_
stmt = stmt.where(
or_(PersonaTemplate.owner_id == owner_id, PersonaTemplate.is_builtin == True)
)
elif owner_id:
stmt = stmt.where(PersonaTemplate.owner_id == owner_id)
elif include_builtin:
stmt = stmt.where(PersonaTemplate.is_builtin == True)
result = await session.execute(stmt)
return list(result.scalars().all())
@database_exception
async def update_template(self, template_id: str, **kwargs):
async with self.async_session_maker() as session:
result = await session.execute(
select(PersonaTemplate).where(PersonaTemplate.template_id == template_id)
)
tpl = result.scalar_one_or_none()
if not tpl:
return None
for k, v in kwargs.items():
if v is not None:
setattr(tpl, k, v)
session.add(tpl)
await session.commit()
await session.refresh(tpl)
return tpl
@database_exception
async def delete_template(self, template_id: str) -> bool:
async with self.async_session_maker() as session:
result = await session.execute(
select(PersonaTemplate).where(PersonaTemplate.template_id == template_id)
)
tpl = result.scalar_one_or_none()
if not tpl:
return False
await session.delete(tpl)
await session.commit()
return True
+2 -6
View File
@@ -42,12 +42,8 @@ class TokenData(BaseModel):
def _get_secret_key() -> str:
"""读取并校验 SECRET_KEY 环境变量。
校验在首次实际使用 JWT 时进行,避免在模块导入阶段抛错,
从而把"环境约束""模块加载"解耦。
"""
key = os.getenv("SECRET_KEY")
from kilostar.utils.settings import get_settings
key = get_settings().security.secret_key
if not key or key in _INSECURE_SECRETS:
raise RuntimeError(
"未提供有效的 SECRET_KEY 或使用了不安全的默认值,请设置一个高熵的随机字符串"
+15 -11
View File
@@ -25,10 +25,11 @@
from __future__ import annotations
import os
from typing import Dict
_DEFAULT_LOCALE: str = os.getenv("KILOSTAR_LANG", "zh")
from kilostar.utils.settings import get_settings
_DEFAULT_LOCALE: str = get_settings().kilostar_lang
# ─── Agent System Prompts ──────────────────────────────────────────────────
@@ -163,16 +164,16 @@ def t(key: str, locale: str | None = None, accept_language: str | None = None, *
return text.format(**kwargs) if kwargs else text
def agent_prompt(agent_name: str, locale: str | None = None, accept_language: str | None = None) -> str:
def agent_prompt(
agent_name: str,
locale: str | None = None,
accept_language: str | None = None,
custom_system_prompt: str | None = None,
) -> str:
"""获取指定 Agent 的 system prompt,并追加语言指令。
Args:
agent_name: ``regulatory_node`` / ``consciousness_node`` / ``control_node``
locale: 显式指定语言代码。
accept_language: ``Accept-Language`` 头内容。
Returns:
完整 system prompt(含 "请使用 XX 语言回复" 的追加指令)。
若 ``custom_system_prompt`` 不为空,追加在默认 prompt 和语言指令之后,
使管理员自定义内容能够覆盖/补充默认行为,同时保留角色定义。
"""
loc = _resolve_locale(locale, accept_language)
prompt = _PROMPTS.get(agent_name, {}).get(loc) or _PROMPTS.get(agent_name, {}).get(_DEFAULT_LOCALE, "")
@@ -180,4 +181,7 @@ def agent_prompt(agent_name: str, locale: str | None = None, accept_language: st
"zh": "\n\n【重要】请始终使用简体中文进行思考和回复。",
"en": "\n\n[Important] Please always think and reply in English.",
}.get(loc, "")
return prompt + lang_instruction
result = prompt + lang_instruction
if custom_system_prompt and custom_system_prompt.strip():
result += f"\n\n{custom_system_prompt.strip()}"
return result
+6 -10
View File
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
from loguru import logger
@@ -23,15 +22,11 @@ from kilostar.utils.request_context import get_request_id, get_trace_id
def _is_json_mode() -> bool:
"""根据环境变量决定是否启用 JSON 结构化日志。
支持开关:``KILOSTAR_LOG_FORMAT=json`` 或 ``KILOSTAR_LOG_JSON=1/true``。
"""
fmt = os.environ.get("KILOSTAR_LOG_FORMAT", "").lower()
if fmt == "json":
from kilostar.utils.settings import get_settings
s = get_settings().log
if s.kilostar_log_format.lower() == "json":
return True
flag = os.environ.get("KILOSTAR_LOG_JSON", "").lower()
return flag in {"1", "true", "yes", "on"}
return s.kilostar_log_json.lower() in {"1", "true", "yes", "on"}
def _ctx_patcher(record):
@@ -58,7 +53,8 @@ def setup_logger() -> Logger:
"""
logger.remove()
log_level = os.environ.get("KILOSTAR_LOG_LEVEL", "DEBUG").upper()
from kilostar.utils.settings import get_settings
log_level = get_settings().log.kilostar_log_level.upper()
if _is_json_mode():
logger.configure(
+15
View File
@@ -125,3 +125,18 @@ def ray_actor_hook(*actor_names: str, timeout: float = 0.0, interval: float = 0.
handle = _get_cached_actor_handle(actor_name)
setattr(actor_list, actor_name, handle)
return actor_list
def get_worker_cluster(affinity: str = "cpu"):
"""按 node_affinity 标签取对应的 WorkerCluster actor 句柄。
单机模式统一返回唯一的 worker_cluster 实例。
分布式模式按 affinity 路由到 worker_cluster_cpu / _core / _gpu。
未知标签降级到 cpu。
"""
if _STANDALONE:
return _standalone_registry.get("worker_cluster")
_valid = {"cpu", "core", "gpu"}
node_type = affinity if affinity in _valid else "cpu"
return _get_cached_actor_handle(f"worker_cluster_{node_type}")
+55
View File
@@ -0,0 +1,55 @@
"""KiloStar 集中式环境变量管理。
所有散落在各模块的 os.getenv/os.environ 收敛到此处,
通过 pydantic-settings 统一校验、类型转换、默认值管理。
"""
from __future__ import annotations
from functools import lru_cache
from pydantic import Field
from pydantic_settings import BaseSettings
class DatabaseSettings(BaseSettings):
postgres_user: str = "postgres"
postgres_password: str = ""
postgres_host: str = "db"
postgres_port: int = 5432
postgres_db: str = "postgres"
class SecuritySettings(BaseSettings):
secret_key: str = ""
kilostar_secret_key: str = ""
kilostar_env: str = "production"
class LogSettings(BaseSettings):
kilostar_log_level: str = "DEBUG"
kilostar_log_format: str = ""
kilostar_log_json: str = ""
class OnebotSettings(BaseSettings):
onebot_access_token: str = ""
onebot_http_url: str = "http://127.0.0.1:5700"
class AppSettings(BaseSettings):
kilostar_mode: str = "distributed"
kilostar_lang: str = "zh"
kilostar_cors_origins: str = ""
db: DatabaseSettings = Field(default_factory=DatabaseSettings)
security: SecuritySettings = Field(default_factory=SecuritySettings)
log: LogSettings = Field(default_factory=LogSettings)
onebot: OnebotSettings = Field(default_factory=OnebotSettings)
model_config = {"env_nested_delimiter": "__"}
@lru_cache(maxsize=1)
def get_settings() -> AppSettings:
return AppSettings()
+8 -1
View File
@@ -36,10 +36,15 @@ class WorkerCluster:
"""
工作集群 Actor:管理和调度所有的 worker_individual
设计理念:按需加载,内存 LRU 淘汰,避免 Actor 爆炸
分布式模式下每种 node_type 对应一个独立实例,Ray 根据自定义资源
``kilostar_node_cpu`` / ``kilostar_node_core`` / ``kilostar_node_gpu``
将 Actor 调度到声明了对应资源的节点上。
"""
def __init__(self, max_capacity: int = 200, num_runners: int = 10):
def __init__(self, max_capacity: int = 200, num_runners: int = 10, node_type: str = "cpu"):
self.max_capacity = max_capacity
self.node_type = node_type
self._active_workers: OrderedDict[str, BaseIndividual] = OrderedDict()
self.status = "running"
self.task_queue = None
@@ -76,6 +81,8 @@ class WorkerCluster:
raise ValueError(f"无法唤醒 Agent {agent_id}:数据库中不存在该档案")
worker_type = agent_config.get("type", "ordinary")
node_affinity = agent_config.get("node_affinity", "cpu")
self.logger.debug(f"[WorkerCluster] 唤醒 Agent {agent_id}, node_affinity={node_affinity}")
if worker_type == "skill":
worker = SkillIndividual(agent_config)
elif worker_type == "special":