# Copyright 2026 zhaoxi826 # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """系统级端点:健康探针 + 集群/系统信息。 健康探针路径刻意保持在根(``/health/live`` / ``/health/ready``),不加 ``/api/v1`` 前缀——这是 k8s liveness/readiness probe 的惯例配置,加前缀会 让运维侧探针 URL 变复杂。系统信息类端点则走 ``/api/v1/system`` 前缀。 """ from __future__ import annotations from fastapi import APIRouter, Depends from fastapi.responses import JSONResponse from kilostar.utils.ray_hook import ray_actor_hook from kilostar.utils.access import Accessor, TokenData from kilostar.utils.check_user.role_check import RoleChecker from kilostar.core.postgres_database.model import UserAuthority from kilostar.utils.config_loader import ( get_workflow_config, save_workflow_config, WorkflowConfig, ) system_router = APIRouter(tags=["system"]) @system_router.get("/health/live", include_in_schema=True) async def liveness(): """存活探针:进程能响应即视为存活。""" return {"status": "alive"} @system_router.get("/health/ready", include_in_schema=True) async def readiness(): """就绪探针:检查关键依赖(Postgres / GSM Actor)是否可达。""" checks = {"postgres": False, "global_state_machine": False} try: postgres_database = ray_actor_hook("postgres_database").postgres_database await postgres_database.ping.remote() checks["postgres"] = True except Exception: pass try: gsm = ray_actor_hook("global_state_machine").global_state_machine await gsm.get_skill_list.remote() checks["global_state_machine"] = True except Exception: pass all_ok = all(checks.values()) return JSONResponse( status_code=200 if all_ok else 503, content={"status": "ready" if all_ok else "not_ready", "checks": checks}, ) @system_router.get("/config/workflow") async def get_workflow_config_endpoint( _: TokenData = Depends(Accessor.get_current_user), ): config = get_workflow_config() return {"config": config.model_dump()} @system_router.put("/config/workflow") async def update_workflow_config_endpoint( update: WorkflowConfig, _: TokenData = Depends(RoleChecker(allowed_roles=UserAuthority.USER)), ): save_workflow_config(update) return {"status": "ok", "config": update.model_dump()} @system_router.get("/logs") async def query_system_logs( trace_id: str | None = None, event_type: str | None = None, level: str | None = None, limit: int = 100, offset: int = 0, _: TokenData = Depends(Accessor.get_current_user), ): from kilostar.utils.ray_hook import ray_actor_hook pg = await ray_actor_hook.get_actor("postgres_database") logs = await pg.query_event_logs.remote( trace_id=trace_id, event_type=event_type, level=level, limit=limit, offset=offset, ) return {"logs": logs, "count": len(logs)}