import os import secrets import sys _INSECURE_SECRETS = {"secret", "114514", "changethiskey12345"} _secret_key = os.getenv("SECRET_KEY") _is_dev = os.getenv("KILOSTAR_ENV", "production").lower() in ("dev", "development") if not _secret_key or _secret_key in _INSECURE_SECRETS: if _is_dev: _secret_key = secrets.token_urlsafe(32) os.environ["SECRET_KEY"] = _secret_key print( "⚠️ [开发模式] 未提供有效的 SECRET_KEY,已生成临时随机密钥(重启后失效)。" ) else: print( "❌ [致命错误] 未提供有效的 SECRET_KEY 或使用了不安全的默认值。\n" " 请设置环境变量 SECRET_KEY 为一个高熵的随机字符串。\n" " 可使用: python -c \"import secrets; print(secrets.token_urlsafe(32))\"\n" " 若为开发环境,请设置 KILOSTAR_ENV=dev 以允许自动生成临时密钥。" ) sys.exit(1) from kilostar.utils.config_loader import get_app_config try: _app_cfg = get_app_config() except Exception as e: print(f"❌ [致命错误] 配置文件校验失败:{e}") sys.exit(1) import asyncio KILOSTAR_MODE = os.environ.get("KILOSTAR_MODE", "distributed") from kilostar.worker_cluster import WorkerCluster from kilostar.utils.banner import print_banner from kilostar.core.postgres_database import PostgresDatabase from kilostar.core.global_state_machine import GlobalStateMachine from kilostar.core.global_workflow_manager import GlobalWorkflowManager from kilostar.core.individual.regulatory_node import RegulatoryNode from kilostar.core.individual.consciousness_node import ConsciousnessNode if KILOSTAR_MODE != "standalone": import ray from ray import serve from kilostar.api import KiloStarGateway async def start_standalone(): """单机模式:纯 asyncio,不依赖 Ray。""" import uvicorn from kilostar.utils.ray_hook import register_standalone from kilostar.api import app postgres_database = PostgresDatabase() await postgres_database.init_db() register_standalone("postgres_database", postgres_database) from kilostar.utils.standalone_proxy import StandaloneProxy postgres_proxy = StandaloneProxy(postgres_database) global_state_machine = GlobalStateMachine(postgres_proxy) await global_state_machine.init_state_machine() register_standalone("global_state_machine", global_state_machine) global_workflow_manager = GlobalWorkflowManager() await global_workflow_manager.init_manager() register_standalone("global_workflow_manager", global_workflow_manager) regulatory_node = RegulatoryNode() register_standalone("regulatory_node", regulatory_node) consciousness_node = ConsciousnessNode() register_standalone("consciousness_node", consciousness_node) worker_cluster = WorkerCluster(node_type="cpu") await worker_cluster.start() register_standalone("worker_cluster", worker_cluster) # 单机模式三个标签共用同一实例 register_standalone("worker_cluster_cpu", worker_cluster) register_standalone("worker_cluster_core", worker_cluster) register_standalone("worker_cluster_gpu", worker_cluster) print(f"✅ KiloStar 单机模式启动完成,监听 0.0.0.0:8000") config = uvicorn.Config(app, host="0.0.0.0", port=8000, log_level="info") server = uvicorn.Server(config) await server.serve() async def start_distributed(): """分布式模式:使用 Ray Actor + Ray Serve。""" env_vars = { "POSTGRES_USER": os.getenv("POSTGRES_USER", "postgres"), "POSTGRES_PASSWORD": os.getenv("POSTGRES_PASSWORD", ""), "POSTGRES_HOST": os.getenv("POSTGRES_HOST", "db"), "POSTGRES_PORT": os.getenv("POSTGRES_PORT", "5432"), "POSTGRES_DB": os.getenv("POSTGRES_DB", "postgres"), "SECRET_KEY": os.getenv("SECRET_KEY"), } ray.init( ignore_reinit_error=True, namespace="kilostar", dashboard_host="0.0.0.0", dashboard_port=8265, runtime_env={"env_vars": env_vars}, resources={ "kilostar_node_cpu": 1, "kilostar_node_core": 1, "kilostar_node_gpu": 1, }, ) postgres_database = PostgresDatabase.options( name="postgres_database" ).remote() await postgres_database.init_db.remote() global_state_machine = GlobalStateMachine.options( name="global_state_machine", namespace="kilostar", lifetime="detached" ).remote(postgres_database) print("正在等待 GlobalStateMachine 初始化并加载注册表...") try: await global_state_machine.init_state_machine.remote() print("GlobalStateMachine 初始化成功!") except Exception as e: print(f"\n[致命错误] GlobalStateMachine 启动失败!\n{e}\n") return global_workflow_manager = GlobalWorkflowManager.options( name="global_workflow_manager", namespace="kilostar", lifetime="detached" ).remote() RegulatoryNode.options(name="regulatory_node").remote() ConsciousnessNode.options(name="consciousness_node").remote() try: for node_type in ("cpu", "core", "gpu"): actor_name = f"worker_cluster_{node_type}" resource_key = f"kilostar_node_{node_type}" try: WorkerCluster.options( name=actor_name, lifetime="detached", resources={resource_key: 1}, ).remote(node_type=node_type) print(f"✅ WorkerCluster[{node_type}] 已成功启动并注册!") except ValueError: print(f"WorkerCluster[{node_type}] 已经存在。") except Exception as e: print(f"WorkerCluster 启动失败: {e}") print("正在等待 GlobalWorkflowManager 初始化与恢复工作流...") try: await global_workflow_manager.init_manager.remote() print("GlobalWorkflowManager 初始化成功!") except Exception as e: print(f"\n[致命错误] GlobalWorkflowManager 启动失败!\n{e}\n") return serve.start(http_options={"host": "0.0.0.0", "port": 8000}) serve.run(KiloStarGateway.bind()) while True: await asyncio.sleep(3600) def main(): print_banner() mode = KILOSTAR_MODE print(f"启动模式: {mode}") try: if mode == "standalone": asyncio.run(start_standalone()) else: asyncio.run(start_distributed()) except KeyboardInterrupt: print("系统已退出。") if __name__ == "__main__": main()