feat(standalone): 新增单机模式,KILOSTAR_MODE=standalone 时去掉 Ray 依赖

通过 StandaloneProxy 适配层让 .remote() 调用在单机模式下透明降级为
asyncio 协程调用,7 个 Actor 和 workflow task 均可在纯 asyncio 环境运行,
启动快、资源占用低。分布式模式行为完全不变。

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-06-03 15:52:41 +00:00
parent 76a67e8237
commit 457d12834f
14 changed files with 390 additions and 108 deletions
+62 -21
View File
@@ -31,8 +31,8 @@ except Exception as e:
sys.exit(1)
import asyncio
import ray
from ray import serve
KILOSTAR_MODE = os.environ.get("KILOSTAR_MODE", "distributed")
from kilostar.worker_cluster import WorkerCluster
from kilostar.utils.banner import print_banner
@@ -42,10 +42,53 @@ from kilostar.core.global_workflow_manager import GlobalWorkflowManager
from kilostar.core.individual.regulatory_node import RegulatoryNode
from kilostar.core.individual.consciousness_node import ConsciousnessNode
from kilostar.core.individual.control_node import ControlNode
from kilostar.api import KiloStarGateway
if KILOSTAR_MODE != "standalone":
import ray
from ray import serve
from kilostar.api import KiloStarGateway
async def start_system():
async def start_standalone():
"""单机模式:纯 asyncio,不依赖 Ray。"""
import uvicorn
from kilostar.utils.ray_hook import register_standalone
from kilostar.api import app
postgres_database = PostgresDatabase()
await postgres_database.init_db()
register_standalone("postgres_database", postgres_database)
global_state_machine = GlobalStateMachine(postgres_database)
await global_state_machine.init_state_machine()
register_standalone("global_state_machine", global_state_machine)
global_workflow_manager = GlobalWorkflowManager()
await global_workflow_manager.init_manager()
register_standalone("global_workflow_manager", global_workflow_manager)
regulatory_node = RegulatoryNode()
register_standalone("regulatory_node", regulatory_node)
consciousness_node = ConsciousnessNode()
register_standalone("consciousness_node", consciousness_node)
control_node = ControlNode()
register_standalone("control_node", control_node)
worker_cluster = WorkerCluster()
await worker_cluster.start()
register_standalone("worker_cluster", worker_cluster)
print(f"✅ KiloStar 单机模式启动完成,监听 0.0.0.0:8000")
config = uvicorn.Config(app, host="0.0.0.0", port=8000, log_level="info")
server = uvicorn.Server(config)
await server.serve()
async def start_distributed():
"""分布式模式:使用 Ray Actor + Ray Serve。"""
env_vars = {
"POSTGRES_USER": os.getenv("POSTGRES_USER", "postgres"),
"POSTGRES_PASSWORD": os.getenv("POSTGRES_PASSWORD", ""),
@@ -63,8 +106,9 @@ async def start_system():
runtime_env={"env_vars": env_vars},
)
# 2. 启动数据库组件
postgres_database = PostgresDatabase.options(name="postgres_database").remote()
postgres_database = PostgresDatabase.options(
name="postgres_database"
).remote()
await postgres_database.init_db.remote()
global_state_machine = GlobalStateMachine.options(
@@ -73,55 +117,52 @@ async def start_system():
print("正在等待 GlobalStateMachine 初始化并加载注册表...")
try:
# 强制执行初始化方法并阻塞等待结果。
# 如果 __init__ 或 init_state_machine 中有任何报错,会立刻在这里抛出!
await global_state_machine.init_state_machine.remote()
print("GlobalStateMachine 初始化成功!")
except Exception as e:
print(f"\n[致命错误] GlobalStateMachine 启动失败!真实报错如下:\n{e}\n")
print(f"\n[致命错误] GlobalStateMachine 启动失败!\n{e}\n")
return
global_workflow_manager = GlobalWorkflowManager.options(
name="global_workflow_manager", namespace="kilostar", lifetime="detached"
).remote()
# 4. 启动核心节点
regulatory_node = RegulatoryNode.options(name="regulatory_node").remote()
consciousness_node = ConsciousnessNode.options(name="consciousness_node").remote()
control_node = ControlNode.options(name="control_node").remote()
RegulatoryNode.options(name="regulatory_node").remote()
ConsciousnessNode.options(name="consciousness_node").remote()
ControlNode.options(name="control_node").remote()
try:
WorkerCluster.options(
name="worker_cluster",
lifetime="detached", # 保证它在后台一直运行
name="worker_cluster", lifetime="detached"
).remote()
print("✅ WorkerCluster 已成功启动并注册!")
except ValueError:
print("WorkerCluster 已经存在。")
# 工作流以一次性 ray task 形式由 ConsciousnessNode 直接 fire,不再需要常驻 engine actor。
print("正在等待 GlobalWorkflowManager 初始化与恢复工作流...")
try:
await global_workflow_manager.init_manager.remote()
print("GlobalWorkflowManager 初始化成功!")
except Exception as e:
print(f"\n[致命错误] GlobalWorkflowManager 启动失败!真实报错如下:\n{e}\n")
print(f"\n[致命错误] GlobalWorkflowManager 启动失败!\n{e}\n")
return
# 6. 启动 FastAPI 网关 (使用 Ray Serve)
serve.start(http_options={"host": "0.0.0.0", "port": 8000})
serve.run(KiloStarGateway.bind())
# 挂起主线程以保持系统运行
while True:
await asyncio.sleep(3600)
def main():
print_banner()
mode = KILOSTAR_MODE
print(f"启动模式: {mode}")
try:
asyncio.run(start_system())
if mode == "standalone":
asyncio.run(start_standalone())
else:
asyncio.run(start_distributed())
except KeyboardInterrupt:
print("系统已退出。")