feat(standalone): 新增单机模式,KILOSTAR_MODE=standalone 时去掉 Ray 依赖
通过 StandaloneProxy 适配层让 .remote() 调用在单机模式下透明降级为 asyncio 协程调用,7 个 Actor 和 workflow task 均可在纯 asyncio 环境运行, 启动快、资源占用低。分布式模式行为完全不变。 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -31,8 +31,8 @@ except Exception as e:
|
||||
sys.exit(1)
|
||||
|
||||
import asyncio
|
||||
import ray
|
||||
from ray import serve
|
||||
|
||||
KILOSTAR_MODE = os.environ.get("KILOSTAR_MODE", "distributed")
|
||||
|
||||
from kilostar.worker_cluster import WorkerCluster
|
||||
from kilostar.utils.banner import print_banner
|
||||
@@ -42,10 +42,53 @@ from kilostar.core.global_workflow_manager import GlobalWorkflowManager
|
||||
from kilostar.core.individual.regulatory_node import RegulatoryNode
|
||||
from kilostar.core.individual.consciousness_node import ConsciousnessNode
|
||||
from kilostar.core.individual.control_node import ControlNode
|
||||
from kilostar.api import KiloStarGateway
|
||||
|
||||
if KILOSTAR_MODE != "standalone":
|
||||
import ray
|
||||
from ray import serve
|
||||
from kilostar.api import KiloStarGateway
|
||||
|
||||
|
||||
async def start_system():
|
||||
async def start_standalone():
|
||||
"""单机模式:纯 asyncio,不依赖 Ray。"""
|
||||
import uvicorn
|
||||
from kilostar.utils.ray_hook import register_standalone
|
||||
from kilostar.api import app
|
||||
|
||||
postgres_database = PostgresDatabase()
|
||||
await postgres_database.init_db()
|
||||
register_standalone("postgres_database", postgres_database)
|
||||
|
||||
global_state_machine = GlobalStateMachine(postgres_database)
|
||||
await global_state_machine.init_state_machine()
|
||||
register_standalone("global_state_machine", global_state_machine)
|
||||
|
||||
global_workflow_manager = GlobalWorkflowManager()
|
||||
await global_workflow_manager.init_manager()
|
||||
register_standalone("global_workflow_manager", global_workflow_manager)
|
||||
|
||||
regulatory_node = RegulatoryNode()
|
||||
register_standalone("regulatory_node", regulatory_node)
|
||||
|
||||
consciousness_node = ConsciousnessNode()
|
||||
register_standalone("consciousness_node", consciousness_node)
|
||||
|
||||
control_node = ControlNode()
|
||||
register_standalone("control_node", control_node)
|
||||
|
||||
worker_cluster = WorkerCluster()
|
||||
await worker_cluster.start()
|
||||
register_standalone("worker_cluster", worker_cluster)
|
||||
|
||||
print(f"✅ KiloStar 单机模式启动完成,监听 0.0.0.0:8000")
|
||||
|
||||
config = uvicorn.Config(app, host="0.0.0.0", port=8000, log_level="info")
|
||||
server = uvicorn.Server(config)
|
||||
await server.serve()
|
||||
|
||||
|
||||
async def start_distributed():
|
||||
"""分布式模式:使用 Ray Actor + Ray Serve。"""
|
||||
env_vars = {
|
||||
"POSTGRES_USER": os.getenv("POSTGRES_USER", "postgres"),
|
||||
"POSTGRES_PASSWORD": os.getenv("POSTGRES_PASSWORD", ""),
|
||||
@@ -63,8 +106,9 @@ async def start_system():
|
||||
runtime_env={"env_vars": env_vars},
|
||||
)
|
||||
|
||||
# 2. 启动数据库组件
|
||||
postgres_database = PostgresDatabase.options(name="postgres_database").remote()
|
||||
postgres_database = PostgresDatabase.options(
|
||||
name="postgres_database"
|
||||
).remote()
|
||||
await postgres_database.init_db.remote()
|
||||
|
||||
global_state_machine = GlobalStateMachine.options(
|
||||
@@ -73,55 +117,52 @@ async def start_system():
|
||||
|
||||
print("正在等待 GlobalStateMachine 初始化并加载注册表...")
|
||||
try:
|
||||
# 强制执行初始化方法并阻塞等待结果。
|
||||
# 如果 __init__ 或 init_state_machine 中有任何报错,会立刻在这里抛出!
|
||||
await global_state_machine.init_state_machine.remote()
|
||||
print("GlobalStateMachine 初始化成功!")
|
||||
except Exception as e:
|
||||
print(f"\n[致命错误] GlobalStateMachine 启动失败!真实报错如下:\n{e}\n")
|
||||
print(f"\n[致命错误] GlobalStateMachine 启动失败!\n{e}\n")
|
||||
return
|
||||
|
||||
global_workflow_manager = GlobalWorkflowManager.options(
|
||||
name="global_workflow_manager", namespace="kilostar", lifetime="detached"
|
||||
).remote()
|
||||
|
||||
# 4. 启动核心节点
|
||||
regulatory_node = RegulatoryNode.options(name="regulatory_node").remote()
|
||||
consciousness_node = ConsciousnessNode.options(name="consciousness_node").remote()
|
||||
control_node = ControlNode.options(name="control_node").remote()
|
||||
RegulatoryNode.options(name="regulatory_node").remote()
|
||||
ConsciousnessNode.options(name="consciousness_node").remote()
|
||||
ControlNode.options(name="control_node").remote()
|
||||
|
||||
try:
|
||||
WorkerCluster.options(
|
||||
name="worker_cluster",
|
||||
lifetime="detached", # 保证它在后台一直运行
|
||||
name="worker_cluster", lifetime="detached"
|
||||
).remote()
|
||||
print("✅ WorkerCluster 已成功启动并注册!")
|
||||
except ValueError:
|
||||
print("WorkerCluster 已经存在。")
|
||||
|
||||
# 工作流以一次性 ray task 形式由 ConsciousnessNode 直接 fire,不再需要常驻 engine actor。
|
||||
|
||||
print("正在等待 GlobalWorkflowManager 初始化与恢复工作流...")
|
||||
try:
|
||||
await global_workflow_manager.init_manager.remote()
|
||||
print("GlobalWorkflowManager 初始化成功!")
|
||||
except Exception as e:
|
||||
print(f"\n[致命错误] GlobalWorkflowManager 启动失败!真实报错如下:\n{e}\n")
|
||||
print(f"\n[致命错误] GlobalWorkflowManager 启动失败!\n{e}\n")
|
||||
return
|
||||
|
||||
# 6. 启动 FastAPI 网关 (使用 Ray Serve)
|
||||
serve.start(http_options={"host": "0.0.0.0", "port": 8000})
|
||||
serve.run(KiloStarGateway.bind())
|
||||
|
||||
# 挂起主线程以保持系统运行
|
||||
while True:
|
||||
await asyncio.sleep(3600)
|
||||
|
||||
|
||||
def main():
|
||||
print_banner()
|
||||
mode = KILOSTAR_MODE
|
||||
print(f"启动模式: {mode}")
|
||||
try:
|
||||
asyncio.run(start_system())
|
||||
if mode == "standalone":
|
||||
asyncio.run(start_standalone())
|
||||
else:
|
||||
asyncio.run(start_distributed())
|
||||
except KeyboardInterrupt:
|
||||
print("系统已退出。")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user