feat(standalone): 新增单机模式,KILOSTAR_MODE=standalone 时去掉 Ray 依赖

通过 StandaloneProxy 适配层让 .remote() 调用在单机模式下透明降级为
asyncio 协程调用,7 个 Actor 和 workflow task 均可在纯 asyncio 环境运行,
启动快、资源占用低。分布式模式行为完全不变。

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-06-03 15:52:41 +00:00
parent 76a67e8237
commit 457d12834f
14 changed files with 390 additions and 108 deletions
@@ -12,8 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import ray
import os
from typing import Any, Dict, List, Optional, Tuple
from kilostar.utils.standalone_proxy import actor_class
_STANDALONE = os.environ.get("KILOSTAR_MODE", "distributed") == "standalone"
if not _STANDALONE:
import ray
from kilostar.core.global_state_machine.individual_manager import (
GlobalIndividualManager,
@@ -25,7 +30,7 @@ from kilostar.core.global_state_machine.gsm_snapshot import GSMSnapshot
from kilostar.core.postgres_database import PostgresDatabase
@ray.remote
@actor_class
class GlobalStateMachine:
"""全局状态机 Actor,统一持有 Provider/Tool/Skill/Individual/MCP/CustomToolset 注册表。
@@ -44,10 +49,9 @@ class GlobalStateMachine:
self._tool_configs: Dict[str, Dict[str, Any]] = {}
self._custom_toolsets: Dict[str, Dict[str, Any]] = {}
# 配置快照与版本号:每次写入 → version+=1 → ray.put 新 snapshot
# 读端通过 current_config_ref 拿 ref 后用 ray.get 直读,绕开 actor 单线程瓶颈
# 配置快照与版本号:每次写入 → version+=1 → 发布新 snapshot
self._config_version: int = 0
self._current_ref: Optional[ray.ObjectRef] = None
self._current_ref = None
self.postgres_database = postgres_database
@@ -113,19 +117,19 @@ class GlobalStateMachine:
)
def _publish_snapshot(self) -> None:
"""版本号 +1 并当前状态 put 到 Ray Object Store。
旧 ref 会因为引用计数归零而进入回收队列;正在执行的 task 已经把 ref
拷贝到了自己的进程,dec 不会影响它们的读取。
"""
"""版本号 +1 并发布当前状态快照。"""
self._config_version += 1
self._current_ref = ray.put(self._build_snapshot())
snapshot = self._build_snapshot()
if _STANDALONE:
self._current_ref = snapshot
else:
self._current_ref = ray.put(snapshot)
async def current_config_ref(self) -> Tuple[int, ray.ObjectRef]:
"""返回 ``(version, ObjectRef)``,调用方拿了 ref 后用 ``ray.get`` 自取
async def current_config_ref(self) -> Tuple[int, Any]:
"""返回 ``(version, ObjectRef 或 snapshot)``
**不要**直接返回 snapshot 对象 —— 那样会走 actor RPC 反序列化,丧失
object store 的共享内存优势。返回 ref 才能让调用方在自己进程里 ray.get
分布式模式返回 ObjectRef,调用方用 ``ray.get`` 自取;
单机模式直接返回 snapshot 对象
"""
if self._current_ref is None:
self._publish_snapshot()
@@ -30,10 +30,13 @@ GSM 仍然是 source of truth + 写入串行化器,但读路径解耦:
from __future__ import annotations
import asyncio
import os
from dataclasses import dataclass, field
from typing import Any, Callable, Dict, List, Optional, Tuple
import ray
_STANDALONE = os.environ.get("KILOSTAR_MODE", "distributed") == "standalone"
if not _STANDALONE:
import ray
from kilostar.core.global_state_machine.model_provider.base_provider import Provider
from kilostar.utils.logger import get_logger
@@ -113,14 +116,19 @@ async def fetch_snapshot(
):
return _local_cache["snapshot"]
version, ref = await gsm_actor.current_config_ref.remote()
snapshot = ray.get(ref)
version, ref_or_snapshot = await gsm_actor.current_config_ref.remote()
if _STANDALONE:
snapshot = ref_or_snapshot
else:
snapshot = ray.get(ref_or_snapshot)
_local_cache["version"] = version
_local_cache["snapshot"] = snapshot
return snapshot
version, ref = await gsm_actor.current_config_ref.remote()
return ray.get(ref)
version, ref_or_snapshot = await gsm_actor.current_config_ref.remote()
if _STANDALONE:
return ref_or_snapshot
return ray.get(ref_or_snapshot)
def reset_local_cache() -> None: