Refactor Workflow and Chat Architecture (#68)

* refactor: overhaul workflow and chat architecture

- Separate Chat and Workflow API endpoints and database models
- Use JSONB to store workflow execution context in Postgres
- Convert workflow engine to use pydantic-ai execution graphs inside a Ray task
- Update frontend React components to support standalone workflow creation
- Remove obsolete and broken workflow runner tests

Co-authored-by: zhaoxi826 <198742034+zhaoxi826@users.noreply.github.com>

* refactor: overhaul workflow and chat architecture

- Separate Chat and Workflow API endpoints and database models
- Use JSONB to store workflow execution context in Postgres
- Convert workflow engine to use pydantic-ai execution graphs inside a Ray task
- Update frontend React components to support standalone workflow creation
- Remove obsolete and broken workflow runner tests

Co-authored-by: zhaoxi826 <198742034+zhaoxi826@users.noreply.github.com>

* refactor: overhaul workflow and chat architecture

- Separate Chat and Workflow API endpoints and database models
- Use JSONB to store workflow execution context in Postgres
- Convert workflow engine to use pydantic-ai execution graphs inside a Ray task
- Update frontend React components to support standalone workflow creation
- Move workflow_engine inside workflow package to keep core root clean
- Remove obsolete and broken workflow runner tests

Co-authored-by: zhaoxi826 <198742034+zhaoxi826@users.noreply.github.com>

---------

Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com>
Co-authored-by: zhaoxi826 <198742034+zhaoxi826@users.noreply.github.com>
This commit is contained in:
2026-05-12 15:47:17 +08:00
committed by GitHub
parent ee9bbbf676
commit ff1ede47a0
33 changed files with 995 additions and 412 deletions
+9 -2
View File
@@ -16,13 +16,17 @@ from pydantic import BaseModel, Field
from typing import Literal, Optional
from enum import Enum
class LogicGate(BaseModel):
"""
LogicGate 类。
跳转逻辑,标记该步骤运行成功或失败的动作
"""
if_fail: str = Field(..., description="失败跳转目标,如 'jump_to_step_1'")
if_pass: Literal["continue", "exit"] = Field(default="continue", description="成功后的动作")
if_pass: Literal["continue", "exit"] = Field(
default="continue", description="成功后的动作"
)
class WorkflowMetadata(BaseModel):
@@ -30,6 +34,7 @@ class WorkflowMetadata(BaseModel):
WorkflowMetadata类
workflow的元数据类,保存与用户有关的数据
"""
user_id: Optional[str] = Field(default=None, description="创建工作流的用户的ulid")
command: Optional[str] = Field(default=None, description="创建工作流的原始命令")
@@ -44,6 +49,7 @@ class WorkStepStatus(str, Enum):
COMPLETED: 完成
FAILED = 失败
"""
PENDING = "pending"
WORKING = "working"
HANGUP = "hang_up"
@@ -61,9 +67,10 @@ class WorkflowStatus(str, Enum):
CREATING = 创建中
PENDING = 等待中
"""
RUNNING = "running"
HANGUP = "hang_up"
COMPLETED = "completed"
FAILED = "failed"
CREATING = "creating"
PENDING = "pending"
PENDING = "pending"
+31 -9
View File
@@ -18,19 +18,32 @@ from .model import LogicGate, WorkflowMetadata, WorkStepStatus, WorkflowStatus
from ulid import ULID
from datetime import datetime
class WorkflowContext(BaseModel):
"""
WorkflowContext 类
作为workflow运行时的数据部分,使得数据和计算分离
"""
trace_id: str = Field(description="工作流的trace_id")
workflow_status: Dict[str, WorkflowStatus] = Field(default_factory=lambda: {datetime.now().strftime("%Y-%m-%d %H:%M:%S"):WorkflowStatus.CREATING} ,description="工作流状态")
workflow_status: Dict[str, WorkflowStatus] = Field(
default_factory=lambda: {
datetime.now().strftime("%Y-%m-%d %H:%M:%S"): WorkflowStatus.CREATING
},
description="工作流状态",
)
blackboard: Dict[str, Any] = Field(description="大模型输出的存储区")
work_step_status: Optional[Dict[int, tuple[str, WorkStepStatus]]] = Field(default= None,description="工作流运行状态")
work_step_status: Optional[Dict[int, tuple[str, WorkStepStatus]]] = Field(
default=None, description="工作流运行状态"
)
"""work_step_status:字典,键为整个工作流的运行步骤,值为元组,包含两个字段:
1.字符串,更新时间的字符串;2.WorkflowStatus枚举类,当前步骤的运行情况"""
workflow_pointer: Optional[int] = Field(description="工作流指针,指向具体的workflow位置")
workflow_log: List[Dict[int, tuple[str, WorkflowStatus, str]]] = Field(default=[], description="工作流运行日志")
workflow_pointer: Optional[int] = Field(
description="工作流指针,指向具体的workflow位置"
)
workflow_log: List[Dict[int, tuple[str, WorkflowStatus, str]]] = Field(
default=[], description="工作流运行日志"
)
"""workflow_log:一个列表,内部元素为一个字典,键为步骤序号,值为一个元组,包含三个字段:
1.字符串,更新时间的字符串;2.WorkflowStatus枚举类,当前步骤的运行情况;3.字符串,当前步骤运行完后的输出总结或失败原因"""
@@ -40,12 +53,18 @@ class WorkflowStep(BaseModel):
WorkflowStep 类
workflow每一个步骤的模型,为workflow的最小执行单位
"""
step: int = Field(..., gt=0, description="步骤序号,严格自增")
name: str = Field(..., description="步骤名称")
action: str = Field(..., description="执行的原子动作")
inputs: Optional[Union[str, List[str]]] = Field(default=None, description="前置依赖输出")
inputs: Optional[Union[str, List[str]]] = Field(
default=None, description="前置依赖输出"
)
outputs: Optional[str] = Field(default=None, description="当前步骤产出物变量名")
agent_id: Optional[str] = Field(default=None,description="分配给 skill_individual 的 Skill Individual 真实 agent_id,不可用名称代替",)
agent_id: Optional[str] = Field(
default=None,
description="分配给 skill_individual 的 Skill Individual 真实 agent_id,不可用名称代替",
)
logic_gate: Optional[LogicGate] = Field(default=None, description="逻辑跳转控制")
@@ -54,9 +73,12 @@ class KiloStarWorkflow(BaseModel):
KiloStarWorkflow 类
kilostar的workflow核心类,由consciousness_node创建
"""
trace_id: str = Field(default_factory=lambda: str(ULID()), description="系统自动生成的追溯ID")
trace_id: str = Field(
default_factory=lambda: str(ULID()), description="系统自动生成的追溯ID"
)
version: str = Field(default="v1.0", description="系统协议版本号")
#-------------------
# -------------------
title: str = Field(..., description="工作流标题")
work_link: List[WorkflowStep] = Field(..., description="工作链")
workflow_metadata: WorkflowMetadata
@@ -86,4 +108,4 @@ class KiloStarWorkflow(BaseModel):
if "越界" in str(e):
raise e
raise ValueError(f"LogicGate 格式错误: {s.logic_gate.if_fail}")
return self
return self
@@ -0,0 +1,177 @@
# Copyright 2026 zhaoxi826
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import asyncio
import ray
from kilostar.core.work.workflow.workflow import KiloStarWorkflow
from typing import Dict, Any, List
@ray.remote
def run_workflow_task(workflow_data: dict, trace_id: str):
from kilostar.utils.ray_hook import ray_actor_hook
from kilostar.core.work.workflow.model import WorkflowStatus
import datetime
from pydantic import BaseModel
# State passed through graph nodes
class WorkflowGraphState(BaseModel):
trace_id: str
blackboard: Dict[str, Any]
work_link: List[Dict[str, Any]]
current_step_index: int = 0
status: str = "running"
logs: List[Dict[str, Any]] = []
async def save_context(state: WorkflowGraphState):
postgres_database = ray_actor_hook("postgres_database").postgres_database
await postgres_database.upsert_workflow_context.remote(
state.trace_id,
workflow_pointer=state.current_step_index,
blackboard=state.blackboard,
work_link=state.work_link,
workflow_status={str(datetime.datetime.now()): state.status},
workflow_log=state.logs,
)
await postgres_database.update_workflow_status.remote(
state.trace_id, state.status
)
global_workflow_manager = ray_actor_hook(
"global_workflow_manager"
).global_workflow_manager
await global_workflow_manager.put_received.remote(
state.trace_id, f"执行步骤 {state.current_step_index + 1}..."
)
async def execute_step(state: WorkflowGraphState):
"""执行单一工作流节点逻辑"""
if state.current_step_index >= len(state.work_link):
state.status = WorkflowStatus.COMPLETED
return state
step = state.work_link[state.current_step_index]
step.get("node", "")
action = step.get("action", "")
# 记录开始状态
state.logs.append(
{
str(state.current_step_index): [
str(datetime.datetime.now()),
"working",
f"开始执行: {step.get('name', '未命名步骤')}",
]
}
)
await save_context(state)
try:
# TODO: 实际对接不同节点执行逻辑 (例如: control_node, agent 技能)
# 这里是简化版,向控制节点或指定 skill 发送指令
# ... 模拟执行逻辑 ...
await asyncio.sleep(2)
# 记录结果
state.blackboard[
step.get("outputs", f"step_{state.current_step_index}_result")
] = "Success execution of " + action
state.logs[-1][str(state.current_step_index)] = [
str(datetime.datetime.now()),
"completed",
f"成功: {action}",
]
# 判断逻辑跳转
logic_gate = step.get("logic_gate")
if logic_gate and logic_gate.get("if_pass") == "exit":
state.status = WorkflowStatus.COMPLETED
else:
state.current_step_index += 1
except Exception as e:
state.logs[-1][str(state.current_step_index)] = [
str(datetime.datetime.now()),
"failed",
str(e),
]
state.status = WorkflowStatus.FAILED
logic_gate = step.get("logic_gate")
if logic_gate and logic_gate.get("if_fail"):
fail_target = logic_gate.get("if_fail")
if "jump_to_step_" in fail_target:
target_step = int(fail_target.split("_")[-1]) - 1
state.current_step_index = target_step
state.status = WorkflowStatus.RUNNING
await save_context(state)
return state
async def _run():
postgres_database = ray_actor_hook("postgres_database").postgres_database
await postgres_database.update_workflow_status.remote(
trace_id, WorkflowStatus.RUNNING
)
state = WorkflowGraphState(
trace_id=trace_id,
blackboard={},
work_link=workflow_data.get("work_link", []),
)
await save_context(state)
# 简单的图执行驱动 (模拟 pydantic-ai.graph.run 行为,直至 Graph 库正式稳定)
while state.status == WorkflowStatus.RUNNING and state.current_step_index < len(
state.work_link
):
state = await execute_step(state)
await postgres_database.update_workflow_status.remote(trace_id, state.status)
global_workflow_manager = ray_actor_hook(
"global_workflow_manager"
).global_workflow_manager
msg = (
"工作流执行完成!"
if state.status == WorkflowStatus.COMPLETED
else "工作流执行失败。"
)
await global_workflow_manager.put_received.remote(trace_id, msg)
asyncio.run(_run())
@ray.remote
class WorkflowRunningEngine:
def __init__(
self, consciousness_node=None, control_node=None, regulatory_node=None
):
self.consciousness_node = consciousness_node
self.control_node = control_node
self.regulatory_node = regulatory_node
self.events_queue = asyncio.Queue()
async def put_event(self, event):
await self.events_queue.put(event)
async def run(self):
"""引擎循环提取事件"""
while True:
await self.events_queue.get()
await asyncio.sleep(1)
async def execute_workflow(self, workflow: KiloStarWorkflow):
# 这个方法可以由意识节点调用来提交一个完整的运行任务
workflow_dict = workflow.model_dump()
trace_id = workflow.trace_id
run_workflow_task.remote(workflow_dict, trace_id)