feat(workflow): refactor iteration runtime to support independent subgraph per task
feat(app): support file metadata in chat messages and DSL app overwrite - Extended chat message file objects with `name`, `size`, and `file_type` fields across app_chat_service and workflow_service - Added ability to overwrite existing app configurations via DSL import in app_dsl_service, including type validation and config update logic for AgentConfig, MultiAgentConfig, and WorkflowConfig
This commit is contained in:
@@ -28,86 +28,135 @@ class IterationRuntime:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
start_id: str,
|
||||
stream: bool,
|
||||
graph: CompiledStateGraph,
|
||||
node_id: str,
|
||||
config: dict[str, Any],
|
||||
state: WorkflowState,
|
||||
variable_pool: VariablePool,
|
||||
child_variable_pool: VariablePool,
|
||||
cycle_nodes: list,
|
||||
cycle_edges: list,
|
||||
):
|
||||
"""
|
||||
Initialize the iteration runtime.
|
||||
|
||||
Args:
|
||||
graph: Compiled workflow graph capable of async invocation.
|
||||
node_id: Unique identifier of the loop node.
|
||||
config: Dictionary containing iteration node configuration.
|
||||
state: Current workflow state at the point of iteration.
|
||||
stream: Whether to run in streaming mode. When True, each iteration
|
||||
uses graph.astream and emits cycle_item events in real time.
|
||||
When False, graph.ainvoke is used instead.
|
||||
node_id: The unique identifier of the iteration node in the workflow.
|
||||
Also used as the variable namespace for item/index inside
|
||||
the subgraph (e.g. {{ node_id.item }}).
|
||||
config: Raw configuration dict for the iteration node, parsed into
|
||||
IterationNodeConfig. Controls input/output variable selectors,
|
||||
parallel execution settings, and output flattening.
|
||||
state: The parent workflow state at the point the iteration node is
|
||||
entered. Each task receives a copy of this state as its
|
||||
starting point.
|
||||
variable_pool: The parent VariablePool containing all variables available
|
||||
at the time the iteration node executes, including sys.*,
|
||||
conv.*, and outputs from upstream nodes. Used as the source
|
||||
for deep-copying into each task's independent child pool.
|
||||
cycle_nodes: List of node config dicts belonging to this iteration's
|
||||
subgraph (i.e. nodes whose cycle field equals node_id).
|
||||
Passed to GraphBuilder when constructing each task's subgraph.
|
||||
cycle_edges: List of edge config dicts connecting nodes within the subgraph.
|
||||
Passed to GraphBuilder alongside cycle_nodes.
|
||||
"""
|
||||
self.start_id = start_id
|
||||
self.stream = stream
|
||||
self.graph = graph
|
||||
self.state = state
|
||||
self.node_id = node_id
|
||||
self.typed_config = IterationNodeConfig(**config)
|
||||
self.looping = True
|
||||
self.variable_pool = variable_pool
|
||||
self.child_variable_pool = child_variable_pool
|
||||
self.cycle_nodes = cycle_nodes
|
||||
self.cycle_edges = cycle_edges
|
||||
self.event_write = get_stream_writer()
|
||||
self.checkpoint = RunnableConfig(
|
||||
configurable={
|
||||
"thread_id": uuid.uuid4()
|
||||
}
|
||||
)
|
||||
|
||||
self.output_value = None
|
||||
self.result: list = []
|
||||
|
||||
async def _init_iteration_state(self, item, idx):
|
||||
def _build_child_graph(self) -> tuple[CompiledStateGraph, VariablePool, str]:
|
||||
"""
|
||||
Initialize a per-iteration copy of the workflow state.
|
||||
Build an independent compiled subgraph for a single iteration task.
|
||||
|
||||
Args:
|
||||
item: Current element from the input array for this iteration.
|
||||
idx: Index of the element in the input array.
|
||||
Each call creates a brand-new VariablePool by deep-copying the parent pool,
|
||||
then passes it to GraphBuilder. GraphBuilder binds this pool to every node's
|
||||
execution closure at build time, so the pool and the subgraph always reference
|
||||
the same object. This is the key design invariant: item/index written into the
|
||||
pool after build will be visible to all nodes inside the subgraph.
|
||||
|
||||
Returns:
|
||||
A copy of the workflow state with iteration-specific variables set.
|
||||
graph: The compiled LangGraph subgraph ready for invocation.
|
||||
child_pool: The VariablePool bound to this subgraph's node closures.
|
||||
Callers must write item/index into this pool before invoking
|
||||
the graph, and read output from it after invocation.
|
||||
start_node_id: The ID of the CYCLE_START node inside the subgraph,
|
||||
used to set the initial activation signal in workflow state.
|
||||
"""
|
||||
loopstate = WorkflowState(
|
||||
**self.state
|
||||
from app.core.workflow.engine.graph_builder import GraphBuilder
|
||||
child_pool = VariablePool()
|
||||
child_pool.copy(self.variable_pool)
|
||||
builder = GraphBuilder(
|
||||
{"nodes": self.cycle_nodes, "edges": self.cycle_edges},
|
||||
stream=self.stream,
|
||||
variable_pool=child_pool,
|
||||
cycle=self.node_id,
|
||||
)
|
||||
self.child_variable_pool.copy(self.variable_pool)
|
||||
await self.child_variable_pool.new(self.node_id, "item", item, VariableType.type_map(item), mut=True)
|
||||
await self.child_variable_pool.new(self.node_id, "index", item, VariableType.type_map(item), mut=True)
|
||||
loopstate["node_outputs"][self.node_id] = {
|
||||
"item": item,
|
||||
"index": idx,
|
||||
}
|
||||
graph = builder.build()
|
||||
return graph, builder.variable_pool, builder.start_node_id
|
||||
|
||||
async def _init_iteration_state(self, item, idx, child_pool: VariablePool, start_id: str):
|
||||
"""
|
||||
Initialize the workflow state for a single iteration.
|
||||
|
||||
Writes the current item and its index into child_pool under the iteration
|
||||
node's namespace (e.g. iteration_xxx.item, iteration_xxx.index), making them
|
||||
accessible to downstream nodes inside the subgraph via variable selectors.
|
||||
|
||||
Also prepares a copy of the parent workflow state with:
|
||||
- node_outputs[node_id] set to {item, index} so the state snapshot is consistent
|
||||
with the pool values.
|
||||
- looping flag set to 1 (active) to signal the subgraph is inside a cycle.
|
||||
- activate[start_id] set to True to trigger the CYCLE_START node.
|
||||
|
||||
Args:
|
||||
item: The current element from the input array.
|
||||
idx: The zero-based index of this element in the input array.
|
||||
child_pool: The VariablePool bound to this iteration's subgraph.
|
||||
Must be the same object returned by _build_child_graph.
|
||||
start_id: The ID of the CYCLE_START node inside the subgraph.
|
||||
|
||||
Returns:
|
||||
A WorkflowState instance ready to be passed to graph.ainvoke or graph.astream.
|
||||
"""
|
||||
loopstate = WorkflowState(**self.state)
|
||||
await child_pool.new(self.node_id, "item", item, VariableType.type_map(item), mut=True)
|
||||
await child_pool.new(self.node_id, "index", idx, VariableType.type_map(idx), mut=True)
|
||||
loopstate["node_outputs"][self.node_id] = {"item": item, "index": idx}
|
||||
loopstate["looping"] = 1
|
||||
loopstate["activate"][self.start_id] = True
|
||||
loopstate["activate"][start_id] = True
|
||||
return loopstate
|
||||
|
||||
def merge_conv_vars(self):
|
||||
self.variable_pool.variables["conv"].update(
|
||||
self.child_variable_pool.variables["conv"]
|
||||
)
|
||||
def _merge_conv_vars(self, child_pool: VariablePool):
|
||||
self.variable_pool.variables["conv"].update(child_pool.variables["conv"])
|
||||
|
||||
async def run_task(self, item, idx):
|
||||
"""
|
||||
Execute a single iteration asynchronously.
|
||||
Each task builds its own subgraph so the variable pool closure is independent.
|
||||
|
||||
Args:
|
||||
item: The input element for this iteration.
|
||||
idx: The index of this iteration.
|
||||
Returns:
|
||||
Tuple of (idx, output, result, child_pool, stopped)
|
||||
"""
|
||||
graph, child_pool, start_id = self._build_child_graph()
|
||||
checkpoint = RunnableConfig(configurable={"thread_id": uuid.uuid4()})
|
||||
init_state = await self._init_iteration_state(item, idx, child_pool, start_id)
|
||||
|
||||
if self.stream:
|
||||
async for event in self.graph.astream(
|
||||
await self._init_iteration_state(item, idx),
|
||||
async for event in graph.astream(
|
||||
init_state,
|
||||
stream_mode=["debug"],
|
||||
config=self.checkpoint
|
||||
config=checkpoint
|
||||
):
|
||||
if isinstance(event, tuple) and len(event) == 2:
|
||||
mode, data = event
|
||||
@@ -117,7 +166,6 @@ class IterationRuntime:
|
||||
event_type = data.get("type")
|
||||
payload = data.get("payload", {})
|
||||
node_name = payload.get("name")
|
||||
|
||||
if node_name and node_name.startswith("nop"):
|
||||
continue
|
||||
if event_type == "task_result":
|
||||
@@ -140,17 +188,13 @@ class IterationRuntime:
|
||||
"token_usage": result.get("node_outputs", {}).get(node_name, {}).get("token_usage")
|
||||
}
|
||||
})
|
||||
result = self.graph.get_state(config=self.checkpoint).values
|
||||
result = graph.get_state(config=checkpoint).values
|
||||
else:
|
||||
result = await self.graph.ainvoke(await self._init_iteration_state(item, idx))
|
||||
output = self.child_variable_pool.get_value(self.output_value)
|
||||
if isinstance(output, list) and self.typed_config.flatten:
|
||||
self.result.extend(output)
|
||||
else:
|
||||
self.result.append(output)
|
||||
if result["looping"] == 2:
|
||||
self.looping = False
|
||||
return result
|
||||
result = await graph.ainvoke(init_state)
|
||||
|
||||
output = child_pool.get_value(self.output_value)
|
||||
stopped = result["looping"] == 2
|
||||
return idx, output, result, child_pool, stopped
|
||||
|
||||
def _create_iteration_tasks(self, array_obj, idx):
|
||||
"""
|
||||
@@ -196,16 +240,32 @@ class IterationRuntime:
|
||||
tasks = self._create_iteration_tasks(array_obj, idx)
|
||||
logger.info(f"Iteration node {self.node_id}: running, concurrency {len(tasks)}")
|
||||
idx += self.typed_config.parallel_count
|
||||
child_state.extend(await asyncio.gather(*tasks))
|
||||
self.merge_conv_vars()
|
||||
batch = await asyncio.gather(*tasks)
|
||||
# Sort by idx to preserve order, then collect results
|
||||
batch_sorted = sorted(batch, key=lambda x: x[0])
|
||||
for _, output, result, child_pool, stopped in batch_sorted:
|
||||
if isinstance(output, list) and self.typed_config.flatten:
|
||||
self.result.extend(output)
|
||||
else:
|
||||
self.result.append(output)
|
||||
child_state.append(result)
|
||||
if stopped:
|
||||
self.looping = False
|
||||
self._merge_conv_vars(batch_sorted[-1][3])
|
||||
else:
|
||||
# Execute iterations sequentially
|
||||
while idx < len(array_obj) and self.looping:
|
||||
logger.info(f"Iteration node {self.node_id}: running")
|
||||
item = array_obj[idx]
|
||||
result = await self.run_task(item, idx)
|
||||
self.merge_conv_vars()
|
||||
_, output, result, child_pool, stopped = await self.run_task(item, idx)
|
||||
if isinstance(output, list) and self.typed_config.flatten:
|
||||
self.result.extend(output)
|
||||
else:
|
||||
self.result.append(output)
|
||||
self._merge_conv_vars(child_pool)
|
||||
child_state.append(result)
|
||||
if stopped:
|
||||
self.looping = False
|
||||
idx += 1
|
||||
logger.info(f"Iteration node {self.node_id}: execution completed")
|
||||
return {
|
||||
|
||||
@@ -123,7 +123,7 @@ class CycleGraphNode(BaseNode):
|
||||
|
||||
return cycle_nodes, cycle_edges
|
||||
|
||||
def build_graph(self):
|
||||
def build_graph(self, variable_pool: VariablePool):
|
||||
"""
|
||||
Build and compile the internal subgraph for this cycle node.
|
||||
|
||||
@@ -135,6 +135,7 @@ class CycleGraphNode(BaseNode):
|
||||
from app.core.workflow.engine.graph_builder import GraphBuilder
|
||||
|
||||
self.child_variable_pool = VariablePool()
|
||||
self.child_variable_pool.copy(variable_pool)
|
||||
builder = GraphBuilder(
|
||||
{
|
||||
"nodes": self.cycle_nodes,
|
||||
@@ -165,8 +166,8 @@ class CycleGraphNode(BaseNode):
|
||||
Raises:
|
||||
RuntimeError: If the node type is unsupported.
|
||||
"""
|
||||
self.build_graph()
|
||||
if self.node_type == NodeType.LOOP:
|
||||
self.build_graph(variable_pool)
|
||||
return await LoopRuntime(
|
||||
start_id=self.start_node_id,
|
||||
stream=False,
|
||||
@@ -179,20 +180,19 @@ class CycleGraphNode(BaseNode):
|
||||
).run()
|
||||
if self.node_type == NodeType.ITERATION:
|
||||
return await IterationRuntime(
|
||||
start_id=self.start_node_id,
|
||||
stream=False,
|
||||
graph=self.graph,
|
||||
node_id=self.node_id,
|
||||
config=self.config,
|
||||
state=state,
|
||||
variable_pool=variable_pool,
|
||||
child_variable_pool=self.child_variable_pool
|
||||
cycle_nodes=self.cycle_nodes,
|
||||
cycle_edges=self.cycle_edges,
|
||||
).run()
|
||||
raise RuntimeError("Unknown cycle node type")
|
||||
|
||||
async def execute_stream(self, state: WorkflowState, variable_pool: VariablePool):
|
||||
self.build_graph()
|
||||
if self.node_type == NodeType.LOOP:
|
||||
self.build_graph(variable_pool)
|
||||
yield {
|
||||
"__final__": True,
|
||||
"result": await LoopRuntime(
|
||||
@@ -211,14 +211,13 @@ class CycleGraphNode(BaseNode):
|
||||
yield {
|
||||
"__final__": True,
|
||||
"result": await IterationRuntime(
|
||||
start_id=self.start_node_id,
|
||||
stream=True,
|
||||
graph=self.graph,
|
||||
node_id=self.node_id,
|
||||
config=self.config,
|
||||
state=state,
|
||||
variable_pool=variable_pool,
|
||||
child_variable_pool=self.child_variable_pool
|
||||
cycle_nodes=self.cycle_nodes,
|
||||
cycle_edges=self.cycle_edges,
|
||||
).run()
|
||||
}
|
||||
return
|
||||
|
||||
Reference in New Issue
Block a user