Merge pull request #1012 from SuanmoSuanyangTechnology/fix/wxy-032
feat(workflow): augment logging queries and ameliorate error handling
This commit is contained in:
@@ -16,6 +16,7 @@ from app.core.workflow.engine.runtime_schema import ExecutionContext
|
||||
from app.core.workflow.engine.state_manager import WorkflowStateManager
|
||||
from app.core.workflow.engine.stream_output_coordinator import StreamOutputCoordinator
|
||||
from app.core.workflow.engine.variable_pool import VariablePool, VariablePoolInitializer
|
||||
from app.core.workflow.nodes.base_node import NodeExecutionError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -326,10 +327,43 @@ class WorkflowExecutor:
|
||||
|
||||
logger.error(f"Workflow execution failed: execution_id={self.execution_context.execution_id}, error={e}",
|
||||
exc_info=True)
|
||||
|
||||
# 1) 尝试从 checkpoint 回补已成功节点的 node_outputs
|
||||
recovered: dict[str, Any] = {}
|
||||
try:
|
||||
if self.graph is not None:
|
||||
recovered = self.graph.get_state(
|
||||
self.execution_context.checkpoint_config
|
||||
).values or {}
|
||||
except Exception as recover_err:
|
||||
logger.warning(
|
||||
f"Recover state on failure failed: {recover_err}, "
|
||||
f"execution_id={self.execution_context.execution_id}"
|
||||
)
|
||||
|
||||
if result is None:
|
||||
result = {"error": str(e)}
|
||||
result = dict(recovered) if recovered else {}
|
||||
else:
|
||||
result["error"] = str(e)
|
||||
# 已有 result 与 recovered 合并,node_outputs 深度合并
|
||||
for k, v in recovered.items():
|
||||
if k == "node_outputs" and isinstance(v, dict):
|
||||
existing = result.get("node_outputs") or {}
|
||||
result["node_outputs"] = {**v, **existing}
|
||||
else:
|
||||
result.setdefault(k, v)
|
||||
|
||||
# 2) 如果是节点抛出的 NodeExecutionError,把失败节点的 node_output 注入 node_outputs
|
||||
failed_node_id: str | None = None
|
||||
if isinstance(e, NodeExecutionError):
|
||||
failed_node_id = e.node_id
|
||||
node_outputs = result.setdefault("node_outputs", {})
|
||||
# 不覆盖已有(理论上不会有),保底写入失败节点记录
|
||||
node_outputs.setdefault(e.node_id, e.node_output)
|
||||
|
||||
result["error"] = str(e)
|
||||
if failed_node_id:
|
||||
result["error_node"] = failed_node_id
|
||||
|
||||
yield {
|
||||
"event": "workflow_end",
|
||||
"data": self.result_builder.build_final_output(
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from abc import ABC, abstractmethod
|
||||
from datetime import datetime
|
||||
@@ -22,6 +23,20 @@ from app.services.multimodal_service import MultimodalService
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class NodeExecutionError(Exception):
|
||||
"""节点执行失败异常。
|
||||
|
||||
携带失败节点的完整 node_output,供 executor 兜底注入 node_outputs,
|
||||
保证 workflow_executions.output_data 里能看到失败节点的日志记录。
|
||||
"""
|
||||
|
||||
def __init__(self, node_id: str, node_output: dict[str, Any], error_message: str):
|
||||
super().__init__(f"Node {node_id} execution failed: {error_message}")
|
||||
self.node_id = node_id
|
||||
self.node_output = node_output
|
||||
self.error_message = error_message
|
||||
|
||||
|
||||
class BaseNode(ABC):
|
||||
"""Base class for workflow nodes.
|
||||
|
||||
@@ -396,6 +411,8 @@ class BaseNode(ABC):
|
||||
"elapsed_time": elapsed_time,
|
||||
"token_usage": token_usage,
|
||||
"error": None,
|
||||
# 单调递增序号,用于日志按执行顺序排序(JSONB 不保证 key 顺序)
|
||||
"execution_order": time.monotonic_ns(),
|
||||
**self._extract_extra_fields(business_result),
|
||||
}
|
||||
final_output = {
|
||||
@@ -444,7 +461,9 @@ class BaseNode(ABC):
|
||||
"output": None,
|
||||
"elapsed_time": elapsed_time,
|
||||
"token_usage": None,
|
||||
"error": error_message
|
||||
"error": error_message,
|
||||
# 单调递增序号,用于日志按执行顺序排序
|
||||
"execution_order": time.monotonic_ns(),
|
||||
}
|
||||
|
||||
# if error_edge:
|
||||
@@ -466,7 +485,12 @@ class BaseNode(ABC):
|
||||
**node_output
|
||||
})
|
||||
logger.error(f"Node {self.node_id} execution failed, stopping workflow: {error_message}")
|
||||
raise Exception(f"Node {self.node_id} execution failed: {error_message}")
|
||||
# 抛出自定义异常,把 node_output 带给 executor,供其写入 node_outputs
|
||||
raise NodeExecutionError(
|
||||
node_id=self.node_id,
|
||||
node_output=node_output,
|
||||
error_message=error_message,
|
||||
)
|
||||
|
||||
def _extract_input(self, state: WorkflowState, variable_pool: VariablePool) -> dict[str, Any]:
|
||||
"""Extracts the input data for this node (used for logging or audit).
|
||||
|
||||
@@ -174,6 +174,9 @@ class IterationRuntime:
|
||||
continue
|
||||
node_type = result.get("node_outputs", {}).get(node_name, {}).get("node_type")
|
||||
cycle_variable = {"item": item} if node_type == NodeType.CYCLE_START else None
|
||||
node_cfg = next(
|
||||
(n for n in self.cycle_nodes if n.get("id") == node_name), None
|
||||
)
|
||||
self.event_write({
|
||||
"type": "cycle_item",
|
||||
"data": {
|
||||
|
||||
@@ -255,9 +255,18 @@ class HttpRequestNode(BaseNode):
|
||||
case HttpContentType.NONE:
|
||||
return {}
|
||||
case HttpContentType.JSON:
|
||||
content["json"] = json.loads(self._render_template(
|
||||
rendered = self._render_template(
|
||||
self.typed_config.body.data, variable_pool
|
||||
))
|
||||
)
|
||||
if not rendered or not rendered.strip():
|
||||
# 第三方导入的工作流可能出现 content_type=json 但 data 为空的情况,视为无 body
|
||||
return {}
|
||||
try:
|
||||
content["json"] = json.loads(rendered)
|
||||
except json.JSONDecodeError as e:
|
||||
raise RuntimeError(
|
||||
f"Invalid JSON body for HTTP request node: {e.msg} (data={rendered!r})"
|
||||
)
|
||||
case HttpContentType.FROM_DATA:
|
||||
data = {}
|
||||
files = []
|
||||
|
||||
Reference in New Issue
Block a user