- Augment log search with app type filtering to enable keyword searching within workflow_executions. - Introduce execution sequence markers to ensure logs are displayed in the correct chronological order. - Ameliorate error handling to capture successful node outputs alongside failure details. - Rectify the processing of empty JSON bodies in HTTP request nodes.
413 lines
15 KiB
Python
413 lines
15 KiB
Python
"""应用日志服务层"""
|
||
import uuid
|
||
import datetime as dt
|
||
from typing import Optional, Tuple
|
||
|
||
from sqlalchemy import select
|
||
from sqlalchemy.orm import Session
|
||
|
||
from app.core.logging_config import get_business_logger
|
||
from app.models.app_model import AppType
|
||
from app.models.conversation_model import Conversation, Message
|
||
from app.models.workflow_model import WorkflowExecution
|
||
from app.repositories.conversation_repository import ConversationRepository, MessageRepository
|
||
from app.schemas.app_log_schema import AppLogMessage, AppLogNodeExecution
|
||
|
||
logger = get_business_logger()
|
||
|
||
|
||
class AppLogService:
|
||
"""应用日志服务"""
|
||
|
||
def __init__(self, db: Session):
|
||
self.db = db
|
||
self.conversation_repository = ConversationRepository(db)
|
||
self.message_repository = MessageRepository(db)
|
||
|
||
def list_conversations(
|
||
self,
|
||
app_id: uuid.UUID,
|
||
workspace_id: uuid.UUID,
|
||
page: int = 1,
|
||
pagesize: int = 20,
|
||
is_draft: Optional[bool] = None,
|
||
keyword: Optional[str] = None,
|
||
app_type: Optional[str] = None,
|
||
) -> Tuple[list[Conversation], int]:
|
||
"""
|
||
查询应用日志会话列表
|
||
|
||
Args:
|
||
app_id: 应用 ID
|
||
workspace_id: 工作空间 ID
|
||
page: 页码(从 1 开始)
|
||
pagesize: 每页数量
|
||
is_draft: 是否草稿会话(None表示返回全部)
|
||
keyword: 搜索关键词(匹配消息内容)
|
||
app_type: 应用类型(WORKFLOW 时关键词将从 workflow_executions 搜索)
|
||
|
||
Returns:
|
||
Tuple[list[Conversation], int]: (会话列表,总数)
|
||
"""
|
||
logger.info(
|
||
"查询应用日志会话列表",
|
||
extra={
|
||
"app_id": str(app_id),
|
||
"workspace_id": str(workspace_id),
|
||
"page": page,
|
||
"pagesize": pagesize,
|
||
"is_draft": is_draft,
|
||
"keyword": keyword,
|
||
"app_type": app_type,
|
||
}
|
||
)
|
||
|
||
# 使用 Repository 查询
|
||
conversations, total = self.conversation_repository.list_app_conversations(
|
||
app_id=app_id,
|
||
workspace_id=workspace_id,
|
||
is_draft=is_draft,
|
||
keyword=keyword,
|
||
page=page,
|
||
pagesize=pagesize,
|
||
app_type=app_type,
|
||
)
|
||
|
||
logger.info(
|
||
"查询应用日志会话列表成功",
|
||
extra={
|
||
"app_id": str(app_id),
|
||
"total": total,
|
||
"returned": len(conversations)
|
||
}
|
||
)
|
||
|
||
return conversations, total
|
||
|
||
def get_conversation_detail(
|
||
self,
|
||
app_id: uuid.UUID,
|
||
conversation_id: uuid.UUID,
|
||
workspace_id: uuid.UUID,
|
||
app_type: str = AppType.AGENT
|
||
) -> Tuple[Conversation, list, dict[str, list[AppLogNodeExecution]]]:
|
||
"""
|
||
查询会话详情
|
||
|
||
Returns:
|
||
Tuple[Conversation, list[AppLogMessage|Message], dict[str, list[AppLogNodeExecution]]]
|
||
"""
|
||
logger.info(
|
||
"查询应用日志会话详情",
|
||
extra={
|
||
"app_id": str(app_id),
|
||
"conversation_id": str(conversation_id),
|
||
"workspace_id": str(workspace_id),
|
||
"app_type": app_type
|
||
}
|
||
)
|
||
|
||
conversation = self.conversation_repository.get_conversation_for_app_log(
|
||
conversation_id=conversation_id,
|
||
app_id=app_id,
|
||
workspace_id=workspace_id
|
||
)
|
||
|
||
if app_type == AppType.WORKFLOW:
|
||
messages, node_executions_map = self._get_workflow_messages_and_nodes(conversation_id)
|
||
else:
|
||
messages = self.message_repository.get_messages_by_conversation(
|
||
conversation_id=conversation_id
|
||
)
|
||
node_executions_map = self._get_workflow_node_executions_with_map(
|
||
conversation_id, messages
|
||
)
|
||
|
||
logger.info(
|
||
"查询应用日志会话详情成功",
|
||
extra={
|
||
"app_id": str(app_id),
|
||
"conversation_id": str(conversation_id),
|
||
"message_count": len(messages),
|
||
"message_with_nodes_count": len(node_executions_map)
|
||
}
|
||
)
|
||
|
||
return conversation, messages, node_executions_map
|
||
|
||
def _get_workflow_messages_and_nodes(
|
||
self,
|
||
conversation_id: uuid.UUID,
|
||
) -> Tuple[list[AppLogMessage], dict[str, list[AppLogNodeExecution]]]:
|
||
"""
|
||
工作流应用专用:从 workflow_executions 构建 messages 和节点日志。
|
||
|
||
每条 WorkflowExecution 对应一轮对话:
|
||
- user message:来自 execution.input_data(content 取 message 字段,files 放 meta_data)
|
||
- assistant message:来自 execution.output_data(失败时内容为错误信息)
|
||
开场白的 suggested_questions 合并到第一条 assistant message 的 meta_data 里。
|
||
|
||
Returns:
|
||
(messages 列表, node_executions_map)
|
||
"""
|
||
stmt = (
|
||
select(WorkflowExecution)
|
||
.where(
|
||
WorkflowExecution.conversation_id == conversation_id,
|
||
WorkflowExecution.status.in_(["completed", "failed"])
|
||
)
|
||
.order_by(WorkflowExecution.started_at.asc())
|
||
)
|
||
executions = list(self.db.scalars(stmt).all())
|
||
|
||
# 查开场白:Message 表里 meta_data 含 suggested_questions 的第一条 assistant 消息
|
||
opening_stmt = (
|
||
select(Message)
|
||
.where(
|
||
Message.conversation_id == conversation_id,
|
||
Message.role == "assistant",
|
||
)
|
||
.order_by(Message.created_at.asc())
|
||
.limit(10)
|
||
)
|
||
early_messages = list(self.db.scalars(opening_stmt).all())
|
||
suggested_questions: list = []
|
||
for m in early_messages:
|
||
if isinstance(m.meta_data, dict) and "suggested_questions" in m.meta_data:
|
||
suggested_questions = m.meta_data.get("suggested_questions") or []
|
||
break
|
||
|
||
messages: list[AppLogMessage] = []
|
||
node_executions_map: dict[str, list[AppLogNodeExecution]] = {}
|
||
|
||
# 如果有开场白,作为第一条 assistant 消息插入
|
||
if suggested_questions or early_messages:
|
||
opening_msg = next(
|
||
(m for m in early_messages
|
||
if isinstance(m.meta_data, dict) and "suggested_questions" in m.meta_data),
|
||
None
|
||
)
|
||
if opening_msg:
|
||
messages.append(AppLogMessage(
|
||
id=opening_msg.id,
|
||
conversation_id=conversation_id,
|
||
role="assistant",
|
||
content=opening_msg.content,
|
||
status=None,
|
||
meta_data={"suggested_questions": suggested_questions},
|
||
created_at=opening_msg.created_at,
|
||
))
|
||
|
||
for execution in executions:
|
||
started_at = execution.started_at or dt.datetime.now()
|
||
completed_at = execution.completed_at or started_at
|
||
|
||
# assistant message 的 id,同时作为 node_executions_map 的 key
|
||
assistant_msg_id = uuid.uuid5(execution.id, "assistant")
|
||
|
||
# --- user message(输入)---
|
||
input_data = execution.input_data or {}
|
||
input_content = input_data.get("message") or _extract_text(input_data)
|
||
|
||
# 跳过没有用户输入的 execution(如开场白触发的记录)
|
||
if not input_content or not input_content.strip():
|
||
continue
|
||
|
||
files = input_data.get("files") or []
|
||
user_msg = AppLogMessage(
|
||
id=uuid.uuid5(execution.id, "user"),
|
||
conversation_id=conversation_id,
|
||
role="user",
|
||
content=input_content,
|
||
meta_data={"files": files} if files else None,
|
||
created_at=started_at,
|
||
)
|
||
messages.append(user_msg)
|
||
|
||
# --- assistant message(输出)---
|
||
if execution.status == "completed":
|
||
output_content = _extract_text(execution.output_data)
|
||
meta = {"usage": execution.token_usage or {}, "elapsed_time": execution.elapsed_time}
|
||
else:
|
||
output_content = _extract_text(execution.output_data) or ""
|
||
meta = {"error": execution.error_message, "error_node_id": execution.error_node_id}
|
||
|
||
assistant_msg = AppLogMessage(
|
||
id=assistant_msg_id,
|
||
conversation_id=conversation_id,
|
||
role="assistant",
|
||
content=output_content,
|
||
status=execution.status,
|
||
meta_data=meta,
|
||
created_at=completed_at,
|
||
)
|
||
messages.append(assistant_msg)
|
||
|
||
# --- 节点执行记录,从 workflow_executions.output_data["node_outputs"] 读取 ---
|
||
execution_nodes = _build_nodes_from_output_data(execution.output_data)
|
||
|
||
if execution_nodes:
|
||
node_executions_map[str(assistant_msg_id)] = execution_nodes
|
||
|
||
return messages, node_executions_map
|
||
|
||
def _get_workflow_node_executions_with_map(
|
||
self,
|
||
conversation_id: uuid.UUID,
|
||
messages: list[Message]
|
||
) -> dict[str, list[AppLogNodeExecution]]:
|
||
"""
|
||
从 workflow_executions 表中提取节点执行记录,并按 assistant message 分组
|
||
|
||
Args:
|
||
conversation_id: 会话 ID
|
||
messages: 消息列表
|
||
|
||
Returns:
|
||
Tuple[list[AppLogNodeExecution], dict[str, list[AppLogNodeExecution]]]:
|
||
(所有节点执行记录列表, 按 message_id 分组的节点执行记录字典)
|
||
"""
|
||
node_executions_map: dict[str, list[AppLogNodeExecution]] = {}
|
||
|
||
# 查询该会话关联的所有工作流执行记录(按时间正序)
|
||
stmt = select(WorkflowExecution).where(
|
||
WorkflowExecution.conversation_id == conversation_id,
|
||
WorkflowExecution.status.in_(["completed", "failed"])
|
||
).order_by(WorkflowExecution.started_at.asc())
|
||
|
||
executions = self.db.scalars(stmt).all()
|
||
|
||
logger.info(
|
||
f"查询到 {len(executions)} 条工作流执行记录",
|
||
extra={
|
||
"conversation_id": str(conversation_id),
|
||
"execution_count": len(executions),
|
||
"execution_ids": [str(e.id) for e in executions]
|
||
}
|
||
)
|
||
|
||
# 筛选出 workflow 执行产生的 assistant 消息(排除开场白)
|
||
# workflow 结果的 meta_data 包含 usage,而开场白包含 suggested_questions
|
||
assistant_messages = [
|
||
m for m in messages
|
||
if m.role == "assistant" and m.meta_data and "usage" in m.meta_data
|
||
]
|
||
|
||
# 通过时序匹配,将 execution 和 assistant message 关联
|
||
used_message_ids: set[str] = set()
|
||
|
||
for execution in executions:
|
||
# 构建节点执行记录列表,从 workflow_executions.output_data["node_outputs"] 读取
|
||
execution_nodes = _build_nodes_from_output_data(execution.output_data)
|
||
|
||
if not execution_nodes:
|
||
continue
|
||
|
||
# 失败的执行没有 assistant message,直接用 execution id 作为 key
|
||
if execution.status == "failed":
|
||
node_executions_map[f"execution_{str(execution.id)}"] = execution_nodes
|
||
continue
|
||
|
||
# completed:通过时序匹配关联到对应的 assistant message
|
||
# 逻辑:找 execution.started_at 之后最近的、未使用的 assistant message
|
||
best_msg = None
|
||
best_dt = None
|
||
for msg in assistant_messages:
|
||
msg_id_str = str(msg.id)
|
||
if msg_id_str in used_message_ids:
|
||
continue
|
||
if msg.created_at and msg.created_at >= execution.started_at:
|
||
delta = (msg.created_at - execution.started_at).total_seconds()
|
||
if best_dt is None or delta < best_dt:
|
||
best_dt = delta
|
||
best_msg = msg
|
||
|
||
if not best_msg:
|
||
continue
|
||
|
||
msg_id_str = str(best_msg.id)
|
||
used_message_ids.add(msg_id_str)
|
||
node_executions_map[msg_id_str] = execution_nodes
|
||
|
||
return node_executions_map
|
||
|
||
|
||
def _extract_text(data: Optional[dict]) -> str:
|
||
"""从 workflow execution 的 input_data / output_data 中提取可读文本。
|
||
|
||
优先取 'text'、'content'、'output' 字段;若都没有则 JSON 序列化整个 dict。
|
||
"""
|
||
if not data:
|
||
return ""
|
||
for key in ("message", "text", "content", "output", "result", "answer"):
|
||
if key in data and isinstance(data[key], str):
|
||
return data[key]
|
||
import json
|
||
return json.dumps(data, ensure_ascii=False)
|
||
|
||
|
||
def _build_nodes_from_output_data(output_data: Optional[dict]) -> list[AppLogNodeExecution]:
|
||
"""从 workflow_executions.output_data["node_outputs"] 构建节点执行记录列表。
|
||
|
||
output_data 结构:
|
||
{
|
||
"node_outputs": {
|
||
"<node_id>": {
|
||
"node_type": ...,
|
||
"node_name": ...,
|
||
"status": ...,
|
||
"input": ...,
|
||
"output": ...,
|
||
"elapsed_time": ...,
|
||
"token_usage": ...,
|
||
"error": ...,
|
||
"cycle_items": [...],
|
||
...
|
||
}
|
||
},
|
||
"error": ...,
|
||
...
|
||
}
|
||
"""
|
||
if not output_data:
|
||
return []
|
||
node_outputs: dict = output_data.get("node_outputs") or {}
|
||
# 按 execution_order(节点执行时写入的单调递增序号)排序。
|
||
# PostgreSQL JSONB 不保证 key 顺序,不能依赖 dict 插入顺序;
|
||
# 缺失 execution_order 的历史数据退化到 0,保持在最前。
|
||
ordered_items = sorted(
|
||
node_outputs.items(),
|
||
key=lambda kv: (kv[1] or {}).get("execution_order", 0)
|
||
if isinstance(kv[1], dict) else 0
|
||
)
|
||
result = []
|
||
for node_id, node_data in ordered_items:
|
||
if not isinstance(node_data, dict):
|
||
continue
|
||
output = dict(node_data)
|
||
cycle_items = output.pop("cycle_items", None)
|
||
# 把已知的顶层字段剥离,剩余的作为 output
|
||
node_type = output.pop("node_type", "unknown")
|
||
node_name = output.pop("node_name", None)
|
||
status = output.pop("status", "completed")
|
||
error = output.pop("error", None)
|
||
inp = output.pop("input", None)
|
||
elapsed_time = output.pop("elapsed_time", None)
|
||
token_usage = output.pop("token_usage", None)
|
||
# execution_order 仅用于排序,不返回给前端
|
||
output.pop("execution_order", None)
|
||
result.append(AppLogNodeExecution(
|
||
node_id=node_id,
|
||
node_type=node_type,
|
||
node_name=node_name,
|
||
status=status,
|
||
error=error,
|
||
input=inp,
|
||
process=None,
|
||
output=output if output else None,
|
||
cycle_items=cycle_items,
|
||
elapsed_time=elapsed_time,
|
||
token_usage=token_usage,
|
||
))
|
||
return result
|