From 531d785629461beaef298717f13a2939e9bb1747 Mon Sep 17 00:00:00 2001 From: Timebomb2018 <18868801967@163.com> Date: Mon, 27 Apr 2026 17:56:58 +0800 Subject: [PATCH] fix(multimodal): support HTML image tags in document extraction and chat responses - Replace plain image URLs with `` HTML tags in multimodal and document extractor services - Propagate citations from workflow end events to client responses - Update system prompts to instruct LLMs to render images using Markdown `![alt](url)` with strict UUID-preserving URL copying --- api/app/controllers/service/app_api_controller.py | 2 +- .../core/workflow/nodes/document_extractor/node.py | 2 +- api/app/services/app_chat_service.py | 10 ++++++++-- api/app/services/draft_run_service.py | 10 ++++++++-- api/app/services/multimodal_service.py | 2 +- api/app/services/workflow_service.py | 13 ++++++++----- 6 files changed, 27 insertions(+), 12 deletions(-) diff --git a/api/app/controllers/service/app_api_controller.py b/api/app/controllers/service/app_api_controller.py index 93e88dc5..c2755bdc 100644 --- a/api/app/controllers/service/app_api_controller.py +++ b/api/app/controllers/service/app_api_controller.py @@ -296,7 +296,7 @@ async def chat( } ) - # 多 Agent 非流式返回 + # workflow 非流式返回 result = await app_chat_service.workflow_chat( message=payload.message, diff --git a/api/app/core/workflow/nodes/document_extractor/node.py b/api/app/core/workflow/nodes/document_extractor/node.py index ea1070f4..5fefbc94 100644 --- a/api/app/core/workflow/nodes/document_extractor/node.py +++ b/api/app/core/workflow/nodes/document_extractor/node.py @@ -182,7 +182,7 @@ class DocExtractorNode(BaseNode): mime_type=f"image/{ext}", is_file=True, ).model_dump()) - text = text + f"\n{placeholder}: {url}" + text = text + f"\n{placeholder}: " except Exception as e: logger.error(f"Node {self.node_id}: failed to save image {placeholder}: {e}") diff --git a/api/app/services/app_chat_service.py b/api/app/services/app_chat_service.py index 12f54c03..cc2b02f1 100644 --- a/api/app/services/app_chat_service.py +++ b/api/app/services/app_chat_service.py @@ -161,7 +161,10 @@ class AppChatService: f.type == FileType.DOCUMENT for f in files ): system_prompt += ( - "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: http://...,请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。" + "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: ," + "请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。" + "重要:图片 URL 中包含 UUID(如 /storage/permanent/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx)," + "必须将 src 属性的值原封不动复制到 Markdown 的括号中,不得增删任何字符。" ) # 创建 LangChain Agent @@ -448,7 +451,10 @@ class AppChatService: ): from langchain.agents import create_agent system_prompt += ( - "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: http://...,请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。" + "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: ," + "请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。" + "重要:图片 URL 中包含 UUID(如 /storage/permanent/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx)," + "必须将 src 属性的值原封不动复制到 Markdown 的括号中,不得增删任何字符。" ) # 创建 LangChain Agent diff --git a/api/app/services/draft_run_service.py b/api/app/services/draft_run_service.py index 2566a50f..16d856ca 100644 --- a/api/app/services/draft_run_service.py +++ b/api/app/services/draft_run_service.py @@ -650,7 +650,10 @@ class AgentRunService: ) if has_doc_with_images: system_prompt += ( - "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: http://...,请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。" + "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: ," + "请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。" + "重要:图片 URL 中包含 UUID(如 /storage/permanent/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx)," + "必须将 src 属性的值原封不动复制到 Markdown 的括号中,不得增删任何字符。" ) agent = LangChainAgent( @@ -924,7 +927,10 @@ class AgentRunService: ) if has_doc_with_images: system_prompt += ( - "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: http://...,请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。" + "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: ," + "请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。" + "重要:图片 URL 中包含 UUID(如 /storage/permanent/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx)," + "必须将 src 属性的值原封不动复制到 Markdown 的括号中,不得增删任何字符。" ) # 创建 LangChain Agent diff --git a/api/app/services/multimodal_service.py b/api/app/services/multimodal_service.py index c362158c..dd021357 100644 --- a/api/app/services/multimodal_service.py +++ b/api/app/services/multimodal_service.py @@ -400,7 +400,7 @@ class MultimodalService: # 在文本内容中追加图片位置标记 if result and result[-1].get("type") in ("text", "document"): key = "text" if "text" in result[-1] else list(result[-1].keys())[-1] - result[-1][key] = result[-1].get(key, "") + f"\n[图片 {placeholder}]: {img_url}" + result[-1][key] = result[-1].get(key, "") + f"\n[图片 {placeholder}]: " # 将图片以视觉格式追加到消息内容中 img_file = FileInput( type=FileType.IMAGE, diff --git a/api/app/services/workflow_service.py b/api/app/services/workflow_service.py index b35656d9..27327e99 100644 --- a/api/app/services/workflow_service.py +++ b/api/app/services/workflow_service.py @@ -554,13 +554,16 @@ class WorkflowService: } } case "workflow_end": + data = { + "elapsed_time": payload.get("elapsed_time"), + "message_length": len(payload.get("output", "")), + "error": payload.get("error", "") + } + if "citations" in payload and payload["citations"]: + data["citations"] = payload["citations"] return { "event": "end", - "data": { - "elapsed_time": payload.get("elapsed_time"), - "message_length": len(payload.get("output", "")), - "error": payload.get("error", "") - } + "data": data } case "node_start" | "node_end" | "node_error" | "cycle_item": return None