From 531d785629461beaef298717f13a2939e9bb1747 Mon Sep 17 00:00:00 2001
From: Timebomb2018 <18868801967@163.com>
Date: Mon, 27 Apr 2026 17:56:58 +0800
Subject: [PATCH] fix(multimodal): support HTML image tags in document
 extraction and chat responses

- Replace plain image URLs with `<img src="..." data-url="...">` HTML tags in multimodal and document extractor services
- Propagate citations from workflow end events to client responses
- Update system prompts to instruct LLMs to render images using Markdown `![alt](url)` with strict UUID-preserving URL copying
---
 api/app/controllers/service/app_api_controller.py   |  2 +-
 .../core/workflow/nodes/document_extractor/node.py  |  2 +-
 api/app/services/app_chat_service.py                | 10 ++++++++--
 api/app/services/draft_run_service.py               | 10 ++++++++--
 api/app/services/multimodal_service.py              |  2 +-
 api/app/services/workflow_service.py                | 13 ++++++++-----
 6 files changed, 27 insertions(+), 12 deletions(-)
diff --git a/api/app/controllers/service/app_api_controller.py b/api/app/controllers/service/app_api_controller.py
index 93e88dc5..c2755bdc 100644
--- a/api/app/controllers/service/app_api_controller.py
+++ b/api/app/controllers/service/app_api_controller.py
@@ -296,7 +296,7 @@ async def chat(
                 }
             )
 
-        # 多 Agent 非流式返回
+        # workflow 非流式返回
         result = await app_chat_service.workflow_chat(
 
             message=payload.message,
diff --git a/api/app/core/workflow/nodes/document_extractor/node.py b/api/app/core/workflow/nodes/document_extractor/node.py
index ea1070f4..5fefbc94 100644
--- a/api/app/core/workflow/nodes/document_extractor/node.py
+++ b/api/app/core/workflow/nodes/document_extractor/node.py
@@ -182,7 +182,7 @@ class DocExtractorNode(BaseNode):
                                     mime_type=f"image/{ext}",
                                     is_file=True,
                                 ).model_dump())
-                                text = text + f"\n{placeholder}: {url}"
+                                text = text + f"\n{placeholder}: <img src=\"{url}\" data-url=\"{url}\">"
                             except Exception as e:
                                 logger.error(f"Node {self.node_id}: failed to save image {placeholder}: {e}")
 
diff --git a/api/app/services/app_chat_service.py b/api/app/services/app_chat_service.py
index 12f54c03..cc2b02f1 100644
--- a/api/app/services/app_chat_service.py
+++ b/api/app/services/app_chat_service.py
@@ -161,7 +161,10 @@ class AppChatService:
                 f.type == FileType.DOCUMENT for f in files
             ):
                 system_prompt += (
-                    "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: http://...，请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。"
+                    "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: <img src=\"url\"...>，"
+                    "请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。"
+                    "重要：图片 URL 中包含 UUID（如 /storage/permanent/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx），"
+                    "必须将 src 属性的值原封不动复制到 Markdown 的括号中，不得增删任何字符。"
                 )
 
         # 创建 LangChain Agent
@@ -448,7 +451,10 @@ class AppChatService:
                 ):
                     from langchain.agents import create_agent
                     system_prompt += (
-                        "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: http://...，请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。"
+                        "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: <img src=\"url\"...>，"
+                        "请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。"
+                        "重要：图片 URL 中包含 UUID（如 /storage/permanent/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx），"
+                        "必须将 src 属性的值原封不动复制到 Markdown 的括号中，不得增删任何字符。"
                     )
 
             # 创建 LangChain Agent
diff --git a/api/app/services/draft_run_service.py b/api/app/services/draft_run_service.py
index 2566a50f..16d856ca 100644
--- a/api/app/services/draft_run_service.py
+++ b/api/app/services/draft_run_service.py
@@ -650,7 +650,10 @@ class AgentRunService:
                 )
             if has_doc_with_images:
                 system_prompt += (
-                    "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: http://...，请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。"
+                    "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: <img src=\"url\"...>，"
+                    "请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。"
+                    "重要：图片 URL 中包含 UUID（如 /storage/permanent/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx），"
+                    "必须将 src 属性的值原封不动复制到 Markdown 的括号中，不得增删任何字符。"
                 )
 
             agent = LangChainAgent(
@@ -924,7 +927,10 @@ class AgentRunService:
                 )
             if has_doc_with_images:
                 system_prompt += (
-                    "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: http://...，请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。"
+                    "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: <img src=\"url\"...>，"
+                    "请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。"
+                    "重要：图片 URL 中包含 UUID（如 /storage/permanent/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx），"
+                    "必须将 src 属性的值原封不动复制到 Markdown 的括号中，不得增删任何字符。"
                 )
 
             # 创建 LangChain Agent
diff --git a/api/app/services/multimodal_service.py b/api/app/services/multimodal_service.py
index c362158c..dd021357 100644
--- a/api/app/services/multimodal_service.py
+++ b/api/app/services/multimodal_service.py
@@ -400,7 +400,7 @@ class MultimodalService:
                                 # 在文本内容中追加图片位置标记
                                 if result and result[-1].get("type") in ("text", "document"):
                                     key = "text" if "text" in result[-1] else list(result[-1].keys())[-1]
-                                    result[-1][key] = result[-1].get(key, "") + f"\n[图片 {placeholder}]: {img_url}"
+                                    result[-1][key] = result[-1].get(key, "") + f"\n[图片 {placeholder}]: <img src=\"{img_url}\" data-url=\"{img_url}\">"
                                 # 将图片以视觉格式追加到消息内容中
                                 img_file = FileInput(
                                     type=FileType.IMAGE,
diff --git a/api/app/services/workflow_service.py b/api/app/services/workflow_service.py
index b35656d9..27327e99 100644
--- a/api/app/services/workflow_service.py
+++ b/api/app/services/workflow_service.py
@@ -554,13 +554,16 @@ class WorkflowService:
                     }
                 }
             case "workflow_end":
+                data = {
+                    "elapsed_time": payload.get("elapsed_time"),
+                    "message_length": len(payload.get("output", "")),
+                    "error": payload.get("error", "")
+                }
+                if "citations" in payload and payload["citations"]:
+                    data["citations"] = payload["citations"]
                 return {
                     "event": "end",
-                    "data": {
-                        "elapsed_time": payload.get("elapsed_time"),
-                        "message_length": len(payload.get("output", "")),
-                        "error": payload.get("error", "")
-                    }
+                    "data": data
                 }
             case "node_start" | "node_end" | "node_error" | "cycle_item":
                 return None