fix(multimodal): support HTML image tags in document extraction and chat responses

- Replace plain image URLs with `<img src="..." data-url="...">` HTML tags in multimodal and document extractor services - Propagate citations from workflow end events to client responses - Update system prompts to instruct LLMs to render images using Markdown `![alt](url)` with strict UUID-preserving URL copying
2026-04-27 17:56:58 +08:00
parent 3d9882643e
commit 531d785629
6 changed files with 27 additions and 12 deletions
--- a/api/app/controllers/service/app_api_controller.py
+++ b/api/app/controllers/service/app_api_controller.py
@@ -296,7 +296,7 @@ async def chat(
                }
            )

-        # 多 Agent 非流式返回
+        # workflow 非流式返回
        result = await app_chat_service.workflow_chat(

            message=payload.message,
--- a/api/app/core/workflow/nodes/document_extractor/node.py
+++ b/api/app/core/workflow/nodes/document_extractor/node.py
@@ -182,7 +182,7 @@ class DocExtractorNode(BaseNode):
                                    mime_type=f"image/{ext}",
                                    is_file=True,
                                ).model_dump())
-                                text = text + f"\n{placeholder}: {url}"
+                                text = text + f"\n{placeholder}: <img src=\"{url}\" data-url=\"{url}\">"
                            except Exception as e:
                                logger.error(f"Node {self.node_id}: failed to save image {placeholder}: {e}")

--- a/api/app/services/app_chat_service.py
+++ b/api/app/services/app_chat_service.py
@@ -161,7 +161,10 @@ class AppChatService:
                f.type == FileType.DOCUMENT for f in files
            ):
                system_prompt += (
-                    "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: http://...，请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。"
+                    "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: <img src=\"url\"...>，"
+                    "请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。"
+                    "重要：图片 URL 中包含 UUID（如 /storage/permanent/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx），"
+                    "必须将 src 属性的值原封不动复制到 Markdown 的括号中，不得增删任何字符。"
                )

        # 创建 LangChain Agent
@@ -448,7 +451,10 @@ class AppChatService:
                ):
                    from langchain.agents import create_agent
                    system_prompt += (
-                        "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: http://...，请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。"
+                        "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: <img src=\"url\"...>，"
+                        "请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。"
+                        "重要：图片 URL 中包含 UUID（如 /storage/permanent/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx），"
+                        "必须将 src 属性的值原封不动复制到 Markdown 的括号中，不得增删任何字符。"
                    )

            # 创建 LangChain Agent
--- a/api/app/services/draft_run_service.py
+++ b/api/app/services/draft_run_service.py
@@ -650,7 +650,10 @@ class AgentRunService:
                )
            if has_doc_with_images:
                system_prompt += (
-                    "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: http://...，请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。"
+                    "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: <img src=\"url\"...>，"
+                    "请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。"
+                    "重要：图片 URL 中包含 UUID（如 /storage/permanent/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx），"
+                    "必须将 src 属性的值原封不动复制到 Markdown 的括号中，不得增删任何字符。"
                )

            agent = LangChainAgent(
@@ -924,7 +927,10 @@ class AgentRunService:
                )
            if has_doc_with_images:
                system_prompt += (
-                    "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: http://...，请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。"
+                    "\n\n文档文字中包含图片位置标记如 [图片 第2页 第1张]: <img src=\"url\"...>，"
+                    "请在回答中用 Markdown 格式 ![图片描述](url) 展示对应图片。"
+                    "重要：图片 URL 中包含 UUID（如 /storage/permanent/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx），"
+                    "必须将 src 属性的值原封不动复制到 Markdown 的括号中，不得增删任何字符。"
                )

            # 创建 LangChain Agent
--- a/api/app/services/multimodal_service.py
+++ b/api/app/services/multimodal_service.py
@@ -400,7 +400,7 @@ class MultimodalService:
                                # 在文本内容中追加图片位置标记
                                if result and result[-1].get("type") in ("text", "document"):
                                    key = "text" if "text" in result[-1] else list(result[-1].keys())[-1]
-                                    result[-1][key] = result[-1].get(key, "") + f"\n[图片 {placeholder}]: {img_url}"
+                                    result[-1][key] = result[-1].get(key, "") + f"\n[图片 {placeholder}]: <img src=\"{img_url}\" data-url=\"{img_url}\">"
                                # 将图片以视觉格式追加到消息内容中
                                img_file = FileInput(
                                    type=FileType.IMAGE,
--- a/api/app/services/workflow_service.py
+++ b/api/app/services/workflow_service.py
@@ -554,13 +554,16 @@ class WorkflowService:
                    }
                }
            case "workflow_end":
+                data = {
+                    "elapsed_time": payload.get("elapsed_time"),
+                    "message_length": len(payload.get("output", "")),
+                    "error": payload.get("error", "")
+                }
+                if "citations" in payload and payload["citations"]:
+                    data["citations"] = payload["citations"]
                return {
                    "event": "end",
-                    "data": {
-                        "elapsed_time": payload.get("elapsed_time"),
-                        "message_length": len(payload.get("output", "")),
-                        "error": payload.get("error", "")
-                    }
+                    "data": data
                }
            case "node_start" | "node_end" | "node_error" | "cycle_item":
                return None