fix(multimodal): support HTML image tags in document extraction and chat responses

- Replace plain image URLs with `<img src="..." data-url="...">` HTML tags in multimodal and document extractor services - Propagate citations from workflow end events to client responses - Update system prompts to instruct LLMs to render images using Markdown `![alt](url)` with strict UUID-preserving URL copying
2026-04-27 17:56:58 +08:00
parent 3d9882643e
commit 531d785629
6 changed files with 27 additions and 12 deletions
--- a/api/app/services/multimodal_service.py
+++ b/api/app/services/multimodal_service.py
@@ -400,7 +400,7 @@ class MultimodalService:
                                # 在文本内容中追加图片位置标记
                                if result and result[-1].get("type") in ("text", "document"):
                                    key = "text" if "text" in result[-1] else list(result[-1].keys())[-1]
-                                    result[-1][key] = result[-1].get(key, "") + f"\n[图片 {placeholder}]: {img_url}"
+                                    result[-1][key] = result[-1].get(key, "") + f"\n[图片 {placeholder}]: <img src=\"{img_url}\" data-url=\"{img_url}\">"
                                # 将图片以视觉格式追加到消息内容中
                                img_file = FileInput(
                                    type=FileType.IMAGE,