fix(multimodal): support HTML image tags in document extraction and chat responses

- Replace plain image URLs with `<img src="..." data-url="...">` HTML tags in multimodal and document extractor services
- Propagate citations from workflow end events to client responses
- Update system prompts to instruct LLMs to render images using Markdown `![alt](url)` with strict UUID-preserving URL copying
This commit is contained in:
Timebomb2018
2026-04-27 17:56:58 +08:00
parent 3d9882643e
commit 531d785629
6 changed files with 27 additions and 12 deletions

View File

@@ -400,7 +400,7 @@ class MultimodalService:
# 在文本内容中追加图片位置标记
if result and result[-1].get("type") in ("text", "document"):
key = "text" if "text" in result[-1] else list(result[-1].keys())[-1]
result[-1][key] = result[-1].get(key, "") + f"\n[图片 {placeholder}]: {img_url}"
result[-1][key] = result[-1].get(key, "") + f"\n[图片 {placeholder}]: <img src=\"{img_url}\" data-url=\"{img_url}\">"
# 将图片以视觉格式追加到消息内容中
img_file = FileInput(
type=FileType.IMAGE,