From a268d0f7f187a4e05df973d0ead15a97ecef9e3f Mon Sep 17 00:00:00 2001 From: Timebomb2018 <18868801967@163.com> Date: Mon, 27 Apr 2026 12:25:27 +0800 Subject: [PATCH] =?UTF-8?q?fix(multimodal=5Fservice):=20add=20'=E6=96=87?= =?UTF-8?q?=E6=A1=A3=E5=86=85=E5=AE=B9=EF=BC=9A'=20prefix=20to=20document?= =?UTF-8?q?=20text=20and=20simplify=20image=20placeholder=20text?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/app/services/multimodal_service.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/api/app/services/multimodal_service.py b/api/app/services/multimodal_service.py index 08a33a48..c362158c 100644 --- a/api/app/services/multimodal_service.py +++ b/api/app/services/multimodal_service.py @@ -95,7 +95,7 @@ class DashScopeFormatStrategy(MultimodalFormatStrategy): """通义千问文档格式""" return True, { "type": "text", - "text": f"\n{text}\n" + "text": f"\n文档内容:\n{text}\n" } async def format_audio( @@ -167,6 +167,7 @@ class BedrockFormatStrategy(MultimodalFormatStrategy): async def format_document(self, file_name: str, text: str) -> tuple[bool, Dict[str, Any]]: """Bedrock/Anthropic 文档格式(需要 base64 编码)""" # Bedrock 文档需要 base64 编码 + text = f"文档内容:\n{text}\n" text_bytes = text.encode('utf-8') base64_text = base64.b64encode(text_bytes).decode('utf-8') @@ -223,7 +224,7 @@ class OpenAIFormatStrategy(MultimodalFormatStrategy): """OpenAI 文档格式""" return True, { "type": "text", - "text": f"\n{text}\n" + "text": f"\n文档内容:\n{text}\n" } async def format_audio( @@ -395,7 +396,7 @@ class MultimodalService: ext = img_info.get("ext", "png") try: _, img_url = await self._save_doc_image_to_storage(img_info["bytes"], ext, tenant_id, workspace_id) - placeholder = f"第{page}页 第{index + 1}张图片" if page > 0 else f"第{index + 1}张图片" + placeholder = f"第{page}页 第{index + 1}张" if page > 0 else f"第{index + 1}张" # 在文本内容中追加图片位置标记 if result and result[-1].get("type") in ("text", "document"): key = "text" if "text" in result[-1] else list(result[-1].keys())[-1]