fix(app): Multimodal file storage

This commit is contained in:
Timebomb2018
2026-03-20 19:45:41 +08:00
parent 726148d7ee
commit 240f1d431b
5 changed files with 250 additions and 72 deletions

View File

@@ -118,28 +118,54 @@ class AppChatService:
)
model_info = ModelInfo(
model_name=api_key_obj.model_name,
provider=api_key_obj.provider,
api_key=api_key_obj.api_key,
api_base=api_key_obj.api_base,
capability=api_key_obj.capability,
is_omni=api_key_obj.is_omni,
model_type=ModelType.LLM
)
# 加载历史消息
messages = self.conversation_service.get_messages(
conversation_id=conversation_id,
limit=10
)
history = [
{"role": msg.role, "content": [{"type": "text", "text": msg.content}] + (msg.meta_data.get("files", []) if msg.meta_data else [])}
for msg in messages
]
history = []
for msg in messages:
content = [{"type": "text", "text": msg.content}]
# 处理 meta_data 中的 files
if msg.meta_data and msg.meta_data.get("files"):
files = msg.meta_data.get("files", [])
# 使用 MultimodalService 处理文件
multimodal_service = MultimodalService(self.db, api_config=model_info)
# 将 files 转换为 FileInput 格式
file_inputs = []
for file in files:
from app.schemas.app_schema import FileInput, TransferMethod
file_input = FileInput(
type=file.get("type"),
transfer_method=TransferMethod.REMOTE_URL,
url=file.get("url")
)
file_inputs.append(file_input)
history_processed_files = await multimodal_service.history_process_files(files=file_inputs)
content.extend(history_processed_files)
history.append({
"role": msg.role,
"content": content
})
# 处理多模态文件
processed_files = None
if files:
model_info = ModelInfo(
model_name=api_key_obj.model_name,
provider=api_key_obj.provider,
api_key=api_key_obj.api_key,
api_base=api_key_obj.api_base,
capability=api_key_obj.capability,
is_omni=api_key_obj.is_omni,
model_type=ModelType.LLM
)
multimodal_service = MultimodalService(self.db, model_info)
processed_files = await multimodal_service.process_files(user_id, files)
logger.info(f"处理了 {len(processed_files)} 个文件")
@@ -187,8 +213,13 @@ class AppChatService:
"usage": result.get("usage", {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}),
"audio_url": None
}
if processed_files:
human_meta["files"].extend(processed_files)
if files:
for f in files:
# url = await MultimodalService(self.db).get_file_url(f)
human_meta["files"].append({
"type": f.type,
"url": f.url
})
# 保存消息
if audio_url:
@@ -308,31 +339,54 @@ class AppChatService:
streaming=True
)
model_info = ModelInfo(
model_name=api_key_obj.model_name,
provider=api_key_obj.provider,
api_key=api_key_obj.api_key,
api_base=api_key_obj.api_base,
capability=api_key_obj.capability,
is_omni=api_key_obj.is_omni,
model_type=ModelType.LLM
)
# 加载历史消息
messages = self.conversation_service.get_messages(
conversation_id=conversation_id,
limit=10
)
history = []
memory_config = {"enabled": True, 'max_history': 10}
if memory_config.get("enabled"):
messages = self.conversation_service.get_messages(
conversation_id=conversation_id,
limit=memory_config.get("max_history", 10)
)
history = [
{"role": msg.role, "content": [{"type": "text", "text": msg.content}] + (msg.meta_data.get("files", []) if msg.meta_data else [])}
for msg in messages
]
for msg in messages:
content = [{"type": "text", "text": msg.content}]
# 处理 meta_data 中的 files
if msg.meta_data and msg.meta_data.get("files"):
files = msg.meta_data.get("files", [])
# 使用 MultimodalService 处理文件
multimodal_service = MultimodalService(self.db, api_config=model_info)
# 将 files 转换为 FileInput 格式
file_inputs = []
for file in files:
from app.schemas.app_schema import FileInput, TransferMethod
file_input = FileInput(
type=file.get("type"),
transfer_method=TransferMethod.REMOTE_URL,
url=file.get("url")
)
file_inputs.append(file_input)
history_processed_files = await multimodal_service.history_process_files(files=file_inputs)
content.extend(history_processed_files)
history.append({
"role": msg.role,
"content": content
})
# 处理多模态文件
processed_files = None
if files:
model_info = ModelInfo(
model_name=api_key_obj.model_name,
provider=api_key_obj.provider,
api_key=api_key_obj.api_key,
api_base=api_key_obj.api_base,
capability=api_key_obj.capability,
is_omni=api_key_obj.is_omni,
model_type=ModelType.LLM
)
multimodal_service = MultimodalService(self.db, model_info)
processed_files = await multimodal_service.process_files(user_id, files)
logger.info(f"处理了 {len(processed_files)} 个文件")
@@ -342,8 +396,14 @@ class AppChatService:
total_tokens = 0
text_queue: asyncio.Queue = asyncio.Queue()
api_key_config = {
"model_name": api_key_obj.model_name,
"api_key": api_key_obj.api_key,
"api_base": api_key_obj.api_base,
"provider": api_key_obj.provider,
}
stream_audio_url, tts_task = await self.agent_service._generate_tts_streaming(
features_config, api_key_obj,
features_config, api_key_config,
text_queue=text_queue,
tenant_id=tenant_id, workspace_id=workspace_id
)
@@ -395,8 +455,13 @@ class AppChatService:
"audio_url": None
}
if processed_files:
human_meta["files"].extend(processed_files)
if files:
for f in files:
# url = await MultimodalService(self.db).get_file_url(f)
human_meta["files"].append({
"type": f.type,
"url": f.url
})
if stream_audio_url:
assistant_meta["audio_url"] = stream_audio_url