fix(app): Multimodal file storage
This commit is contained in:
@@ -118,28 +118,54 @@ class AppChatService:
|
||||
|
||||
)
|
||||
|
||||
model_info = ModelInfo(
|
||||
model_name=api_key_obj.model_name,
|
||||
provider=api_key_obj.provider,
|
||||
api_key=api_key_obj.api_key,
|
||||
api_base=api_key_obj.api_base,
|
||||
capability=api_key_obj.capability,
|
||||
is_omni=api_key_obj.is_omni,
|
||||
model_type=ModelType.LLM
|
||||
)
|
||||
|
||||
# 加载历史消息
|
||||
messages = self.conversation_service.get_messages(
|
||||
conversation_id=conversation_id,
|
||||
limit=10
|
||||
)
|
||||
history = [
|
||||
{"role": msg.role, "content": [{"type": "text", "text": msg.content}] + (msg.meta_data.get("files", []) if msg.meta_data else [])}
|
||||
for msg in messages
|
||||
]
|
||||
history = []
|
||||
for msg in messages:
|
||||
content = [{"type": "text", "text": msg.content}]
|
||||
|
||||
# 处理 meta_data 中的 files
|
||||
if msg.meta_data and msg.meta_data.get("files"):
|
||||
files = msg.meta_data.get("files", [])
|
||||
# 使用 MultimodalService 处理文件
|
||||
multimodal_service = MultimodalService(self.db, api_config=model_info)
|
||||
|
||||
# 将 files 转换为 FileInput 格式
|
||||
file_inputs = []
|
||||
for file in files:
|
||||
from app.schemas.app_schema import FileInput, TransferMethod
|
||||
file_input = FileInput(
|
||||
type=file.get("type"),
|
||||
transfer_method=TransferMethod.REMOTE_URL,
|
||||
url=file.get("url")
|
||||
)
|
||||
file_inputs.append(file_input)
|
||||
|
||||
history_processed_files = await multimodal_service.history_process_files(files=file_inputs)
|
||||
|
||||
content.extend(history_processed_files)
|
||||
|
||||
history.append({
|
||||
"role": msg.role,
|
||||
"content": content
|
||||
})
|
||||
|
||||
# 处理多模态文件
|
||||
processed_files = None
|
||||
if files:
|
||||
model_info = ModelInfo(
|
||||
model_name=api_key_obj.model_name,
|
||||
provider=api_key_obj.provider,
|
||||
api_key=api_key_obj.api_key,
|
||||
api_base=api_key_obj.api_base,
|
||||
capability=api_key_obj.capability,
|
||||
is_omni=api_key_obj.is_omni,
|
||||
model_type=ModelType.LLM
|
||||
)
|
||||
multimodal_service = MultimodalService(self.db, model_info)
|
||||
processed_files = await multimodal_service.process_files(user_id, files)
|
||||
logger.info(f"处理了 {len(processed_files)} 个文件")
|
||||
@@ -187,8 +213,13 @@ class AppChatService:
|
||||
"usage": result.get("usage", {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}),
|
||||
"audio_url": None
|
||||
}
|
||||
if processed_files:
|
||||
human_meta["files"].extend(processed_files)
|
||||
if files:
|
||||
for f in files:
|
||||
# url = await MultimodalService(self.db).get_file_url(f)
|
||||
human_meta["files"].append({
|
||||
"type": f.type,
|
||||
"url": f.url
|
||||
})
|
||||
|
||||
# 保存消息
|
||||
if audio_url:
|
||||
@@ -308,31 +339,54 @@ class AppChatService:
|
||||
streaming=True
|
||||
)
|
||||
|
||||
model_info = ModelInfo(
|
||||
model_name=api_key_obj.model_name,
|
||||
provider=api_key_obj.provider,
|
||||
api_key=api_key_obj.api_key,
|
||||
api_base=api_key_obj.api_base,
|
||||
capability=api_key_obj.capability,
|
||||
is_omni=api_key_obj.is_omni,
|
||||
model_type=ModelType.LLM
|
||||
)
|
||||
|
||||
# 加载历史消息
|
||||
messages = self.conversation_service.get_messages(
|
||||
conversation_id=conversation_id,
|
||||
limit=10
|
||||
)
|
||||
history = []
|
||||
memory_config = {"enabled": True, 'max_history': 10}
|
||||
if memory_config.get("enabled"):
|
||||
messages = self.conversation_service.get_messages(
|
||||
conversation_id=conversation_id,
|
||||
limit=memory_config.get("max_history", 10)
|
||||
)
|
||||
history = [
|
||||
{"role": msg.role, "content": [{"type": "text", "text": msg.content}] + (msg.meta_data.get("files", []) if msg.meta_data else [])}
|
||||
for msg in messages
|
||||
]
|
||||
for msg in messages:
|
||||
content = [{"type": "text", "text": msg.content}]
|
||||
|
||||
# 处理 meta_data 中的 files
|
||||
if msg.meta_data and msg.meta_data.get("files"):
|
||||
files = msg.meta_data.get("files", [])
|
||||
# 使用 MultimodalService 处理文件
|
||||
multimodal_service = MultimodalService(self.db, api_config=model_info)
|
||||
|
||||
# 将 files 转换为 FileInput 格式
|
||||
file_inputs = []
|
||||
for file in files:
|
||||
from app.schemas.app_schema import FileInput, TransferMethod
|
||||
file_input = FileInput(
|
||||
type=file.get("type"),
|
||||
transfer_method=TransferMethod.REMOTE_URL,
|
||||
url=file.get("url")
|
||||
)
|
||||
file_inputs.append(file_input)
|
||||
|
||||
history_processed_files = await multimodal_service.history_process_files(files=file_inputs)
|
||||
|
||||
content.extend(history_processed_files)
|
||||
|
||||
history.append({
|
||||
"role": msg.role,
|
||||
"content": content
|
||||
})
|
||||
|
||||
# 处理多模态文件
|
||||
processed_files = None
|
||||
if files:
|
||||
model_info = ModelInfo(
|
||||
model_name=api_key_obj.model_name,
|
||||
provider=api_key_obj.provider,
|
||||
api_key=api_key_obj.api_key,
|
||||
api_base=api_key_obj.api_base,
|
||||
capability=api_key_obj.capability,
|
||||
is_omni=api_key_obj.is_omni,
|
||||
model_type=ModelType.LLM
|
||||
)
|
||||
multimodal_service = MultimodalService(self.db, model_info)
|
||||
processed_files = await multimodal_service.process_files(user_id, files)
|
||||
logger.info(f"处理了 {len(processed_files)} 个文件")
|
||||
@@ -342,8 +396,14 @@ class AppChatService:
|
||||
total_tokens = 0
|
||||
|
||||
text_queue: asyncio.Queue = asyncio.Queue()
|
||||
api_key_config = {
|
||||
"model_name": api_key_obj.model_name,
|
||||
"api_key": api_key_obj.api_key,
|
||||
"api_base": api_key_obj.api_base,
|
||||
"provider": api_key_obj.provider,
|
||||
}
|
||||
stream_audio_url, tts_task = await self.agent_service._generate_tts_streaming(
|
||||
features_config, api_key_obj,
|
||||
features_config, api_key_config,
|
||||
text_queue=text_queue,
|
||||
tenant_id=tenant_id, workspace_id=workspace_id
|
||||
)
|
||||
@@ -395,8 +455,13 @@ class AppChatService:
|
||||
"audio_url": None
|
||||
}
|
||||
|
||||
if processed_files:
|
||||
human_meta["files"].extend(processed_files)
|
||||
if files:
|
||||
for f in files:
|
||||
# url = await MultimodalService(self.db).get_file_url(f)
|
||||
human_meta["files"].append({
|
||||
"type": f.type,
|
||||
"url": f.url
|
||||
})
|
||||
|
||||
if stream_audio_url:
|
||||
assistant_meta["audio_url"] = stream_audio_url
|
||||
|
||||
@@ -21,6 +21,7 @@ from app.models.conversation_model import ConversationDetail
|
||||
from app.models.prompt_optimizer_model import RoleType
|
||||
from app.repositories.conversation_repository import ConversationRepository, MessageRepository
|
||||
from app.schemas.conversation_schema import ConversationOut
|
||||
from app.schemas.model_schema import ModelInfo
|
||||
from app.services import workspace_service
|
||||
from app.services.model_service import ModelConfigService
|
||||
|
||||
@@ -269,10 +270,11 @@ class ConversationService:
|
||||
|
||||
return messages
|
||||
|
||||
def get_conversation_history(
|
||||
async def get_conversation_history(
|
||||
self,
|
||||
conversation_id: uuid.UUID,
|
||||
max_history: Optional[int] = None
|
||||
max_history: Optional[int] = None,
|
||||
api_config: Optional[ModelInfo] = None
|
||||
) -> List[dict]:
|
||||
"""
|
||||
Retrieve historical conversation messages formatted as dictionaries.
|
||||
@@ -280,6 +282,7 @@ class ConversationService:
|
||||
Args:
|
||||
conversation_id (uuid.UUID): Conversation UUID.
|
||||
max_history (Optional[int]): Maximum number of messages to retrieve.
|
||||
api_config (Optional[ModelInfo]): Model API configuration for multimodal processing.
|
||||
|
||||
Returns:
|
||||
List[dict]: List of message dictionaries with keys 'role' and 'content'.
|
||||
@@ -290,13 +293,37 @@ class ConversationService:
|
||||
)
|
||||
|
||||
# 转换为字典格式
|
||||
history = [
|
||||
{
|
||||
history = []
|
||||
for msg in messages:
|
||||
content = [{"type": "text", "text": msg.content}]
|
||||
|
||||
# 处理 meta_data 中的 files
|
||||
if msg.meta_data and msg.meta_data.get("files"):
|
||||
files = msg.meta_data.get("files", [])
|
||||
if api_config:
|
||||
# 使用 MultimodalService 处理文件
|
||||
from app.services.multimodal_service import MultimodalService
|
||||
multimodal_service = MultimodalService(self.db, api_config=api_config)
|
||||
|
||||
# 将 files 转换为 FileInput 格式
|
||||
file_inputs = []
|
||||
for file in files:
|
||||
from app.schemas.app_schema import FileInput, TransferMethod
|
||||
file_input = FileInput(
|
||||
type=file.get("type"),
|
||||
transfer_method=TransferMethod.REMOTE_URL,
|
||||
url=file.get("url")
|
||||
)
|
||||
file_inputs.append(file_input)
|
||||
|
||||
processed_files = await multimodal_service.history_process_files(files=file_inputs)
|
||||
|
||||
content.extend(processed_files)
|
||||
|
||||
history.append({
|
||||
"role": msg.role,
|
||||
"content": [{"type": "text", "text": msg.content}] + (msg.meta_data.get("files", []) if msg.meta_data else [])
|
||||
}
|
||||
for msg in messages
|
||||
]
|
||||
"content": content
|
||||
})
|
||||
|
||||
return history
|
||||
|
||||
@@ -524,9 +551,18 @@ class ConversationService:
|
||||
type=ModelType(model_type)
|
||||
)
|
||||
|
||||
conversation_messages = self.get_conversation_history(
|
||||
conversation_messages = await self.get_conversation_history(
|
||||
conversation_id=conversation_id,
|
||||
max_history=20
|
||||
max_history=20,
|
||||
api_config=ModelInfo(
|
||||
model_name=model_name,
|
||||
provider=provider,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
capability=api_config.capability,
|
||||
is_omni=api_config.is_omni,
|
||||
model_type=model_type
|
||||
)
|
||||
)
|
||||
if len(conversation_messages) == 0:
|
||||
return ConversationOut(
|
||||
|
||||
@@ -579,9 +579,20 @@ class AgentRunService:
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
model_info = ModelInfo(
|
||||
model_name=api_key_config["model_name"],
|
||||
provider=api_key_config["provider"],
|
||||
api_key=api_key_config["api_key"],
|
||||
api_base=api_key_config["api_base"],
|
||||
capability=api_key_config["capability"],
|
||||
is_omni=api_key_config["is_omni"],
|
||||
model_type=model_config.type
|
||||
)
|
||||
|
||||
# 6. 加载历史消息
|
||||
history = await self._load_conversation_history(
|
||||
conversation_id=conversation_id,
|
||||
api_config=model_info,
|
||||
max_history=10
|
||||
)
|
||||
|
||||
@@ -589,15 +600,6 @@ class AgentRunService:
|
||||
processed_files = None
|
||||
if files:
|
||||
# 获取 provider 信息
|
||||
model_info = ModelInfo(
|
||||
model_name=api_key_config["model_name"],
|
||||
provider=api_key_config["provider"],
|
||||
api_key=api_key_config["api_key"],
|
||||
api_base=api_key_config["api_base"],
|
||||
capability=api_key_config["capability"],
|
||||
is_omni=api_key_config["is_omni"],
|
||||
model_type=ModelType.LLM
|
||||
)
|
||||
provider = api_key_config.get("provider", "openai")
|
||||
multimodal_service = MultimodalService(self.db, model_info)
|
||||
processed_files = await multimodal_service.process_files(user_id, files)
|
||||
@@ -658,7 +660,7 @@ class AgentRunService:
|
||||
"total_tokens": 0
|
||||
})
|
||||
},
|
||||
files=processed_files,
|
||||
files=files,
|
||||
audio_url=audio_url
|
||||
)
|
||||
|
||||
@@ -815,9 +817,20 @@ class AgentRunService:
|
||||
sub_agent=sub_agent
|
||||
)
|
||||
|
||||
model_info = ModelInfo(
|
||||
model_name=api_key_config["model_name"],
|
||||
provider=api_key_config["provider"],
|
||||
api_key=api_key_config["api_key"],
|
||||
api_base=api_key_config["api_base"],
|
||||
capability=api_key_config["capability"],
|
||||
is_omni=api_key_config["is_omni"],
|
||||
model_type=model_config.type
|
||||
)
|
||||
|
||||
# 6. 加载历史消息
|
||||
history = await self._load_conversation_history(
|
||||
conversation_id=conversation_id,
|
||||
api_config=model_info,
|
||||
max_history=memory_config.get("max_history", 10)
|
||||
)
|
||||
|
||||
@@ -825,15 +838,6 @@ class AgentRunService:
|
||||
processed_files = None
|
||||
if files:
|
||||
# 获取 provider 信息
|
||||
model_info = ModelInfo(
|
||||
model_name=api_key_config["model_name"],
|
||||
provider=api_key_config["provider"],
|
||||
api_key=api_key_config["api_key"],
|
||||
api_base=api_key_config["api_base"],
|
||||
capability=api_key_config["capability"],
|
||||
is_omni=api_key_config["is_omni"],
|
||||
model_type=ModelType.LLM
|
||||
)
|
||||
provider = api_key_config.get("provider", "openai")
|
||||
multimodal_service = MultimodalService(self.db, model_info)
|
||||
processed_files = await multimodal_service.process_files(user_id, files)
|
||||
@@ -904,7 +908,7 @@ class AgentRunService:
|
||||
meta_data={
|
||||
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens}
|
||||
},
|
||||
files=processed_files,
|
||||
files=files,
|
||||
audio_url=stream_audio_url
|
||||
)
|
||||
|
||||
@@ -1115,6 +1119,7 @@ class AgentRunService:
|
||||
async def _load_conversation_history(
|
||||
self,
|
||||
conversation_id: str,
|
||||
api_config: ModelInfo | None = None,
|
||||
max_history: int = 10
|
||||
) -> List[Dict[str, str]]:
|
||||
"""加载会话历史消息
|
||||
@@ -1129,9 +1134,11 @@ class AgentRunService:
|
||||
try:
|
||||
|
||||
conversation_service = ConversationService(self.db)
|
||||
history = conversation_service.get_conversation_history(
|
||||
# 获取 API 配置用于多模态处理
|
||||
history = await conversation_service.get_conversation_history(
|
||||
conversation_id=uuid.UUID(conversation_id),
|
||||
max_history=max_history
|
||||
max_history=max_history,
|
||||
api_config=api_config
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
@@ -1182,7 +1189,12 @@ class AgentRunService:
|
||||
"files": []
|
||||
}
|
||||
if files:
|
||||
human_meta["files"].extend(files)
|
||||
for f in files:
|
||||
# url = await MultimodalService(self.db).get_file_url(f)
|
||||
human_meta["files"].append({
|
||||
"type": f.type,
|
||||
"url": f.url
|
||||
})
|
||||
# 保存用户消息
|
||||
conversation_service.add_message(
|
||||
conversation_id=conv_uuid,
|
||||
|
||||
@@ -418,6 +418,71 @@ class MultimodalService:
|
||||
logger.info(f"成功处理 {len(result)}/{len(files)} 个文件,provider={self.provider}")
|
||||
return result
|
||||
|
||||
async def history_process_files(
|
||||
self,
|
||||
files: Optional[List[FileInput]],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
处理文件列表,返回 LLM 可用的格式
|
||||
|
||||
Args:
|
||||
files: 文件输入列表
|
||||
|
||||
Returns:
|
||||
List[Dict]: LLM 可用的内容格式列表(根据 provider 返回不同格式)
|
||||
"""
|
||||
if not files:
|
||||
return []
|
||||
|
||||
# 获取对应的策略
|
||||
# dashscope 的 omni 模型使用 OpenAI 兼容格式
|
||||
if self.provider == "dashscope" and self.is_omni:
|
||||
strategy_class = OpenAIFormatStrategy
|
||||
else:
|
||||
strategy_class = PROVIDER_STRATEGIES.get(self.provider)
|
||||
if not strategy_class:
|
||||
logger.warning(f"未找到 provider '{self.provider}' 的策略,使用默认策略")
|
||||
strategy_class = DashScopeFormatStrategy
|
||||
|
||||
result = []
|
||||
for idx, file in enumerate(files):
|
||||
strategy = strategy_class(file)
|
||||
if not file.url:
|
||||
file.url = await self.get_file_url(file)
|
||||
try:
|
||||
if file.type == FileType.IMAGE and "vision" in self.capability:
|
||||
is_support, content = await self._process_image(file, strategy)
|
||||
result.append(content)
|
||||
elif file.type == FileType.DOCUMENT:
|
||||
is_support, content = await self._process_document(file, strategy)
|
||||
result.append(content)
|
||||
elif file.type == FileType.AUDIO and "audio" in self.capability:
|
||||
is_support, content = await self._process_audio(file, strategy)
|
||||
result.append(content)
|
||||
elif file.type == FileType.VIDEO and "video" in self.capability:
|
||||
is_support, content = await self._process_video(file, strategy)
|
||||
result.append(content)
|
||||
else:
|
||||
logger.warning(f"不支持的文件类型: {file.type}")
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"处理文件失败",
|
||||
extra={
|
||||
"file_index": idx,
|
||||
"file_type": file.type,
|
||||
"error": str(e)
|
||||
},
|
||||
exc_info=True
|
||||
)
|
||||
# 继续处理其他文件,不中断整个流程
|
||||
result.append({
|
||||
"type": "text",
|
||||
"text": f"[文件处理失败: {str(e)}]"
|
||||
})
|
||||
|
||||
logger.info(f"成功处理 {len(result)}/{len(files)} 个文件,provider={self.provider}")
|
||||
return result
|
||||
|
||||
def write_perceptual_memory(
|
||||
self,
|
||||
end_user_id: str,
|
||||
|
||||
@@ -264,7 +264,7 @@ class SharedChatService:
|
||||
limit=memory_config.get("max_history", 10)
|
||||
)
|
||||
history = [
|
||||
{"role": msg.role, "content": [{"type": "text", "text": msg.content}] + (msg.meta_data.get("files", []) if msg.meta_data else [])}
|
||||
{"role": msg.role, "content": msg.content}
|
||||
for msg in messages
|
||||
]
|
||||
|
||||
@@ -472,7 +472,7 @@ class SharedChatService:
|
||||
limit=memory_config.get("max_history", 10)
|
||||
)
|
||||
history = [
|
||||
{"role": msg.role, "content": [{"type": "text", "text": msg.content}] + (msg.meta_data.get("files", []) if msg.meta_data else [])}
|
||||
{"role": msg.role, "content": msg.content}
|
||||
for msg in messages
|
||||
]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user