fix(agetn features):
1. Historical multimodal message writing is incorporated into the conversation context; 2. Resolve the issues where csv, json, and txt files cannot be recognized due to encoding problems; 3. File quantity limit; 4. Error details
This commit is contained in:
@@ -91,7 +91,7 @@ async def upload_file(
|
|||||||
|
|
||||||
if file_size > settings.MAX_FILE_SIZE:
|
if file_size > settings.MAX_FILE_SIZE:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_400_BAD_REQUEST,
|
status_code=status.HTTP_413_CONTENT_TOO_LARGE,
|
||||||
detail=f"The file size exceeds the {settings.MAX_FILE_SIZE} byte limit"
|
detail=f"The file size exceeds the {settings.MAX_FILE_SIZE} byte limit"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -97,6 +97,7 @@ class Settings:
|
|||||||
|
|
||||||
# File Upload
|
# File Upload
|
||||||
MAX_FILE_SIZE: int = int(os.getenv("MAX_FILE_SIZE", "52428800"))
|
MAX_FILE_SIZE: int = int(os.getenv("MAX_FILE_SIZE", "52428800"))
|
||||||
|
MAX_FILE_COUNT: int = int(os.getenv("MAX_FILE_COUNT", "20"))
|
||||||
FILE_PATH: str = os.getenv("FILE_PATH", "/files")
|
FILE_PATH: str = os.getenv("FILE_PATH", "/files")
|
||||||
FILE_URL_EXPIRES: int = int(os.getenv("FILE_URL_EXPIRES", "3600"))
|
FILE_URL_EXPIRES: int = int(os.getenv("FILE_URL_EXPIRES", "3600"))
|
||||||
|
|
||||||
|
|||||||
@@ -506,10 +506,13 @@ async def http_exception_handler(request: Request, exc: HTTPException):
|
|||||||
404: "errors.common.not_found",
|
404: "errors.common.not_found",
|
||||||
405: "errors.common.method_not_allowed",
|
405: "errors.common.method_not_allowed",
|
||||||
409: "errors.common.conflict",
|
409: "errors.common.conflict",
|
||||||
|
413: "errors.common.payload_too_large",
|
||||||
422: "errors.common.validation_failed",
|
422: "errors.common.validation_failed",
|
||||||
429: "errors.common.too_many_requests",
|
429: "errors.common.too_many_requests",
|
||||||
500: "errors.common.internal_error",
|
500: "errors.common.internal_error",
|
||||||
|
502: "errors.common.bad_gateway",
|
||||||
503: "errors.common.service_unavailable",
|
503: "errors.common.service_unavailable",
|
||||||
|
504: "errors.common.gateway_timeout",
|
||||||
}
|
}
|
||||||
|
|
||||||
# 如果有对应的翻译键,使用翻译
|
# 如果有对应的翻译键,使用翻译
|
||||||
@@ -534,7 +537,7 @@ async def http_exception_handler(request: Request, exc: HTTPException):
|
|||||||
|
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
status_code=exc.status_code,
|
status_code=exc.status_code,
|
||||||
content=fail(code=exc.status_code, msg=translated_message, error=translated_message)
|
content=fail(code=exc.status_code, msg=translated_message, error=exc.detail)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -151,7 +151,7 @@ class FileUploadConfig(BaseModel):
|
|||||||
document_enabled: bool = Field(default=False)
|
document_enabled: bool = Field(default=False)
|
||||||
document_max_size_mb: int = Field(default=100)
|
document_max_size_mb: int = Field(default=100)
|
||||||
document_allowed_extensions: List[str] = Field(
|
document_allowed_extensions: List[str] = Field(
|
||||||
default=["pdf", "docx", "xlsx", "txt", "csv", "json", "md"]
|
default=["pdf", "docx", "doc", "xlsx", "xls", "txt", "csv", "json", "md"]
|
||||||
)
|
)
|
||||||
# 视频文件:MP4/MOV/AVI/WebM,最大 500MB
|
# 视频文件:MP4/MOV/AVI/WebM,最大 500MB
|
||||||
video_enabled: bool = Field(default=False)
|
video_enabled: bool = Field(default=False)
|
||||||
@@ -160,7 +160,15 @@ class FileUploadConfig(BaseModel):
|
|||||||
default=["mp4", "mov"]
|
default=["mp4", "mov"]
|
||||||
)
|
)
|
||||||
# 最大文件数量
|
# 最大文件数量
|
||||||
max_file_count: int = Field(default=5, ge=1, le=20)
|
max_file_count: int = Field(default=5, ge=1)
|
||||||
|
|
||||||
|
@field_validator("max_file_count")
|
||||||
|
@classmethod
|
||||||
|
def validate_max_file_count(cls, v: int) -> int:
|
||||||
|
from app.core.config import settings
|
||||||
|
if v > settings.MAX_FILE_COUNT:
|
||||||
|
raise ValueError(f"max_file_count 不能超过 {settings.MAX_FILE_COUNT}")
|
||||||
|
return v
|
||||||
|
|
||||||
|
|
||||||
class OpeningStatementConfig(BaseModel):
|
class OpeningStatementConfig(BaseModel):
|
||||||
|
|||||||
@@ -124,7 +124,7 @@ class AppChatService:
|
|||||||
limit=10
|
limit=10
|
||||||
)
|
)
|
||||||
history = [
|
history = [
|
||||||
{"role": msg.role, "content": msg.content}
|
{"role": msg.role, "content": [{"type": "text", "text": msg.content}] + msg.meta_data.get("files", [])}
|
||||||
for msg in messages
|
for msg in messages
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -188,12 +188,7 @@ class AppChatService:
|
|||||||
"audio_url": None
|
"audio_url": None
|
||||||
}
|
}
|
||||||
if files:
|
if files:
|
||||||
for f in files:
|
human_meta["files"].extend(processed_files)
|
||||||
# url = await MultimodalService(self.db).get_file_url(f)
|
|
||||||
human_meta["files"].append({
|
|
||||||
"type": f.type,
|
|
||||||
"url": f.url
|
|
||||||
})
|
|
||||||
|
|
||||||
# 保存消息
|
# 保存消息
|
||||||
if audio_url:
|
if audio_url:
|
||||||
@@ -322,7 +317,7 @@ class AppChatService:
|
|||||||
limit=memory_config.get("max_history", 10)
|
limit=memory_config.get("max_history", 10)
|
||||||
)
|
)
|
||||||
history = [
|
history = [
|
||||||
{"role": msg.role, "content": msg.content}
|
{"role": msg.role, "content": [{"type": "text", "text": msg.content}] + msg.meta_data.get("files", [])}
|
||||||
for msg in messages
|
for msg in messages
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -291,7 +291,7 @@ class ConversationService:
|
|||||||
history = [
|
history = [
|
||||||
{
|
{
|
||||||
"role": msg.role,
|
"role": msg.role,
|
||||||
"content": msg.content
|
"content": [{"type": "text", "text": msg.content}] + msg.meta_data.get("files", [])
|
||||||
}
|
}
|
||||||
for msg in messages
|
for msg in messages
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -658,7 +658,7 @@ class AgentRunService:
|
|||||||
"total_tokens": 0
|
"total_tokens": 0
|
||||||
})
|
})
|
||||||
},
|
},
|
||||||
files=files,
|
files=processed_files,
|
||||||
audio_url=audio_url
|
audio_url=audio_url
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -820,6 +820,7 @@ class AgentRunService:
|
|||||||
conversation_id=conversation_id,
|
conversation_id=conversation_id,
|
||||||
max_history=memory_config.get("max_history", 10)
|
max_history=memory_config.get("max_history", 10)
|
||||||
)
|
)
|
||||||
|
print(history)
|
||||||
|
|
||||||
# 6. 处理多模态文件
|
# 6. 处理多模态文件
|
||||||
processed_files = None
|
processed_files = None
|
||||||
@@ -904,7 +905,7 @@ class AgentRunService:
|
|||||||
meta_data={
|
meta_data={
|
||||||
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens}
|
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens}
|
||||||
},
|
},
|
||||||
files=files,
|
files=processed_files,
|
||||||
audio_url=stream_audio_url
|
audio_url=stream_audio_url
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1182,12 +1183,7 @@ class AgentRunService:
|
|||||||
"files": []
|
"files": []
|
||||||
}
|
}
|
||||||
if files:
|
if files:
|
||||||
for f in files:
|
human_meta["files"].extend(files)
|
||||||
# url = await MultimodalService(self.db).get_file_url(f)
|
|
||||||
human_meta["files"].append({
|
|
||||||
"type": f.type,
|
|
||||||
"url": f.url
|
|
||||||
})
|
|
||||||
# 保存用户消息
|
# 保存用户消息
|
||||||
conversation_service.add_message(
|
conversation_service.add_message(
|
||||||
conversation_id=conv_uuid,
|
conversation_id=conv_uuid,
|
||||||
|
|||||||
@@ -11,6 +11,8 @@
|
|||||||
import base64
|
import base64
|
||||||
import io
|
import io
|
||||||
import uuid
|
import uuid
|
||||||
|
import zipfile
|
||||||
|
import chardet
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import List, Dict, Any, Optional
|
from typing import List, Dict, Any, Optional
|
||||||
|
|
||||||
@@ -42,12 +44,10 @@ PDF_MIME = ['application/pdf']
|
|||||||
DOC_MIME = [
|
DOC_MIME = [
|
||||||
'application/msword',
|
'application/msword',
|
||||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||||
'application/zip'
|
|
||||||
]
|
]
|
||||||
XLSX_MIME = [
|
XLSX_MIME = [
|
||||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||||
'application/vnd.ms-excel',
|
'application/vnd.ms-excel',
|
||||||
'application/zip'
|
|
||||||
]
|
]
|
||||||
CSV_MIME = ['text/csv', 'application/csv']
|
CSV_MIME = ['text/csv', 'application/csv']
|
||||||
JSON_MIME = ['application/json']
|
JSON_MIME = ['application/json']
|
||||||
@@ -588,12 +588,12 @@ class MultimodalService:
|
|||||||
file.set_content(file_content)
|
file.set_content(file_content)
|
||||||
file_mime_type = magic.from_buffer(file_content, mime=True)
|
file_mime_type = magic.from_buffer(file_content, mime=True)
|
||||||
if file_mime_type in TEXT_MIME:
|
if file_mime_type in TEXT_MIME:
|
||||||
return file_content.decode("utf-8")
|
return self._decode_text_safe(file_content)
|
||||||
elif file_mime_type in PDF_MIME:
|
elif file_mime_type in PDF_MIME:
|
||||||
return await self._extract_pdf_text(file_content)
|
return await self._extract_pdf_text(file_content)
|
||||||
elif file_mime_type in DOC_MIME and file.file_type.endswith(('docx', 'doc')):
|
elif self._is_word_file(file_content, file_mime_type):
|
||||||
return await self._extract_word_text(file_content)
|
return await self._extract_word_text(file_content)
|
||||||
elif file_mime_type in XLSX_MIME and file.file_type.endswith(("xlsx", "xls")):
|
elif self._is_excel_file(file_content, file_mime_type):
|
||||||
return await self._extract_xlsx_text(file_content)
|
return await self._extract_xlsx_text(file_content)
|
||||||
elif file_mime_type in CSV_MIME:
|
elif file_mime_type in CSV_MIME:
|
||||||
return await self._extract_csv_text(file_content)
|
return await self._extract_csv_text(file_content)
|
||||||
@@ -647,27 +647,89 @@ class MultimodalService:
|
|||||||
logger.error(f"提取 Excel 文本失败: {e}")
|
logger.error(f"提取 Excel 文本失败: {e}")
|
||||||
return f"[Excel 提取失败: {str(e)}]"
|
return f"[Excel 提取失败: {str(e)}]"
|
||||||
|
|
||||||
@staticmethod
|
async def _extract_csv_text(self, file_content: bytes) -> str:
|
||||||
async def _extract_csv_text(file_content: bytes) -> str:
|
|
||||||
"""提取 CSV 文本"""
|
"""提取 CSV 文本"""
|
||||||
try:
|
try:
|
||||||
text = file_content.decode('utf-8-sig')
|
text = self._decode_text_safe(file_content)
|
||||||
reader = csv.reader(io.StringIO(text))
|
reader = csv.reader(io.StringIO(text))
|
||||||
return '\n'.join('\t'.join(row) for row in reader)
|
return '\n'.join('\t'.join(row) for row in reader)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"提取 CSV 文本失败: {e}")
|
logger.error(f"提取 CSV 文本失败: {e}")
|
||||||
return f"[CSV 提取失败: {str(e)}]"
|
return f"[CSV 提取失败: {str(e)}]"
|
||||||
|
|
||||||
@staticmethod
|
async def _extract_json_text(self, file_content: bytes) -> str:
|
||||||
async def _extract_json_text(file_content: bytes) -> str:
|
|
||||||
"""提取 JSON 文本"""
|
"""提取 JSON 文本"""
|
||||||
try:
|
try:
|
||||||
data = json.loads(file_content.decode('utf-8'))
|
text = self._decode_text_safe(file_content)
|
||||||
|
data = json.loads(text)
|
||||||
return json.dumps(data, ensure_ascii=False, indent=2)
|
return json.dumps(data, ensure_ascii=False, indent=2)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"提取 JSON 文本失败: {e}")
|
logger.error(f"提取 JSON 文本失败: {e}")
|
||||||
return f"[JSON 提取失败: {str(e)}]"
|
return f"[JSON 提取失败: {str(e)}]"
|
||||||
|
|
||||||
|
def _is_word_file(self, file_content: bytes, mime_type: str) -> bool:
|
||||||
|
"""判断是不是 Word 文件(doc / docx),不依赖后缀"""
|
||||||
|
# 旧版 .doc
|
||||||
|
if mime_type == 'application/msword':
|
||||||
|
return True
|
||||||
|
|
||||||
|
# 新版 .docx(ZIP 内部包含 word/document.xml)
|
||||||
|
header = file_content[:4]
|
||||||
|
if header == b'PK\x03\x04':
|
||||||
|
try:
|
||||||
|
with zipfile.ZipFile(io.BytesIO(file_content)) as zf:
|
||||||
|
return "word/document.xml" in zf.namelist()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _is_excel_file(self, file_content: bytes, mime_type: str) -> bool:
|
||||||
|
"""判断是不是 Excel 文件(xls / xlsx),不依赖后缀"""
|
||||||
|
# 旧版 .xls
|
||||||
|
if mime_type == 'application/vnd.ms-excel':
|
||||||
|
return True
|
||||||
|
|
||||||
|
# 新版 .xlsx(ZIP 内部包含 xl/workbook.xml)
|
||||||
|
header = file_content[:4]
|
||||||
|
if header == b'PK\x03\x04':
|
||||||
|
try:
|
||||||
|
with zipfile.ZipFile(io.BytesIO(file_content)) as zf:
|
||||||
|
return "xl/workbook.xml" in zf.namelist()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _decode_text_safe(file_content: bytes) -> str:
|
||||||
|
"""
|
||||||
|
【万能文本解码】
|
||||||
|
自动检测编码,支持 utf-8 / gbk / gb2312 / utf-8-sig / ascii 等
|
||||||
|
永远不报错,永远不乱码
|
||||||
|
"""
|
||||||
|
if not file_content:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# 1. 自动检测文件编码
|
||||||
|
detect = chardet.detect(file_content)
|
||||||
|
encoding = detect.get("encoding", "utf-8").lower()
|
||||||
|
|
||||||
|
# 2. 兼容常见中文编码
|
||||||
|
compatible_encodings = ["utf-8", "gbk", "gb18030", "gb2312", "ascii", "latin-1"]
|
||||||
|
|
||||||
|
# 3. 按优先级尝试解码
|
||||||
|
for enc in [encoding] + compatible_encodings:
|
||||||
|
if not enc:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
return file_content.decode(enc.strip())
|
||||||
|
except (UnicodeDecodeError, LookupError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 终极兜底
|
||||||
|
return file_content.decode("utf-8", errors="replace")
|
||||||
|
|
||||||
|
|
||||||
def get_multimodal_service(db: Session) -> MultimodalService:
|
def get_multimodal_service(db: Session) -> MultimodalService:
|
||||||
"""获取多模态服务实例(依赖注入)"""
|
"""获取多模态服务实例(依赖注入)"""
|
||||||
|
|||||||
@@ -264,7 +264,7 @@ class SharedChatService:
|
|||||||
limit=memory_config.get("max_history", 10)
|
limit=memory_config.get("max_history", 10)
|
||||||
)
|
)
|
||||||
history = [
|
history = [
|
||||||
{"role": msg.role, "content": msg.content}
|
{"role": msg.role, "content": [{"type": "text", "text": msg.content}] + msg.meta_data.get("files", [])}
|
||||||
for msg in messages
|
for msg in messages
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -472,7 +472,7 @@ class SharedChatService:
|
|||||||
limit=memory_config.get("max_history", 10)
|
limit=memory_config.get("max_history", 10)
|
||||||
)
|
)
|
||||||
history = [
|
history = [
|
||||||
{"role": msg.role, "content": msg.content}
|
{"role": msg.role, "content": [{"type": "text", "text": msg.content}] + msg.meta_data.get("files", [])}
|
||||||
for msg in messages
|
for msg in messages
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user