feat(multimodal): support document image extraction and inline vision processing

Added document image extraction capability for PDF and DOCX files, including page/index metadata and storage integration. Extended `process_files` with `document_image_recognition` flag to conditionally enable vision-based image processing when model supports it. Updated knowledge repository and workflow node logic to enforce status=1 checks. Added PyMuPDF dependency.
This commit is contained in:
Timebomb2018
2026-04-24 11:18:50 +08:00
parent 9fdb952396
commit 767eb5e6f2
13 changed files with 397 additions and 52 deletions

View File

@@ -155,6 +155,10 @@ class FileUploadConfig(BaseModel):
document_allowed_extensions: List[str] = Field(
default=["pdf", "docx", "doc", "xlsx", "xls", "txt", "csv", "json", "md"]
)
document_image_recognition: bool = Field(
default=False,
description="是否识别文档中的图片(需配置视觉模型)"
)
# 视频文件MP4/MOV/AVI/WebM最大 500MB
video_enabled: bool = Field(default=False)
video_max_size_mb: int = Field(default=50)