feat(multimodal): support document image extraction and inline vision processing
Added document image extraction capability for PDF and DOCX files, including page/index metadata and storage integration. Extended `process_files` with `document_image_recognition` flag to conditionally enable vision-based image processing when model supports it. Updated knowledge repository and workflow node logic to enforce status=1 checks. Added PyMuPDF dependency.
This commit is contained in:
@@ -155,6 +155,10 @@ class FileUploadConfig(BaseModel):
|
||||
document_allowed_extensions: List[str] = Field(
|
||||
default=["pdf", "docx", "doc", "xlsx", "xls", "txt", "csv", "json", "md"]
|
||||
)
|
||||
document_image_recognition: bool = Field(
|
||||
default=False,
|
||||
description="是否识别文档中的图片(需配置视觉模型)"
|
||||
)
|
||||
# 视频文件:MP4/MOV/AVI/WebM,最大 500MB
|
||||
video_enabled: bool = Field(default=False)
|
||||
video_max_size_mb: int = Field(default=50)
|
||||
|
||||
Reference in New Issue
Block a user