feat(multimodal): support document image extraction and inline vision processing

Added document image extraction capability for PDF and DOCX files, including page/index metadata and storage integration. Extended `process_files` with `document_image_recognition` flag to conditionally enable vision-based image processing when model supports it. Updated knowledge repository and workflow node logic to enforce status=1 checks. Added PyMuPDF dependency.
2026-04-24 11:18:50 +08:00
parent 9fdb952396
commit 767eb5e6f2
13 changed files with 397 additions and 52 deletions
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -147,7 +147,8 @@ dependencies = [
    "modelscope>=1.34.0",
    "python-magic>=0.4.14; sys_platform == 'linux' or sys_platform == 'darwin'",
    "python-magic-bin>=0.4.14; sys_platform=='win32'",
-    "volcengine-python-sdk[ark]==5.0.19"
+    "volcengine-python-sdk[ark]==5.0.19",
+    "pymupdf>=1.27.2.2",
 ]

 [tool.pytest.ini_options]