From 1df3fc416ade2bd39b165f189b8c1e95e727e712 Mon Sep 17 00:00:00 2001 From: Timebomb2018 <18868801967@163.com> Date: Thu, 26 Mar 2026 16:19:40 +0800 Subject: [PATCH] feat(workflow): Document extraction node --- api/app/core/workflow/nodes/document_extractor/config.py | 7 +------ api/app/core/workflow/nodes/document_extractor/node.py | 4 +++- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/api/app/core/workflow/nodes/document_extractor/config.py b/api/app/core/workflow/nodes/document_extractor/config.py index dd946422..69f7f76d 100644 --- a/api/app/core/workflow/nodes/document_extractor/config.py +++ b/api/app/core/workflow/nodes/document_extractor/config.py @@ -7,17 +7,12 @@ class DocExtractorNodeConfig(BaseNodeConfig): ..., description="File variable selector, e.g. {{ sys.files }} or {{ node_id.file }}" ) - output_format: str = Field( - default="text", - description="Output format: 'text' | 'markdown'" - ) class Config: json_schema_extra = { "examples": [ { - "file_selector": "{{ sys.files }}", - "output_format": "text" + "file_selector": "{{ sys.files }}" } ] } diff --git a/api/app/core/workflow/nodes/document_extractor/node.py b/api/app/core/workflow/nodes/document_extractor/node.py index 050f693f..40641f3c 100644 --- a/api/app/core/workflow/nodes/document_extractor/node.py +++ b/api/app/core/workflow/nodes/document_extractor/node.py @@ -29,12 +29,14 @@ def _normalise_files(val: Any) -> list[FileObject]: if isinstance(val, dict) and val.get("is_file"): return [FileObject(**val)] if isinstance(val, list): - result = [] + result: list[FileObject] = [] for item in val: if isinstance(item, FileObject): result.append(item) elif isinstance(item, dict) and item.get("is_file"): result.append(FileObject(**item)) + else: + logger.warning("Ignoring non-file entry in file list for document extractor: %r", item) return result return []