feat(workflow): Document extraction node

This commit is contained in:
Timebomb2018
2026-03-26 16:19:40 +08:00
parent 68489f1b28
commit 1df3fc416a
2 changed files with 4 additions and 7 deletions

View File

@@ -7,17 +7,12 @@ class DocExtractorNodeConfig(BaseNodeConfig):
..., ...,
description="File variable selector, e.g. {{ sys.files }} or {{ node_id.file }}" description="File variable selector, e.g. {{ sys.files }} or {{ node_id.file }}"
) )
output_format: str = Field(
default="text",
description="Output format: 'text' | 'markdown'"
)
class Config: class Config:
json_schema_extra = { json_schema_extra = {
"examples": [ "examples": [
{ {
"file_selector": "{{ sys.files }}", "file_selector": "{{ sys.files }}"
"output_format": "text"
} }
] ]
} }

View File

@@ -29,12 +29,14 @@ def _normalise_files(val: Any) -> list[FileObject]:
if isinstance(val, dict) and val.get("is_file"): if isinstance(val, dict) and val.get("is_file"):
return [FileObject(**val)] return [FileObject(**val)]
if isinstance(val, list): if isinstance(val, list):
result = [] result: list[FileObject] = []
for item in val: for item in val:
if isinstance(item, FileObject): if isinstance(item, FileObject):
result.append(item) result.append(item)
elif isinstance(item, dict) and item.get("is_file"): elif isinstance(item, dict) and item.get("is_file"):
result.append(FileObject(**item)) result.append(FileObject(**item))
else:
logger.warning("Ignoring non-file entry in file list for document extractor: %r", item)
return result return result
return [] return []