feat(workflow): Document extraction node
This commit is contained in:
@@ -7,17 +7,12 @@ class DocExtractorNodeConfig(BaseNodeConfig):
|
|||||||
...,
|
...,
|
||||||
description="File variable selector, e.g. {{ sys.files }} or {{ node_id.file }}"
|
description="File variable selector, e.g. {{ sys.files }} or {{ node_id.file }}"
|
||||||
)
|
)
|
||||||
output_format: str = Field(
|
|
||||||
default="text",
|
|
||||||
description="Output format: 'text' | 'markdown'"
|
|
||||||
)
|
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
json_schema_extra = {
|
json_schema_extra = {
|
||||||
"examples": [
|
"examples": [
|
||||||
{
|
{
|
||||||
"file_selector": "{{ sys.files }}",
|
"file_selector": "{{ sys.files }}"
|
||||||
"output_format": "text"
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -29,12 +29,14 @@ def _normalise_files(val: Any) -> list[FileObject]:
|
|||||||
if isinstance(val, dict) and val.get("is_file"):
|
if isinstance(val, dict) and val.get("is_file"):
|
||||||
return [FileObject(**val)]
|
return [FileObject(**val)]
|
||||||
if isinstance(val, list):
|
if isinstance(val, list):
|
||||||
result = []
|
result: list[FileObject] = []
|
||||||
for item in val:
|
for item in val:
|
||||||
if isinstance(item, FileObject):
|
if isinstance(item, FileObject):
|
||||||
result.append(item)
|
result.append(item)
|
||||||
elif isinstance(item, dict) and item.get("is_file"):
|
elif isinstance(item, dict) and item.get("is_file"):
|
||||||
result.append(FileObject(**item))
|
result.append(FileObject(**item))
|
||||||
|
else:
|
||||||
|
logger.warning("Ignoring non-file entry in file list for document extractor: %r", item)
|
||||||
return result
|
return result
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user