feat(workflow,app): add MIME-based file handling and HTTP response files

This commit is contained in:
Eternity
2026-03-10 18:28:16 +08:00
committed by GitHub
parent cfd5c1bc93
commit 99e94b3567
10 changed files with 347 additions and 142 deletions

View File

@@ -1,5 +1,6 @@
import asyncio
import logging
import uuid
from abc import ABC, abstractmethod
from datetime import datetime
from functools import cached_property
@@ -643,15 +644,18 @@ class BaseNode(ABC):
return content.content_cache[provider]
with get_db_read() as db:
multimodel_service = MultimodalService(db, provider, is_omni=is_omni)
message = await multimodel_service.process_files(
[FileInput.model_construct(
type=content.type,
url=content.url,
transfer_method=content.transfer_method,
file_type=content.origin_file_type,
upload_file_id=content.file_id
)]
file_obj = FileInput(
type=content.type,
url=content.url,
transfer_method=content.transfer_method,
origin_file_type=content.origin_file_type,
upload_file_id=uuid.UUID(content.file_id) if content.file_id else None,
)
file_obj.set_content(content.get_content())
message = await multimodel_service.process_files(
[file_obj]
)
content.set_content(file_obj.get_content())
if message:
content.content_cache[provider] = message
return message

View File

@@ -4,6 +4,7 @@ from pydantic import Field, BaseModel, field_validator
from app.core.workflow.nodes.base_config import BaseNodeConfig
from app.core.workflow.nodes.enums import HttpRequestMethod, HttpAuthType, HttpContentType, HttpErrorHandle
from app.core.workflow.variable.base_variable import FileObject
class HttpAuthConfig(BaseModel):
@@ -260,6 +261,11 @@ class HttpRequestNodeOutput(BaseModel):
description="Http response headers"
)
files: list[FileObject] = Field(
default_factory=list,
description="List of files",
)
output: str = Field(
default="SUCCESS",
description="HTTP response body",

View File

@@ -1,24 +1,146 @@
import asyncio
import json
import logging
import mimetypes
import uuid
import imghdr
from email.message import Message
from typing import Any, Callable, Coroutine
import httpx
# import filetypes # TODO: File support (Feature)
from httpx import AsyncClient, Response, Timeout
import magic
from app.core.workflow.engine.state_manager import WorkflowState
from app.core.workflow.engine.variable_pool import VariablePool
from app.core.workflow.nodes.base_node import BaseNode
from app.core.workflow.nodes.enums import HttpRequestMethod, HttpErrorHandle, HttpAuthType, HttpContentType
from app.core.workflow.nodes.http_request.config import HttpRequestNodeConfig, HttpRequestNodeOutput
from app.core.workflow.variable.base_variable import VariableType
from app.core.workflow.utils.file_processer import mime_to_file_type
from app.core.workflow.variable.base_variable import VariableType, FileObject
from app.core.workflow.variable.variable_objects import FileVariable, ArrayVariable
from app.schemas import FileType, TransferMethod
logger = logging.getLogger(__file__)
class HttpResponse:
def __init__(self, response: httpx.Response):
self.response = response
self.headers = dict(response.headers)
self._is_file: bool | None = None
@property
def content_type(self) -> str:
return self.headers.get("content-type", "")
@property
def content_disposition(self) -> Message | None:
content_disposition = self.headers.get("content-disposition", "")
if content_disposition:
msg = Message()
msg["content-disposition"] = content_disposition
return msg
return None
@property
def is_file(self) -> bool:
if self._is_file is not None:
return self._is_file
content_type = self.content_type.split(";")[0].strip().lower()
parsed_content_disposition = self.content_disposition
if parsed_content_disposition:
disp_type = parsed_content_disposition.get_content_disposition()
filename = parsed_content_disposition.get_filename()
if disp_type == "attachment" or filename:
self._is_file = True
return True
if content_type.startswith("text/") and "csv" not in content_type:
return False
if content_type.startswith("application/"):
if any(
text_type in content_type
for text_type in {"json", "xml", "javascript", "x-www-form-urlencoded", "yaml", "graphql"}
):
self._is_file = False
return False
try:
content_sample = self.response.content[:1024]
content_sample.decode("utf-8")
text_markers = (b"{", b"[", b"<", b"function", b"var ", b"const ", b"let ")
if any(marker in content_sample for marker in text_markers):
return False
except UnicodeDecodeError:
self._is_file = True
return True
main_type, _ = mimetypes.guess_type("dummy" + (mimetypes.guess_extension(content_type) or ""))
if main_type:
self._is_file = main_type.split("/")[0] in ("application", "image", "audio", "video")
return self._is_file
self._is_file = any(media_type in content_type for media_type in ("image/", "audio/", "video/"))
return self._is_file
@property
def is_image(self):
if self.is_file:
kind = imghdr.what(None, h=self.response.content)
return kind is not None
return False
@property
def url(self) -> str:
return str(self.response.url)
@property
def body(self) -> str:
if self.is_file:
return f"{'!' if self.is_image else ''}[file]({self.url})"
return self.response.text
@staticmethod
def get_file_type(file_bytes) -> tuple[FileType | None, str | None]:
mime = magic.from_buffer(file_bytes, mime=True)
if mime.startswith("image"):
return FileType.IMAGE, mime
elif mime.startswith("video"):
return FileType.VIDEO, mime
elif mime.startswith("audio"):
return FileType.AUDIO, mime
elif mime in ["application/pdf",
"application/msword",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.ms-excel",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"text/plain"]:
return FileType.DOCUMENT, mime
return None, None
@property
def files(self) -> list[FileObject]:
file_type, mime_type = self.get_file_type(self.response.content)
origin_file_type = mime_to_file_type(mime_type)
if self.is_file and file_type and origin_file_type:
file_obj = FileObject(
type=file_type,
url=self.url,
transfer_method=TransferMethod.REMOTE_URL.value,
origin_file_type=origin_file_type,
file_id=None,
is_file=True
)
file_obj.set_content(self.response.content)
return [
file_obj
]
return []
class HttpRequestNode(BaseNode):
"""
HTTP Request Workflow Node.
@@ -44,6 +166,7 @@ class HttpRequestNode(BaseNode):
"body": VariableType.STRING,
"status_code": VariableType.NUMBER,
"headers": VariableType.OBJECT,
"files": VariableType.ARRAY_FILE,
"output": VariableType.STRING
}
@@ -232,10 +355,12 @@ class HttpRequestNode(BaseNode):
)
resp.raise_for_status()
logger.info(f"Node {self.node_id}: HTTP request succeeded")
response = HttpResponse(resp)
return HttpRequestNodeOutput(
body=resp.text,
body=response.body,
status_code=resp.status_code,
headers=resp.headers,
files=response.files
).model_dump()
except (httpx.HTTPStatusError, httpx.RequestError) as e:
logger.error(f"HTTP request node exception: {e}")

View File

@@ -0,0 +1,56 @@
# -*- coding: UTF-8 -*-
# Author: Eternity
# @Email: 1533512157@qq.com
# @Time : 2026/3/10 13:36
TRANSFORM_FILE_TYPE = {
'text/plain': 'document/text',
'text/markdown': 'document/markdown',
'text/x-markdown': 'document/x-markdown',
'application/pdf': 'document/pdf',
'application/msword': 'document/doc',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'document/docx',
'application/vnd.ms-powerpoint': 'document/ppt',
'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'document/pptx',
}
ALLOWED_FILE_TYPES = [
'text/plain',
'text/markdown',
'text/x-markdown',
'application/pdf',
'application/msword',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.ms-powerpoint',
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'image/jpg',
'image/jpeg',
'image/png',
'image/gif',
'image/bmp',
'image/webp',
'image/svg+xml',
'video/mp4',
'video/quicktime',
'video/x-msvideo',
'video/x-matroska',
'video/webm',
'video/x-flv',
'video/x-ms-wmv',
'audio/mpeg',
'audio/wav',
'audio/ogg',
'audio/aac',
'audio/flac',
'audio/mp4',
'audio/x-ms-wma',
'audio/x-m4a',
]
def mime_to_file_type(mime_type):
if mime_type not in ALLOWED_FILE_TYPES:
return None
return TRANSFORM_FILE_TYPE.get(mime_type, mime_type)

View File

@@ -114,9 +114,16 @@ class FileObject(BaseModel):
file_id: str | None
content_cache: dict = Field(default_factory=dict)
is_file: bool
_byte_content: bytes | None = None
def get_content(self):
return self._byte_content
def set_content(self, byte_content):
self._byte_content = byte_content
class BaseVariable(ABC):
"""Abstract base class for all workflow variables.