Merge branch 'release/v0.3.0' into develop

* release/v0.3.0: (44 commits) Revert "fix(web): prompt editor" fix(web): prompt editor fix(prompt-optimizer): handle escaped quotes in JSON parsing fix(custom-tools): remove parameter coercion in custom tool base class fix(core): conditionally apply thinking parameters based on model support refactor(custom-tools): coerce query and request body parameters to schema types fix(prompt-optimizer): support list content type in prompt optimizer refactor(memory): unify user placeholder names and harden alias sync logic fix(rag): replace semicolon separators with newlines in Excel parser output fix(web): Compatible with Windows whitespace fix(memory): make PgSQL the single source of truth for user entity aliases refactor(rag): simplify Excel parsing logic and remove redundant chunk_token_num assignment fix(web): Hide error message when workflow node error message equals empty string ci(wechat-notify): add Sourcery summary extraction with Qwen fallback fix(http-request,embedding,naive): tighten form-data validation, reduce truncation length to 8000, and disable chunking for Excel fix(web): adjust the value of End User Name fix(http-request): support array and file variables in form-data files upload fix(web): change http body key name fix(web): header user name fix(web): calculate using the filtered breadcrumbs length ... # Conflicts: # web/src/views/UserMemoryDetail/Neo4j.tsx # web/src/views/UserMemoryDetail/components/EndUserProfile.tsx # web/src/views/UserMemoryDetail/types.ts
2026-04-15 19:31:38 +08:00
parent bef6a50deb e0d7a5a91f
commit c7230659e3
48 changed files with 702 additions and 452 deletions
--- a/api/app/core/memory/agent/utils/write_tools.py
+++ b/api/app/core/memory/agent/utils/write_tools.py
@@ -14,6 +14,7 @@ from dotenv import load_dotenv

 from app.core.logging_config import get_agent_logger
 from app.core.memory.agent.utils.get_dialogs import get_chunked_dialogs
+from app.core.memory.storage_services.extraction_engine.deduplication.deduped_and_disamb import _USER_PLACEHOLDER_NAMES
 from app.core.memory.storage_services.extraction_engine.extraction_orchestrator import ExtractionOrchestrator
 from app.core.memory.storage_services.extraction_engine.knowledge_extraction.memory_summary import \
    memory_summary_generation
@@ -191,15 +192,37 @@ async def write(
            if success:
                logger.info("Successfully saved all data to Neo4j")
                
-                # 使用 Celery 异步任务触发聚类（不阻塞主流程）
                if all_entity_nodes:
+                    end_user_id = all_entity_nodes[0].end_user_id
+
+                    # Neo4j 写入完成后，用 PgSQL 权威 aliases 覆盖 Neo4j 用户实体
+                    try:
+                        from app.repositories.end_user_info_repository import EndUserInfoRepository
+                        if end_user_id:
+                            with get_db_context() as db_session:
+                                info = EndUserInfoRepository(db_session).get_by_end_user_id(uuid.UUID(end_user_id))
+                                pg_aliases = info.aliases if info and info.aliases else []
+                            if info is not None:
+                                # 将 Python 侧占位名集合作为参数传入，避免 Cypher 硬编码
+                                placeholder_names = list(_USER_PLACEHOLDER_NAMES)
+                                await neo4j_connector.execute_query(
+                                    """
+                                    MATCH (e:ExtractedEntity)
+                                    WHERE e.end_user_id = $end_user_id AND toLower(e.name) IN $placeholder_names
+                                    SET e.aliases = $aliases
+                                    """,
+                                    end_user_id=end_user_id, aliases=pg_aliases,
+                                    placeholder_names=placeholder_names,
+                                )
+                                logger.info(f"[AliasSync] Neo4j 用户实体 aliases 已用 PgSQL 权威源覆盖: {pg_aliases}")
+                    except Exception as sync_err:
+                        logger.warning(f"[AliasSync] PgSQL→Neo4j aliases 同步失败（不影响主流程）: {sync_err}")
+
+                    # 使用 Celery 异步任务触发聚类（不阻塞主流程）
                    try:
                        from app.tasks import run_incremental_clustering
                        
-                        end_user_id = all_entity_nodes[0].end_user_id
                        new_entity_ids = [e.id for e in all_entity_nodes]
-                        
-                        # 异步提交 Celery 任务
                        task = run_incremental_clustering.apply_async(
                            kwargs={
                                "end_user_id": end_user_id,
@@ -207,7 +230,6 @@ async def write(
                                "llm_model_id": str(memory_config.llm_model_id) if memory_config.llm_model_id else None,
                                "embedding_model_id": str(memory_config.embedding_model_id) if memory_config.embedding_model_id else None,
                            },
-                            # 设置任务优先级（低优先级，不影响主业务）
                            priority=3,
                        )
                        logger.info(
@@ -215,7 +237,6 @@ async def write(
                            f"task_id={task.id}, end_user_id={end_user_id}, entity_count={len(new_entity_ids)}"
                        )
                    except Exception as e:
-                        # 聚类任务提交失败不影响主流程
                        logger.error(f"[Clustering] 提交聚类任务失败（不影响主流程）: {e}", exc_info=True)
                
                break
--- a/api/app/core/memory/storage_services/extraction_engine/deduplication/deduped_and_disamb.py
+++ b/api/app/core/memory/storage_services/extraction_engine/deduplication/deduped_and_disamb.py
@@ -82,51 +82,38 @@ def _merge_attribute(canonical: ExtractedEntityNode, ent: ExtractedEntityNode):
            canonical.connect_strength = next(iter(pair))

    # 别名合并（去重保序，使用标准化工具）
+    # 用户实体的 aliases 由 PgSQL end_user_info 作为唯一权威源，去重合并时不修改
    try:
        canonical_name = (getattr(canonical, "name", "") or "").strip()
-        incoming_name = (getattr(ent, "name", "") or "").strip()
-        
-        # 收集所有需要合并的别名
-        all_aliases = []
-        
-        # 1. 添加canonical现有的别名
-        existing = getattr(canonical, "aliases", []) or []
-        all_aliases.extend(existing)
-        
-        # 2. 添加incoming实体的名称（如果不同于canonical的名称）
-        if incoming_name and incoming_name != canonical_name:
-            all_aliases.append(incoming_name)
-        
-        # 3. 添加incoming实体的所有别名
-        incoming = getattr(ent, "aliases", []) or []
-        all_aliases.extend(incoming)
-        
-        # 4. 标准化并去重（优先使用alias_utils工具函数）
-        try:
-            from app.core.memory.utils.alias_utils import normalize_aliases
-            canonical.aliases = normalize_aliases(canonical_name, all_aliases)
-        except Exception:
-            # 如果导入失败，使用增强的去重逻辑
-            seen_normalized = set()
-            unique_aliases = []
+        if canonical_name.lower() not in _USER_PLACEHOLDER_NAMES:
+            incoming_name = (getattr(ent, "name", "") or "").strip()
            
-            for alias in all_aliases:
-                if not alias:
-                    continue
-                
-                alias_stripped = str(alias).strip()
-                if not alias_stripped or alias_stripped == canonical_name:
-                    continue
-                
-                # 标准化：转小写用于去重判断
-                alias_normalized = alias_stripped.lower()
-                
-                if alias_normalized not in seen_normalized:
-                    seen_normalized.add(alias_normalized)
-                    unique_aliases.append(alias_stripped)
+            # 收集所有需要合并的别名，过滤掉用户占位名避免污染非用户实体
+            all_aliases = list(getattr(canonical, "aliases", []) or [])
+            if incoming_name and incoming_name != canonical_name and incoming_name.lower() not in _USER_PLACEHOLDER_NAMES:
+                all_aliases.append(incoming_name)
+            all_aliases.extend(
+                a for a in (getattr(ent, "aliases", []) or [])
+                if a and a.strip().lower() not in _USER_PLACEHOLDER_NAMES
+            )
            
-            # 排序并赋值
-            canonical.aliases = sorted(unique_aliases)
+            try:
+                from app.core.memory.utils.alias_utils import normalize_aliases
+                canonical.aliases = normalize_aliases(canonical_name, all_aliases)
+            except Exception:
+                seen_normalized = set()
+                unique_aliases = []
+                for alias in all_aliases:
+                    if not alias:
+                        continue
+                    alias_stripped = str(alias).strip()
+                    if not alias_stripped or alias_stripped == canonical_name:
+                        continue
+                    alias_normalized = alias_stripped.lower()
+                    if alias_normalized not in seen_normalized:
+                        seen_normalized.add(alias_normalized)
+                        unique_aliases.append(alias_stripped)
+                canonical.aliases = sorted(unique_aliases)
    except Exception:
        pass

@@ -733,66 +720,37 @@ def fuzzy_match(


    def _merge_entities_with_aliases(canonical: ExtractedEntityNode, losing: ExtractedEntityNode):
-        """ 模糊匹配中的实体合并。
+        """模糊匹配中的实体合并（别名部分）。
        
-        合并策略：
-        1. 保留canonical的主名称不变
-        2. 将losing的主名称添加为alias（如果不同）
-        3. 合并两个实体的所有aliases
-        4. 自动去重（case-insensitive）并排序
-        
-        Args:
-            canonical: 规范实体（保留）
-            losing: 被合并实体（删除）
-            
-        Note:
-            使用alias_utils.normalize_aliases进行标准化去重
+        用户实体的 aliases 由 PgSQL end_user_info 作为唯一权威源，跳过合并。
        """
-        # 获取规范实体的名称
        canonical_name = (getattr(canonical, "name", "") or "").strip()
+        if canonical_name.lower() in _USER_PLACEHOLDER_NAMES:
+            return
+
        losing_name = (getattr(losing, "name", "") or "").strip()
        
-        # 收集所有需要合并的别名
-        all_aliases = []
-        
-        # 1. 添加canonical现有的别名
-        current_aliases = getattr(canonical, "aliases", []) or []
-        all_aliases.extend(current_aliases)
-        
-        # 2. 添加losing实体的名称（如果不同于canonical的名称）
+        all_aliases = list(getattr(canonical, "aliases", []) or [])
        if losing_name and losing_name != canonical_name:
            all_aliases.append(losing_name)
+        all_aliases.extend(getattr(losing, "aliases", []) or [])
        
-        # 3. 添加losing实体的所有别名
-        losing_aliases = getattr(losing, "aliases", []) or []
-        all_aliases.extend(losing_aliases)
-        
-        # 4. 标准化并去重（使用标准化后的字符串进行去重）
        try:
            from app.core.memory.utils.alias_utils import normalize_aliases
            canonical.aliases = normalize_aliases(canonical_name, all_aliases)
        except Exception:
-            # 如果导入失败，使用增强的去重逻辑
-            # 使用标准化后的字符串作为key进行去重
            seen_normalized = set()
            unique_aliases = []
-            
            for alias in all_aliases:
                if not alias:
                    continue
-                
                alias_stripped = str(alias).strip()
                if not alias_stripped or alias_stripped == canonical_name:
                    continue
-                
-                # 标准化：转小写用于去重判断
                alias_normalized = alias_stripped.lower()
-                
                if alias_normalized not in seen_normalized:
                    seen_normalized.add(alias_normalized)
                    unique_aliases.append(alias_stripped)
-            
-            # 排序并赋值
            canonical.aliases = sorted(unique_aliases)
    
    # ========== 主循环：遍历所有实体对进行模糊匹配 ==========
--- a/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py
+++ b/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py
@@ -1391,18 +1391,18 @@ class ExtractionOrchestrator:
        """
        将本轮提取的用户别名同步到 end_user 和 end_user_info 表。

-        注意：此方法在 Neo4j 写入之前调用，因此不能依赖 Neo4j 作为别名的权威数据源。
-        改为直接使用内存中去重后的 entity_nodes 的 aliases，与 PgSQL 已有的 aliases 合并。
+        PgSQL end_user_info.aliases 是用户别名的唯一权威源。
+        此方法仅将本轮 LLM 从对话中新提取的别名增量追加到 PgSQL，
+        不再从 Neo4j 二层去重合并历史别名，避免脏数据反向污染 PgSQL。

        策略：
-        1. 从内存中的 entity_nodes 提取本轮用户别名（current_aliases）
-        2. 从去重后的 entity_nodes 中提取完整别名（含 Neo4j 二层去重合并的历史别名）
-        3. 从 PgSQL end_user_info 读取已有的 aliases（db_aliases）
-        4. 合并 db_aliases + deduped_aliases + current_aliases，去重保序
-        5. 写回 PgSQL
+        1. 从本轮对话原始发言中提取用户别名（current_aliases）
+        2. 从 PgSQL end_user_info 读取已有的 aliases（db_aliases）
+        3. 合并 db_aliases + current_aliases，去重保序
+        4. 写回 PgSQL

        Args:
-            entity_nodes: 去重后的实体节点列表（内存中，含二层去重合并结果）
+            entity_nodes: 去重后的实体节点列表（内存中）
            dialog_data_list: 对话数据列表
        """
        try:
@@ -1418,11 +1418,6 @@ class ExtractionOrchestrator:
            # 1. 提取本轮对话的用户别名（保持 LLM 提取的原始顺序，不排序）
            current_aliases = self._extract_current_aliases(entity_nodes, dialog_data_list)

-            # 1.5 从去重后的 entity_nodes 中提取完整别名
-            # 二层去重会将 Neo4j 中已有的历史别名合并到 entity_nodes 中，
-            # 这里提取出来确保 PgSQL 与 Neo4j 的别名保持同步
-            deduped_aliases = self._extract_deduped_entity_aliases(entity_nodes)
-
            # 1.6 从 Neo4j 查询已有的 AI 助手别名，作为额外的排除源
            # （防止 LLM 未提取出 AI 助手实体时，AI 别名泄漏到用户别名中）
            neo4j_assistant_aliases = await self._fetch_neo4j_assistant_aliases(end_user_id)
@@ -1434,19 +1429,12 @@ class ExtractionOrchestrator:
                ]
                if len(current_aliases) < before_count:
                    logger.info(f"通过 Neo4j AI 助手别名排除了 {before_count - len(current_aliases)} 个误归属别名")
-                # 同样过滤 deduped_aliases
-                deduped_aliases = [
-                    a for a in deduped_aliases
-                    if a.strip().lower() not in neo4j_assistant_aliases
-                ]

-            if not current_aliases and not deduped_aliases:
+            if not current_aliases:
                logger.debug(f"本轮未提取到用户别名，跳过同步: end_user_id={end_user_id}")
                return

            logger.info(f"本轮对话提取的 aliases: {current_aliases}")
-            if deduped_aliases:
-                logger.info(f"去重后实体的完整 aliases（含历史）: {deduped_aliases}")

            # 2. 同步到数据库
            end_user_uuid = uuid.UUID(end_user_id)
@@ -1457,21 +1445,15 @@ class ExtractionOrchestrator:
                    logger.warning(f"未找到 end_user_id={end_user_id} 的用户记录")
                    return

-                # 3. 从 PgSQL 读取已有 aliases 并与本轮合并
+                # 3. 从 PgSQL 读取已有 aliases 并与本轮新增合并
                info = EndUserInfoRepository(db).get_by_end_user_id(end_user_uuid)
                db_aliases = (info.aliases if info and info.aliases else [])
                # 过滤掉占位名称
                db_aliases = [a for a in db_aliases if a.strip().lower() not in self.USER_PLACEHOLDER_NAMES]

-                # 合并：已有 + 去重后完整别名 + 本轮新增，去重保序
+                # 合并：PgSQL 已有 + 本轮新增，去重保序（不再合并 Neo4j 历史别名）
                merged_aliases = list(db_aliases)
                seen_lower = {a.strip().lower() for a in merged_aliases}
-                # 先合并去重后实体的完整别名（含 Neo4j 历史别名）
-                for alias in deduped_aliases:
-                    if alias.strip().lower() not in seen_lower:
-                        merged_aliases.append(alias)
-                        seen_lower.add(alias.strip().lower())
-                # 再合并本轮新提取的别名
                for alias in current_aliases:
                    if alias.strip().lower() not in seen_lower:
                        merged_aliases.append(alias)
@@ -1505,9 +1487,7 @@ class ExtractionOrchestrator:
                        info.aliases = merged_aliases
                        logger.info(f"同步合并后 aliases 到 end_user_info: {merged_aliases}")
                else:
-                    first_alias = current_aliases[0].strip() if current_aliases else (
-                        deduped_aliases[0].strip() if deduped_aliases else ""
-                    )
+                    first_alias = current_aliases[0].strip() if current_aliases else ""
                    # 确保 first_alias 不是占位名称
                    if first_alias and first_alias.lower() not in self.USER_PLACEHOLDER_NAMES:
                        db.add(EndUserInfo(
--- a/api/app/core/models/base.py
+++ b/api/app/core/models/base.py
@@ -112,22 +112,23 @@ class RedBearModelFactory:
                params["stream_usage"] = True
            # 深度思考模式
            is_streaming = bool(config.extra_params.get("streaming"))
-            if is_streaming and not config.is_omni:
-                if provider == ModelProvider.VOLCANO:
-                    # 火山引擎深度思考仅流式调用支持，非流式时不传 thinking 参数
-                    thinking_config: Dict[str, Any] = {
-                        "type": "enabled" if config.deep_thinking else "disabled"
-                    }
-                    if config.deep_thinking and config.thinking_budget_tokens:
-                        thinking_config["budget_tokens"] = config.thinking_budget_tokens
-                    params["extra_body"] = {"thinking": thinking_config}
-                else:
-                    # 始终显式传递 enable_thinking，不支持该参数的模型（如 DeepSeek-R1）会直接忽略
-                    model_kwargs: Dict[str, Any] = config.extra_params.get("model_kwargs", {})
-                    model_kwargs["enable_thinking"] = config.deep_thinking
-                    if config.deep_thinking and config.thinking_budget_tokens:
-                        model_kwargs["thinking_budget"] = config.thinking_budget_tokens
-                    params["model_kwargs"] = model_kwargs
+            if config.support_thinking:
+                if is_streaming and not config.is_omni:
+                    if provider == ModelProvider.VOLCANO:
+                        # 火山引擎深度思考仅流式调用支持，非流式时不传 thinking 参数
+                        thinking_config: Dict[str, Any] = {
+                            "type": "enabled" if config.deep_thinking else "disabled"
+                        }
+                        if config.deep_thinking and config.thinking_budget_tokens:
+                            thinking_config["budget_tokens"] = config.thinking_budget_tokens
+                        params["extra_body"] = {"thinking": thinking_config}
+                    else:
+                        # 始终显式传递 enable_thinking，不支持该参数的模型（如 DeepSeek-R1）会直接忽略
+                        model_kwargs: Dict[str, Any] = config.extra_params.get("model_kwargs", {})
+                        model_kwargs["enable_thinking"] = config.deep_thinking
+                        if config.deep_thinking and config.thinking_budget_tokens:
+                            model_kwargs["thinking_budget"] = config.thinking_budget_tokens
+                        params["model_kwargs"] = model_kwargs
            return params
        elif provider == ModelProvider.DASHSCOPE:
            params = {
--- a/api/app/core/rag/app/naive.py
+++ b/api/app/core/rag/app/naive.py
@@ -672,10 +672,15 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
        excel_parser = ExcelParser()
        if parser_config.get("html4excel") and parser_config.get("html4excel").lower() == "true":
            sections = [(_, "") for _ in excel_parser.html(binary, 12) if _]
-            parser_config["chunk_token_num"] = 0
        else:
            sections = [(_, "") for _ in excel_parser(binary) if _]
-        parser_config["chunk_token_num"] = 12800
+        callback(0.8, "Finish parsing.")
+        # Excel 每行直接作为一个 chunk，不经过 naive_merge 避免被 delimiter 拆分
+        chunks = [s for s, _ in sections]
+        res.extend(tokenize_chunks(chunks, doc, is_english, None))
+        res.extend(embed_res)
+        res.extend(url_res)
+        return res

    elif re.search(r"\.(txt|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|sql)$", filename, re.IGNORECASE):
        callback(0.1, "Start to parse.")
--- a/api/app/core/rag/deepdoc/parser/excel_parser.py
+++ b/api/app/core/rag/deepdoc/parser/excel_parser.py
@@ -232,14 +232,14 @@ class RAGExcelParser:
                        t = str(ti[i].value) if i < len(ti) else ""
                        t += ("：" if t else "") + str(c.value)
                        fields.append(t)
-                    line = "; ".join(fields)
+                    line = "\n".join(fields)
                    if sheetname.lower().find("sheet") < 0:
-                        line += " ——" + sheetname
+                        line += "\n——" + sheetname
                    res.append(line)
            else:
                # 只有表头的情况
                if header_fields:
-                    line = "; ".join(header_fields)
+                    line = "\n".join(header_fields)
                    if sheetname.lower().find("sheet") < 0:
                        line += " ——" + sheetname
                    res.append(line)
--- a/api/app/core/rag/llm/embedding_model.py
+++ b/api/app/core/rag/llm/embedding_model.py
@@ -50,7 +50,9 @@ class OpenAIEmbed(Base):
    def encode(self, texts: list):
        # OpenAI requires batch size <=16
        batch_size = 16
-        texts = [truncate(t, 8191) for t in texts]
+        # Use 8000 instead of 8191 to leave safety margin for tokenizer differences
+        # between cl100k_base (used by truncate) and the actual embedding model
+        texts = [truncate(t, 8000) for t in texts]
        ress = []
        total_tokens = 0
        for i in range(0, len(texts), batch_size):
@@ -63,7 +65,7 @@ class OpenAIEmbed(Base):
        return np.array(ress), total_tokens

    def encode_queries(self, text):
-        res = self.client.embeddings.create(input=[truncate(text, 8191)], model=self.model_name, encoding_format="float",extra_body={"drop_params": True})
+        res = self.client.embeddings.create(input=[truncate(text, 8000)], model=self.model_name, encoding_format="float",extra_body={"drop_params": True})
        return np.array(res.data[0].embedding), self.total_token_count(res)


@@ -79,6 +81,7 @@ class LocalAIEmbed(Base):

    def encode(self, texts: list):
        batch_size = 16
+        texts = [truncate(t, 8000) for t in texts]
        ress = []
        for i in range(0, len(texts), batch_size):
            res = self.client.embeddings.create(input=texts[i : i + batch_size], model=self.model_name)
@@ -173,6 +176,7 @@ class XinferenceEmbed(Base):

    def encode(self, texts: list):
        batch_size = 16
+        texts = [truncate(t, 8000) for t in texts]
        ress = []
        total_tokens = 0
        for i in range(0, len(texts), batch_size):
@@ -188,7 +192,7 @@ class XinferenceEmbed(Base):
    def encode_queries(self, text):
        res = None
        try:
-            res = self.client.embeddings.create(input=[text], model=self.model_name)
+            res = self.client.embeddings.create(input=[truncate(text, 8000)], model=self.model_name)
            return np.array(res.data[0].embedding), self.total_token_count(res)
        except Exception as _e:
            log_exception(_e, res)
--- a/api/app/core/workflow/nodes/http_request/config.py
+++ b/api/app/core/workflow/nodes/http_request/config.py
@@ -72,8 +72,9 @@ class HttpContentTypeConfig(BaseModel):
    @classmethod
    def validate_data(cls, v, info):
        content_type = info.data.get("content_type")
-        if content_type == HttpContentType.FROM_DATA and not isinstance(v, HttpFormData):
-            raise ValueError("When content_type is 'form-data', data must be of type HttpFormData")
+        if content_type == HttpContentType.FROM_DATA and (
+                not isinstance(v, list) or not all(isinstance(item, HttpFormData) for item in v)):
+            raise ValueError("When content_type is 'form-data', data must be a list of HttpFormData")
        elif content_type in [HttpContentType.JSON] and not isinstance(v, str):
            raise ValueError("When content_type is JSON, data must be of type str")
        elif content_type in [HttpContentType.WWW_FORM] and not isinstance(v, dict):
--- a/api/app/core/workflow/nodes/http_request/node.py
+++ b/api/app/core/workflow/nodes/http_request/node.py
@@ -260,17 +260,22 @@ class HttpRequestNode(BaseNode):
                ))
            case HttpContentType.FROM_DATA:
                data = {}
-                content["files"] = {}
+                files = []
                for item in self.typed_config.body.data:
+                    key = self._render_template(item.key, variable_pool)
                    if item.type == "text":
-                        data[self._render_template(item.key, variable_pool)] = self._render_template(item.value,
-                                                                                                     variable_pool)
+                        data[key] = self._render_template(item.value, variable_pool)
                    elif item.type == "file":
-                        content["files"][self._render_template(item.key, variable_pool)] = (
-                            uuid.uuid4().hex,
-                            await variable_pool.get_instance(item.value).get_content()
-                        )
+                        file_instance = variable_pool.get_instance(item.value)
+                        if isinstance(file_instance, ArrayVariable):
+                            for v in file_instance.value:
+                                if isinstance(v, FileVariable):
+                                    files.append((key, (uuid.uuid4().hex, await v.get_content())))
+                        elif isinstance(file_instance, FileVariable):
+                            files.append((key, (uuid.uuid4().hex, await file_instance.get_content())))
                content["data"] = data
+                if files:
+                    content["files"] = files
            case HttpContentType.BINARY:
                content["files"] = []
                file_instence = variable_pool.get_instance(self.typed_config.body.data)
--- a/api/app/core/workflow/variable/variable_objects.py
+++ b/api/app/core/workflow/variable/variable_objects.py
@@ -84,7 +84,7 @@ class FileVariable(BaseVariable):
        total_bytes = 0
        chunks = []

-        async with httpx.AsyncClient() as client:
+        async with httpx.AsyncClient(follow_redirects=True) as client:
            async with client.stream("GET", self.value.url) as resp:
                resp.raise_for_status()
                async for chunk in resp.aiter_bytes(8192):