feat(memory, model): update multi-modal memory write and model list API

- Adjust multi-modal memory write behavior for text and visual data - Mask API keys in model list response to prevent exposure - Add capability-based filtering to the model list API
2026-03-24 13:54:15 +08:00
parent 2ff81ba101
commit 6bba574ca6
21 changed files with 389 additions and 401 deletions
--- a/api/app/core/memory/agent/utils/get_dialogs.py
+++ b/api/app/core/memory/agent/utils/get_dialogs.py
@@ -11,7 +11,7 @@ async def get_chunked_dialogs(
        chunker_strategy: str = "RecursiveChunker",
        end_user_id: str = "group_1",
        messages: list = None,
-        ref_id: str = "wyl_20251027",
+        ref_id: str = "",
        config_id: str = None
 ) -> List[DialogData]:
    """Generate chunks from structured messages using the specified chunker strategy.
@@ -40,12 +40,13 @@ async def get_chunked_dialogs(

        role = msg['role']
        content = msg['content']
+        files = msg.get("file_content", [])

        if role not in ['user', 'assistant']:
            raise ValueError(f"Message {idx} role must be 'user' or 'assistant', got: {role}")

        if content.strip():
-            conversation_messages.append(ConversationMessage(role=role, msg=content.strip()))
+            conversation_messages.append(ConversationMessage(role=role, msg=content.strip(), files=files))

    if not conversation_messages:
        raise ValueError("Message list cannot be empty after filtering")
--- a/api/app/core/memory/agent/utils/write_tools.py
+++ b/api/app/core/memory/agent/utils/write_tools.py
@@ -5,8 +5,8 @@ This module provides the main write function for executing the knowledge extract
 pipeline. Only MemoryConfig is needed - clients are constructed internally.
 """
 import asyncio
-import uuid
 import time
+import uuid
 from datetime import datetime

 from dotenv import load_dotenv
@@ -19,10 +19,8 @@ from app.core.memory.storage_services.extraction_engine.knowledge_extraction.mem
 from app.core.memory.utils.llm.llm_utils import MemoryClientFactory
 from app.core.memory.utils.log.logging_utils import log_time
 from app.db import get_db_context
-from app.models import MemoryPerceptualModel
 from app.repositories.neo4j.add_edges import add_memory_summary_statement_edges
-from app.repositories.neo4j.add_nodes import add_memory_summary_nodes, add_perceptual_nodes, \
-    add_perceptual_dialogue_edges
+from app.repositories.neo4j.add_nodes import add_memory_summary_nodes
 from app.repositories.neo4j.graph_saver import save_dialog_and_statements_to_neo4j, schedule_clustering_after_write
 from app.repositories.neo4j.neo4j_connector import Neo4jConnector
 from app.schemas.memory_config_schema import MemoryConfig
@@ -36,7 +34,6 @@ async def write(
        end_user_id: str,
        memory_config: MemoryConfig,
        messages: list,
-        file_content: list[MemoryPerceptualModel],
        ref_id: str = "",
        language: str = "zh",
 ) -> None:
@@ -47,7 +44,6 @@ async def write(
        end_user_id: Group identifier
        memory_config: MemoryConfig object containing all configuration
        messages: Structured message list [{"role": "user", "content": "..."}, ...]
-        file_content: mutilmodal message list
        ref_id: Reference ID, defaults to ""
        language: 语言类型 ("zh" 中文, "en" 英文)，默认中文
    """
@@ -142,9 +138,11 @@ async def write(
        all_chunk_nodes,
        all_statement_nodes,
        all_entity_nodes,
+        all_perceptual_nodes,
        all_statement_chunk_edges,
        all_statement_entity_edges,
        all_entity_entity_edges,
+        all_perceptual_edges,
        all_dedup_details,
    ) = await orchestrator.run(chunked_dialogs, is_pilot_run=False)

@@ -169,9 +167,11 @@ async def write(
                chunk_nodes=all_chunk_nodes,
                statement_nodes=all_statement_nodes,
                entity_nodes=all_entity_nodes,
+                perceptual_nodes=all_perceptual_nodes,
                statement_chunk_edges=all_statement_chunk_edges,
                statement_entity_edges=all_statement_entity_edges,
                entity_edges=all_entity_entity_edges,
+                perceptual_edges=all_perceptual_edges,
                connector=neo4j_connector,
            )
            if success:
@@ -230,34 +230,6 @@ async def write(
    finally:
        log_time("Memory Summary (Neo4j)", time.time() - step_start, log_file)

-    # Step 5: Save perceptual memory to Neo4j
-    step_start = time.time()
-    if file_content:
-        try:
-            pc_connector = Neo4jConnector()
-            try:
-                created_ids = await add_perceptual_nodes(
-                    perceptuals=file_content,
-                    connector=pc_connector,
-                    embedder_client=embedder_client,
-                )
-                # 如果有 ref_id，建立感知记忆与对话的关联
-                if ref_id and created_ids:
-                    await add_perceptual_dialogue_edges(
-                        perceptuals=file_content,
-                        dialog_id=ref_id,
-                        connector=pc_connector,
-                    )
-                logger.info(f"Successfully saved {len(created_ids or [])} perceptual memory nodes to Neo4j")
-            finally:
-                try:
-                    await pc_connector.close()
-                except Exception:
-                    pass
-        except Exception as e:
-            logger.error(f"Perceptual memory Neo4j save failed: {e}", exc_info=True)
-    log_time("Perceptual Memory (Neo4j)", time.time() - step_start, log_file)
-
    # Log total pipeline time
    total_time = time.time() - pipeline_start
    log_time("TOTAL PIPELINE TIME", total_time, log_file)
--- a/api/app/core/memory/llm_tools/chunker_client.py
+++ b/api/app/core/memory/llm_tools/chunker_client.py
@@ -1,10 +1,10 @@
-from typing import Any, List
-import re
-import os
 import asyncio
 import json
-import numpy as np
 import logging
+import os
+from typing import Any, List
+
+import numpy as np

 # Fix tokenizer parallelism warning
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
@@ -246,6 +246,7 @@ class ChunkerClient:
                            "total_sub_chunks": len(sub_chunks),
                            "chunker_strategy": self.chunker_config.chunker_strategy,
                        },
+                        files=msg.files
                    )
                    dialogue.chunks.append(chunk)
            else:
@@ -258,6 +259,7 @@ class ChunkerClient:
                        "message_role": msg.role,
                        "chunker_strategy": self.chunker_config.chunker_strategy,
                    },
+                    files=msg.files
                )
                dialogue.chunks.append(chunk)

--- a/api/app/core/memory/models/graph_models.py
+++ b/api/app/core/memory/models/graph_models.py
@@ -114,7 +114,7 @@ class Edge(BaseModel):
    end_user_id: str = Field(..., description="The end user ID of the edge.")
    run_id: str = Field(default_factory=lambda: uuid4().hex, description="Unique identifier for this pipeline run.")
    created_at: datetime = Field(..., description="The valid time of the edge from system perspective.")
-    expired_at: Optional[datetime] = Field(None, description="The expired time of the edge from system perspective.")
+    expired_at: Optional[datetime] = Field(default=None, description="The expired time of the edge from system perspective.")


 class ChunkEdge(Edge):
@@ -175,6 +175,12 @@ class EntityEntityEdge(Edge):
        return parse_historical_datetime(v)


+class PerceptualEdge(Edge):
+    """Edge connecting perceptual nodes to their source chunks
+    """
+    pass
+
+
 class Node(BaseModel):
    """Base class for all graph nodes in the knowledge graph.

@@ -555,19 +561,16 @@ class MemorySummaryNode(Node):
    )


-class MutlimodalNode(Node):
+class PerceptualNode(Node):
    """Node representing a multimodal message in the knowledge graph.
-
-    Attributes:
-        dialog_id: ID of the parent dialog
-        message_id: ID of the message
-        metadata: Additional message metadata
-        embedding: Optional embedding vector for the message
    """
-    dialog_id: str = Field(..., description="ID of the parent dialog")
-    message_id: str = Field(..., description="ID of the message")
-    summary: str = Field(..., description="The text content of the message")
-    file_type: str = Field(..., description="Type of the message (e.g., 'text', 'image', 'audio', 'video')")
-    file_path: List[str] = Field(..., description="List of file paths for multimodal content")
-    metadata: dict = Field(default_factory=dict, description="Additional message metadata")
-    embedding: Optional[List[float]] = Field(None, description="Embedding vector for the message")
+    perceptual_type: int
+    file_path: str
+    file_name: str
+    file_ext: str
+    summary: str
+    keywords: list[str]
+    topic: str
+    domain: str
+    file_type: str
+    summary_embedding: list[float] | None
--- a/api/app/core/memory/models/message_models.py
+++ b/api/app/core/memory/models/message_models.py
@@ -30,6 +30,7 @@ class ConversationMessage(BaseModel):
    """
    role: str = Field(..., description="The role of the speaker (e.g., 'user', 'assistant').")
    msg: str = Field(..., description="The text content of the message.")
+    files: list[tuple] = Field(default_factory=list, description="The file content of the message", exclude=True)


 class TemporalValidityRange(BaseModel):
@@ -130,7 +131,8 @@ class Chunk(BaseModel):
    content: str = Field(..., description="The content of the chunk as a string.")
    speaker: Optional[str] = Field(None, description="The speaker/role for this chunk (user/assistant).")
    statements: List[Statement] = Field(default_factory=list, description="A list of statements in the chunk.")
-    chunk_embedding: Optional[List[float]] = Field(None, description="The embedding vector of the chunk.")
+    files: list[tuple] = Field(default_factory=list, description="List of files in the chunk.")
+    chunk_embedding: Optional[List[float]] = Field(default=None, description="The embedding vector of the chunk.")
    metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata for the chunk.")

    @classmethod
--- a/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py
+++ b/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py
@@ -31,7 +31,9 @@ from app.core.memory.models.graph_models import (
    ExtractedEntityNode,
    StatementChunkEdge,
    StatementEntityEdge,
-    StatementNode
+    StatementNode,
+    PerceptualEdge,
+    PerceptualNode
 )
 from app.core.memory.models.message_models import DialogData
 from app.core.memory.models.ontology_extraction_models import OntologyTypeList
@@ -170,9 +172,11 @@ class ExtractionOrchestrator:
        list[ChunkNode],
        list[StatementNode],
        list[ExtractedEntityNode],
+        list[PerceptualNode],
        list[StatementChunkEdge],
        list[StatementEntityEdge],
        list[EntityEntityEdge],
+        list[PerceptualEdge],
        dict
    ]:
        """
@@ -259,9 +263,11 @@ class ExtractionOrchestrator:
                chunk_nodes,
                statement_nodes,
                entity_nodes,
+                perceptual_nodes,
                statement_chunk_edges,
                statement_entity_edges,
                entity_entity_edges,
+                perceptual_edges
            ) = await self._create_nodes_and_edges(dialog_data_list)

            # 导出去重前的测试输入文档（试运行和正式模式都需要，用于生成结果汇总）
@@ -275,7 +281,16 @@ class ExtractionOrchestrator:

            # 注意：deduplication 消息已在创建节点和边完成后立即发送

-            result = await self._run_dedup_and_write_summary(
+            (
+                dialogue_nodes,
+                chunk_nodes,
+                statement_nodes,
+                entity_nodes,
+                statement_chunk_edges,
+                statement_entity_edges,
+                entity_entity_edges,
+                dialog_data_list,
+            ) = await self._run_dedup_and_write_summary(
                dialogue_nodes,
                chunk_nodes,
                statement_nodes,
@@ -287,7 +302,18 @@ class ExtractionOrchestrator:
            )

            logger.info(f"知识提取流水线运行完成（{mode_str}）")
-            return result
+            return (
+                dialogue_nodes,
+                chunk_nodes,
+                statement_nodes,
+                entity_nodes,
+                perceptual_nodes,
+                statement_chunk_edges,
+                statement_entity_edges,
+                entity_entity_edges,
+                perceptual_edges,
+                dialog_data_list,
+            )

        except Exception as e:
            logger.error(f"知识提取流水线运行失败: {e}", exc_info=True)
@@ -1000,9 +1026,11 @@ class ExtractionOrchestrator:
        List[ChunkNode],
        List[StatementNode],
        List[ExtractedEntityNode],
+        List[PerceptualNode],
        List[StatementChunkEdge],
        List[StatementEntityEdge],
-        List[EntityEntityEdge]
+        List[EntityEntityEdge],
+        List[PerceptualEdge]
    ]:
        """
        创建图数据库节点和边
@@ -1026,6 +1054,8 @@ class ExtractionOrchestrator:
        statement_chunk_edges = []
        statement_entity_edges = []
        entity_entity_edges = []
+        perceptual_nodes = []
+        perceptual_edges = []

        # 用于去重的集合
        entity_id_set = set()
@@ -1069,6 +1099,46 @@ class ExtractionOrchestrator:
                    metadata=chunk.metadata,
                )
                chunk_nodes.append(chunk_node)
+                logger.error(f"chunk file: {chunk.files}")
+
+                for p, file_type in chunk.files:
+
+                    meta = p.meta_data or {}
+                    content_meta = meta.get("content", {})
+
+                    # 生成 summary embedding（如果有 embedder_client）
+                    summary_embedding = None
+                    if self.embedder_client and p.summary:
+                        try:
+                            summary_embedding = (await self.embedder_client.response([p.summary]))[0]
+                        except Exception as emb_err:
+                            print(f"Failed to embed perceptual summary: {emb_err}")
+
+                    perceptual = PerceptualNode(
+                        name=f"Perceptual_{p.id}",
+                        **{
+                        "id": str(p.id),
+                        "end_user_id": str(p.end_user_id),
+                        "perceptual_type": p.perceptual_type,
+                        "file_path": p.file_path or "",
+                        "file_name": p.file_name or "",
+                        "file_ext": p.file_ext or "",
+                        "summary": p.summary or "",
+                        "keywords": content_meta.get("keywords", []),
+                        "topic": content_meta.get("topic", ""),
+                        "domain": content_meta.get("domain", ""),
+                        "created_at": p.created_time.isoformat() if p.created_time else None,
+                        "file_type": file_type,
+                        "summary_embedding": summary_embedding,
+                    })
+                    perceptual_nodes.append(perceptual)
+                    perceptual_edges.append(PerceptualEdge(
+                        source=perceptual.id,
+                        target=chunk.id,
+                        end_user_id=dialog_data.end_user_id,
+                        run_id=dialog_data.run_id,
+                        created_at=dialog_data.created_at,
+                    ))

                # 处理每个陈述句
                for statement in chunk.statements:
@@ -1248,9 +1318,11 @@ class ExtractionOrchestrator:
            chunk_nodes,
            statement_nodes,
            entity_nodes,
+            perceptual_nodes,
            statement_chunk_edges,
            statement_entity_edges,
            entity_entity_edges,
+            perceptual_edges
        )

    async def _run_dedup_and_write_summary(