Feature/episodic memory (#64)

* [feature]episodic memory * [feature]episodic memory * [changes]AI review and modify code
2026-01-10 16:35:32 +08:00
parent 7d28717030
commit 539821454a
10 changed files with 825 additions and 9 deletions
--- a/api/app/core/memory/models/graph_models.py
+++ b/api/app/core/memory/models/graph_models.py
@@ -474,6 +474,8 @@ class MemorySummaryNode(Node):
        dialog_id: ID of the parent dialog
        chunk_ids: List of chunk IDs used to generate this summary
        content: Summary text content
+        name: Title/name of the memory summary (generated by LLM, used as title in API)
+        memory_type: Type/category of the episodic memory (e.g., Conversation, Project/Work, Learning, Decision, Important Event)
        summary_embedding: Optional embedding vector for the summary
        metadata: Additional metadata for the summary
        config_id: Configuration ID used to process this summary
@@ -492,6 +494,7 @@ class MemorySummaryNode(Node):
    dialog_id: str = Field(..., description="ID of the parent dialog")
    chunk_ids: List[str] = Field(default_factory=list, description="List of chunk IDs used in the summary")
    content: str = Field(..., description="Summary text content")
+    memory_type: Optional[str] = Field(None, description="Type/category of the episodic memory")
    summary_embedding: Optional[List[float]] = Field(None, description="Embedding vector for the summary")
    metadata: dict = Field(default_factory=dict, description="Additional metadata for the summary")
    config_id: Optional[int | str] = Field(None, description="Configuration ID used to process this summary (integer or string)")
--- a/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/memory_summary.py
+++ b/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/memory_summary.py
@@ -59,13 +59,28 @@ async def _process_chunk_summary(
        )
        summary_text = structured.summary.strip()

+        # Generate title and type for the summary
+        title = None
+        episodic_type = None
+        try:
+            from app.services.user_memory_service import UserMemoryService
+            title, episodic_type = await UserMemoryService.generate_title_and_type_for_summary(
+                content=summary_text,
+                end_user_id=dialog.group_id
+            )
+            logger.info(f"Generated title and type for MemorySummary: title={title}, type={episodic_type}")
+        except Exception as e:
+            logger.warning(f"Failed to generate title and type for chunk {chunk.id}: {e}")
+            # Continue without title and type
+
        # Embed the summary
        embedding = (await embedder.response([summary_text]))[0]

        # Build node per chunk
+        # Note: title is stored in the 'name' field, type is stored in 'memory_type' field
        node = MemorySummaryNode(
            id=uuid4().hex,
-            name=f"MemorySummaryChunk_{chunk.id}",
+            name=title if title else f"MemorySummaryChunk_{chunk.id}",
            group_id=dialog.group_id,
            user_id=dialog.user_id,
            apply_id=dialog.apply_id,
@@ -75,6 +90,7 @@ async def _process_chunk_summary(
            dialog_id=dialog.id,
            chunk_ids=[chunk.id],
            content=summary_text,
+            memory_type=episodic_type,
            summary_embedding=embedding,
            metadata={"ref_id": dialog.ref_id},
            config_id=dialog.config_id,  # 添加 config_id
--- a/api/app/core/memory/storage_services/forgetting_engine/forgetting_strategy.py
+++ b/api/app/core/memory/storage_services/forgetting_engine/forgetting_strategy.py
@@ -247,6 +247,9 @@ class ForgettingStrategy:
        entity_activation = entity_node['entity_activation']
        entity_importance = entity_node['entity_importance']
        
+        # 获取 group_id（从 statement 或 entity 节点）
+        group_id = statement_node.get('group_id') or entity_node.get('group_id')
+        
        # 生成摘要内容
        summary_text = await self._generate_summary(
            statement_text=statement_text,
@@ -256,6 +259,19 @@ class ForgettingStrategy:
            db=db
        )
        
+        # 生成标题和类型（使用LLM）
+        from app.services.user_memory_service import UserMemoryService
+        try:
+            title, episodic_type = await UserMemoryService.generate_title_and_type_for_summary(
+                content=summary_text,
+                end_user_id=group_id
+            )
+            logger.info(f"成功为MemorySummary生成标题和类型: title={title}, type={episodic_type}")
+        except Exception as e:
+            logger.error(f"生成标题和类型失败，使用默认值: {str(e)}")
+            title = "未命名"
+            episodic_type = "其他"
+        
        # 计算继承的激活值和重要性（取较高值）
        inherited_activation = max(statement_activation, entity_activation)
        inherited_importance = max(statement_importance, entity_importance)
@@ -268,9 +284,6 @@ class ForgettingStrategy:
        import uuid
        summary_id = f"summary_{uuid.uuid4().hex[:16]}"
        
-        # 获取 group_id（从 statement 或 entity 节点）
-        group_id = statement_node.get('group_id') or entity_node.get('group_id')
-        
        # 使用事务创建 MemorySummary 并删除原节点
        async def merge_transaction(tx, **params):
            """事务函数：创建摘要节点并删除原节点"""
@@ -287,6 +300,8 @@ class ForgettingStrategy:
            CREATE (ms:MemorySummary {
                id: $summary_id,
                summary: $summary_text,
+                name: $title,
+                memory_type: $episodic_type,
                original_statement_id: $statement_id,
                original_entity_id: $entity_id,
                activation_value: $inherited_activation,
@@ -386,6 +401,8 @@ class ForgettingStrategy:
        params = {
            'summary_id': summary_id,
            'summary_text': summary_text,
+            'title': title,
+            'episodic_type': episodic_type,
            'statement_id': statement_id,
            'entity_id': entity_id,
            'inherited_activation': inherited_activation,
--- a/api/app/core/memory/utils/prompt/prompt_utils.py
+++ b/api/app/core/memory/utils/prompt/prompt_utils.py
@@ -386,3 +386,26 @@ async def render_memory_insight_prompt(
    })
    
    return rendered_prompt
+
+
+async def render_episodic_title_and_type_prompt(content: str) -> str:
+    """
+    Renders the episodic title and type classification prompt using the episodic_type_classification.jinja2 template.
+
+    Args:
+        content: The content of the episodic memory summary to analyze
+
+    Returns:
+        Rendered prompt content as string
+    """
+    template = prompt_env.get_template("episodic_type_classification.jinja2")
+    rendered_prompt = template.render(content=content)
+    
+    # 记录渲染结果到提示日志
+    log_prompt_rendering('episodic title and type classification', rendered_prompt)
+    # 可选：记录模板渲染信息
+    log_template_rendering('episodic_type_classification.jinja2', {
+        'content_len': len(content) if content else 0
+    })
+    
+    return rendered_prompt
--- a/api/app/core/memory/utils/prompt/prompts/episodic_type_classification.jinja2
+++ b/api/app/core/memory/utils/prompt/prompts/episodic_type_classification.jinja2
@@ -0,0 +1,57 @@
+=== Task ===
+Generate a concise title and classify the episodic memory into the most appropriate category.
+
+=== Requirements ===
+- Extract a clear, concise title (10-20 characters) that captures the core content
+- Classify into exactly one category based on the primary theme
+- Be specific and avoid ambiguity
+- Output must be valid JSON conforming to the schema below
+
+=== Input ===
+{{ content }}
+
+=== Category Definitions ===
+
+1. **conversation**: Daily communication, chat, discussion, and social interactions
+   - Keywords: chat, communication, discussion, dialogue, exchange
+
+2. **project_work**: Work-related tasks, projects, meetings, and collaboration
+   - Keywords: project, task, work, meeting, collaboration, business, client
+
+3. **learning**: Acquiring new knowledge, skill development, reading, and research
+   - Keywords: learning, reading, research, knowledge, skill, course, training
+
+4. **decision**: Making important decisions, choices, and planning
+   - Keywords: decision, choice, planning, consideration, evaluation, weighing
+
+5. **important_event**: Major events, milestones, and special experiences
+   - Keywords: important, major, milestone, special, memorable, celebration
+
+=== Analysis Steps ===
+1. Read the episodic memory content carefully
+2. Identify the core theme and context
+3. Extract a concise title
+4. Compare against category definitions and keywords
+5. Select the best matching category
+6. If multiple categories apply, choose the primary one
+
+=== Output Schema ===
+**CRITICAL JSON FORMATTING REQUIREMENTS:**
+1. Use only standard ASCII double quotes (") for JSON structure
+2. Escape any quotation marks within string values using backslashes (\")
+3. Ensure all JSON strings are properly closed and comma-separated
+4. Do not include line breaks within JSON string values
+
+Return only a JSON object with title and type fields:
+{
+  "title": "Generated title here",
+  "type": "Category type here"
+}
+
+The type field must be exactly one of:
+- conversation
+- project_work
+- learning
+- decision
+- important_event
+