Feature/episodic memory (#70)

* [feature]episodic memory * [feature]episodic memory * [changes]AI review and modify code * [feature]Explicit memory * [feature]Explicit memory
2026-01-12 12:27:33 +08:00
parent 2a12be310d
commit 9722601bae
8 changed files with 510 additions and 28 deletions
--- a/api/app/core/memory/models/graph_models.py
+++ b/api/app/core/memory/models/graph_models.py
@@ -405,6 +405,10 @@ class ExtractedEntityNode(Node):
    statement_id: str = Field(..., description="Statement this entity was extracted from")
    entity_type: str = Field(..., description="Type of the entity")
    description: str = Field(..., description="Entity description")
+    example: str = Field(
+        default="", 
+        description="A concise example (around 20 characters) to help understand the entity"
+    )
    aliases: List[str] = Field(
        default_factory=list, 
        description="Entity aliases - alternative names for this entity"
@@ -441,6 +445,12 @@ class ExtractedEntityNode(Node):
        description="Total number of times this node has been accessed"
    )
    
+    # Explicit Memory Classification
+    is_explicit_memory: bool = Field(
+        default=False,
+        description="Whether this entity represents explicit/semantic memory (knowledge, concepts, definitions, theories, principles)"
+    )
+    
    @field_validator('aliases', mode='before')
    @classmethod
    def validate_aliases_field(cls, v): # 字段验证器 自动清理和验证 aliases 字段
--- a/api/app/core/memory/models/triplet_models.py
+++ b/api/app/core/memory/models/triplet_models.py
@@ -38,10 +38,20 @@ class Entity(BaseModel):
    name_embedding: Optional[List[float]] = Field(None, description="Embedding vector for the entity name")
    type: str = Field(..., description="Type/category of the entity")
    description: str = Field(..., description="Description of the entity")
+    example: str = Field(
+        default="",
+        description="A concise example (around 20 characters) to help understand the entity"
+    )
    aliases: List[str] = Field(
        default_factory=list,
        description="Alternative names for this entity (abbreviations, full names, translations, etc.)"
    )
+    
+    # Explicit Memory Classification
+    is_explicit_memory: bool = Field(
+        default=False,
+        description="Whether this entity represents explicit/semantic memory (knowledge, concepts, definitions, theories, principles)"
+    )


 class Triplet(BaseModel):
--- a/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py
+++ b/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py
@@ -42,7 +42,6 @@ from app.core.memory.storage_services.extraction_engine.deduplication.two_stage_
 )
 from app.core.memory.storage_services.extraction_engine.knowledge_extraction.embedding_generation import (
    embedding_generation,
-    embedding_generation_all,
    generate_entity_embeddings_from_triplets,
 )

@@ -179,7 +178,7 @@ class ExtractionOrchestrator:
            for dialog in dialog_data_list:
                for chunk in dialog.chunks:
                    all_statements_list.extend(chunk.statements)
-            total_statements = len(all_statements_list)
+            len(all_statements_list)

            # 步骤 2: 并行执行三元组提取、时间信息提取、情绪提取和基础嵌入生成
            logger.info("步骤 2/6: 并行执行三元组提取、时间信息提取、情绪提取和嵌入生成")
@@ -201,9 +200,9 @@ class ExtractionOrchestrator:
                        all_entities_list.extend(triplet_info.entities)
                        all_triplets_list.extend(triplet_info.triplets)
            
-            total_entities = len(all_entities_list)
-            total_triplets = len(all_triplets_list)
-            total_temporal = sum(len(temporal_map) for temporal_map in temporal_maps)
+            len(all_entities_list)
+            len(all_triplets_list)
+            sum(len(temporal_map) for temporal_map in temporal_maps)

            # 步骤 3: 生成实体嵌入（依赖三元组提取结果）
            logger.info("步骤 3/6: 生成实体嵌入")
@@ -385,7 +384,7 @@ class ExtractionOrchestrator:
        
        # 用于跟踪已完成的陈述句数量
        completed_statements = 0
-        total_statements = len(all_statements)
+        len(all_statements)

        # 全局并行处理所有陈述句
        async def extract_for_statement(stmt_data, stmt_index):
@@ -497,7 +496,7 @@ class ExtractionOrchestrator:
        
        # 用于跟踪已完成的时间提取数量
        completed_temporal = 0
-        total_temporal_statements = len(all_statements)
+        len(all_statements)

        # 全局并行处理所有陈述句
        async def extract_for_statement(stmt_data, stmt_index):
@@ -1082,10 +1081,12 @@ class ExtractionOrchestrator:
                                    statement_id=statement.id,  # 添加必需的 statement_id 字段
                                    entity_type=getattr(entity, 'type', 'unknown'),  # 使用 type 而不是 entity_type
                                    description=getattr(entity, 'description', ''),  # 添加必需的 description 字段
+                                    example=getattr(entity, 'example', ''),  # 新增：传递示例字段
                                    fact_summary=getattr(entity, 'fact_summary', ''),  # 添加必需的 fact_summary 字段
                                    connect_strength=entity_connect_strength if entity_connect_strength is not None else 'Strong',  # 添加必需的 connect_strength 字段
                                    aliases=getattr(entity, 'aliases', []) or [],  # 传递从三元组提取阶段获取的aliases
                                    name_embedding=getattr(entity, 'name_embedding', None),
+                                    is_explicit_memory=getattr(entity, 'is_explicit_memory', False),  # 新增：传递语义记忆标记
                                    group_id=dialog_data.group_id,
                                    user_id=dialog_data.user_id,
                                    apply_id=dialog_data.apply_id,
--- a/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2
+++ b/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2
@@ -12,7 +12,34 @@ Extract entities and knowledge triplets from the given statement.
 ===Guidelines===

 **Entity Extraction:**
- Extract entities with their types, context-independent descriptions, and aliases
+- Extract entities with their types, context-independent descriptions, **concise examples**, aliases, and semantic memory classification
+- **Semantic Memory Classification (is_explicit_memory):**
+  * Set to `true` if the entity represents **explicit/semantic memory**:
+    - **Concepts:** "Machine Learning", "Photosynthesis", "Democracy", "人工智能", "光合作用", "民主"
+    - **Knowledge:** "Python Programming Language", "Theory of Relativity", "Python编程语言", "相对论"
+    - **Definitions:** "API (Application Programming Interface)", "REST API", "应用程序接口"
+    - **Principles:** "SOLID Principles", "First Law of Thermodynamics", "SOLID原则", "热力学第一定律"
+    - **Theories:** "Evolution Theory", "Quantum Mechanics", "进化论", "量子力学"
+    - **Methods/Techniques:** "Agile Development", "Machine Learning Algorithm", "敏捷开发", "机器学习算法"
+    - **Technical Terms:** "Neural Network", "Database", "神经网络", "数据库"
+  * Set to `false` for:
+    - **People:** "John Smith", "Dr. Wang", "张明", "王博士"
+    - **Organizations:** "Microsoft", "Harvard University", "微软", "哈佛大学"
+    - **Locations:** "Beijing", "Central Park", "北京", "中央公园"
+    - **Events:** "2024 Conference", "Project Meeting", "2024会议", "项目会议"
+    - **Specific objects:** "iPhone 15", "Building A", "iPhone 15", "A栋"
+- **Example Generation (IMPORTANT for semantic memory entities):**
+  * For entities where `is_explicit_memory=true`, generate a **concise example (around 20 characters)** to help understand the concept
+  * The example should be:
+    - **Specific and concrete**: Use real-world scenarios or applications
+    - **Brief**: Around 20 characters (can be slightly longer if needed for clarity)
+    - **In the same language as the entity name**
+  * Examples:
+    - Entity: "机器学习" → example: "如：用神经网络识别图片中的猫狗"
+    - Entity: "SOLID Principles" → example: "e.g., Single Responsibility, Open-Closed"
+    - Entity: "Photosynthesis" → example: "e.g., plants convert sunlight to energy"
+    - Entity: "人工智能" → example: "如：智能客服、自动驾驶"
+  * For non-semantic entities (`is_explicit_memory=false`), the example field can be empty
 - **Aliases Extraction (Important):**
  * **CRITICAL: Extract aliases ONLY in the SAME LANGUAGE as the input text**
  * **DO NOT translate or add aliases in different languages**
@@ -84,21 +111,27 @@ Output:
      "name": "I",
      "type": "Person",
      "description": "The user",
-      "aliases": []
+      "example": "",
+      "aliases": [],
+      "is_explicit_memory": false
    },
    {
      "entity_idx": 1,
      "name": "Paris",
      "type": "Location",
      "description": "Capital city of France",
-      "aliases": []
+      "example": "",
+      "aliases": [],
+      "is_explicit_memory": false
    },
    {
      "entity_idx": 2,
      "name": "Louvre",
      "type": "Location",
      "description": "World-famous museum located in Paris",
-      "aliases": ["Louvre Museum"]
+      "example": "",
+      "aliases": ["Louvre Museum"],
+      "is_explicit_memory": false
    }
  ]
 }
@@ -130,21 +163,27 @@ Output:
      "name": "John Smith",
      "type": "Person",
      "description": "Individual person name",
-      "aliases": []
+      "example": "",
+      "aliases": [],
+      "is_explicit_memory": false
    },
    {
      "entity_idx": 1,
      "name": "Google",
      "type": "Organization",
      "description": "American technology company",
-      "aliases": ["Google LLC", "Alphabet Inc."]
+      "example": "",
+      "aliases": ["Google LLC", "Alphabet Inc."],
+      "is_explicit_memory": false
    },
    {
      "entity_idx": 2,
      "name": "AI product development",
-      "type": "WorkRole",
+      "type": "Concept",
      "description": "Artificial intelligence product development work",
-      "aliases": []
+      "example": "e.g., developing chatbots, recommendation systems",
+      "aliases": [],
+      "is_explicit_memory": true
    }
  ]
 }
@@ -176,21 +215,27 @@ Output:
      "name": "我",
      "type": "Person",
      "description": "用户本人",
-      "aliases": []
+      "example": "",
+      "aliases": [],
+      "is_explicit_memory": false
    },
    {
      "entity_idx": 1,
      "name": "巴黎",
      "type": "Location",
      "description": "法国首都城市",
-      "aliases": []
+      "example": "",
+      "aliases": [],
+      "is_explicit_memory": false
    },
    {
      "entity_idx": 2,
      "name": "卢浮宫",
      "type": "Location",
      "description": "位于巴黎的世界著名博物馆",
-      "aliases": []
+      "example": "",
+      "aliases": [],
+      "is_explicit_memory": false
    }
  ]
 }
@@ -222,21 +267,27 @@ Output:
      "name": "张明",
      "type": "Person",
      "description": "个人姓名",
-      "aliases": []
+      "example": "",
+      "aliases": [],
+      "is_explicit_memory": false
    },
    {
      "entity_idx": 1,
      "name": "腾讯",
      "type": "Organization",
      "description": "中国科技公司",
-      "aliases": ["腾讯控股", "腾讯公司"]
+      "example": "",
+      "aliases": ["腾讯控股", "腾讯公司"],
+      "is_explicit_memory": false
    },
    {
      "entity_idx": 2,
      "name": "AI产品开发",
-      "type": "WorkRole",
+      "type": "Concept",
      "description": "人工智能产品研发工作",
-      "aliases": []
+      "example": "如：开发智能客服机器人、推荐系统",
+      "aliases": [],
+      "is_explicit_memory": true
    }
  ]
 }
@@ -251,7 +302,9 @@ Output:
      "name": "Tripod",
      "type": "Equipment",
      "description": "Photography equipment accessory",
-      "aliases": ["Camera Tripod"]
+      "example": "",
+      "aliases": ["Camera Tripod"],
+      "is_explicit_memory": false
    }
  ]
 }
@@ -266,7 +319,9 @@ Output:
      "name": "三脚架",
      "type": "Equipment",
      "description": "摄影器材配件",
-      "aliases": ["相机三脚架"]
+      "example": "",
+      "aliases": ["相机三脚架"],
+      "is_explicit_memory": false
    }
  ]
 }