Feature/episodic memory (#70)
* [feature]episodic memory * [feature]episodic memory * [changes]AI review and modify code * [feature]Explicit memory * [feature]Explicit memory
This commit is contained in:
@@ -405,6 +405,10 @@ class ExtractedEntityNode(Node):
|
||||
statement_id: str = Field(..., description="Statement this entity was extracted from")
|
||||
entity_type: str = Field(..., description="Type of the entity")
|
||||
description: str = Field(..., description="Entity description")
|
||||
example: str = Field(
|
||||
default="",
|
||||
description="A concise example (around 20 characters) to help understand the entity"
|
||||
)
|
||||
aliases: List[str] = Field(
|
||||
default_factory=list,
|
||||
description="Entity aliases - alternative names for this entity"
|
||||
@@ -441,6 +445,12 @@ class ExtractedEntityNode(Node):
|
||||
description="Total number of times this node has been accessed"
|
||||
)
|
||||
|
||||
# Explicit Memory Classification
|
||||
is_explicit_memory: bool = Field(
|
||||
default=False,
|
||||
description="Whether this entity represents explicit/semantic memory (knowledge, concepts, definitions, theories, principles)"
|
||||
)
|
||||
|
||||
@field_validator('aliases', mode='before')
|
||||
@classmethod
|
||||
def validate_aliases_field(cls, v): # 字段验证器 自动清理和验证 aliases 字段
|
||||
|
||||
@@ -38,10 +38,20 @@ class Entity(BaseModel):
|
||||
name_embedding: Optional[List[float]] = Field(None, description="Embedding vector for the entity name")
|
||||
type: str = Field(..., description="Type/category of the entity")
|
||||
description: str = Field(..., description="Description of the entity")
|
||||
example: str = Field(
|
||||
default="",
|
||||
description="A concise example (around 20 characters) to help understand the entity"
|
||||
)
|
||||
aliases: List[str] = Field(
|
||||
default_factory=list,
|
||||
description="Alternative names for this entity (abbreviations, full names, translations, etc.)"
|
||||
)
|
||||
|
||||
# Explicit Memory Classification
|
||||
is_explicit_memory: bool = Field(
|
||||
default=False,
|
||||
description="Whether this entity represents explicit/semantic memory (knowledge, concepts, definitions, theories, principles)"
|
||||
)
|
||||
|
||||
|
||||
class Triplet(BaseModel):
|
||||
|
||||
@@ -42,7 +42,6 @@ from app.core.memory.storage_services.extraction_engine.deduplication.two_stage_
|
||||
)
|
||||
from app.core.memory.storage_services.extraction_engine.knowledge_extraction.embedding_generation import (
|
||||
embedding_generation,
|
||||
embedding_generation_all,
|
||||
generate_entity_embeddings_from_triplets,
|
||||
)
|
||||
|
||||
@@ -179,7 +178,7 @@ class ExtractionOrchestrator:
|
||||
for dialog in dialog_data_list:
|
||||
for chunk in dialog.chunks:
|
||||
all_statements_list.extend(chunk.statements)
|
||||
total_statements = len(all_statements_list)
|
||||
len(all_statements_list)
|
||||
|
||||
# 步骤 2: 并行执行三元组提取、时间信息提取、情绪提取和基础嵌入生成
|
||||
logger.info("步骤 2/6: 并行执行三元组提取、时间信息提取、情绪提取和嵌入生成")
|
||||
@@ -201,9 +200,9 @@ class ExtractionOrchestrator:
|
||||
all_entities_list.extend(triplet_info.entities)
|
||||
all_triplets_list.extend(triplet_info.triplets)
|
||||
|
||||
total_entities = len(all_entities_list)
|
||||
total_triplets = len(all_triplets_list)
|
||||
total_temporal = sum(len(temporal_map) for temporal_map in temporal_maps)
|
||||
len(all_entities_list)
|
||||
len(all_triplets_list)
|
||||
sum(len(temporal_map) for temporal_map in temporal_maps)
|
||||
|
||||
# 步骤 3: 生成实体嵌入(依赖三元组提取结果)
|
||||
logger.info("步骤 3/6: 生成实体嵌入")
|
||||
@@ -385,7 +384,7 @@ class ExtractionOrchestrator:
|
||||
|
||||
# 用于跟踪已完成的陈述句数量
|
||||
completed_statements = 0
|
||||
total_statements = len(all_statements)
|
||||
len(all_statements)
|
||||
|
||||
# 全局并行处理所有陈述句
|
||||
async def extract_for_statement(stmt_data, stmt_index):
|
||||
@@ -497,7 +496,7 @@ class ExtractionOrchestrator:
|
||||
|
||||
# 用于跟踪已完成的时间提取数量
|
||||
completed_temporal = 0
|
||||
total_temporal_statements = len(all_statements)
|
||||
len(all_statements)
|
||||
|
||||
# 全局并行处理所有陈述句
|
||||
async def extract_for_statement(stmt_data, stmt_index):
|
||||
@@ -1082,10 +1081,12 @@ class ExtractionOrchestrator:
|
||||
statement_id=statement.id, # 添加必需的 statement_id 字段
|
||||
entity_type=getattr(entity, 'type', 'unknown'), # 使用 type 而不是 entity_type
|
||||
description=getattr(entity, 'description', ''), # 添加必需的 description 字段
|
||||
example=getattr(entity, 'example', ''), # 新增:传递示例字段
|
||||
fact_summary=getattr(entity, 'fact_summary', ''), # 添加必需的 fact_summary 字段
|
||||
connect_strength=entity_connect_strength if entity_connect_strength is not None else 'Strong', # 添加必需的 connect_strength 字段
|
||||
aliases=getattr(entity, 'aliases', []) or [], # 传递从三元组提取阶段获取的aliases
|
||||
name_embedding=getattr(entity, 'name_embedding', None),
|
||||
is_explicit_memory=getattr(entity, 'is_explicit_memory', False), # 新增:传递语义记忆标记
|
||||
group_id=dialog_data.group_id,
|
||||
user_id=dialog_data.user_id,
|
||||
apply_id=dialog_data.apply_id,
|
||||
|
||||
@@ -12,7 +12,34 @@ Extract entities and knowledge triplets from the given statement.
|
||||
===Guidelines===
|
||||
|
||||
**Entity Extraction:**
|
||||
- Extract entities with their types, context-independent descriptions, and aliases
|
||||
- Extract entities with their types, context-independent descriptions, **concise examples**, aliases, and semantic memory classification
|
||||
- **Semantic Memory Classification (is_explicit_memory):**
|
||||
* Set to `true` if the entity represents **explicit/semantic memory**:
|
||||
- **Concepts:** "Machine Learning", "Photosynthesis", "Democracy", "人工智能", "光合作用", "民主"
|
||||
- **Knowledge:** "Python Programming Language", "Theory of Relativity", "Python编程语言", "相对论"
|
||||
- **Definitions:** "API (Application Programming Interface)", "REST API", "应用程序接口"
|
||||
- **Principles:** "SOLID Principles", "First Law of Thermodynamics", "SOLID原则", "热力学第一定律"
|
||||
- **Theories:** "Evolution Theory", "Quantum Mechanics", "进化论", "量子力学"
|
||||
- **Methods/Techniques:** "Agile Development", "Machine Learning Algorithm", "敏捷开发", "机器学习算法"
|
||||
- **Technical Terms:** "Neural Network", "Database", "神经网络", "数据库"
|
||||
* Set to `false` for:
|
||||
- **People:** "John Smith", "Dr. Wang", "张明", "王博士"
|
||||
- **Organizations:** "Microsoft", "Harvard University", "微软", "哈佛大学"
|
||||
- **Locations:** "Beijing", "Central Park", "北京", "中央公园"
|
||||
- **Events:** "2024 Conference", "Project Meeting", "2024会议", "项目会议"
|
||||
- **Specific objects:** "iPhone 15", "Building A", "iPhone 15", "A栋"
|
||||
- **Example Generation (IMPORTANT for semantic memory entities):**
|
||||
* For entities where `is_explicit_memory=true`, generate a **concise example (around 20 characters)** to help understand the concept
|
||||
* The example should be:
|
||||
- **Specific and concrete**: Use real-world scenarios or applications
|
||||
- **Brief**: Around 20 characters (can be slightly longer if needed for clarity)
|
||||
- **In the same language as the entity name**
|
||||
* Examples:
|
||||
- Entity: "机器学习" → example: "如:用神经网络识别图片中的猫狗"
|
||||
- Entity: "SOLID Principles" → example: "e.g., Single Responsibility, Open-Closed"
|
||||
- Entity: "Photosynthesis" → example: "e.g., plants convert sunlight to energy"
|
||||
- Entity: "人工智能" → example: "如:智能客服、自动驾驶"
|
||||
* For non-semantic entities (`is_explicit_memory=false`), the example field can be empty
|
||||
- **Aliases Extraction (Important):**
|
||||
* **CRITICAL: Extract aliases ONLY in the SAME LANGUAGE as the input text**
|
||||
* **DO NOT translate or add aliases in different languages**
|
||||
@@ -84,21 +111,27 @@ Output:
|
||||
"name": "I",
|
||||
"type": "Person",
|
||||
"description": "The user",
|
||||
"aliases": []
|
||||
"example": "",
|
||||
"aliases": [],
|
||||
"is_explicit_memory": false
|
||||
},
|
||||
{
|
||||
"entity_idx": 1,
|
||||
"name": "Paris",
|
||||
"type": "Location",
|
||||
"description": "Capital city of France",
|
||||
"aliases": []
|
||||
"example": "",
|
||||
"aliases": [],
|
||||
"is_explicit_memory": false
|
||||
},
|
||||
{
|
||||
"entity_idx": 2,
|
||||
"name": "Louvre",
|
||||
"type": "Location",
|
||||
"description": "World-famous museum located in Paris",
|
||||
"aliases": ["Louvre Museum"]
|
||||
"example": "",
|
||||
"aliases": ["Louvre Museum"],
|
||||
"is_explicit_memory": false
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -130,21 +163,27 @@ Output:
|
||||
"name": "John Smith",
|
||||
"type": "Person",
|
||||
"description": "Individual person name",
|
||||
"aliases": []
|
||||
"example": "",
|
||||
"aliases": [],
|
||||
"is_explicit_memory": false
|
||||
},
|
||||
{
|
||||
"entity_idx": 1,
|
||||
"name": "Google",
|
||||
"type": "Organization",
|
||||
"description": "American technology company",
|
||||
"aliases": ["Google LLC", "Alphabet Inc."]
|
||||
"example": "",
|
||||
"aliases": ["Google LLC", "Alphabet Inc."],
|
||||
"is_explicit_memory": false
|
||||
},
|
||||
{
|
||||
"entity_idx": 2,
|
||||
"name": "AI product development",
|
||||
"type": "WorkRole",
|
||||
"type": "Concept",
|
||||
"description": "Artificial intelligence product development work",
|
||||
"aliases": []
|
||||
"example": "e.g., developing chatbots, recommendation systems",
|
||||
"aliases": [],
|
||||
"is_explicit_memory": true
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -176,21 +215,27 @@ Output:
|
||||
"name": "我",
|
||||
"type": "Person",
|
||||
"description": "用户本人",
|
||||
"aliases": []
|
||||
"example": "",
|
||||
"aliases": [],
|
||||
"is_explicit_memory": false
|
||||
},
|
||||
{
|
||||
"entity_idx": 1,
|
||||
"name": "巴黎",
|
||||
"type": "Location",
|
||||
"description": "法国首都城市",
|
||||
"aliases": []
|
||||
"example": "",
|
||||
"aliases": [],
|
||||
"is_explicit_memory": false
|
||||
},
|
||||
{
|
||||
"entity_idx": 2,
|
||||
"name": "卢浮宫",
|
||||
"type": "Location",
|
||||
"description": "位于巴黎的世界著名博物馆",
|
||||
"aliases": []
|
||||
"example": "",
|
||||
"aliases": [],
|
||||
"is_explicit_memory": false
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -222,21 +267,27 @@ Output:
|
||||
"name": "张明",
|
||||
"type": "Person",
|
||||
"description": "个人姓名",
|
||||
"aliases": []
|
||||
"example": "",
|
||||
"aliases": [],
|
||||
"is_explicit_memory": false
|
||||
},
|
||||
{
|
||||
"entity_idx": 1,
|
||||
"name": "腾讯",
|
||||
"type": "Organization",
|
||||
"description": "中国科技公司",
|
||||
"aliases": ["腾讯控股", "腾讯公司"]
|
||||
"example": "",
|
||||
"aliases": ["腾讯控股", "腾讯公司"],
|
||||
"is_explicit_memory": false
|
||||
},
|
||||
{
|
||||
"entity_idx": 2,
|
||||
"name": "AI产品开发",
|
||||
"type": "WorkRole",
|
||||
"type": "Concept",
|
||||
"description": "人工智能产品研发工作",
|
||||
"aliases": []
|
||||
"example": "如:开发智能客服机器人、推荐系统",
|
||||
"aliases": [],
|
||||
"is_explicit_memory": true
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -251,7 +302,9 @@ Output:
|
||||
"name": "Tripod",
|
||||
"type": "Equipment",
|
||||
"description": "Photography equipment accessory",
|
||||
"aliases": ["Camera Tripod"]
|
||||
"example": "",
|
||||
"aliases": ["Camera Tripod"],
|
||||
"is_explicit_memory": false
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -266,7 +319,9 @@ Output:
|
||||
"name": "三脚架",
|
||||
"type": "Equipment",
|
||||
"description": "摄影器材配件",
|
||||
"aliases": ["相机三脚架"]
|
||||
"example": "",
|
||||
"aliases": ["相机三脚架"],
|
||||
"is_explicit_memory": false
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user