refactor(memory): enhance extraction ontology and add assistant pruning graph support
- Expand entity type ontology with detailed definitions, examples, and notes (merged types: 地点设施, 物品设备, 产品服务, 软件平台, 角色职业, 知识能力, 偏好习惯目标, 称呼别名, 智能体) - Add relation ontology taxonomy with 15 predicate categories and usage rules - Strengthen reference resolution rules: resolve pronouns before extraction, skip unresolvable references entirely - Add guidelines to avoid extracting abstract propositions, emotions, and low-value entities (effort/reward/success patterns) - Add 7 new extraction examples covering edge cases - Add AssistantOriginal/AssistantPruned node models and graph persistence (PRUNED_TO and BELONGS_TO_DIALOG edges, Neo4j indexes and constraints) - Add graph_build_step.py for building graph nodes/edges from DialogData - Update write_pipeline.py to pass assistant pruning nodes/edges to graph saver - Update data_pruning.py with related preprocessing changes
This commit is contained in:
@@ -24,6 +24,10 @@ from app.core.memory.models.graph_models import (
|
||||
EntityEntityEdge,
|
||||
PerceptualNode,
|
||||
PerceptualEdge,
|
||||
AssistantOriginalNode,
|
||||
AssistantPrunedNode,
|
||||
AssistantPrunedEdge,
|
||||
AssistantDialogEdge,
|
||||
)
|
||||
import logging
|
||||
|
||||
@@ -166,6 +170,10 @@ async def save_dialog_and_statements_to_neo4j(
|
||||
statement_entity_edges: List[StatementEntityEdge],
|
||||
perceptual_edges: List[PerceptualEdge],
|
||||
connector: Neo4jConnector,
|
||||
assistant_original_nodes: Optional[List[AssistantOriginalNode]] = None,
|
||||
assistant_pruned_nodes: Optional[List[AssistantPrunedNode]] = None,
|
||||
assistant_pruned_edges: Optional[List[AssistantPrunedEdge]] = None,
|
||||
assistant_dialog_edges: Optional[List[AssistantDialogEdge]] = None,
|
||||
) -> bool:
|
||||
"""Save dialogue nodes, chunk nodes, statement nodes, entities, and all relationships to Neo4j using graph models.
|
||||
|
||||
@@ -368,6 +376,55 @@ async def save_dialog_and_statements_to_neo4j(
|
||||
results['perceptual_chunk_edges'] = perceptual_edges_uuids
|
||||
logger.info(f"Successfully saved {len(perceptual_edges_uuids)} perceptual-chunk edges to Neo4j")
|
||||
|
||||
# 8. Save assistant original nodes
|
||||
if assistant_original_nodes:
|
||||
from app.repositories.neo4j.cypher_queries import ASSISTANT_ORIGINAL_NODE_SAVE
|
||||
original_data = [node.model_dump() for node in assistant_original_nodes]
|
||||
result = await tx.run(ASSISTANT_ORIGINAL_NODE_SAVE, originals=original_data)
|
||||
original_uuids = [record["uuid"] async for record in result]
|
||||
results['assistant_originals'] = original_uuids
|
||||
logger.info(f"Successfully saved {len(original_uuids)} assistant original nodes to Neo4j")
|
||||
|
||||
# 9. Save assistant pruned nodes
|
||||
if assistant_pruned_nodes:
|
||||
from app.repositories.neo4j.cypher_queries import ASSISTANT_PRUNED_NODE_SAVE
|
||||
pruned_data = [node.model_dump() for node in assistant_pruned_nodes]
|
||||
result = await tx.run(ASSISTANT_PRUNED_NODE_SAVE, pruneds=pruned_data)
|
||||
pruned_uuids = [record["uuid"] async for record in result]
|
||||
results['assistant_pruneds'] = pruned_uuids
|
||||
logger.info(f"Successfully saved {len(pruned_uuids)} assistant pruned nodes to Neo4j")
|
||||
|
||||
# 10. Save PRUNED_TO edges (Original → Pruned)
|
||||
if assistant_pruned_edges:
|
||||
from app.repositories.neo4j.cypher_queries import ASSISTANT_PRUNED_EDGE_SAVE
|
||||
edge_data = [{
|
||||
"source": edge.source,
|
||||
"target": edge.target,
|
||||
"pair_id": edge.pair_id,
|
||||
"end_user_id": edge.end_user_id,
|
||||
"run_id": edge.run_id,
|
||||
"created_at": edge.created_at.isoformat() if edge.created_at else None,
|
||||
} for edge in assistant_pruned_edges]
|
||||
result = await tx.run(ASSISTANT_PRUNED_EDGE_SAVE, edges=edge_data)
|
||||
pruned_edge_uuids = [record["uuid"] async for record in result]
|
||||
results['assistant_pruned_edges'] = pruned_edge_uuids
|
||||
logger.info(f"Successfully saved {len(pruned_edge_uuids)} PRUNED_TO edges to Neo4j")
|
||||
|
||||
# 11. Save BELONGS_TO_DIALOG edges (Original → Dialogue)
|
||||
if assistant_dialog_edges:
|
||||
from app.repositories.neo4j.cypher_queries import ASSISTANT_DIALOG_EDGE_SAVE
|
||||
edge_data = [{
|
||||
"source": edge.source,
|
||||
"target": edge.target,
|
||||
"end_user_id": edge.end_user_id,
|
||||
"run_id": edge.run_id,
|
||||
"created_at": edge.created_at.isoformat() if edge.created_at else None,
|
||||
} for edge in assistant_dialog_edges]
|
||||
result = await tx.run(ASSISTANT_DIALOG_EDGE_SAVE, edges=edge_data)
|
||||
dialog_edge_uuids = [record["uuid"] async for record in result]
|
||||
results['assistant_dialog_edges'] = dialog_edge_uuids
|
||||
logger.info(f"Successfully saved {len(dialog_edge_uuids)} BELONGS_TO_DIALOG edges to Neo4j")
|
||||
|
||||
return results
|
||||
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user