feat(memory): propagate temporal validity fields through extraction pipeline

- Add valid_at/invalid_at passthrough in triplet extraction prompt (both zh/en)
- Propagate temporal_validity to EntityEntityEdge in ExtractionOrchestrator
- Use coalesce() for valid_at/invalid_at in Neo4j cypher queries to handle NULLs
- Fix workspace_id/config_id UUID parsing in read_memory config resolution
- Downgrade verbose extraction pipeline logs from info to debug
- Remove UUID and short API key patterns from sensitive filter to reduce false positives
- Standardize log message format (use = spacing, end_user_id label)
- Fix misindented TODO comment in write_pipeline.py
This commit is contained in:
lanceyq
2026-04-28 21:26:32 +08:00
parent 1f0c88a5f0
commit 4af9b02815
22 changed files with 229 additions and 192 deletions

View File

@@ -33,8 +33,8 @@ SET s += {
temporal_info: statement.temporal_info,
created_at: statement.created_at,
expired_at: statement.expired_at,
valid_at: statement.valid_at,
invalid_at: statement.invalid_at,
valid_at: coalesce(statement.valid_at, ""),
invalid_at: coalesce(statement.invalid_at, ""),
statement_embedding: statement.statement_embedding,
relevence_info: statement.relevence_info,
importance_score: statement.importance_score,
@@ -152,8 +152,8 @@ SET r.predicate = rel.predicate,
r.statement_id = rel.statement_id,
r.value = rel.value,
r.statement = rel.statement,
r.valid_at = rel.valid_at,
r.invalid_at = rel.invalid_at,
r.valid_at = coalesce(rel.valid_at, ""),
r.invalid_at = coalesce(rel.invalid_at, ""),
r.created_at = rel.created_at,
r.expired_at = rel.expired_at,
r.run_id = rel.run_id,

View File

@@ -260,7 +260,7 @@ async def save_dialog_and_statements_to_neo4j(
result = await tx.run(DIALOGUE_NODE_SAVE, dialogues=dialogue_data)
dialogue_uuids = [record["uuid"] async for record in result]
results['dialogues'] = dialogue_uuids
logger.info(f"Dialogues saved to Neo4j with UUIDs: {dialogue_uuids}")
logger.debug(f"Dialogues saved to Neo4j with UUIDs: {dialogue_uuids}")
# 2. Save all chunk nodes in batch
if chunk_nodes:
@@ -269,7 +269,7 @@ async def save_dialog_and_statements_to_neo4j(
result = await tx.run(CHUNK_NODE_SAVE, chunks=chunk_data)
chunk_uuids = [record["uuid"] async for record in result]
results['chunks'] = chunk_uuids
logger.info(f"Successfully saved {len(chunk_uuids)} chunk nodes to Neo4j")
logger.debug(f"Successfully saved {len(chunk_uuids)} chunk nodes to Neo4j")
if perceptual_nodes:
from app.repositories.neo4j.cypher_queries import PERCEPTUAL_NODE_SAVE
@@ -277,7 +277,7 @@ async def save_dialog_and_statements_to_neo4j(
result = await tx.run(PERCEPTUAL_NODE_SAVE, perceptuals=perceptual_data)
perceptual_uuids = [record["uuid"] async for record in result]
results["perceptuals"] = perceptual_uuids
logger.info(f"Successfully saved {len(perceptual_uuids)} perceptual nodes to Neo4j")
logger.debug(f"Successfully saved {len(perceptual_uuids)} perceptual nodes to Neo4j")
# 3. Save all statement nodes in batch
if statement_nodes:
@@ -286,7 +286,7 @@ async def save_dialog_and_statements_to_neo4j(
result = await tx.run(STATEMENT_NODE_SAVE, statements=statement_data)
statement_uuids = [record["uuid"] async for record in result]
results['statements'] = statement_uuids
logger.info(f"Successfully saved {len(statement_uuids)} statement nodes to Neo4j")
logger.debug(f"Successfully saved {len(statement_uuids)} statement nodes to Neo4j")
# 4. Save entities
if entity_nodes:
@@ -295,7 +295,7 @@ async def save_dialog_and_statements_to_neo4j(
result = await tx.run(EXTRACTED_ENTITY_NODE_SAVE, entities=entity_data)
entity_uuids = [record["uuid"] async for record in result]
results['entities'] = entity_uuids
logger.info(f"Successfully saved {len(entity_uuids)} entity nodes to Neo4j")
logger.debug(f"Successfully saved {len(entity_uuids)} entity nodes to Neo4j")
# 5. Create entity relationships
if entity_edges:
@@ -320,7 +320,7 @@ async def save_dialog_and_statements_to_neo4j(
result = await tx.run(ENTITY_RELATIONSHIP_SAVE, relationships=relationship_data)
rel_uuids = [record["uuid"] async for record in result]
results['entity_relationships'] = rel_uuids
logger.info(f"Successfully saved {len(rel_uuids)} entity relationships to Neo4j")
logger.debug(f"Successfully saved {len(rel_uuids)} entity relationships to Neo4j")
# 6. Save statement-chunk edges
if statement_chunk_edges:
@@ -339,7 +339,7 @@ async def save_dialog_and_statements_to_neo4j(
result = await tx.run(CHUNK_STATEMENT_EDGE_SAVE, chunk_statement_edges=sc_edge_data)
sc_uuids = [record["uuid"] async for record in result]
results['statement_chunk_edges'] = sc_uuids
logger.info(f"Successfully saved {len(sc_uuids)} statement-chunk edges to Neo4j")
logger.debug(f"Successfully saved {len(sc_uuids)} statement-chunk edges to Neo4j")
# 7. Save statement-entity edges
if statement_entity_edges:
@@ -358,7 +358,7 @@ async def save_dialog_and_statements_to_neo4j(
result = await tx.run(STATEMENT_ENTITY_EDGE_SAVE, relationships=se_edge_data)
se_uuids = [record["uuid"] async for record in result]
results['statement_entity_edges'] = se_uuids
logger.info(f"Successfully saved {len(se_uuids)} statement-entity edges to Neo4j")
logger.debug(f"Successfully saved {len(se_uuids)} statement-entity edges to Neo4j")
if perceptual_edges:
from app.repositories.neo4j.cypher_queries import PERCEPTUAL_CHUNK_EDGE_SAVE
@@ -374,7 +374,7 @@ async def save_dialog_and_statements_to_neo4j(
result = await tx.run(PERCEPTUAL_CHUNK_EDGE_SAVE, edges=perceptual_edge_data)
perceptual_edges_uuids = [record["uuid"] async for record in result]
results['perceptual_chunk_edges'] = perceptual_edges_uuids
logger.info(f"Successfully saved {len(perceptual_edges_uuids)} perceptual-chunk edges to Neo4j")
logger.debug(f"Successfully saved {len(perceptual_edges_uuids)} perceptual-chunk edges to Neo4j")
# 8. Save assistant original nodes
if assistant_original_nodes:
@@ -383,7 +383,7 @@ async def save_dialog_and_statements_to_neo4j(
result = await tx.run(ASSISTANT_ORIGINAL_NODE_SAVE, originals=original_data)
original_uuids = [record["uuid"] async for record in result]
results['assistant_originals'] = original_uuids
logger.info(f"Successfully saved {len(original_uuids)} assistant original nodes to Neo4j")
logger.debug(f"Successfully saved {len(original_uuids)} assistant original nodes to Neo4j")
# 9. Save assistant pruned nodes
if assistant_pruned_nodes:
@@ -392,7 +392,7 @@ async def save_dialog_and_statements_to_neo4j(
result = await tx.run(ASSISTANT_PRUNED_NODE_SAVE, pruneds=pruned_data)
pruned_uuids = [record["uuid"] async for record in result]
results['assistant_pruneds'] = pruned_uuids
logger.info(f"Successfully saved {len(pruned_uuids)} assistant pruned nodes to Neo4j")
logger.debug(f"Successfully saved {len(pruned_uuids)} assistant pruned nodes to Neo4j")
# 10. Save PRUNED_TO edges (Original → Pruned)
if assistant_pruned_edges:
@@ -408,7 +408,7 @@ async def save_dialog_and_statements_to_neo4j(
result = await tx.run(ASSISTANT_PRUNED_EDGE_SAVE, edges=edge_data)
pruned_edge_uuids = [record["uuid"] async for record in result]
results['assistant_pruned_edges'] = pruned_edge_uuids
logger.info(f"Successfully saved {len(pruned_edge_uuids)} PRUNED_TO edges to Neo4j")
logger.debug(f"Successfully saved {len(pruned_edge_uuids)} PRUNED_TO edges to Neo4j")
# 11. Save BELONGS_TO_DIALOG edges (Original → Dialogue)
if assistant_dialog_edges:
@@ -423,7 +423,7 @@ async def save_dialog_and_statements_to_neo4j(
result = await tx.run(ASSISTANT_DIALOG_EDGE_SAVE, edges=edge_data)
dialog_edge_uuids = [record["uuid"] async for record in result]
results['assistant_dialog_edges'] = dialog_edge_uuids
logger.info(f"Successfully saved {len(dialog_edge_uuids)} BELONGS_TO_DIALOG edges to Neo4j")
logger.debug(f"Successfully saved {len(dialog_edge_uuids)} BELONGS_TO_DIALOG edges to Neo4j")
return results

View File

@@ -227,7 +227,7 @@ class OntologyClassRepository:
).all()
logger.info(
f"Found {len(classes)} ontology classes in scene {scene_id}"
f"Found {len(classes)} ontology classes in scene_id: {scene_id}"
)
return classes