Remove the deprecated expired_at field from all graph models, Neo4j Cypher queries, repositories, and pipeline code. Replace with dialog_at on StatementNode to track the original dialog timestamp. - Strip expired_at from DialogueNode, ChunkNode, StatementNode, ExtractedEntityNode, edges, and all Cypher queries - Add dialog_at to MessageItem schema and propagate through extraction and graph build steps - Extract emotion/metadata async submission from WritePipeline into a generic _submit_celery_task helper - Add post_store_dedup_and_alias_merge Celery task for async alias merging and second-layer dedup after Neo4j write - Switch pytest async backend from anyio to asyncio_mode=auto
1737 lines
60 KiB
Python
1737 lines
60 KiB
Python
from app.core.memory.enums import Neo4jNodeType
|
||
|
||
DIALOGUE_NODE_SAVE = """
|
||
UNWIND $dialogues AS dialogue
|
||
MERGE (n:Dialogue {id: dialogue.id})
|
||
SET n.uuid = coalesce(n.uuid, dialogue.id),
|
||
n.end_user_id = dialogue.end_user_id,
|
||
n.run_id = dialogue.run_id,
|
||
n.ref_id = dialogue.ref_id,
|
||
n.created_at = dialogue.created_at,
|
||
n.content = dialogue.content,
|
||
n.dialog_embedding = dialogue.dialog_embedding
|
||
RETURN n.id AS uuid
|
||
"""
|
||
|
||
STATEMENT_NODE_SAVE = """
|
||
UNWIND $statements AS statement
|
||
MERGE (s:Statement {id: statement.id})
|
||
SET s += {
|
||
id: statement.id,
|
||
run_id: statement.run_id,
|
||
chunk_id: statement.chunk_id,
|
||
end_user_id: statement.end_user_id,
|
||
stmt_type: statement.stmt_type,
|
||
statement: statement.statement,
|
||
speaker: statement.speaker,
|
||
emotion_intensity: statement.emotion_intensity,
|
||
emotion_target: statement.emotion_target,
|
||
emotion_subject: statement.emotion_subject,
|
||
emotion_type: statement.emotion_type,
|
||
emotion_keywords: statement.emotion_keywords,
|
||
temporal_info: statement.temporal_info,
|
||
created_at: statement.created_at,
|
||
valid_at: coalesce(statement.valid_at, ""),
|
||
invalid_at: coalesce(statement.invalid_at, ""),
|
||
statement_embedding: statement.statement_embedding,
|
||
relevence_info: statement.relevence_info,
|
||
importance_score: statement.importance_score,
|
||
activation_value: statement.activation_value,
|
||
access_history: statement.access_history,
|
||
last_access_time: statement.last_access_time,
|
||
access_count: statement.access_count,
|
||
dialog_at: statement.dialog_at
|
||
}
|
||
RETURN s.id AS uuid
|
||
"""
|
||
|
||
STATEMENT_EMOTION_UPDATE = """
|
||
UNWIND $items AS item
|
||
MATCH (s:Statement {id: item.statement_id})
|
||
SET s.emotion_type = item.emotion_type,
|
||
s.emotion_intensity = item.emotion_intensity,
|
||
s.emotion_keywords = item.emotion_keywords
|
||
RETURN s.id AS uuid
|
||
"""
|
||
|
||
CHUNK_NODE_SAVE = """
|
||
UNWIND $chunks AS chunk
|
||
MERGE (c:Chunk {id: chunk.id})
|
||
SET c += {
|
||
id: chunk.id,
|
||
name: chunk.name,
|
||
end_user_id: chunk.end_user_id,
|
||
run_id: chunk.run_id,
|
||
created_at: chunk.created_at,
|
||
dialog_id: chunk.dialog_id,
|
||
content: chunk.content,
|
||
speaker: chunk.speaker,
|
||
chunk_embedding: chunk.chunk_embedding,
|
||
sequence_number: chunk.sequence_number,
|
||
start_index: chunk.start_index,
|
||
end_index: chunk.end_index
|
||
}
|
||
RETURN c.id AS uuid
|
||
"""
|
||
# bug修改点
|
||
|
||
EXTRACTED_ENTITY_NODE_SAVE = """
|
||
// Upsert entity nodes safely: preserve existing non-empty fields when incoming is empty
|
||
UNWIND $entities AS entity
|
||
MERGE (e:ExtractedEntity {id: entity.id})
|
||
SET e.name = CASE WHEN entity.name IS NOT NULL AND entity.name <> '' THEN entity.name ELSE e.name END,
|
||
e.end_user_id = CASE WHEN entity.end_user_id IS NOT NULL AND entity.end_user_id <> '' THEN entity.end_user_id ELSE e.end_user_id END,
|
||
e.run_id = CASE WHEN entity.run_id IS NOT NULL AND entity.run_id <> '' THEN entity.run_id ELSE e.run_id END,
|
||
e.created_at = CASE
|
||
WHEN entity.created_at IS NOT NULL AND (e.created_at IS NULL OR entity.created_at < e.created_at)
|
||
THEN entity.created_at ELSE e.created_at END,
|
||
e.entity_idx = CASE WHEN e.entity_idx IS NULL OR e.entity_idx = 0 THEN entity.entity_idx ELSE e.entity_idx END,
|
||
e.entity_type = CASE WHEN entity.entity_type IS NOT NULL AND entity.entity_type <> '' THEN entity.entity_type ELSE e.entity_type END,
|
||
e.type_description = CASE WHEN entity.type_description IS NOT NULL AND entity.type_description <> '' THEN entity.type_description ELSE coalesce(e.type_description, '') END,
|
||
e.description = CASE
|
||
WHEN entity.description IS NOT NULL AND entity.description <> ''
|
||
AND (e.description IS NULL OR size(e.description) = 0 OR size(entity.description) > size(e.description))
|
||
THEN entity.description ELSE e.description END,
|
||
e.example = CASE
|
||
WHEN entity.example IS NOT NULL AND entity.example <> ''
|
||
THEN entity.example
|
||
ELSE coalesce(e.example, '')
|
||
END,
|
||
e.statement_id = CASE WHEN entity.statement_id IS NOT NULL AND entity.statement_id <> '' THEN entity.statement_id ELSE e.statement_id END,
|
||
e.aliases = CASE
|
||
// 用户实体的 aliases 由 PgSQL end_user_info 作为唯一权威源,知识抽取完全不写入
|
||
WHEN entity.name IN ['用户', '我', 'User', 'I'] THEN e.aliases
|
||
WHEN entity.aliases IS NOT NULL AND size(entity.aliases) > 0
|
||
THEN CASE
|
||
WHEN e.aliases IS NULL THEN entity.aliases
|
||
ELSE reduce(acc = [], alias IN (e.aliases + entity.aliases) |
|
||
CASE WHEN alias IN acc THEN acc ELSE acc + alias END)
|
||
END
|
||
ELSE e.aliases END,
|
||
e.name_embedding = CASE
|
||
WHEN entity.name_embedding IS NOT NULL AND size(entity.name_embedding) > 0 THEN entity.name_embedding
|
||
ELSE e.name_embedding END,
|
||
// TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||
// e.fact_summary = CASE
|
||
// WHEN entity.fact_summary IS NOT NULL AND entity.fact_summary <> ''
|
||
// AND (e.fact_summary IS NULL OR size(e.fact_summary) = 0 OR size(entity.fact_summary) > size(e.fact_summary))
|
||
// THEN entity.fact_summary ELSE e.fact_summary END,
|
||
e.connect_strength = CASE
|
||
WHEN entity.connect_strength IS NULL OR entity.connect_strength = '' THEN e.connect_strength
|
||
ELSE CASE
|
||
WHEN e.connect_strength = 'strong' AND entity.connect_strength = 'weak' THEN 'both'
|
||
WHEN e.connect_strength = 'weak' AND entity.connect_strength = 'strong' THEN 'both'
|
||
WHEN e.connect_strength IS NULL OR e.connect_strength = '' THEN entity.connect_strength
|
||
ELSE e.connect_strength
|
||
END
|
||
END,
|
||
e.importance_score = CASE WHEN entity.importance_score IS NOT NULL THEN entity.importance_score ELSE coalesce(e.importance_score, 0.5) END,
|
||
e.activation_value = CASE WHEN entity.activation_value IS NOT NULL THEN entity.activation_value ELSE e.activation_value END,
|
||
e.access_history = CASE WHEN entity.access_history IS NOT NULL THEN entity.access_history ELSE coalesce(e.access_history, []) END,
|
||
e.last_access_time = CASE WHEN entity.last_access_time IS NOT NULL THEN entity.last_access_time ELSE e.last_access_time END,
|
||
e.access_count = CASE WHEN entity.access_count IS NOT NULL THEN entity.access_count ELSE coalesce(e.access_count, 0) END,
|
||
e.is_explicit_memory = CASE WHEN entity.is_explicit_memory IS NOT NULL THEN entity.is_explicit_memory ELSE coalesce(e.is_explicit_memory, false) END
|
||
RETURN e.id AS uuid
|
||
"""
|
||
|
||
# ── 元数据增量回写:将 LLM 提取的元数据追加到用户实体节点 ──
|
||
ENTITY_METADATA_UPDATE = """
|
||
MATCH (e:ExtractedEntity {id: $entity_id})
|
||
SET e.core_facts = CASE
|
||
WHEN $core_facts IS NOT NULL AND size($core_facts) > 0
|
||
THEN reduce(acc = coalesce(e.core_facts, []), item IN $core_facts |
|
||
CASE WHEN item IN acc THEN acc ELSE acc + item END)
|
||
ELSE coalesce(e.core_facts, []) END,
|
||
e.traits = CASE
|
||
WHEN $traits IS NOT NULL AND size($traits) > 0
|
||
THEN reduce(acc = coalesce(e.traits, []), item IN $traits |
|
||
CASE WHEN item IN acc THEN acc ELSE acc + item END)
|
||
ELSE coalesce(e.traits, []) END,
|
||
e.relations = CASE
|
||
WHEN $relations IS NOT NULL AND size($relations) > 0
|
||
THEN reduce(acc = coalesce(e.relations, []), item IN $relations |
|
||
CASE WHEN item IN acc THEN acc ELSE acc + item END)
|
||
ELSE coalesce(e.relations, []) END,
|
||
e.goals = CASE
|
||
WHEN $goals IS NOT NULL AND size($goals) > 0
|
||
THEN reduce(acc = coalesce(e.goals, []), item IN $goals |
|
||
CASE WHEN item IN acc THEN acc ELSE acc + item END)
|
||
ELSE coalesce(e.goals, []) END,
|
||
e.interests = CASE
|
||
WHEN $interests IS NOT NULL AND size($interests) > 0
|
||
THEN reduce(acc = coalesce(e.interests, []), item IN $interests |
|
||
CASE WHEN item IN acc THEN acc ELSE acc + item END)
|
||
ELSE coalesce(e.interests, []) END,
|
||
e.beliefs_or_stances = CASE
|
||
WHEN $beliefs_or_stances IS NOT NULL AND size($beliefs_or_stances) > 0
|
||
THEN reduce(acc = coalesce(e.beliefs_or_stances, []), item IN $beliefs_or_stances |
|
||
CASE WHEN item IN acc THEN acc ELSE acc + item END)
|
||
ELSE coalesce(e.beliefs_or_stances, []) END,
|
||
e.anchors = CASE
|
||
WHEN $anchors IS NOT NULL AND size($anchors) > 0
|
||
THEN reduce(acc = coalesce(e.anchors, []), item IN $anchors |
|
||
CASE WHEN item IN acc THEN acc ELSE acc + item END)
|
||
ELSE coalesce(e.anchors, []) END,
|
||
e.events = CASE
|
||
WHEN $events IS NOT NULL AND size($events) > 0
|
||
THEN reduce(acc = coalesce(e.events, []), item IN $events |
|
||
CASE WHEN item IN acc THEN acc ELSE acc + item END)
|
||
ELSE coalesce(e.events, []) END
|
||
RETURN e.id AS uuid
|
||
"""
|
||
|
||
# ── 查询用户实体已有的元数据(供增量提取时去重) ──
|
||
ENTITY_METADATA_QUERY = """
|
||
MATCH (e:ExtractedEntity {id: $entity_id})
|
||
RETURN e.core_facts AS core_facts,
|
||
e.traits AS traits,
|
||
e.relations AS relations,
|
||
e.goals AS goals,
|
||
e.interests AS interests,
|
||
e.beliefs_or_stances AS beliefs_or_stances,
|
||
e.anchors AS anchors,
|
||
e.events AS events
|
||
"""
|
||
|
||
# Add back ENTITY_RELATIONSHIP_SAVE to be used by graph_saver.save_entities_and_relationships
|
||
ENTITY_RELATIONSHIP_SAVE = """
|
||
UNWIND $relationships AS rel
|
||
// Match entities by stable id within end_user_id, do not constrain by run_id
|
||
MATCH (subject:ExtractedEntity {id: rel.source_id, end_user_id: rel.end_user_id})
|
||
MATCH (object:ExtractedEntity {id: rel.target_id, end_user_id: rel.end_user_id})
|
||
// Avoid duplicate edges across runs for the same endpoints
|
||
MERGE (subject)-[r:EXTRACTED_RELATIONSHIP]->(object)
|
||
SET r.predicate = rel.predicate,
|
||
r.predicate_description = rel.predicate_description,
|
||
r.statement_id = rel.statement_id,
|
||
r.value = rel.value,
|
||
r.statement = rel.statement,
|
||
r.valid_at = coalesce(rel.valid_at, ""),
|
||
r.invalid_at = coalesce(rel.invalid_at, ""),
|
||
r.created_at = rel.created_at,
|
||
r.run_id = rel.run_id,
|
||
r.end_user_id = rel.end_user_id
|
||
RETURN elementId(r) AS uuid
|
||
"""
|
||
|
||
# 在 Neo4j 5及后续版本中,id() 函数已被标记为弃用,用elementId() 函数替代
|
||
|
||
# 保存弱关系实体,设置 e.is_weak = true;不维护 e.relations 聚合字段
|
||
WEAK_ENTITY_NODE_SAVE = """
|
||
UNWIND $weak_entities AS entity
|
||
MERGE (e:ExtractedEntity {id: entity.id, run_id: entity.run_id})
|
||
SET e += {
|
||
name: entity.name,
|
||
end_user_id: entity.end_user_id,
|
||
run_id: entity.run_id,
|
||
description: entity.description,
|
||
chunk_id: entity.chunk_id,
|
||
dialog_id: entity.dialog_id
|
||
}
|
||
// Independent weak flag,仅标记弱关系,不再维护 relations 聚合字段
|
||
SET e.is_weak = true
|
||
RETURN e.id AS id
|
||
"""
|
||
|
||
# 为强关系三元组中的主语和宾语创建/更新实体节点,仅设置 e.is_strong = true,不维护 e.relations 字段
|
||
SAVE_STRONG_TRIPLE_ENTITIES = """
|
||
UNWIND $items AS item
|
||
MERGE (s:ExtractedEntity {id: item.source_id, run_id: item.run_id})
|
||
SET s += {name: item.subject, end_user_id: item.end_user_id, run_id: item.run_id}
|
||
// Independent strong flag
|
||
SET s.is_strong = true
|
||
MERGE (o:ExtractedEntity {id: item.target_id, run_id: item.run_id})
|
||
SET o += {name: item.object, end_user_id: item.end_user_id, run_id: item.run_id}
|
||
// Independent strong flag
|
||
SET o.is_strong = true
|
||
"""
|
||
|
||
|
||
DIALOGUE_STATEMENT_EDGE_SAVE = """
|
||
UNWIND $dialogue_statement_edges AS edge
|
||
// 支持按 uuid 或 ref_id 连接到 Dialogue,避免因来源 ID 不一致而断链
|
||
MATCH (dialogue:Dialogue)
|
||
WHERE dialogue.uuid = edge.source OR dialogue.ref_id = edge.source
|
||
MATCH (statement:Statement {id: edge.target})
|
||
// 仅按端点去重,关系属性可更新
|
||
MERGE (dialogue)-[e:MENTIONS]->(statement)
|
||
SET e.uuid = edge.id,
|
||
e.end_user_id = edge.end_user_id,
|
||
e.created_at = edge.created_at
|
||
RETURN e.uuid AS uuid
|
||
"""
|
||
|
||
# 在 Neo4j 5及后续版本中,id() 函数已被标记为弃用,用elementId() 函数替代
|
||
|
||
|
||
CHUNK_STATEMENT_EDGE_SAVE = """
|
||
UNWIND $chunk_statement_edges AS edge
|
||
MATCH (statement:Statement {id: edge.source, run_id: edge.run_id})
|
||
MATCH (chunk:Chunk {id: edge.target, run_id: edge.run_id})
|
||
MERGE (chunk)-[e:CONTAINS {id: edge.id}]->(statement)
|
||
SET e.end_user_id = edge.end_user_id,
|
||
e.run_id = edge.run_id,
|
||
e.created_at = edge.created_at
|
||
RETURN e.id AS uuid
|
||
"""
|
||
|
||
STATEMENT_ENTITY_EDGE_SAVE = """
|
||
UNWIND $relationships AS rel
|
||
// Statement nodes are per-run; keep run_id constraint on statements
|
||
MATCH (statement:Statement {id: rel.source, run_id: rel.run_id})
|
||
// Entities are shared across runs within end_user_id; do not constrain by run_id
|
||
MATCH (entity:ExtractedEntity {id: rel.target, end_user_id: rel.end_user_id})
|
||
// Avoid duplicate edges across runs for same endpoints
|
||
MERGE (statement)-[r:REFERENCES_ENTITY]->(entity)
|
||
SET r.end_user_id = rel.end_user_id,
|
||
r.run_id = rel.run_id,
|
||
r.created_at = rel.created_at,
|
||
r.connect_strength = rel.connect_strength
|
||
RETURN elementId(r) AS uuid
|
||
"""
|
||
|
||
ENTITY_EMBEDDING_SEARCH = """
|
||
CALL db.index.vector.queryNodes('entity_embedding_index', $limit * 100, $embedding)
|
||
YIELD node AS e, score
|
||
WHERE e.name_embedding IS NOT NULL
|
||
AND ($end_user_id IS NULL OR e.end_user_id = $end_user_id)
|
||
RETURN e.id AS id,
|
||
e.name AS name,
|
||
e.end_user_id AS end_user_id,
|
||
e.entity_type AS entity_type,
|
||
COALESCE(e.activation_value, e.importance_score, 0.5) AS activation_value,
|
||
COALESCE(e.importance_score, 0.5) AS importance_score,
|
||
e.last_access_time AS last_access_time,
|
||
COALESCE(e.access_count, 0) AS access_count,
|
||
score
|
||
ORDER BY score DESC
|
||
LIMIT $limit
|
||
"""
|
||
# Embedding-based search: cosine similarity on Statement.statement_embedding
|
||
STATEMENT_EMBEDDING_SEARCH = """
|
||
CALL db.index.vector.queryNodes('statement_embedding_index', $limit * 100, $embedding)
|
||
YIELD node AS s, score
|
||
WHERE s.statement_embedding IS NOT NULL
|
||
AND ($end_user_id IS NULL OR s.end_user_id = $end_user_id)
|
||
RETURN s.id AS id,
|
||
s.statement AS statement,
|
||
s.end_user_id AS end_user_id,
|
||
s.chunk_id AS chunk_id,
|
||
s.created_at AS created_at,
|
||
s.valid_at AS valid_at,
|
||
s.invalid_at AS invalid_at,
|
||
COALESCE(s.activation_value, s.importance_score, 0.5) AS activation_value,
|
||
COALESCE(s.importance_score, 0.5) AS importance_score,
|
||
s.last_access_time AS last_access_time,
|
||
COALESCE(s.access_count, 0) AS access_count,
|
||
score
|
||
ORDER BY score DESC
|
||
LIMIT $limit
|
||
"""
|
||
|
||
# Embedding-based search: cosine similarity on Chunk.chunk_embedding
|
||
CHUNK_EMBEDDING_SEARCH = """
|
||
CALL db.index.vector.queryNodes('chunk_embedding_index', $limit * 100, $embedding)
|
||
YIELD node AS c, score
|
||
WHERE c.chunk_embedding IS NOT NULL
|
||
AND ($end_user_id IS NULL OR c.end_user_id = $end_user_id)
|
||
RETURN c.id AS chunk_id,
|
||
c.end_user_id AS end_user_id,
|
||
c.content AS content,
|
||
c.dialog_id AS dialog_id,
|
||
COALESCE(c.activation_value, 0.5) AS activation_value,
|
||
c.last_access_time AS last_access_time,
|
||
COALESCE(c.access_count, 0) AS access_count,
|
||
score
|
||
ORDER BY score DESC
|
||
LIMIT $limit
|
||
"""
|
||
|
||
SEARCH_STATEMENTS_BY_KEYWORD = """
|
||
CALL db.index.fulltext.queryNodes("statementsFulltext", $query) YIELD node AS s, score
|
||
WHERE ($end_user_id IS NULL OR s.end_user_id = $end_user_id)
|
||
OPTIONAL MATCH (c:Chunk)-[:CONTAINS]->(s)
|
||
OPTIONAL MATCH (s)-[:REFERENCES_ENTITY]->(e:ExtractedEntity)
|
||
RETURN s.id AS id,
|
||
s.statement AS statement,
|
||
s.end_user_id AS end_user_id,
|
||
s.chunk_id AS chunk_id,
|
||
s.created_at AS created_at,
|
||
s.valid_at AS valid_at,
|
||
s.invalid_at AS invalid_at,
|
||
c.id AS chunk_id_from_rel,
|
||
collect(DISTINCT e.id) AS entity_ids,
|
||
COALESCE(s.activation_value, s.importance_score, 0.5) AS activation_value,
|
||
COALESCE(s.importance_score, 0.5) AS importance_score,
|
||
s.last_access_time AS last_access_time,
|
||
COALESCE(s.access_count, 0) AS access_count,
|
||
score
|
||
ORDER BY score DESC
|
||
LIMIT $limit
|
||
"""
|
||
# 查询实体名称包含指定字符串的实体
|
||
SEARCH_ENTITIES_BY_NAME = """
|
||
CALL db.index.fulltext.queryNodes("entitiesFulltext", $query) YIELD node AS e, score
|
||
WHERE ($end_user_id IS NULL OR e.end_user_id = $end_user_id)
|
||
OPTIONAL MATCH (s:Statement)-[:REFERENCES_ENTITY]->(e)
|
||
OPTIONAL MATCH (c:Chunk)-[:CONTAINS]->(s)
|
||
RETURN e.id AS id,
|
||
e.name AS name,
|
||
e.end_user_id AS end_user_id,
|
||
e.entity_type AS entity_type,
|
||
e.created_at AS created_at,
|
||
e.entity_idx AS entity_idx,
|
||
e.statement_id AS statement_id,
|
||
e.description AS description,
|
||
e.aliases AS aliases,
|
||
e.name_embedding AS name_embedding,
|
||
// TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||
// COALESCE(e.fact_summary, '') AS fact_summary,
|
||
e.connect_strength AS connect_strength,
|
||
collect(DISTINCT s.id) AS statement_ids,
|
||
collect(DISTINCT c.id) AS chunk_ids,
|
||
COALESCE(e.activation_value, e.importance_score, 0.5) AS activation_value,
|
||
COALESCE(e.importance_score, 0.5) AS importance_score,
|
||
e.last_access_time AS last_access_time,
|
||
COALESCE(e.access_count, 0) AS access_count,
|
||
score
|
||
ORDER BY score DESC
|
||
LIMIT $limit
|
||
"""
|
||
|
||
SEARCH_ENTITIES_BY_NAME_OR_ALIAS = """
|
||
CALL db.index.fulltext.queryNodes("entitiesFulltext", $query) YIELD node AS e, score
|
||
WHERE ($end_user_id IS NULL OR e.end_user_id = $end_user_id)
|
||
WITH e, score
|
||
With collect({entity: e, score: score}) AS fulltextResults
|
||
|
||
OPTIONAL MATCH (ae:ExtractedEntity)
|
||
WHERE ($end_user_id IS NULL OR ae.end_user_id = $end_user_id)
|
||
AND ae.aliases IS NOT NULL
|
||
AND ANY(alias IN ae.aliases WHERE toLower(alias) CONTAINS toLower($query))
|
||
WITH fulltextResults, collect(ae) AS aliasEntities
|
||
|
||
UNWIND (fulltextResults + [x IN aliasEntities | {entity: x, score:
|
||
CASE
|
||
WHEN ANY(alias IN x.aliases WHERE toLower(alias) = toLower($query)) THEN 1.0
|
||
WHEN ANY(alias IN x.aliases WHERE toLower(alias) STARTS WITH toLower($query)) THEN 0.9
|
||
ELSE 0.8
|
||
END
|
||
}]) AS row
|
||
WITH row.entity AS e, row.score AS score
|
||
WITH DISTINCT e, MAX(score) AS score
|
||
OPTIONAL MATCH (s:Statement)-[:REFERENCES_ENTITY]->(e)
|
||
OPTIONAL MATCH (c:Chunk)-[:CONTAINS]->(s)
|
||
RETURN e.id AS id,
|
||
e.name AS name,
|
||
e.end_user_id AS end_user_id,
|
||
e.entity_type AS entity_type,
|
||
e.created_at AS created_at,
|
||
e.entity_idx AS entity_idx,
|
||
e.statement_id AS statement_id,
|
||
e.description AS description,
|
||
e.aliases AS aliases,
|
||
e.name_embedding AS name_embedding,
|
||
e.connect_strength AS connect_strength,
|
||
collect(DISTINCT s.id) AS statement_ids,
|
||
collect(DISTINCT c.id) AS chunk_ids,
|
||
COALESCE(e.activation_value, e.importance_score, 0.5) AS activation_value,
|
||
COALESCE(e.importance_score, 0.5) AS importance_score,
|
||
e.last_access_time AS last_access_time,
|
||
COALESCE(e.access_count, 0) AS access_count,
|
||
score
|
||
ORDER BY score DESC
|
||
LIMIT $limit
|
||
"""
|
||
|
||
|
||
SEARCH_CHUNKS_BY_CONTENT = """
|
||
CALL db.index.fulltext.queryNodes("chunksFulltext", $query) YIELD node AS c, score
|
||
WHERE ($end_user_id IS NULL OR c.end_user_id = $end_user_id)
|
||
OPTIONAL MATCH (c)-[:CONTAINS]->(s:Statement)
|
||
OPTIONAL MATCH (s)-[:REFERENCES_ENTITY]->(e:ExtractedEntity)
|
||
RETURN c.id AS chunk_id,
|
||
c.end_user_id AS end_user_id,
|
||
c.content AS content,
|
||
c.dialog_id AS dialog_id,
|
||
c.sequence_number AS sequence_number,
|
||
collect(DISTINCT s.id) AS statement_ids,
|
||
collect(DISTINCT e.id) AS entity_ids,
|
||
COALESCE(c.activation_value, 0.5) AS activation_value,
|
||
c.last_access_time AS last_access_time,
|
||
COALESCE(c.access_count, 0) AS access_count,
|
||
score
|
||
ORDER BY score DESC
|
||
LIMIT $limit
|
||
"""
|
||
|
||
# 以下是关于第二层去重消歧与数据库进行检索的语句,在最近的规划中不再使用
|
||
|
||
# # 同组group_id下按“精确名字或别名+可选类型一致”来检索
|
||
# SECOND_LAYER_CANDIDATE_MATCH_BATCH = """
|
||
# UNWIND $rows AS row
|
||
# MATCH (e:ExtractedEntity)
|
||
# WHERE e.group_id = row.group_id
|
||
# AND (toLower(e.name) = toLower(row.name) OR any(a IN e.aliases WHERE toLower(a) = toLower(row.name)))
|
||
# AND (row.entity_type IS NULL OR e.entity_type = row.entity_type)
|
||
# RETURN row.id AS incoming_id,
|
||
# e.id AS id,
|
||
# e.name AS name,
|
||
# e.group_id AS group_id,
|
||
# e.entity_idx AS entity_idx,
|
||
# e.entity_type AS entity_type,
|
||
# e.description AS description,
|
||
# e.statement_id AS statement_id,
|
||
# e.aliases AS aliases,
|
||
# e.name_embedding AS name_embedding,
|
||
# e.fact_summary AS fact_summary,
|
||
# e.connect_strength AS connect_strength,
|
||
# e.created_at AS created_at,
|
||
# e.expired_at AS expired_at
|
||
# """
|
||
# # 同组group_id下按name contains召回补充
|
||
# SECOND_LAYER_CANDIDATE_CONTAINS_BATCH = """
|
||
# UNWIND $rows AS row
|
||
# MATCH (e:ExtractedEntity)
|
||
# WHERE e.group_id = row.group_id
|
||
# AND toLower(e.name) CONTAINS toLower(row.name)
|
||
# RETURN row.id AS incoming_id,
|
||
# e.id AS id,
|
||
# e.name AS name,
|
||
# e.group_id AS group_id,
|
||
# e.entity_idx AS entity_idx,
|
||
# e.entity_type AS entity_type,
|
||
# e.description AS description,
|
||
# e.statement_id AS statement_id,
|
||
# e.aliases AS aliases,
|
||
# e.name_embedding AS name_embedding,
|
||
# e.fact_summary AS fact_summary,
|
||
# e.connect_strength AS connect_strength,
|
||
# e.created_at AS created_at,
|
||
# e.expired_at AS expired_at
|
||
# """
|
||
|
||
SEARCH_DIALOGUE_BY_DIALOG_ID = """
|
||
MATCH (d:Dialogue)
|
||
WHERE ($end_user_id IS NULL OR d.end_user_id = $end_user_id)
|
||
AND d.id = $dialog_id
|
||
RETURN d.id AS dialog_id,
|
||
d.end_user_id AS end_user_id,
|
||
d.content AS content,
|
||
d.created_at AS created_at
|
||
ORDER BY d.created_at DESC
|
||
LIMIT $limit
|
||
"""
|
||
|
||
SEARCH_CHUNK_BY_CHUNK_ID = """
|
||
MATCH (c:Chunk)
|
||
WHERE ($end_user_id IS NULL OR c.end_user_id = $end_user_id)
|
||
AND c.id = $chunk_id
|
||
RETURN c.id AS chunk_id,
|
||
c.end_user_id AS end_user_id,
|
||
c.content AS content,
|
||
c.dialog_id AS dialog_id,
|
||
c.created_at AS created_at,
|
||
c.sequence_number AS sequence_number
|
||
ORDER BY c.created_at DESC
|
||
LIMIT $limit
|
||
"""
|
||
|
||
SEARCH_STATEMENTS_BY_TEMPORAL = """
|
||
MATCH (s:Statement)
|
||
WHERE ($end_user_id IS NULL OR s.end_user_id = $end_user_id)
|
||
AND ((($start_date IS NULL OR datetime(s.created_at) >= datetime($start_date))
|
||
AND ($end_date IS NULL OR datetime(s.created_at) <= datetime($end_date)))
|
||
OR (($valid_date IS NULL OR (s.valid_at IS NOT NULL AND datetime(s.valid_at) >= datetime($valid_date)))
|
||
AND ($invalid_date IS NULL OR (s.invalid_at IS NOT NULL AND datetime(s.invalid_at) <= datetime($invalid_date)))))
|
||
RETURN s.id AS id,
|
||
s.statement AS statement,
|
||
s.end_user_id AS end_user_id,
|
||
s.chunk_id AS chunk_id,
|
||
s.created_at AS created_at,
|
||
s.valid_at AS valid_at,
|
||
s.invalid_at AS invalid_at,
|
||
collect(DISTINCT s.id) AS statement_ids,
|
||
COALESCE(s.activation_value, s.importance_score, 0.5) AS activation_value,
|
||
COALESCE(s.importance_score, 0.5) AS importance_score,
|
||
s.last_access_time AS last_access_time,
|
||
COALESCE(s.access_count, 0) AS access_count
|
||
ORDER BY datetime(s.created_at) DESC
|
||
LIMIT $limit
|
||
"""
|
||
|
||
SEARCH_STATEMENTS_BY_KEYWORD_TEMPORAL = """
|
||
CALL db.index.fulltext.queryNodes("statementsFulltext", $query) YIELD node AS s, score
|
||
WHERE ($end_user_id IS NULL OR s.end_user_id = $end_user_id)
|
||
AND ((($start_date IS NULL OR (s.created_at IS NOT NULL AND datetime(s.created_at) >= datetime($start_date)))
|
||
AND ($end_date IS NULL OR (s.created_at IS NOT NULL AND datetime(s.created_at) <= datetime($end_date))))
|
||
OR (($valid_date IS NULL OR (s.valid_at IS NOT NULL AND datetime(s.valid_at) >= datetime($valid_date)))
|
||
AND ($invalid_date IS NULL OR (s.invalid_at IS NOT NULL AND datetime(s.invalid_at) <= datetime($invalid_date)))))
|
||
OPTIONAL MATCH (c:Chunk)-[:CONTAINS]->(s)
|
||
OPTIONAL MATCH (s)-[:REFERENCES_ENTITY]->(e:ExtractedEntity)
|
||
RETURN s.id AS id,
|
||
s.statement AS statement,
|
||
s.end_user_id AS end_user_id,
|
||
s.chunk_id AS chunk_id,
|
||
s.created_at AS created_at,
|
||
s.valid_at AS valid_at,
|
||
s.invalid_at AS invalid_at,
|
||
c.id AS chunk_id_from_rel,
|
||
collect(DISTINCT e.id) AS entity_ids,
|
||
COALESCE(s.activation_value, s.importance_score, 0.5) AS activation_value,
|
||
COALESCE(s.importance_score, 0.5) AS importance_score,
|
||
s.last_access_time AS last_access_time,
|
||
COALESCE(s.access_count, 0) AS access_count,
|
||
score
|
||
ORDER BY s.created_at DESC, score DESC
|
||
LIMIT $limit
|
||
"""
|
||
|
||
SEARCH_STATEMENTS_BY_CREATED_AT = """
|
||
MATCH (n:Statement)
|
||
WHERE ($end_user_id IS NULL OR n.end_user_id = $end_user_id)
|
||
AND ($created_at IS NOT NULL AND date(substring(n.created_at, 0, 10)) = date($created_at))
|
||
RETURN n.id AS id,
|
||
n.statement AS statement,
|
||
n.end_user_id AS end_user_id,
|
||
n.chunk_id AS chunk_id,
|
||
n.created_at AS created_at,
|
||
n.valid_at AS valid_at,
|
||
n.invalid_at AS invalid_at,
|
||
collect(DISTINCT n.id) AS statement_ids
|
||
ORDER BY n.created_at DESC
|
||
LIMIT $limit
|
||
"""
|
||
|
||
SEARCH_STATEMENTS_BY_VALID_AT = """
|
||
MATCH (n:Statement)
|
||
WHERE ($end_user_id IS NULL OR n.end_user_id = $end_user_id)
|
||
AND ($valid_at IS NOT NULL AND date(substring(n.valid_at, 0, 10)) = date($valid_at))
|
||
RETURN n.id AS id,
|
||
n.statement AS statement,
|
||
n.end_user_id AS end_user_id,
|
||
n.chunk_id AS chunk_id,
|
||
n.created_at AS created_at,
|
||
n.valid_at AS valid_at,
|
||
n.invalid_at AS invalid_at,
|
||
collect(DISTINCT n.id) AS statement_ids
|
||
ORDER BY n.valid_at DESC
|
||
LIMIT $limit
|
||
"""
|
||
|
||
SEARCH_STATEMENTS_G_CREATED_AT = """
|
||
MATCH (n:Statement)
|
||
WHERE ($end_user_id IS NULL OR n.end_user_id = $end_user_id)
|
||
AND ($created_at IS NOT NULL AND date(substring(n.created_at, 0, 19)) = date($created_at))
|
||
RETURN n.id AS id,
|
||
n.statement AS statement,
|
||
n.end_user_id AS end_user_id,
|
||
n.chunk_id AS chunk_id,
|
||
n.created_at AS created_at,
|
||
n.valid_at AS valid_at,
|
||
n.invalid_at AS invalid_at,
|
||
collect(DISTINCT n.id) AS statement_ids
|
||
ORDER BY n.created_at DESC
|
||
LIMIT $limit
|
||
"""
|
||
|
||
SEARCH_STATEMENTS_L_CREATED_AT = """
|
||
MATCH (n:Statement)
|
||
WHERE ($end_user_id IS NULL OR n.end_user_id = $end_user_id)
|
||
AND ($created_at IS NOT NULL AND date(substring(n.created_at, 0, 19)) < date($created_at))
|
||
RETURN n.id AS id,
|
||
n.statement AS statement,
|
||
n.end_user_id AS end_user_id,
|
||
n.chunk_id AS chunk_id,
|
||
n.created_at AS created_at,
|
||
n.valid_at AS valid_at,
|
||
n.invalid_at AS invalid_at,
|
||
collect(DISTINCT n.id) AS statement_ids
|
||
ORDER BY n.created_at DESC
|
||
LIMIT $limit
|
||
"""
|
||
|
||
SEARCH_STATEMENTS_G_VALID_AT = """
|
||
MATCH (n:Statement)
|
||
WHERE ($end_user_id IS NULL OR n.end_user_id = $end_user_id)
|
||
AND ($valid_at IS NOT NULL AND date(substring(n.valid_at, 0, 10)) > date($valid_at))
|
||
RETURN n.id AS id,
|
||
n.statement AS statement,
|
||
n.end_user_id AS end_user_id,
|
||
n.chunk_id AS chunk_id,
|
||
n.created_at AS created_at,
|
||
n.valid_at AS valid_at,
|
||
n.invalid_at AS invalid_at,
|
||
collect(DISTINCT n.id) AS statement_ids
|
||
ORDER BY n.valid_at DESC
|
||
LIMIT $limit
|
||
"""
|
||
|
||
SEARCH_STATEMENTS_L_VALID_AT = """
|
||
MATCH (n:Statement)
|
||
WHERE ($end_user_id IS NULL OR n.end_user_id = $end_user_id)
|
||
AND ($valid_at IS NOT NULL AND date(substring(n.valid_at, 0, 10)) < date($valid_at))
|
||
RETURN n.id AS id,
|
||
n.statement AS statement,
|
||
n.end_user_id AS end_user_id,
|
||
n.chunk_id AS chunk_id,
|
||
n.created_at AS created_at,
|
||
n.valid_at AS valid_at,
|
||
n.invalid_at AS invalid_at,
|
||
collect(DISTINCT n.id) AS statement_ids
|
||
ORDER BY n.valid_at DESC
|
||
LIMIT $limit
|
||
"""
|
||
|
||
# 以下是关于第二层去重消歧与数据库进行检索的语句,在最近的规划中不再使用
|
||
|
||
# # 同组group_id下按“精确名字或别名+可选类型一致”来检索
|
||
# SECOND_LAYER_CANDIDATE_MATCH_BATCH = """
|
||
# UNWIND $rows AS row
|
||
# MATCH (e:ExtractedEntity)
|
||
# WHERE e.group_id = row.group_id
|
||
# AND (toLower(e.name) = toLower(row.name) OR any(a IN e.aliases WHERE toLower(a) = toLower(row.name)))
|
||
# AND (row.entity_type IS NULL OR e.entity_type = row.entity_type)
|
||
# RETURN row.id AS incoming_id,
|
||
# e.id AS id,
|
||
# e.name AS name,
|
||
# e.group_id AS group_id,
|
||
# e.entity_idx AS entity_idx,
|
||
# e.entity_type AS entity_type,
|
||
# e.description AS description,
|
||
# e.statement_id AS statement_id,
|
||
# e.aliases AS aliases,
|
||
# e.name_embedding AS name_embedding,
|
||
# e.fact_summary AS fact_summary,
|
||
# e.connect_strength AS connect_strength,
|
||
# e.created_at AS created_at,
|
||
# e.expired_at AS expired_at
|
||
# """
|
||
# # 同组group_id下按name contains召回补充
|
||
# SECOND_LAYER_CANDIDATE_CONTAINS_BATCH = """
|
||
# UNWIND $rows AS row
|
||
# MATCH (e:ExtractedEntity)
|
||
# WHERE e.group_id = row.group_id
|
||
# AND toLower(e.name) CONTAINS toLower(row.name)
|
||
# RETURN row.id AS incoming_id,
|
||
# e.id AS id,
|
||
# e.name AS name,
|
||
# e.group_id AS group_id,
|
||
# e.entity_idx AS entity_idx,
|
||
# e.entity_type AS entity_type,
|
||
# e.description AS description,
|
||
# e.statement_id AS statement_id,
|
||
# e.aliases AS aliases,
|
||
# e.name_embedding AS name_embedding,
|
||
# e.fact_summary AS fact_summary,
|
||
# e.connect_strength AS connect_strength,
|
||
# e.created_at AS created_at,
|
||
# e.expired_at AS expired_at
|
||
# """
|
||
|
||
# 根据id修改句子的invalid_at的值
|
||
UPDATE_STATEMENT_INVALID_AT = """
|
||
MATCH (n:Statement {end_user_id: $end_user_id, id: $id})
|
||
SET n.invalid_at = $new_invalid_at
|
||
"""
|
||
|
||
MEMORY_SUMMARY_NODE_SAVE = """
|
||
UNWIND $summaries AS summary
|
||
MERGE (m:MemorySummary {id: summary.id})
|
||
SET m += {
|
||
id: summary.id,
|
||
name: summary.name,
|
||
end_user_id: summary.end_user_id,
|
||
run_id: summary.run_id,
|
||
created_at: summary.created_at,
|
||
dialog_id: summary.dialog_id,
|
||
chunk_ids: summary.chunk_ids,
|
||
content: summary.content,
|
||
memory_type: summary.memory_type,
|
||
summary_embedding: summary.summary_embedding,
|
||
config_id: summary.config_id,
|
||
importance_score: CASE WHEN summary.importance_score IS NOT NULL THEN summary.importance_score ELSE coalesce(m.importance_score, 0.5) END,
|
||
activation_value: CASE WHEN summary.activation_value IS NOT NULL THEN summary.activation_value ELSE m.activation_value END,
|
||
access_history: CASE WHEN summary.access_history IS NOT NULL THEN summary.access_history ELSE coalesce(m.access_history, []) END,
|
||
last_access_time: CASE WHEN summary.last_access_time IS NOT NULL THEN summary.last_access_time ELSE m.last_access_time END,
|
||
access_count: CASE WHEN summary.access_count IS NOT NULL THEN summary.access_count ELSE coalesce(m.access_count, 0) END
|
||
}
|
||
RETURN m.id AS uuid
|
||
"""
|
||
|
||
MEMORY_SUMMARY_STATEMENT_EDGE_SAVE = """
|
||
UNWIND $edges AS e
|
||
MATCH (ms:MemorySummary {id: e.summary_id, run_id: e.run_id})
|
||
MATCH (c:Chunk {id: e.chunk_id, run_id: e.run_id})
|
||
MATCH (c)-[:CONTAINS]->(s:Statement {run_id: e.run_id})
|
||
MERGE (ms)-[r:DERIVED_FROM_STATEMENT]->(s)
|
||
SET r.end_user_id = e.end_user_id,
|
||
r.run_id = e.run_id,
|
||
r.created_at = e.created_at
|
||
RETURN elementId(r) AS uuid
|
||
"""
|
||
|
||
# Entity Merge Query
|
||
MERGE_ENTITIES = """
|
||
MATCH (canonical:ExtractedEntity {id: $canonical_id})
|
||
MATCH (losing:ExtractedEntity {id: $losing_id})
|
||
|
||
// 更新canonical实体的aliases
|
||
SET canonical.aliases = $merged_aliases
|
||
|
||
// 转移所有从losing出发的关系到canonical
|
||
WITH canonical, losing
|
||
OPTIONAL MATCH (losing)-[r]->(target)
|
||
WHERE NOT (canonical)-[:RELATES_TO]->(target)
|
||
FOREACH (rel IN CASE WHEN r IS NOT NULL THEN [r] ELSE [] END |
|
||
CREATE (canonical)-[:RELATES_TO {
|
||
id: rel.id,
|
||
relation_type: rel.relation_type,
|
||
relation_value: rel.relation_value,
|
||
statement: rel.statement,
|
||
source_statement_id: rel.source_statement_id,
|
||
valid_at: rel.valid_at,
|
||
invalid_at: rel.invalid_at,
|
||
end_user_id: rel.end_user_id,
|
||
user_id: rel.user_id,
|
||
apply_id: rel.apply_id,
|
||
run_id: rel.run_id,
|
||
created_at: rel.created_at
|
||
}]->(target)
|
||
)
|
||
|
||
// 转移所有指向losing的关系到canonical
|
||
WITH canonical, losing
|
||
OPTIONAL MATCH (source)-[r]->(losing)
|
||
WHERE NOT (source)-[:RELATES_TO]->(canonical)
|
||
FOREACH (rel IN CASE WHEN r IS NOT NULL THEN [r] ELSE [] END |
|
||
CREATE (source)-[:RELATES_TO {
|
||
id: rel.id,
|
||
relation_type: rel.relation_type,
|
||
relation_value: rel.relation_value,
|
||
statement: rel.statement,
|
||
source_statement_id: rel.source_statement_id,
|
||
valid_at: rel.valid_at,
|
||
invalid_at: rel.invalid_at,
|
||
end_user_id: rel.end_user_id,
|
||
user_id: rel.user_id,
|
||
apply_id: rel.apply_id,
|
||
run_id: rel.run_id,
|
||
created_at: rel.created_at
|
||
}]->(canonical)
|
||
)
|
||
|
||
// 删除losing实体及其所有关系
|
||
WITH losing
|
||
DETACH DELETE losing
|
||
|
||
RETURN count(losing) as deleted
|
||
"""
|
||
|
||
neo4j_statement_part = '''
|
||
MATCH (n:Statement)
|
||
WHERE n.end_user_id = "{}"
|
||
AND datetime(n.created_at) >= datetime() - duration('P3D')
|
||
RETURN
|
||
n.statement as statement_name,
|
||
n.id as statement_id,
|
||
n.created_at as statement_created_at
|
||
|
||
'''
|
||
neo4j_statement_all = '''
|
||
MATCH (n:Statement)
|
||
WHERE n.end_user_id = "{}"
|
||
RETURN
|
||
n.statement as statement_name,
|
||
n.id as statement_id
|
||
|
||
'''
|
||
neo4j_query_part = """
|
||
MATCH (n)-[r]-(m:ExtractedEntity)
|
||
WHERE n.end_user_id = "{}"
|
||
AND datetime(n.created_at) >= datetime() - duration('P3D')
|
||
WITH DISTINCT m
|
||
OPTIONAL MATCH (m)-[rel]-(other:ExtractedEntity)
|
||
RETURN
|
||
elementId(m) as id,
|
||
m.name as entity1_name,
|
||
m.description as description,
|
||
m.statement_id as statement_id,
|
||
m.created_at as created_at,
|
||
CASE WHEN rel IS NULL THEN "NO_RELATIONSHIP" ELSE type(rel) END as relationship_type,
|
||
elementId(rel) as rel_id,
|
||
rel.predicate as predicate,
|
||
rel.statement as relationship,
|
||
rel.statement_id as relationship_statement_id,
|
||
CASE WHEN other IS NULL THEN "ISOLATED_NODE" ELSE other.name END as entity2_name,
|
||
other as entity2
|
||
"""
|
||
neo4j_query_all = """
|
||
MATCH (n)-[r]-(m:ExtractedEntity)
|
||
WHERE n.end_user_id = "{}"
|
||
WITH DISTINCT m
|
||
OPTIONAL MATCH (m)-[rel]-(other:ExtractedEntity)
|
||
RETURN
|
||
elementId(m) as id,
|
||
m.name as entity1_name,
|
||
m.description as description,
|
||
m.statement_id as statement_id,
|
||
m.created_at as created_at,
|
||
CASE WHEN rel IS NULL THEN "NO_RELATIONSHIP" ELSE type(rel) END as relationship_type,
|
||
elementId(rel) as rel_id,
|
||
rel.predicate as predicate,
|
||
rel.statement as relationship,
|
||
rel.statement_id as relationship_statement_id,
|
||
CASE WHEN other IS NULL THEN "ISOLATED_NODE" ELSE other.name END as entity2_name,
|
||
other as entity2
|
||
"""
|
||
|
||
'''针对当前节点下扩长的句子,实体和总结'''
|
||
Memory_Timeline_ExtractedEntity = """
|
||
MATCH (n)-[r1]-(e)-[r2]-(ms)
|
||
WHERE elementId(n) = $id
|
||
AND (ms:ExtractedEntity OR ms:MemorySummary)
|
||
|
||
RETURN
|
||
collect(
|
||
DISTINCT
|
||
CASE
|
||
WHEN ms:ExtractedEntity THEN {
|
||
text: ms.name,
|
||
created_at: ms.created_at,
|
||
type: "情景记忆"
|
||
}
|
||
END
|
||
) AS ExtractedEntity,
|
||
|
||
collect(
|
||
DISTINCT
|
||
CASE
|
||
WHEN ms:MemorySummary THEN {
|
||
text: ms.content,
|
||
created_at: ms.created_at,
|
||
type: "长期沉淀"
|
||
}
|
||
END
|
||
) AS MemorySummary,
|
||
|
||
collect(
|
||
DISTINCT {
|
||
text: e.statement,
|
||
created_at: e.created_at,
|
||
type: "情绪记忆"
|
||
}
|
||
) AS statement;
|
||
|
||
|
||
"""
|
||
Memory_Timeline_MemorySummary = """
|
||
MATCH (n)-[r1]-(e)-[r2]-(ms)
|
||
WHERE elementId(n) =$id
|
||
AND (ms:MemorySummary OR ms:ExtractedEntity)
|
||
RETURN
|
||
collect(
|
||
DISTINCT
|
||
CASE
|
||
WHEN ms:ExtractedEntity THEN {
|
||
text: ms.name,
|
||
created_at: ms.created_at,
|
||
type: "情景记忆"
|
||
}
|
||
END
|
||
) AS ExtractedEntity,
|
||
|
||
collect(
|
||
DISTINCT
|
||
CASE
|
||
WHEN n:MemorySummary THEN {
|
||
text: n.content,
|
||
created_at: n.created_at,
|
||
type: "长期沉淀"
|
||
}
|
||
END
|
||
) AS MemorySummary,
|
||
|
||
collect(
|
||
DISTINCT {
|
||
text: e.statement,
|
||
created_at: e.created_at,
|
||
type: "情绪记忆"
|
||
}
|
||
) AS statement;
|
||
"""
|
||
Memory_Timeline_Statement = """
|
||
MATCH (n)
|
||
WHERE elementId(n) = $id
|
||
|
||
CALL {
|
||
WITH n
|
||
MATCH (n)-[]-(m:ExtractedEntity)
|
||
WHERE NOT m:MemorySummary AND NOT m:Chunk
|
||
RETURN collect(
|
||
DISTINCT {
|
||
text: m.name,
|
||
created_at: m.created_at,
|
||
type: "情景记忆"
|
||
}
|
||
) AS ExtractedEntity
|
||
}
|
||
|
||
CALL {
|
||
WITH n
|
||
MATCH (n)-[]-(m:MemorySummary)
|
||
WHERE NOT m:Chunk
|
||
RETURN collect(
|
||
DISTINCT {
|
||
text: m.content,
|
||
created_at: m.created_at,
|
||
type: "长期沉淀"
|
||
}
|
||
) AS MemorySummary
|
||
}
|
||
|
||
RETURN
|
||
ExtractedEntity,
|
||
MemorySummary,
|
||
{
|
||
text: n.statement,
|
||
created_at: n.created_at,
|
||
type: "情绪记忆"
|
||
} AS statement;
|
||
|
||
|
||
"""
|
||
|
||
'''针对当前节点,主要获取更加完整的句子节点'''
|
||
Memory_Space_Emotion_Statement = """
|
||
MATCH (n)
|
||
WHERE elementId(n) = $id
|
||
RETURN
|
||
n.emotion_intensity AS emotion_intensity,
|
||
n.created_at AS created_at,
|
||
n.emotion_type AS emotion_type,
|
||
n.statement AS statement;
|
||
|
||
"""
|
||
Memory_Space_Emotion_MemorySummary = """
|
||
MATCH (n)-[]-(e)
|
||
WHERE elementId(n) = $id
|
||
AND EXISTS {
|
||
MATCH (e)-[]-(ms)
|
||
WHERE ms:MemorySummary OR ms:ExtractedEntity
|
||
}
|
||
RETURN DISTINCT
|
||
e.emotion_intensity AS emotion_intensity,
|
||
e.created_at AS created_at,
|
||
e.emotion_type AS emotion_type,
|
||
e.statement AS statement;
|
||
"""
|
||
Memory_Space_Emotion_ExtractedEntity = """
|
||
MATCH (n)-[]-(e)
|
||
WHERE elementId(n) = $id
|
||
AND EXISTS {
|
||
MATCH (e)-[]-(ms:ExtractedEntity)
|
||
}
|
||
RETURN DISTINCT
|
||
e.emotion_intensity AS emotion_intensity,
|
||
e.created_at AS created_at,
|
||
e.emotion_type AS emotion_type,
|
||
e.statement AS statement;
|
||
"""
|
||
|
||
Memory_Space_User = """
|
||
MATCH (n)-[r]->(m)
|
||
WHERE n.end_user_id = $end_user_id AND m.name="用户"
|
||
return DISTINCT elementId(m) as id
|
||
"""
|
||
Memory_Space_Entity = """
|
||
MATCH (n)-[]-(m)
|
||
WHERE elementId(m) = $id AND m.entity_type = "Person"
|
||
RETURN
|
||
DISTINCT m.name as name,m.end_user_id as end_user_id
|
||
"""
|
||
Memory_Space_Associative = """
|
||
MATCH (u)-[]-(x)-[]-(h)
|
||
WHERE elementId(u) = $user_id
|
||
AND elementId(h) = $id
|
||
RETURN DISTINCT
|
||
x.statement as statement,x.created_at as created_at
|
||
"""
|
||
|
||
Graph_Node_query = """
|
||
MATCH (n:MemorySummary)
|
||
WHERE n.end_user_id = $end_user_id
|
||
RETURN
|
||
elementId(n) AS id,
|
||
labels(n) AS labels,
|
||
properties(n) AS properties,
|
||
0 AS priority
|
||
LIMIT $limit
|
||
|
||
UNION ALL
|
||
|
||
MATCH (n:Dialogue)
|
||
WHERE n.end_user_id = $end_user_id
|
||
RETURN
|
||
elementId(n) AS id,
|
||
labels(n) AS labels,
|
||
properties(n) AS properties,
|
||
1 AS priority
|
||
LIMIT 1
|
||
|
||
UNION ALL
|
||
|
||
MATCH (n:Statement)
|
||
WHERE n.end_user_id = $end_user_id
|
||
RETURN
|
||
elementId(n) AS id,
|
||
labels(n) AS labels,
|
||
properties(n) AS properties,
|
||
1 AS priority
|
||
LIMIT $limit
|
||
|
||
UNION ALL
|
||
|
||
MATCH (n:ExtractedEntity)
|
||
WHERE n.end_user_id = $end_user_id
|
||
RETURN
|
||
elementId(n) AS id,
|
||
labels(n) AS labels,
|
||
properties(n) AS properties,
|
||
2 AS priority
|
||
LIMIT $limit
|
||
|
||
UNION ALL
|
||
|
||
MATCH (n:Chunk)
|
||
WHERE n.end_user_id = $end_user_id
|
||
RETURN
|
||
elementId(n) AS id,
|
||
labels(n) AS labels,
|
||
properties(n) AS properties,
|
||
3 AS priority
|
||
LIMIT $limit
|
||
|
||
UNION ALL
|
||
MATCH (n:Perceptual)
|
||
WHERE n.end_user_id = $end_user_id
|
||
RETURN
|
||
elementId(n) AS id,
|
||
labels(n) AS labels,
|
||
properties(n) AS properties,
|
||
4 AS priority
|
||
|
||
"""
|
||
|
||
# ============================================================
|
||
# Community 节点 & BELONGS_TO_COMMUNITY 边
|
||
# ============================================================
|
||
|
||
# ─── Community 聚类相关 Cypher 模板 ───────────────────────────────────────────
|
||
|
||
COMMUNITY_NODE_UPSERT = """
|
||
MERGE (c:Community {community_id: $community_id})
|
||
ON CREATE SET c.id = $community_id
|
||
SET c.end_user_id = $end_user_id,
|
||
c.member_count = $member_count,
|
||
c.updated_at = datetime()
|
||
RETURN c.community_id AS community_id
|
||
"""
|
||
|
||
ENTITY_JOIN_COMMUNITY = """
|
||
MATCH (e:ExtractedEntity {id: $entity_id, end_user_id: $end_user_id})
|
||
MATCH (c:Community {community_id: $community_id, end_user_id: $end_user_id})
|
||
MERGE (e)-[:BELONGS_TO_COMMUNITY]->(c)
|
||
SET c.updated_at = datetime()
|
||
RETURN e.id AS entity_id, c.community_id AS community_id
|
||
"""
|
||
|
||
ENTITY_LEAVE_ALL_COMMUNITIES = """
|
||
MATCH (e:ExtractedEntity {id: $entity_id, end_user_id: $end_user_id})
|
||
MATCH (e)-[r:BELONGS_TO_COMMUNITY]->(:Community)
|
||
DELETE r
|
||
"""
|
||
|
||
GET_ENTITY_NEIGHBORS = """
|
||
MATCH (e:ExtractedEntity {id: $entity_id, end_user_id: $end_user_id})
|
||
|
||
// 来源一:直接关系邻居(EXTRACTED_RELATIONSHIP 边)
|
||
OPTIONAL MATCH (e)-[:EXTRACTED_RELATIONSHIP]-(nb1:ExtractedEntity {end_user_id: $end_user_id})
|
||
|
||
// 来源二:同 Statement 共现邻居(REFERENCES_ENTITY 边)
|
||
OPTIONAL MATCH (s:Statement)-[:REFERENCES_ENTITY]->(e)
|
||
OPTIONAL MATCH (s)-[:REFERENCES_ENTITY]->(nb2:ExtractedEntity {end_user_id: $end_user_id})
|
||
WHERE nb2.id <> e.id
|
||
|
||
WITH collect(DISTINCT nb1) + collect(DISTINCT nb2) AS all_neighbors
|
||
UNWIND all_neighbors AS nb
|
||
WITH nb WHERE nb IS NOT NULL
|
||
OPTIONAL MATCH (nb)-[:BELONGS_TO_COMMUNITY]->(c:Community)
|
||
RETURN DISTINCT
|
||
nb.id AS id,
|
||
nb.name AS name,
|
||
nb.name_embedding AS name_embedding,
|
||
nb.activation_value AS activation_value,
|
||
CASE WHEN c IS NOT NULL THEN c.community_id ELSE null END AS community_id
|
||
"""
|
||
|
||
GET_ALL_ENTITIES_FOR_USER = """
|
||
MATCH (e:ExtractedEntity {end_user_id: $end_user_id})
|
||
OPTIONAL MATCH (e)-[:BELONGS_TO_COMMUNITY]->(c:Community)
|
||
RETURN e.id AS id,
|
||
e.name AS name,
|
||
e.name_embedding AS name_embedding,
|
||
e.activation_value AS activation_value,
|
||
CASE WHEN c IS NOT NULL THEN c.community_id ELSE null END AS community_id
|
||
"""
|
||
|
||
GET_ENTITY_COUNT_FOR_USER = """
|
||
MATCH (e:ExtractedEntity {end_user_id: $end_user_id})
|
||
RETURN count(e) AS entity_count
|
||
"""
|
||
|
||
GET_ALL_ENTITY_IDS_FOR_USER = """
|
||
MATCH (e:ExtractedEntity {end_user_id: $end_user_id})
|
||
RETURN e.id AS id
|
||
"""
|
||
|
||
GET_COMMUNITY_MEMBERS = """
|
||
MATCH (e:ExtractedEntity {end_user_id: $end_user_id})-[:BELONGS_TO_COMMUNITY]->(c:Community {community_id: $community_id})
|
||
RETURN e.id AS id, e.name AS name, e.entity_type AS entity_type,
|
||
e.importance_score AS importance_score, e.activation_value AS activation_value,
|
||
e.name_embedding AS name_embedding,
|
||
e.aliases AS aliases, e.description AS description,
|
||
e.example AS example
|
||
ORDER BY coalesce(e.activation_value, 0) DESC
|
||
"""
|
||
|
||
GET_COMMUNITY_RELATIONSHIPS = """
|
||
MATCH (e1:ExtractedEntity {end_user_id: $end_user_id})-[:BELONGS_TO_COMMUNITY]->(c:Community {community_id: $community_id})
|
||
MATCH (e2:ExtractedEntity {end_user_id: $end_user_id})-[:BELONGS_TO_COMMUNITY]->(c)
|
||
MATCH (e1)-[r:EXTRACTED_RELATIONSHIP]->(e2)
|
||
RETURN e1.name AS subject, r.predicate AS predicate, e2.name AS object
|
||
ORDER BY e1.name, r.predicate, e2.name
|
||
LIMIT 20
|
||
"""
|
||
|
||
GET_ALL_COMMUNITY_MEMBERS_BATCH = """
|
||
MATCH (e:ExtractedEntity {end_user_id: $end_user_id})-[:BELONGS_TO_COMMUNITY]->(c:Community)
|
||
RETURN c.community_id AS community_id,
|
||
e.id AS id, e.name AS name, e.entity_type AS entity_type,
|
||
e.importance_score AS importance_score, e.activation_value AS activation_value,
|
||
e.name_embedding AS name_embedding,
|
||
e.aliases AS aliases, e.description AS description
|
||
ORDER BY c.community_id, coalesce(e.activation_value, 0) DESC
|
||
"""
|
||
|
||
CHECK_USER_HAS_COMMUNITIES = """
|
||
MATCH (c:Community {end_user_id: $end_user_id})
|
||
RETURN count(c) AS community_count
|
||
"""
|
||
|
||
UPDATE_COMMUNITY_MEMBER_COUNT = """
|
||
MATCH (e:ExtractedEntity {end_user_id: $end_user_id})-[:BELONGS_TO_COMMUNITY]->(c:Community {community_id: $community_id})
|
||
WITH c, count(e) AS cnt
|
||
SET c.member_count = cnt
|
||
RETURN c.community_id AS community_id, cnt AS member_count
|
||
"""
|
||
|
||
UPDATE_COMMUNITY_METADATA = """
|
||
MATCH (c:Community {community_id: $community_id, end_user_id: $end_user_id})
|
||
SET c.id = coalesce(c.id, $community_id),
|
||
c.name = $name,
|
||
c.summary = $summary,
|
||
c.core_entities = $core_entities,
|
||
c.summary_embedding = $summary_embedding,
|
||
c.updated_at = datetime()
|
||
RETURN c.community_id AS community_id
|
||
"""
|
||
|
||
BATCH_UPDATE_COMMUNITY_METADATA = """
|
||
UNWIND $communities AS row
|
||
MATCH (c:Community {community_id: row.community_id, end_user_id: row.end_user_id})
|
||
SET c.id = coalesce(c.id, row.community_id),
|
||
c.name = row.name,
|
||
c.summary = row.summary,
|
||
c.core_entities = row.core_entities,
|
||
c.summary_embedding = row.summary_embedding,
|
||
c.updated_at = datetime()
|
||
RETURN c.community_id AS community_id
|
||
"""
|
||
|
||
GET_ENTITIES_PAGE = """
|
||
MATCH (e:ExtractedEntity {end_user_id: $end_user_id})
|
||
OPTIONAL MATCH (e)-[:BELONGS_TO_COMMUNITY]->(c:Community)
|
||
RETURN e.id AS id,
|
||
e.name AS name,
|
||
e.name_embedding AS name_embedding,
|
||
e.activation_value AS activation_value,
|
||
CASE WHEN c IS NOT NULL THEN c.community_id ELSE null END AS community_id
|
||
ORDER BY e.id
|
||
SKIP $skip LIMIT $limit
|
||
"""
|
||
|
||
GET_ENTITY_NEIGHBORS_BATCH_FOR_IDS = """
|
||
// 批量拉取指定实体列表的邻居(用于分批全量聚类)
|
||
MATCH (e:ExtractedEntity {end_user_id: $end_user_id})
|
||
WHERE e.id IN $entity_ids
|
||
OPTIONAL MATCH (e)-[:EXTRACTED_RELATIONSHIP]-(nb1:ExtractedEntity {end_user_id: $end_user_id})
|
||
OPTIONAL MATCH (s:Statement)-[:REFERENCES_ENTITY]->(e)
|
||
OPTIONAL MATCH (s)-[:REFERENCES_ENTITY]->(nb2:ExtractedEntity {end_user_id: $end_user_id})
|
||
WHERE nb2.id <> e.id
|
||
WITH e, collect(DISTINCT nb1) + collect(DISTINCT nb2) AS all_neighbors
|
||
UNWIND all_neighbors AS nb
|
||
WITH e, nb WHERE nb IS NOT NULL
|
||
OPTIONAL MATCH (nb)-[:BELONGS_TO_COMMUNITY]->(c:Community)
|
||
RETURN DISTINCT
|
||
e.id AS entity_id,
|
||
nb.id AS id,
|
||
nb.name AS name,
|
||
nb.name_embedding AS name_embedding,
|
||
nb.activation_value AS activation_value,
|
||
CASE WHEN c IS NOT NULL THEN c.community_id ELSE null END AS community_id
|
||
"""
|
||
|
||
GET_ALL_ENTITY_NEIGHBORS_BATCH = """
|
||
// 批量拉取某用户下所有实体的邻居(用于全量聚类预加载)
|
||
MATCH (e:ExtractedEntity {end_user_id: $end_user_id})
|
||
|
||
// 来源一:直接关系邻居
|
||
OPTIONAL MATCH (e)-[:EXTRACTED_RELATIONSHIP]-(nb1:ExtractedEntity {end_user_id: $end_user_id})
|
||
|
||
// 来源二:同 Statement 共现邻居
|
||
OPTIONAL MATCH (s:Statement)-[:REFERENCES_ENTITY]->(e)
|
||
OPTIONAL MATCH (s)-[:REFERENCES_ENTITY]->(nb2:ExtractedEntity {end_user_id: $end_user_id})
|
||
WHERE nb2.id <> e.id
|
||
|
||
WITH e, collect(DISTINCT nb1) + collect(DISTINCT nb2) AS all_neighbors
|
||
UNWIND all_neighbors AS nb
|
||
WITH e, nb WHERE nb IS NOT NULL
|
||
OPTIONAL MATCH (nb)-[:BELONGS_TO_COMMUNITY]->(c:Community)
|
||
RETURN DISTINCT
|
||
e.id AS entity_id,
|
||
nb.id AS id,
|
||
nb.name AS name,
|
||
nb.name_embedding AS name_embedding,
|
||
nb.activation_value AS activation_value,
|
||
CASE WHEN c IS NOT NULL THEN c.community_id ELSE null END AS community_id
|
||
"""
|
||
|
||
GET_COMMUNITY_GRAPH_DATA = """
|
||
MATCH (c:Community {end_user_id: $end_user_id})
|
||
MATCH (e:ExtractedEntity {end_user_id: $end_user_id})-[b:BELONGS_TO_COMMUNITY]->(c)
|
||
OPTIONAL MATCH (e)-[r:EXTRACTED_RELATIONSHIP]-(e2:ExtractedEntity {end_user_id: $end_user_id})
|
||
RETURN
|
||
elementId(c) AS c_id,
|
||
properties(c) AS c_props,
|
||
elementId(e) AS e_id,
|
||
properties(e) AS e_props,
|
||
elementId(b) AS b_id,
|
||
elementId(e2) AS e2_id,
|
||
properties(e2) AS e2_props,
|
||
elementId(r) AS r_id,
|
||
type(r) AS r_type,
|
||
properties(r) AS r_props,
|
||
startNode(r) = e AS r_from_e
|
||
"""
|
||
|
||
CHECK_COMMUNITY_IS_COMPLETE = """
|
||
MATCH (c:Community {community_id: $community_id, end_user_id: $end_user_id})
|
||
RETURN (
|
||
c.name IS NOT NULL AND c.name <> '' AND
|
||
c.summary IS NOT NULL AND c.summary <> '' AND
|
||
c.core_entities IS NOT NULL
|
||
) AS is_complete
|
||
"""
|
||
|
||
# 别名归并:将 predicate="别名属于" 的 EXTRACTED_RELATIONSHIP 边的 source.name
|
||
# 合并进 target.aliases(去重),并将 source.description 追加到 target.description(分号分隔)
|
||
MERGE_ALIAS_BELONGS_TO = """
|
||
MATCH (source:ExtractedEntity {end_user_id: $end_user_id})-[r:EXTRACTED_RELATIONSHIP]->(target:ExtractedEntity {end_user_id: $end_user_id})
|
||
WHERE r.predicate = '别名属于'
|
||
WITH source, target,
|
||
coalesce(target.aliases, []) AS existing_aliases,
|
||
source.name AS source_name,
|
||
coalesce(source.description, '') AS src_desc,
|
||
coalesce(target.description, '') AS tgt_desc
|
||
|
||
// 1. 合并 aliases:将 source.name 追加到 target.aliases(去重)
|
||
WITH source, target, src_desc, tgt_desc,
|
||
CASE
|
||
WHEN source_name IS NOT NULL AND source_name <> '' AND NOT source_name IN existing_aliases
|
||
THEN existing_aliases + source_name
|
||
ELSE existing_aliases
|
||
END AS new_aliases
|
||
|
||
SET target.aliases = new_aliases,
|
||
target.description = CASE
|
||
WHEN src_desc <> '' AND NOT src_desc IN tgt_desc
|
||
THEN CASE WHEN tgt_desc = '' THEN src_desc ELSE tgt_desc + ';' + src_desc END
|
||
ELSE tgt_desc
|
||
END
|
||
|
||
RETURN source.name AS merged_alias, target.name AS target_name, new_aliases AS updated_aliases
|
||
"""
|
||
|
||
# 边重定向:将指向别名节点("别名属于"关系的 source)的所有其他边,重定向到用户节点(target)。
|
||
# 处理两类边:
|
||
# 1. EXTRACTED_RELATIONSHIP:其他实体 → 别名节点 或 别名节点 → 其他实体
|
||
# 2. STATEMENT_ENTITY:陈述句 → 别名节点
|
||
# 对于每条需要重定向的边,创建一条指向用户节点的新边(复制所有属性),然后删除旧边。
|
||
REDIRECT_ALIAS_EDGES = """
|
||
// 找到所有 别名→用户 的映射
|
||
MATCH (alias:ExtractedEntity {end_user_id: $end_user_id})-[ar:EXTRACTED_RELATIONSHIP]->(user:ExtractedEntity {end_user_id: $end_user_id})
|
||
WHERE ar.predicate = '别名属于'
|
||
WITH collect({alias_id: elementId(alias), user_id: elementId(user), alias_eid: alias.id, user_eid: user.id}) AS mappings
|
||
|
||
// 1. 重定向 EXTRACTED_RELATIONSHIP 边:别名节点作为 target 的情况
|
||
UNWIND mappings AS m
|
||
MATCH (other)-[r:EXTRACTED_RELATIONSHIP]->(alias:ExtractedEntity {end_user_id: $end_user_id})
|
||
WHERE alias.id = m.alias_eid
|
||
AND r.predicate <> '别名属于'
|
||
AND other.id <> m.user_eid
|
||
WITH m, other, r, alias
|
||
MATCH (user:ExtractedEntity {id: m.user_eid, end_user_id: $end_user_id})
|
||
CREATE (other)-[nr:EXTRACTED_RELATIONSHIP]->(user)
|
||
SET nr = properties(r)
|
||
DELETE r
|
||
WITH count(*) AS redirected_incoming
|
||
|
||
// 2. 重定向 EXTRACTED_RELATIONSHIP 边:别名节点作为 source 的情况
|
||
MATCH (alias:ExtractedEntity {end_user_id: $end_user_id})-[ar2:EXTRACTED_RELATIONSHIP]->(user2:ExtractedEntity {end_user_id: $end_user_id})
|
||
WHERE ar2.predicate = '别名属于'
|
||
WITH alias, user2, redirected_incoming
|
||
MATCH (alias)-[r:EXTRACTED_RELATIONSHIP]->(other)
|
||
WHERE r.predicate <> '别名属于'
|
||
AND other.id <> user2.id
|
||
WITH user2, other, r, redirected_incoming
|
||
CREATE (user2)-[nr:EXTRACTED_RELATIONSHIP]->(other)
|
||
SET nr = properties(r)
|
||
DELETE r
|
||
WITH redirected_incoming, count(*) AS redirected_outgoing
|
||
|
||
// 3. 重定向 STATEMENT_ENTITY 边:陈述句 → 别名节点
|
||
MATCH (alias:ExtractedEntity {end_user_id: $end_user_id})-[ar3:EXTRACTED_RELATIONSHIP]->(user3:ExtractedEntity {end_user_id: $end_user_id})
|
||
WHERE ar3.predicate = '别名属于'
|
||
WITH alias, user3, redirected_incoming, redirected_outgoing
|
||
MATCH (stmt)-[r:STATEMENT_ENTITY]->(alias)
|
||
WITH user3, stmt, r, redirected_incoming, redirected_outgoing
|
||
CREATE (stmt)-[nr:STATEMENT_ENTITY]->(user3)
|
||
SET nr = properties(r)
|
||
DELETE r
|
||
|
||
RETURN redirected_incoming, redirected_outgoing, count(*) AS redirected_stmt
|
||
"""
|
||
|
||
CHECK_COMMUNITY_IS_COMPLETE_WITH_EMBEDDING = """
|
||
MATCH (c:Community {community_id: $community_id, end_user_id: $end_user_id})
|
||
RETURN (
|
||
c.name IS NOT NULL AND c.name <> '' AND
|
||
c.summary IS NOT NULL AND c.summary <> '' AND
|
||
c.core_entities IS NOT NULL AND
|
||
c.summary_embedding IS NOT NULL
|
||
) AS is_complete
|
||
"""
|
||
|
||
GET_INCOMPLETE_COMMUNITIES = """
|
||
MATCH (c:Community {end_user_id: $end_user_id})
|
||
WHERE c.name IS NULL OR c.summary IS NULL OR c.core_entities IS NULL
|
||
OR c.name = '' OR c.summary = ''
|
||
RETURN c.community_id AS community_id
|
||
"""
|
||
|
||
GET_INCOMPLETE_COMMUNITIES_WITH_EMBEDDING = """
|
||
MATCH (c:Community {end_user_id: $end_user_id})
|
||
WHERE c.name IS NULL OR c.name = ''
|
||
OR c.summary IS NULL OR c.summary = ''
|
||
OR c.core_entities IS NULL
|
||
OR (c.summary_embedding IS NULL AND c.summary IS NOT NULL AND c.summary <> '(empty)')
|
||
RETURN c.community_id AS community_id
|
||
"""
|
||
|
||
# Community 向量检索 ──────────────────────────────────────────────────
|
||
# Community embedding-based search: cosine similarity on Community.summary_embedding
|
||
COMMUNITY_EMBEDDING_SEARCH = """
|
||
CALL db.index.vector.queryNodes('community_summary_embedding_index', $limit * 100, $embedding)
|
||
YIELD node AS c, score
|
||
WHERE c.summary_embedding IS NOT NULL
|
||
AND ($end_user_id IS NULL OR c.end_user_id = $end_user_id)
|
||
RETURN c.community_id AS id,
|
||
c.name AS name,
|
||
c.summary AS content,
|
||
c.core_entities AS core_entities,
|
||
c.member_count AS member_count,
|
||
c.end_user_id AS end_user_id,
|
||
c.updated_at AS updated_at,
|
||
score
|
||
ORDER BY score DESC
|
||
LIMIT $limit
|
||
"""
|
||
|
||
# Community 展开检索 ──────────────────────────────────────────────────
|
||
# 命中社区后,拉取该社区所有成员实体关联的 Statement 节点(主题→细节两级检索)
|
||
EXPAND_COMMUNITY_STATEMENTS = """
|
||
MATCH (c:Community {community_id: $community_id})
|
||
MATCH (e:ExtractedEntity)-[:BELONGS_TO_COMMUNITY]->(c)
|
||
MATCH (s:Statement)-[:REFERENCES_ENTITY]->(e)
|
||
WHERE s.end_user_id = $end_user_id
|
||
RETURN s.statement AS statement,
|
||
s.id AS id,
|
||
s.end_user_id AS end_user_id,
|
||
s.created_at AS created_at,
|
||
s.valid_at AS valid_at,
|
||
s.invalid_at AS invalid_at,
|
||
COALESCE(s.activation_value, s.importance_score, 0.5) AS activation_value,
|
||
COALESCE(s.importance_score, 0.5) AS importance_score,
|
||
e.name AS source_entity,
|
||
c.name AS community_name
|
||
ORDER BY COALESCE(s.activation_value, 0) DESC
|
||
LIMIT $limit
|
||
"""
|
||
|
||
# 感知记忆节点保存
|
||
PERCEPTUAL_NODE_SAVE = """
|
||
UNWIND $perceptuals AS p
|
||
MERGE (n:Perceptual {id: p.id})
|
||
SET n += {
|
||
id: p.id,
|
||
end_user_id: p.end_user_id,
|
||
perceptual_type: p.perceptual_type,
|
||
file_path: p.file_path,
|
||
file_name: p.file_name,
|
||
file_ext: p.file_ext,
|
||
summary: p.summary,
|
||
keywords: p.keywords,
|
||
topic: p.topic,
|
||
domain: p.domain,
|
||
created_at: p.created_at,
|
||
file_type: p.file_type,
|
||
summary_embedding: p.summary_embedding
|
||
}
|
||
RETURN n.id AS uuid
|
||
"""
|
||
|
||
# 感知记忆与对话的关联边
|
||
PERCEPTUAL_CHUNK_EDGE_SAVE = """
|
||
UNWIND $edges AS edge
|
||
MATCH (p:Perceptual {id: edge.perceptual_id, end_user_id: edge.end_user_id})
|
||
MATCH (c:Chunk {id: edge.chunk_id, end_user_id: edge.end_user_id})
|
||
MERGE (c)-[r:HAS_PERCEPTUAL]->(p)
|
||
ON CREATE SET r.end_user_id = edge.end_user_id,
|
||
r.created_at = edge.created_at
|
||
RETURN elementId(r) AS uuid
|
||
"""
|
||
|
||
# -------------------
|
||
# search by user id
|
||
# -------------------
|
||
SEARCH_PERCEPTUAL_BY_USER_ID = """
|
||
MATCH (p:Perceptual)
|
||
WHERE p.end_user_id = $end_user_id
|
||
RETURN p.id AS id,
|
||
p.summary_embedding AS embedding
|
||
"""
|
||
|
||
SEARCH_STATEMENTS_BY_USER_ID = """
|
||
MATCH (s:Statement)
|
||
WHERE s.end_user_id = $end_user_id
|
||
RETURN s.id AS id,
|
||
s.statement_embedding AS embedding
|
||
"""
|
||
|
||
SEARCH_ENTITIES_BY_USER_ID = """
|
||
MATCH (e:ExtractedEntity)
|
||
WHERE e.end_user_id = $end_user_id
|
||
RETURN e.id AS id,
|
||
e.name_embedding AS embedding
|
||
"""
|
||
|
||
SEARCH_CHUNKS_BY_USER_ID = """
|
||
MATCH (c:Chunk)
|
||
WHERE c.end_user_id = $end_user_id
|
||
RETURN c.id AS id,
|
||
c.chunk_embedding AS embedding
|
||
"""
|
||
|
||
SEARCH_MEMORY_SUMMARIES_BY_USER_ID = """
|
||
MATCH (s:MemorySummary)
|
||
WHERE s.end_user_id = $end_user_id
|
||
RETURN s.id AS id,
|
||
s.summary_embedding AS embedding
|
||
"""
|
||
|
||
SEARCH_COMMUNITIES_BY_USER_ID = """
|
||
MATCH (c:Community)
|
||
WHERE c.end_user_id = $end_user_id
|
||
RETURN c.community_id AS id,
|
||
c.summary_embedding AS embedding
|
||
"""
|
||
|
||
# -------------------
|
||
# search by id
|
||
# -------------------
|
||
SEARCH_PERCEPTUAL_BY_IDS = """
|
||
MATCH (p:Perceptual)
|
||
WHERE p.id IN $ids
|
||
RETURN p.id AS id,
|
||
p.end_user_id AS end_user_id,
|
||
p.perceptual_type AS perceptual_type,
|
||
p.file_path AS file_path,
|
||
p.file_name AS file_name,
|
||
p.file_ext AS file_ext,
|
||
p.summary AS summary,
|
||
p.keywords AS keywords,
|
||
p.topic AS topic,
|
||
p.domain AS domain,
|
||
p.created_at AS created_at,
|
||
p.file_type AS file_type
|
||
"""
|
||
|
||
SEARCH_STATEMENTS_BY_IDS = """
|
||
MATCH (s:Statement)
|
||
WHERE s.id IN $ids
|
||
RETURN s.id AS id,
|
||
s.statement AS statement,
|
||
s.end_user_id AS end_user_id,
|
||
s.chunk_id AS chunk_id,
|
||
s.created_at AS created_at,
|
||
s.expired_at AS expired_at,
|
||
s.valid_at AS valid_at,
|
||
properties(s)['invalid_at'] AS invalid_at,
|
||
COALESCE(s.activation_value, s.importance_score, 0.5) AS activation_value,
|
||
COALESCE(s.importance_score, 0.5) AS importance_score,
|
||
s.last_access_time AS last_access_time,
|
||
COALESCE(s.access_count, 0) AS access_count
|
||
"""
|
||
|
||
SEARCH_CHUNKS_BY_IDS = """
|
||
MATCH (c:Chunk)
|
||
WHERE c.id IN $ids
|
||
RETURN c.id AS id,
|
||
c.end_user_id AS end_user_id,
|
||
c.content AS content,
|
||
c.dialog_id AS dialog_id,
|
||
COALESCE(c.activation_value, 0.5) AS activation_value,
|
||
c.last_access_time AS last_access_time,
|
||
COALESCE(c.access_count, 0) AS access_count
|
||
"""
|
||
|
||
SEARCH_ENTITIES_BY_IDS = """
|
||
MATCH (e:ExtractedEntity)
|
||
WHERE e.id IN $ids
|
||
RETURN e.id AS id,
|
||
e.name AS name,
|
||
e.end_user_id AS end_user_id,
|
||
e.entity_type AS entity_type,
|
||
COALESCE(e.activation_value, e.importance_score, 0.5) AS activation_value,
|
||
COALESCE(e.importance_score, 0.5) AS importance_score,
|
||
e.last_access_time AS last_access_time,
|
||
COALESCE(e.access_count, 0) AS access_count
|
||
"""
|
||
|
||
SEARCH_MEMORY_SUMMARIES_BY_IDS = """
|
||
MATCH (m:MemorySummary)
|
||
WHERE m.id IN $ids
|
||
RETURN m.id AS id,
|
||
m.name AS name,
|
||
m.end_user_id AS end_user_id,
|
||
m.dialog_id AS dialog_id,
|
||
m.chunk_ids AS chunk_ids,
|
||
m.content AS content,
|
||
m.created_at AS created_at,
|
||
COALESCE(m.activation_value, m.importance_score, 0.5) AS activation_value,
|
||
COALESCE(m.importance_score, 0.5) AS importance_score,
|
||
m.last_access_time AS last_access_time,
|
||
COALESCE(m.access_count, 0) AS access_count
|
||
"""
|
||
|
||
SEARCH_COMMUNITIES_BY_IDS = """
|
||
MATCH (c:Community)
|
||
WHERE c.id IN $ids
|
||
RETURN c.id AS id,
|
||
c.name AS name,
|
||
c.summary AS content,
|
||
c.core_entities AS core_entities,
|
||
c.member_count AS member_count,
|
||
c.end_user_id AS end_user_id,
|
||
c.updated_at AS updated_at
|
||
"""
|
||
# -------------------
|
||
# search by fulltext
|
||
# -------------------
|
||
SEARCH_PERCEPTUALS_BY_KEYWORD = """
|
||
CALL db.index.fulltext.queryNodes("perceptualFulltext", $query) YIELD node AS p, score
|
||
WHERE p.end_user_id = $end_user_id
|
||
RETURN p.id AS id,
|
||
p.end_user_id AS end_user_id,
|
||
p.perceptual_type AS perceptual_type,
|
||
p.file_path AS file_path,
|
||
p.file_name AS file_name,
|
||
p.file_ext AS file_ext,
|
||
p.summary AS summary,
|
||
p.keywords AS keywords,
|
||
p.topic AS topic,
|
||
p.domain AS domain,
|
||
p.created_at AS created_at,
|
||
p.file_type AS file_type,
|
||
score
|
||
ORDER BY score DESC
|
||
LIMIT $limit
|
||
"""
|
||
|
||
|
||
# ── Assistant Pruning Nodes & Edges ──
|
||
|
||
ASSISTANT_ORIGINAL_NODE_SAVE = """
|
||
UNWIND $originals AS orig
|
||
MERGE (o:AssistantOriginal {id: orig.id})
|
||
SET o += {
|
||
end_user_id: orig.end_user_id,
|
||
run_id: orig.run_id,
|
||
dialog_id: orig.dialog_id,
|
||
pair_id: orig.pair_id,
|
||
text: orig.text,
|
||
created_at: orig.created_at
|
||
}
|
||
RETURN o.id AS uuid
|
||
"""
|
||
|
||
ASSISTANT_PRUNED_NODE_SAVE = """
|
||
UNWIND $pruneds AS p
|
||
MERGE (pr:AssistantPruned {id: p.id})
|
||
SET pr += {
|
||
end_user_id: p.end_user_id,
|
||
run_id: p.run_id,
|
||
dialog_id: p.dialog_id,
|
||
pair_id: p.pair_id,
|
||
text: p.text,
|
||
memory_type: p.memory_type,
|
||
text_embedding: p.text_embedding,
|
||
created_at: p.created_at
|
||
}
|
||
RETURN pr.id AS uuid
|
||
"""
|
||
|
||
ASSISTANT_PRUNED_EDGE_SAVE = """
|
||
UNWIND $edges AS edge
|
||
MATCH (o:AssistantOriginal {id: edge.source})
|
||
MATCH (p:AssistantPruned {id: edge.target})
|
||
MERGE (o)-[r:PRUNED_TO]->(p)
|
||
SET r.pair_id = edge.pair_id,
|
||
r.end_user_id = edge.end_user_id,
|
||
r.run_id = edge.run_id,
|
||
r.created_at = edge.created_at
|
||
RETURN elementId(r) AS uuid
|
||
"""
|
||
|
||
ASSISTANT_DIALOG_EDGE_SAVE = """
|
||
UNWIND $edges AS edge
|
||
MATCH (o:AssistantOriginal {id: edge.source})
|
||
MATCH (d:Dialogue {id: edge.target})
|
||
MERGE (o)-[r:BELONGS_TO_DIALOG]->(d)
|
||
SET r.end_user_id = edge.end_user_id,
|
||
r.run_id = edge.run_id,
|
||
r.created_at = edge.created_at
|
||
RETURN elementId(r) AS uuid
|
||
"""
|