feat(memory): add alias invalidation support for entity alias management

Introduce the `别名失效` predicate to handle cases where an alias is
explicitly no longer applicable to an entity.

Changes:
- write_pipeline.py: extend _merge_alias_in_memory to process
  `别名失效` edges — removes invalidated alias names from target
  entity's aliases list in-memory before Neo4j write
- cypher_queries.py: add REMOVE_INVALID_ALIASES and DELETE_ALIAS_NODES
  queries; update REDIRECT_ALIAS_EDGES to handle both `别名属于` and
  `别名失效` predicates
- tasks.py: add step 1.5 in post_store_dedup_and_alias_merge_task to
  execute REMOVE_INVALID_ALIASES and sync removals to PostgreSQL;
  add step 3 to delete alias nodes after edge redirection; add
  snapshot step 3.5 for post-merge entity state; pass snapshot_dir
  to the task
- end_user_info_repository.py: add remove_aliases() method to remove
  specified aliases from end_user_info.aliases (case-insensitive)
- write_snapshot_recorder.py: add save_alias_merge_result() static
  method to write stage 8 snapshot after alias merge and deletion
- extract_triplet.jinja2: document `别名失效` predicate with usage
  rules — only use when conversation explicitly negates an alias
This commit is contained in:
lanceyq
2026-05-07 20:07:53 +08:00
parent e3ab19dd4f
commit aa9eb66668
6 changed files with 255 additions and 23 deletions

View File

@@ -138,3 +138,41 @@ class EndUserInfoRepository:
f"aliases_count={len(end_user_info.aliases or [])}"
)
return end_user_info
def remove_aliases(
self,
end_user_id: uuid.UUID,
aliases_to_remove: List[str],
) -> Optional["EndUserInfo"]:
"""从用户别名列表中移除指定别名(忽略大小写)。
Args:
end_user_id: 终端用户 ID
aliases_to_remove: 需要移除的别名列表
Returns:
更新后的 EndUserInfo若记录不存在则返回 None
"""
if not aliases_to_remove:
return self.get_by_end_user_id(end_user_id)
end_user_info = self.get_by_end_user_id(end_user_id)
if not end_user_info:
logger.warning(f"[EndUserInfo] 记录不存在,跳过别名移除: end_user_id={end_user_id}")
return None
remove_lower = {a.strip().lower() for a in aliases_to_remove if a.strip()}
existing = list(end_user_info.aliases or [])
new_aliases = [a for a in existing if a.lower() not in remove_lower]
if len(new_aliases) == len(existing):
return end_user_info
end_user_info.aliases = new_aliases
self.db.commit()
self.db.refresh(end_user_info)
logger.info(
f"[EndUserInfo] 别名移除完成: end_user_id={end_user_id}, "
f"removed={aliases_to_remove}, remaining={new_aliases}"
)
return end_user_info

View File

@@ -1379,16 +1379,16 @@ RETURN source.name AS merged_alias, target.name AS target_name, new_aliases AS u
# 2. STATEMENT_ENTITY陈述句 → 别名节点
# 对于每条需要重定向的边,创建一条指向用户节点的新边(复制所有属性),然后删除旧边。
REDIRECT_ALIAS_EDGES = """
// 找到所有 别名→用户 的映射
// 找到所有 别名→用户 的映射(包含 别名属于 和 别名失效 两种 predicate
MATCH (alias:ExtractedEntity {end_user_id: $end_user_id})-[ar:EXTRACTED_RELATIONSHIP]->(user:ExtractedEntity {end_user_id: $end_user_id})
WHERE ar.predicate = '别名属于'
WHERE ar.predicate IN ['别名属于', '别名失效']
WITH collect({alias_id: elementId(alias), user_id: elementId(user), alias_eid: alias.id, user_eid: user.id}) AS mappings
// 1. 重定向 EXTRACTED_RELATIONSHIP 边:别名节点作为 target 的情况
UNWIND mappings AS m
MATCH (other)-[r:EXTRACTED_RELATIONSHIP]->(alias:ExtractedEntity {end_user_id: $end_user_id})
WHERE alias.id = m.alias_eid
AND r.predicate <> '别名属于'
AND NOT (r.predicate IN ['别名属于', '别名失效'])
AND other.id <> m.user_eid
WITH m, other, r, alias
MATCH (user:ExtractedEntity {id: m.user_eid, end_user_id: $end_user_id})
@@ -1399,10 +1399,10 @@ WITH count(*) AS redirected_incoming
// 2. 重定向 EXTRACTED_RELATIONSHIP 边:别名节点作为 source 的情况
MATCH (alias:ExtractedEntity {end_user_id: $end_user_id})-[ar2:EXTRACTED_RELATIONSHIP]->(user2:ExtractedEntity {end_user_id: $end_user_id})
WHERE ar2.predicate = '别名属于'
WHERE ar2.predicate IN ['别名属于', '别名失效']
WITH alias, user2, redirected_incoming
MATCH (alias)-[r:EXTRACTED_RELATIONSHIP]->(other)
WHERE r.predicate <> '别名属于'
WHERE NOT (r.predicate IN ['别名属于', '别名失效'])
AND other.id <> user2.id
WITH user2, other, r, redirected_incoming
CREATE (user2)-[nr:EXTRACTED_RELATIONSHIP]->(other)
@@ -1412,7 +1412,7 @@ WITH redirected_incoming, count(*) AS redirected_outgoing
// 3. 重定向 STATEMENT_ENTITY 边:陈述句 → 别名节点
MATCH (alias:ExtractedEntity {end_user_id: $end_user_id})-[ar3:EXTRACTED_RELATIONSHIP]->(user3:ExtractedEntity {end_user_id: $end_user_id})
WHERE ar3.predicate = '别名属于'
WHERE ar3.predicate IN ['别名属于', '别名失效']
WITH alias, user3, redirected_incoming, redirected_outgoing
MATCH (stmt)-[r:STATEMENT_ENTITY]->(alias)
WITH user3, stmt, r, redirected_incoming, redirected_outgoing
@@ -1423,6 +1423,32 @@ DELETE r
RETURN redirected_incoming, redirected_outgoing, count(*) AS redirected_stmt
"""
# 删除别名节点:在别名归并和边重定向完成后,删除所有 predicate="别名属于" 关系的 source 节点。
# 此时这些节点的其他边已被 REDIRECT_ALIAS_EDGES 重定向完毕,
# 唯一剩余的边就是 (alias)-[:EXTRACTED_RELATIONSHIP {predicate:'别名属于'}]->(user)
# 使用 DETACH DELETE 一并删除节点和该关系。
DELETE_ALIAS_NODES = """
MATCH (alias:ExtractedEntity {end_user_id: $end_user_id})-[r:EXTRACTED_RELATIONSHIP]->(user:ExtractedEntity {end_user_id: $end_user_id})
WHERE r.predicate IN ['别名属于', '别名失效']
WITH alias, count(r) AS rel_count
DETACH DELETE alias
RETURN count(alias) AS deleted_count
"""
# 失效别名处理:将 predicate="别名失效" 的 source.name 从 target.aliases 中移除。
# 在 MERGE_ALIAS_BELONGS_TO追加新别名之后、DELETE_ALIAS_NODES删除节点之前执行。
REMOVE_INVALID_ALIASES = """
MATCH (source:ExtractedEntity {end_user_id: $end_user_id})-[r:EXTRACTED_RELATIONSHIP]->(target:ExtractedEntity {end_user_id: $end_user_id})
WHERE r.predicate = '别名失效'
WITH source, target,
coalesce(target.aliases, []) AS existing_aliases,
source.name AS invalid_name
SET target.aliases = [a IN existing_aliases WHERE toLower(a) <> toLower(invalid_name)]
RETURN source.name AS removed_alias, target.name AS target_name
"""
CHECK_COMMUNITY_IS_COMPLETE_WITH_EMBEDDING = """
MATCH (c:Community {community_id: $community_id, end_user_id: $end_user_id})
RETURN (