refactor(memory): unify user placeholder names and harden alias sync logic

- Replace hardcoded user placeholder name lists in write_tools and
user_memory_service with shared _USER_PLACEHOLDER_NAMES constant
- Filter user placeholder names during alias merging in _merge_attribute
  to prevent cross-role alias contamination on non-user entities
- Use toLower() in Cypher query for case-insensitive name matching
- Change PgSQL->Neo4j alias sync condition from 'if pg_aliases' to
  'if info is not None' so empty aliases correctly clear stale data
This commit is contained in:
lanceyq
2026-04-14 18:06:56 +08:00
parent 811193dd75
commit 49e0801d15
3 changed files with 14 additions and 6 deletions

View File

@@ -14,6 +14,7 @@ from dotenv import load_dotenv
from app.core.logging_config import get_agent_logger
from app.core.memory.agent.utils.get_dialogs import get_chunked_dialogs
from app.core.memory.storage_services.extraction_engine.deduplication.deduped_and_disamb import _USER_PLACEHOLDER_NAMES
from app.core.memory.storage_services.extraction_engine.extraction_orchestrator import ExtractionOrchestrator
from app.core.memory.storage_services.extraction_engine.knowledge_extraction.memory_summary import \
memory_summary_generation
@@ -201,14 +202,17 @@ async def write(
with get_db_context() as db_session:
info = EndUserInfoRepository(db_session).get_by_end_user_id(uuid.UUID(end_user_id))
pg_aliases = info.aliases if info and info.aliases else []
if pg_aliases:
if info is not None:
# 将 Python 侧占位名集合作为参数传入,避免 Cypher 硬编码
placeholder_names = list(_USER_PLACEHOLDER_NAMES)
await neo4j_connector.execute_query(
"""
MATCH (e:ExtractedEntity)
WHERE e.end_user_id = $end_user_id AND e.name IN ['用户', '', 'User', 'I']
WHERE e.end_user_id = $end_user_id AND toLower(e.name) IN $placeholder_names
SET e.aliases = $aliases
""",
end_user_id=end_user_id, aliases=pg_aliases,
placeholder_names=placeholder_names,
)
logger.info(f"[AliasSync] Neo4j 用户实体 aliases 已用 PgSQL 权威源覆盖: {pg_aliases}")
except Exception as sync_err:

View File

@@ -88,11 +88,14 @@ def _merge_attribute(canonical: ExtractedEntityNode, ent: ExtractedEntityNode):
if canonical_name.lower() not in _USER_PLACEHOLDER_NAMES:
incoming_name = (getattr(ent, "name", "") or "").strip()
# 收集所有需要合并的别名
# 收集所有需要合并的别名,过滤掉用户占位名避免污染非用户实体
all_aliases = list(getattr(canonical, "aliases", []) or [])
if incoming_name and incoming_name != canonical_name:
if incoming_name and incoming_name != canonical_name and incoming_name.lower() not in _USER_PLACEHOLDER_NAMES:
all_aliases.append(incoming_name)
all_aliases.extend(getattr(ent, "aliases", []) or [])
all_aliases.extend(
a for a in (getattr(ent, "aliases", []) or [])
if a and a.strip().lower() not in _USER_PLACEHOLDER_NAMES
)
try:
from app.core.memory.utils.alias_utils import normalize_aliases

View File

@@ -14,6 +14,7 @@ from pydantic import BaseModel, Field
from sqlalchemy.orm import Session
from app.core.logging_config import get_logger
from app.core.memory.storage_services.extraction_engine.deduplication.deduped_and_disamb import _USER_PLACEHOLDER_NAMES
from app.core.memory.utils.llm.llm_utils import MemoryClientFactory
from app.db import get_db_context
from app.repositories.conversation_repository import ConversationRepository
@@ -473,7 +474,7 @@ class UserMemoryService:
allowed_fields = {'other_name', 'aliases', 'meta_data'}
# 用户占位名称黑名单,不允许作为 other_name 或出现在 aliases 中
_user_placeholder_names = {'用户', '', 'User', 'I'}
_user_placeholder_names = _USER_PLACEHOLDER_NAMES
# 过滤 other_name不允许设置为占位名称
if 'other_name' in update_data and update_data['other_name'] and update_data['other_name'].strip() in _user_placeholder_names: