diff --git a/api/app/core/memory/agent/utils/write_tools.py b/api/app/core/memory/agent/utils/write_tools.py index 5e51beba..3b0ea1ee 100644 --- a/api/app/core/memory/agent/utils/write_tools.py +++ b/api/app/core/memory/agent/utils/write_tools.py @@ -14,6 +14,7 @@ from dotenv import load_dotenv from app.core.logging_config import get_agent_logger from app.core.memory.agent.utils.get_dialogs import get_chunked_dialogs +from app.core.memory.storage_services.extraction_engine.deduplication.deduped_and_disamb import _USER_PLACEHOLDER_NAMES from app.core.memory.storage_services.extraction_engine.extraction_orchestrator import ExtractionOrchestrator from app.core.memory.storage_services.extraction_engine.knowledge_extraction.memory_summary import \ memory_summary_generation @@ -201,14 +202,17 @@ async def write( with get_db_context() as db_session: info = EndUserInfoRepository(db_session).get_by_end_user_id(uuid.UUID(end_user_id)) pg_aliases = info.aliases if info and info.aliases else [] - if pg_aliases: + if info is not None: + # 将 Python 侧占位名集合作为参数传入,避免 Cypher 硬编码 + placeholder_names = list(_USER_PLACEHOLDER_NAMES) await neo4j_connector.execute_query( """ MATCH (e:ExtractedEntity) - WHERE e.end_user_id = $end_user_id AND e.name IN ['用户', '我', 'User', 'I'] + WHERE e.end_user_id = $end_user_id AND toLower(e.name) IN $placeholder_names SET e.aliases = $aliases """, end_user_id=end_user_id, aliases=pg_aliases, + placeholder_names=placeholder_names, ) logger.info(f"[AliasSync] Neo4j 用户实体 aliases 已用 PgSQL 权威源覆盖: {pg_aliases}") except Exception as sync_err: diff --git a/api/app/core/memory/storage_services/extraction_engine/deduplication/deduped_and_disamb.py b/api/app/core/memory/storage_services/extraction_engine/deduplication/deduped_and_disamb.py index 8f659a27..715f190c 100644 --- a/api/app/core/memory/storage_services/extraction_engine/deduplication/deduped_and_disamb.py +++ b/api/app/core/memory/storage_services/extraction_engine/deduplication/deduped_and_disamb.py @@ -88,11 +88,14 @@ def _merge_attribute(canonical: ExtractedEntityNode, ent: ExtractedEntityNode): if canonical_name.lower() not in _USER_PLACEHOLDER_NAMES: incoming_name = (getattr(ent, "name", "") or "").strip() - # 收集所有需要合并的别名 + # 收集所有需要合并的别名,过滤掉用户占位名避免污染非用户实体 all_aliases = list(getattr(canonical, "aliases", []) or []) - if incoming_name and incoming_name != canonical_name: + if incoming_name and incoming_name != canonical_name and incoming_name.lower() not in _USER_PLACEHOLDER_NAMES: all_aliases.append(incoming_name) - all_aliases.extend(getattr(ent, "aliases", []) or []) + all_aliases.extend( + a for a in (getattr(ent, "aliases", []) or []) + if a and a.strip().lower() not in _USER_PLACEHOLDER_NAMES + ) try: from app.core.memory.utils.alias_utils import normalize_aliases diff --git a/api/app/services/user_memory_service.py b/api/app/services/user_memory_service.py index cc18447e..9389ecfa 100644 --- a/api/app/services/user_memory_service.py +++ b/api/app/services/user_memory_service.py @@ -14,6 +14,7 @@ from pydantic import BaseModel, Field from sqlalchemy.orm import Session from app.core.logging_config import get_logger +from app.core.memory.storage_services.extraction_engine.deduplication.deduped_and_disamb import _USER_PLACEHOLDER_NAMES from app.core.memory.utils.llm.llm_utils import MemoryClientFactory from app.db import get_db_context from app.repositories.conversation_repository import ConversationRepository @@ -473,7 +474,7 @@ class UserMemoryService: allowed_fields = {'other_name', 'aliases', 'meta_data'} # 用户占位名称黑名单,不允许作为 other_name 或出现在 aliases 中 - _user_placeholder_names = {'用户', '我', 'User', 'I'} + _user_placeholder_names = _USER_PLACEHOLDER_NAMES # 过滤 other_name:不允许设置为占位名称 if 'other_name' in update_data and update_data['other_name'] and update_data['other_name'].strip() in _user_placeholder_names: