refactor(memory): unify user placeholder names and harden alias sync logic
- Replace hardcoded user placeholder name lists in write_tools and user_memory_service with shared _USER_PLACEHOLDER_NAMES constant - Filter user placeholder names during alias merging in _merge_attribute to prevent cross-role alias contamination on non-user entities - Use toLower() in Cypher query for case-insensitive name matching - Change PgSQL->Neo4j alias sync condition from 'if pg_aliases' to 'if info is not None' so empty aliases correctly clear stale data
This commit is contained in:
@@ -14,6 +14,7 @@ from dotenv import load_dotenv
|
|||||||
|
|
||||||
from app.core.logging_config import get_agent_logger
|
from app.core.logging_config import get_agent_logger
|
||||||
from app.core.memory.agent.utils.get_dialogs import get_chunked_dialogs
|
from app.core.memory.agent.utils.get_dialogs import get_chunked_dialogs
|
||||||
|
from app.core.memory.storage_services.extraction_engine.deduplication.deduped_and_disamb import _USER_PLACEHOLDER_NAMES
|
||||||
from app.core.memory.storage_services.extraction_engine.extraction_orchestrator import ExtractionOrchestrator
|
from app.core.memory.storage_services.extraction_engine.extraction_orchestrator import ExtractionOrchestrator
|
||||||
from app.core.memory.storage_services.extraction_engine.knowledge_extraction.memory_summary import \
|
from app.core.memory.storage_services.extraction_engine.knowledge_extraction.memory_summary import \
|
||||||
memory_summary_generation
|
memory_summary_generation
|
||||||
@@ -201,14 +202,17 @@ async def write(
|
|||||||
with get_db_context() as db_session:
|
with get_db_context() as db_session:
|
||||||
info = EndUserInfoRepository(db_session).get_by_end_user_id(uuid.UUID(end_user_id))
|
info = EndUserInfoRepository(db_session).get_by_end_user_id(uuid.UUID(end_user_id))
|
||||||
pg_aliases = info.aliases if info and info.aliases else []
|
pg_aliases = info.aliases if info and info.aliases else []
|
||||||
if pg_aliases:
|
if info is not None:
|
||||||
|
# 将 Python 侧占位名集合作为参数传入,避免 Cypher 硬编码
|
||||||
|
placeholder_names = list(_USER_PLACEHOLDER_NAMES)
|
||||||
await neo4j_connector.execute_query(
|
await neo4j_connector.execute_query(
|
||||||
"""
|
"""
|
||||||
MATCH (e:ExtractedEntity)
|
MATCH (e:ExtractedEntity)
|
||||||
WHERE e.end_user_id = $end_user_id AND e.name IN ['用户', '我', 'User', 'I']
|
WHERE e.end_user_id = $end_user_id AND toLower(e.name) IN $placeholder_names
|
||||||
SET e.aliases = $aliases
|
SET e.aliases = $aliases
|
||||||
""",
|
""",
|
||||||
end_user_id=end_user_id, aliases=pg_aliases,
|
end_user_id=end_user_id, aliases=pg_aliases,
|
||||||
|
placeholder_names=placeholder_names,
|
||||||
)
|
)
|
||||||
logger.info(f"[AliasSync] Neo4j 用户实体 aliases 已用 PgSQL 权威源覆盖: {pg_aliases}")
|
logger.info(f"[AliasSync] Neo4j 用户实体 aliases 已用 PgSQL 权威源覆盖: {pg_aliases}")
|
||||||
except Exception as sync_err:
|
except Exception as sync_err:
|
||||||
|
|||||||
@@ -88,11 +88,14 @@ def _merge_attribute(canonical: ExtractedEntityNode, ent: ExtractedEntityNode):
|
|||||||
if canonical_name.lower() not in _USER_PLACEHOLDER_NAMES:
|
if canonical_name.lower() not in _USER_PLACEHOLDER_NAMES:
|
||||||
incoming_name = (getattr(ent, "name", "") or "").strip()
|
incoming_name = (getattr(ent, "name", "") or "").strip()
|
||||||
|
|
||||||
# 收集所有需要合并的别名
|
# 收集所有需要合并的别名,过滤掉用户占位名避免污染非用户实体
|
||||||
all_aliases = list(getattr(canonical, "aliases", []) or [])
|
all_aliases = list(getattr(canonical, "aliases", []) or [])
|
||||||
if incoming_name and incoming_name != canonical_name:
|
if incoming_name and incoming_name != canonical_name and incoming_name.lower() not in _USER_PLACEHOLDER_NAMES:
|
||||||
all_aliases.append(incoming_name)
|
all_aliases.append(incoming_name)
|
||||||
all_aliases.extend(getattr(ent, "aliases", []) or [])
|
all_aliases.extend(
|
||||||
|
a for a in (getattr(ent, "aliases", []) or [])
|
||||||
|
if a and a.strip().lower() not in _USER_PLACEHOLDER_NAMES
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from app.core.memory.utils.alias_utils import normalize_aliases
|
from app.core.memory.utils.alias_utils import normalize_aliases
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ from pydantic import BaseModel, Field
|
|||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from app.core.logging_config import get_logger
|
from app.core.logging_config import get_logger
|
||||||
|
from app.core.memory.storage_services.extraction_engine.deduplication.deduped_and_disamb import _USER_PLACEHOLDER_NAMES
|
||||||
from app.core.memory.utils.llm.llm_utils import MemoryClientFactory
|
from app.core.memory.utils.llm.llm_utils import MemoryClientFactory
|
||||||
from app.db import get_db_context
|
from app.db import get_db_context
|
||||||
from app.repositories.conversation_repository import ConversationRepository
|
from app.repositories.conversation_repository import ConversationRepository
|
||||||
@@ -473,7 +474,7 @@ class UserMemoryService:
|
|||||||
allowed_fields = {'other_name', 'aliases', 'meta_data'}
|
allowed_fields = {'other_name', 'aliases', 'meta_data'}
|
||||||
|
|
||||||
# 用户占位名称黑名单,不允许作为 other_name 或出现在 aliases 中
|
# 用户占位名称黑名单,不允许作为 other_name 或出现在 aliases 中
|
||||||
_user_placeholder_names = {'用户', '我', 'User', 'I'}
|
_user_placeholder_names = _USER_PLACEHOLDER_NAMES
|
||||||
|
|
||||||
# 过滤 other_name:不允许设置为占位名称
|
# 过滤 other_name:不允许设置为占位名称
|
||||||
if 'other_name' in update_data and update_data['other_name'] and update_data['other_name'].strip() in _user_placeholder_names:
|
if 'other_name' in update_data and update_data['other_name'] and update_data['other_name'].strip() in _user_placeholder_names:
|
||||||
|
|||||||
Reference in New Issue
Block a user