From dae7431075a2b369bb520de8d30813f8854c9903 Mon Sep 17 00:00:00 2001 From: lanceyq <1982376970@qq.com> Date: Mon, 30 Mar 2026 15:39:53 +0800 Subject: [PATCH 1/2] [fix] Refusing the user, I went to "other_name" --- .../extraction_orchestrator.py | 45 +++++++++++++------ .../prompt/prompts/extract_triplet.jinja2 | 6 +++ api/app/services/user_memory_service.py | 12 +++++ 3 files changed, 50 insertions(+), 13 deletions(-) diff --git a/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py b/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py index f6a143cd..b20112a2 100644 --- a/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py +++ b/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py @@ -1405,7 +1405,8 @@ class ExtractionOrchestrator: logger.info(f"同步 Neo4j aliases 到 end_user_info: {neo4j_aliases}") else: first_alias = current_aliases[0].strip() if current_aliases else "" - if first_alias: + # 确保 first_alias 不是占位名称 + if first_alias and first_alias not in self.USER_PLACEHOLDER_NAMES: db.add(EndUserInfo( end_user_id=end_user_uuid, other_name=first_alias, @@ -1421,29 +1422,33 @@ class ExtractionOrchestrator: + # 用户实体占位名称,不允许作为 other_name 或出现在 aliases 中 + USER_PLACEHOLDER_NAMES = {'用户', '我', 'User', 'I'} + def _extract_current_aliases(self, entity_nodes: List[ExtractedEntityNode]) -> List[str]: """从实体节点提取用户别名(保持 LLM 提取的原始顺序,不进行任何排序) - 这个方法直接返回 LLM 提取的别名列表,不做任何修改。 + 这个方法直接返回 LLM 提取的别名列表,并过滤掉占位名称("用户"、"我"、"User"、"I")。 第一个别名将被用作 other_name。 Args: entity_nodes: 实体节点列表 Returns: - 别名列表(保持 LLM 提取的原始顺序) + 别名列表(保持 LLM 提取的原始顺序,已过滤占位名称) """ - USER_NAMES = {'用户', '我', 'User', 'I'} for entity in entity_nodes: - if getattr(entity, 'name', '').strip() in USER_NAMES: + if getattr(entity, 'name', '').strip() in self.USER_PLACEHOLDER_NAMES: aliases = getattr(entity, 'aliases', []) or [] - logger.debug(f"提取到用户别名(原始顺序): {aliases}") - return aliases + # 过滤掉占位名称,防止 "用户"/"我"/"User"/"I" 被存入 aliases 和 other_name + filtered = [a for a in aliases if a.strip() not in self.USER_PLACEHOLDER_NAMES] + logger.debug(f"提取到用户别名(原始顺序,已过滤占位名称): {filtered}") + return filtered return [] async def _fetch_neo4j_user_aliases(self, end_user_id: str) -> List[str]: - """从 Neo4j 查询用户实体的完整 aliases 列表""" + """从 Neo4j 查询用户实体的完整 aliases 列表(已过滤占位名称)""" cypher = """ MATCH (e:ExtractedEntity) WHERE e.end_user_id = $end_user_id AND e.name IN ['用户', '我', 'User', 'I'] @@ -1457,7 +1462,10 @@ class ExtractionOrchestrator: aliases = result[0].get('aliases') or [] if not aliases: logger.debug(f"Neo4j 用户实体 aliases 为空: end_user_id={end_user_id}") - return aliases + return [] + # 过滤掉占位名称,防止历史脏数据传播 + filtered = [a for a in aliases if a.strip() not in self.USER_PLACEHOLDER_NAMES] + return filtered def _resolve_other_name( self, @@ -1469,14 +1477,25 @@ class ExtractionOrchestrator: 决定 other_name 是否需要更新,返回新值;无需更新返回 None。 决策规则: - - 为空 → 用本次对话第一个别名 + - 为空或为占位名称 → 用本次对话第一个别名 - 不在 Neo4j aliases 中 → 用 Neo4j 第一个别名(说明已被删除) - 否则 → 保持不变(返回 None) + + 注意:返回值不允许是占位名称("用户"、"我"、"User"、"I") """ - if not current or not current.strip(): - return current_aliases[0].strip() if current_aliases else None + # 当前值为空或为占位名称时,需要更新 + if not current or not current.strip() or current.strip() in self.USER_PLACEHOLDER_NAMES: + candidate = current_aliases[0].strip() if current_aliases else None + # 确保候选值不是占位名称 + if candidate and candidate in self.USER_PLACEHOLDER_NAMES: + return None + return candidate if current not in neo4j_aliases: - return neo4j_aliases[0].strip() if neo4j_aliases else None + candidate = neo4j_aliases[0].strip() if neo4j_aliases else None + # 确保候选值不是占位名称 + if candidate and candidate in self.USER_PLACEHOLDER_NAMES: + return None + return candidate return None diff --git a/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2 b/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2 index f9f2f45c..6605532d 100644 --- a/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2 +++ b/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2 @@ -105,13 +105,19 @@ Extract entities and knowledge triplets from the given statement. {% if language == "zh" %} - 用户实体的 name 字段:使用 "用户" 或 "我" - 用户的真实姓名:放入 aliases + - **🚨 禁止将 "用户"、"我" 放入 aliases 中,aliases 只能包含用户的真实姓名、昵称等** - 示例: * "我叫李明" → name="用户", aliases=["李明"] + * ❌ 错误:aliases=["用户", "李明"]("用户"不是真实姓名,禁止放入 aliases) + * ❌ 错误:aliases=["我", "李明"]("我"不是真实姓名,禁止放入 aliases) {% else %} - User entity name field: use "User" or "I" - User's real name: put in aliases + - **🚨 NEVER put "User" or "I" in aliases. Aliases must only contain real names, nicknames, etc.** - Examples: * "I'm John" → name="User", aliases=["John"] + * ❌ Wrong: aliases=["User", "John"] ("User" is not a real name, FORBIDDEN in aliases) + * ❌ Wrong: aliases=["I", "John"] ("I" is not a real name, FORBIDDEN in aliases) {% endif %} diff --git a/api/app/services/user_memory_service.py b/api/app/services/user_memory_service.py index 942e01a0..c6743ff2 100644 --- a/api/app/services/user_memory_service.py +++ b/api/app/services/user_memory_service.py @@ -472,6 +472,18 @@ class UserMemoryService: # 定义允许更新的字段白名单 allowed_fields = {'other_name', 'aliases', 'meta_data'} + # 用户占位名称黑名单,不允许作为 other_name 或出现在 aliases 中 + _user_placeholder_names = {'用户', '我', 'User', 'I'} + + # 过滤 other_name:不允许设置为占位名称 + if 'other_name' in update_data and update_data['other_name'] and update_data['other_name'].strip() in _user_placeholder_names: + logger.warning(f"拒绝将占位名称 '{update_data['other_name']}' 设置为 other_name") + del update_data['other_name'] + + # 过滤 aliases:移除占位名称 + if 'aliases' in update_data and update_data['aliases']: + update_data['aliases'] = [a for a in update_data['aliases'] if a.strip() not in _user_placeholder_names] + # 检查是否更新了 aliases 字段 aliases_updated = 'aliases' in update_data and update_data['aliases'] != end_user_info_record.aliases From c0cd2373c0be88a53c5544216e0c1601a8e36b36 Mon Sep 17 00:00:00 2001 From: lanceyq <1982376970@qq.com> Date: Mon, 30 Mar 2026 15:51:30 +0800 Subject: [PATCH 2/2] [fix] Added type checking with isinstance(a, str) and filtering out empty strings with a.strip() --- api/app/services/user_memory_service.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/api/app/services/user_memory_service.py b/api/app/services/user_memory_service.py index c6743ff2..ab51d922 100644 --- a/api/app/services/user_memory_service.py +++ b/api/app/services/user_memory_service.py @@ -480,9 +480,12 @@ class UserMemoryService: logger.warning(f"拒绝将占位名称 '{update_data['other_name']}' 设置为 other_name") del update_data['other_name'] - # 过滤 aliases:移除占位名称 + # 过滤 aliases:移除占位名称和非字符串值 if 'aliases' in update_data and update_data['aliases']: - update_data['aliases'] = [a for a in update_data['aliases'] if a.strip() not in _user_placeholder_names] + update_data['aliases'] = [ + a for a in update_data['aliases'] + if isinstance(a, str) and a.strip() and a.strip() not in _user_placeholder_names + ] # 检查是否更新了 aliases 字段 aliases_updated = 'aliases' in update_data and update_data['aliases'] != end_user_info_record.aliases