[changes] Simultaneously create the "end_user_info" data to ensure that the interface modification takes effect immediately.

This commit is contained in:
lanceyq
2026-03-25 16:26:41 +08:00
parent 7c0743eb8f
commit 38c6c7f053
5 changed files with 438 additions and 93 deletions

View File

@@ -176,6 +176,22 @@ async def write(
)
if success:
logger.info("Successfully saved all data to Neo4j")
# 同步用户别名到 PostgreSQL
try:
# 创建一个临时的 orchestrator 实例来调用同步方法
temp_orchestrator = ExtractionOrchestrator(
llm_client=llm_client,
embedder_client=embedder_client,
connector=neo4j_connector,
embedding_id=embedding_model_id
)
await temp_orchestrator._update_end_user_other_name(all_entity_nodes, chunked_dialogs)
logger.info("Successfully synced user aliases to PostgreSQL")
except Exception as sync_error:
logger.error(f"Failed to sync user aliases to PostgreSQL: {sync_error}", exc_info=True)
# 不影响主流程
# 写入成功后,同步等待聚类完成(避免与 Memory Summary 并发冲突)
await _trigger_clustering_sync(
all_entity_nodes,

View File

@@ -19,6 +19,7 @@
import asyncio
import logging
import os
import uuid
from datetime import datetime
from typing import Any, Awaitable, Callable, Dict, List, Optional, Tuple
@@ -62,6 +63,10 @@ from app.core.memory.storage_services.extraction_engine.pipeline_help import (
export_test_input_doc,
)
from app.core.memory.utils.data.ontology import TemporalInfo
from app.db import get_db_context
from app.models.end_user_info_model import EndUserInfo
from app.repositories.end_user_info_repository import EndUserInfoRepository
from app.repositories.end_user_repository import EndUserRepository
from app.repositories.neo4j.neo4j_connector import Neo4jConnector
# 配置日志
@@ -1325,6 +1330,152 @@ class ExtractionOrchestrator:
perceptual_edges
)
async def _update_end_user_other_name(
self,
entity_nodes: List[ExtractedEntityNode],
dialog_data_list: List[DialogData]
) -> None:
"""
从 Neo4j 读取用户实体的最终 aliases同步到 end_user 和 end_user_info 表
注意:
1. other_name 使用本次对话提取的第一个别名(保持时间顺序)
2. aliases 从 Neo4j 读取(保持完整性)
Args:
entity_nodes: 实体节点列表
dialog_data_list: 对话数据列表
"""
try:
if not dialog_data_list:
logger.warning("dialog_data_list 为空,跳过用户别名同步")
return
end_user_id = dialog_data_list[0].end_user_id
if not end_user_id:
logger.warning("end_user_id 为空,跳过用户别名同步")
return
# 1. 提取本次对话的用户别名(保持 LLM 提取的原始顺序,不排序)
current_aliases = self._extract_current_aliases(entity_nodes)
# 2. 从 Neo4j 获取完整 aliases权威数据源
neo4j_aliases = await self._fetch_neo4j_user_aliases(end_user_id)
if not neo4j_aliases:
# Neo4j 中没有别名,使用本次对话提取的别名
neo4j_aliases = current_aliases
if not neo4j_aliases:
logger.debug(f"aliases 为空,跳过同步: end_user_id={end_user_id}")
return
logger.info(f"本次对话提取的 aliases: {current_aliases}")
logger.info(f"Neo4j 中的完整 aliases: {neo4j_aliases}")
# 3. 同步到数据库
end_user_uuid = uuid.UUID(end_user_id)
with get_db_context() as db:
# 更新 end_user 表
end_user = EndUserRepository(db).get_by_id(end_user_uuid)
if not end_user:
logger.warning(f"未找到 end_user_id={end_user_id} 的用户记录")
return
new_name = self._resolve_other_name(end_user.other_name, current_aliases, neo4j_aliases)
if new_name is not None:
end_user.other_name = new_name
logger.info(f"更新 end_user 表 other_name → {new_name}")
else:
logger.debug(f"end_user 表 other_name 保持不变: {end_user.other_name}")
# 更新或创建 end_user_info 记录
existing_infos = EndUserInfoRepository(db).get_by_end_user_id(end_user_uuid)
if existing_infos:
info = existing_infos[0]
new_name_info = self._resolve_other_name(info.other_name, current_aliases, neo4j_aliases)
if new_name_info is not None:
info.other_name = new_name_info
logger.info(f"更新 end_user_info 表 other_name → {new_name_info}")
if info.aliases != neo4j_aliases:
info.aliases = neo4j_aliases
logger.info(f"同步 Neo4j aliases 到 end_user_info: {neo4j_aliases}")
else:
first_alias = current_aliases[0].strip() if current_aliases else ""
if first_alias:
db.add(EndUserInfo(
end_user_id=end_user_uuid,
other_name=first_alias,
aliases=neo4j_aliases,
meta_data={}
))
logger.info(f"创建 end_user_info 记录other_name={first_alias}, aliases={neo4j_aliases}")
db.commit()
except Exception as e:
logger.error(f"更新 end_user other_name 失败: {e}", exc_info=True)
def _extract_current_aliases(self, entity_nodes: List[ExtractedEntityNode]) -> List[str]:
"""从实体节点提取用户别名(保持 LLM 提取的原始顺序,不进行任何排序)
这个方法直接返回 LLM 提取的别名列表,不做任何修改。
第一个别名将被用作 other_name。
Args:
entity_nodes: 实体节点列表
Returns:
别名列表(保持 LLM 提取的原始顺序)
"""
USER_NAMES = {'用户', '', 'User', 'I'}
for entity in entity_nodes:
if getattr(entity, 'name', '').strip() in USER_NAMES:
aliases = getattr(entity, 'aliases', []) or []
logger.debug(f"提取到用户别名(原始顺序): {aliases}")
return aliases
return []
async def _fetch_neo4j_user_aliases(self, end_user_id: str) -> List[str]:
"""从 Neo4j 查询用户实体的完整 aliases 列表"""
cypher = """
MATCH (e:ExtractedEntity)
WHERE e.end_user_id = $end_user_id AND e.name IN ['用户', '', 'User', 'I']
RETURN e.aliases AS aliases
LIMIT 1
"""
result = await Neo4jConnector().execute_query(cypher, end_user_id=end_user_id)
if not result:
logger.debug(f"Neo4j 中未找到用户实体: end_user_id={end_user_id}")
return []
aliases = result[0].get('aliases') or []
if not aliases:
logger.debug(f"Neo4j 用户实体 aliases 为空: end_user_id={end_user_id}")
return aliases
def _resolve_other_name(
self,
current: Optional[str],
current_aliases: List[str],
neo4j_aliases: List[str]
) -> Optional[str]:
"""
决定 other_name 是否需要更新,返回新值;无需更新返回 None。
决策规则:
- 为空 → 用本次对话第一个别名
- 不在 Neo4j aliases 中 → 用 Neo4j 第一个别名(说明已被删除)
- 否则 → 保持不变(返回 None
"""
if not current or not current.strip():
return current_aliases[0].strip() if current_aliases else None
if current not in neo4j_aliases:
return neo4j_aliases[0].strip() if neo4j_aliases else None
return None
async def _run_dedup_and_write_summary(
self,
dialogue_nodes: List[DialogueNode],

View File

@@ -5,6 +5,15 @@
===Task===
Extract entities and knowledge triplets from the given statement.
**⚠️ CRITICAL REQUIREMENTS:**
1. **ALIASES ORDER IS CRITICAL**: The FIRST alias in the array will be used as the user's primary display name (other_name). You MUST put the most important/frequently used name FIRST.
2. **ALWAYS include aliases field**: Even if empty, you MUST include "aliases": [] in EVERY entity.
<!-- TODO: v0.2.10 - denied_aliases 功能暂时禁用,将通过 Cypher 查询实现
2. **DENIED_ALIASES**: When user explicitly denies a name (e.g., "我不叫X", "I'm not called X"), you MUST put X in denied_aliases field, NOT in aliases.
3. **ALWAYS include both fields**: Even if empty, you MUST include "aliases": [] and "denied_aliases": [] in EVERY entity.
-->
{% if language == "zh" %}
**重要请使用中文生成实体名称name、描述description和示例example。**
{% else %}
@@ -18,34 +27,29 @@ Extract entities and knowledge triplets from the given statement.
{% if ontology_types %}
===Ontology Type Guidance===
**CRITICAL RULE: You MUST ONLY use the predefined ontology type names listed below for the entity "type" field. Do NOT use any other type names, even if they seem reasonable.**
**CRITICAL: Use ONLY predefined type names below. If no exact match, use CLOSEST type. NEVER invent new types.**
**If no predefined type fits an entity, use the CLOSEST matching predefined type. NEVER invent new type names.**
**Type Priority:**
1. [场景类型] Scene Types (domain-specific, prefer first)
2. [通用类型] General Types (standard ontologies)
3. [通用父类] Parent Types (hierarchy context)
**Type Priority (from highest to lowest):**
1. **[场景类型] Scene Types** - Domain-specific types, ALWAYS prefer these first
2. **[通用类型] General Types** - Common types from standard ontologies (DBpedia)
3. **[通用父类] Parent Types** - Provide type hierarchy context
**Rules:**
- Type MUST exactly match predefined names
- Do NOT modify, translate, or abbreviate type names
- Prefer scene types over general types
**Type Matching Rules:**
- Entity type MUST exactly match one of the predefined type names below
- Do NOT use types like "Equipment", "Component", "Concept", "Action", "Condition", "Data", "Duration" unless they appear in the predefined list
- Do NOT modify, translate, abbreviate, or create variations of type names
- Prefer scene types (marked [场景类型]) over general types when both could apply
- If uncertain, check the type description to find the best match
**Predefined Ontology Types:**
**Predefined Types:**
{{ ontology_types }}
{% if type_hierarchy_hints %}
**Type Hierarchy Reference:**
The following shows type inheritance relationships (Child → Parent → Grandparent):
**Hierarchy:**
{% for hint in type_hierarchy_hints %}
- {{ hint }}
{% endfor %}
{% endif %}
**ALLOWED Type Names (use EXACTLY one of these, no exceptions):**
**ALLOWED Names:**
{{ ontology_type_names | join(', ') }}
{% endif %}
@@ -62,75 +66,114 @@ The following shows type inheritance relationships (Child → Parent → Grandpa
- **Entity descriptions must be in English**
- **Examples must be in English**
{% endif %}
- **Semantic Memory Classification (is_explicit_memory):**
* Set to `true` if the entity represents **explicit/semantic memory**:
- **Concepts:** "Machine Learning", "Photosynthesis", "Democracy"
- **Knowledge:** "Python Programming Language", "Theory of Relativity"
- **Definitions:** "API (Application Programming Interface)", "REST API"
- **Principles:** "SOLID Principles", "First Law of Thermodynamics"
- **Theories:** "Evolution Theory", "Quantum Mechanics"
- **Methods/Techniques:** "Agile Development", "Machine Learning Algorithm"
- **Technical Terms:** "Neural Network", "Database"
* Set to `false` for:
- **People:** "John Smith", "Dr. Wang"
- **Organizations:** "Microsoft", "Harvard University"
- **Locations:** "Beijing", "Central Park"
- **Events:** "2024 Conference", "Project Meeting"
- **Specific objects:** "iPhone 15", "Building A"
- **Example Generation (IMPORTANT for semantic memory entities):**
* For entities where `is_explicit_memory=true`, generate a **concise example (around 20 characters)** to help understand the concept
* The example should be:
- **Specific and concrete**: Use real-world scenarios or applications
- **Brief**: Around 20 characters (can be slightly longer if needed for clarity)
{% if language == "zh" %}
- **使用中文**
{% else %}
- **In English**
{% endif %}
* For non-semantic entities (`is_explicit_memory=false`), the example field can be empty
- **Aliases Extraction:**
{% if language == "zh" %}
* 别名使用中文
{% else %}
* Aliases should be in English
{% endif %}
* Include common alternative names, abbreviations and full names
* If no aliases exist, use empty array: []
- **Semantic Memory (is_explicit_memory):**
* `true` for: Concepts, Knowledge, Definitions, Theories, Methods (e.g., "Machine Learning", "REST API")
* `false` for: People, Organizations, Locations, Events, Specific objects
* For `is_explicit_memory=true`, provide concise example (~20 chars{% if language == "zh" %},使用中文{% endif %})
**姓名别名识别规则Name Alias Recognition**
* 当前对话的用户实体 name 固定为"用户",不得使用用户透露的真实姓名作为 name
* 自我称呼模式:用户说"我的名字是X"、"我叫X" → X 加入 aliasesname 保持为"用户"
* 昵称/小名模式:识别"小名"、"昵称"、"英文名"、"网名"等关键词后的称呼 → 加入 aliases
* 他人称呼模式:识别"同事叫我X"、"朋友叫我X"、"大家叫我X" → X 加入 aliases
* 同一实体的多个称呼应合并到同一 Entity 的 aliases 列表中
* aliases 中不应包含与 name 完全相同的字符串
* **严禁将已加入某实体 aliases 的词再单独抽取为另一个独立实体**:若某个词已作为别名归属于"用户"实体,则不得再将该词作为独立 Entity 的 name 出现在 entities 列表中
- Exclude lengthy quotes, calendar dates, temporal ranges, and temporal expressions
- For numeric values: extract as separate entities (instance_of: 'Numeric', name: units, numeric_value: value)
Example: £30 → name: 'GBP', numeric_value: 30, instance_of: 'Numeric'
**🚨🚨🚨 ALIASES & DENIED_ALIASES - MANDATORY FIELDS 🚨🚨🚨**
**CRITICAL RULES (违反将导致提取失败):**
1. **EVERY entity MUST have BOTH fields:**
- `"aliases": [...]` - REQUIRED, even if empty `[]`
- `"denied_aliases": [...]` - REQUIRED, even if empty `[]`
2. **ALIASES - 别名提取规则:**
{% if language == "zh" %}
- 包含:昵称、全名、简称、别称、网名等
- 顺序:**第一个别名将作为用户的主显示名称other_name必须把最重要/最常用的名字放在第一位**
- 提取顺序:严格按照对话中首次出现的顺序
- 示例:
* "我叫张三,大家叫我小张" → aliases=["张三", "小张"](张三是第一个,将成为 other_name
* "大家叫我小李,我全名叫李明" → aliases=["小李", "李明"](小李先出现,将成为 other_name
- 空值:如果没有别名,使用 `[]`
- 重要:只提取本次对话中明确提到的别名,不要推测或添加未提及的名字
{% else %}
- Include: nicknames, full names, abbreviations, alternative names
- Order: **The FIRST alias will be used as the user's primary display name (other_name). Put the most important/frequently used name FIRST**
- Extraction order: Strictly follow the order of first appearance in conversation
- Examples:
* "I'm John, people call me Johnny" → aliases=["John", "Johnny"] (John is first, will become other_name)
* "People call me Mike, my full name is Michael" → aliases=["Mike", "Michael"] (Mike appears first, will become other_name)
- Empty: If no aliases, use `[]`
- Important: Only extract aliases explicitly mentioned in current conversation, do not infer or add unmentioned names
{% endif %}
<!-- TODO: v0.2.10 - denied_aliases 功能暂时禁用,将通过 Cypher 查询实现
3. **DENIED_ALIASES - 否定别名规则:**
...
-->
4. **USER ENTITY SPECIAL HANDLING:**
{% if language == "zh" %}
- 用户实体的 name 字段:使用 "用户" 或 "我"
- 用户的真实姓名:放入 aliases
- 示例:
* "我叫李明" → name="用户", aliases=["李明"]
{% else %}
- User entity name field: use "User" or "I"
- User's real name: put in aliases
- Examples:
* "I'm John" → name="User", aliases=["John"]
{% endif %}
<!-- TODO: v0.2.10 - denied_aliases 功能暂时禁用
* "我不叫王强,我叫李明" → name="用户", aliases=["李明"], denied_aliases=["王强"]
* "I'm not Bob, I'm John" → name="User", aliases=["John"], denied_aliases=["Bob"]
-->
5. **CONFLICT RESOLUTION:**
{% if language == "zh" %}
- 顺序优先级:按出现顺序,先出现的在前
{% else %}
- Order priority: by appearance order, first mentioned comes first
{% endif %}
<!-- TODO: v0.2.10 - denied_aliases 功能暂时禁用
- 如果同一句话中既有肯定又有否定:
* "我不叫X我叫Y" → aliases=["Y"], denied_aliases=["X"]
- If both affirmation and denial in same sentence:
* "I'm not X, I'm Y" → aliases=["Y"], denied_aliases=["X"]
-->
**EXAMPLES OF CORRECT EXTRACTION:**
{% if language == "zh" %}
- "我叫张三" → aliases=["张三"] (张三将成为 other_name
- "大家叫我小明,我全名叫李明" → aliases=["小明", "李明"] (小明先出现,将成为 other_name
- "我是李华,网名叫华仔" → aliases=["李华", "华仔"] (李华先出现,将成为 other_name
<!-- TODO: v0.2.10 - denied_aliases 示例暂时禁用
- "我不叫王强" → aliases=[], denied_aliases=["王强"]
- "我不叫老张,我叫小张" → aliases=["小张"], denied_aliases=["老张"] (小张将成为 other_name
- "我是李华,网名叫华仔,但我不是小李" → aliases=["李华", "华仔"], denied_aliases=["小李"] (李华先出现,将成为 other_name
-->
{% else %}
- "I'm John" → aliases=["John"] (John will become other_name)
- "People call me Mike, my full name is Michael" → aliases=["Mike", "Michael"] (Mike appears first, will become other_name)
- "I'm John Smith, username JSmith" → aliases=["John Smith", "JSmith"] (John Smith appears first, will become other_name)
<!-- TODO: v0.2.10 - denied_aliases examples temporarily disabled
- "I'm not called Bob" → aliases=[], denied_aliases=["Bob"]
- "I'm not Bob, I'm John" → aliases=["John"], denied_aliases=["Bob"] (John will become other_name)
- "I'm John Smith, username JSmith, but not Johnny" → aliases=["John Smith", "JSmith"], denied_aliases=["Johnny"] (John Smith appears first, will become other_name)
-->
{% endif %}
- Exclude lengthy quotes, dates, temporal expressions
- Numeric values: extract as entities (instance_of: 'Numeric', name: units, numeric_value: value)
**Triplet Extraction:**
- Extract (subject, predicate, object) triplets where:
- Subject: main entity performing the action or being described
- Predicate: relationship between entities (e.g., 'is', 'works at', 'believes')
- Object: entity, value, or concept affected by the predicate
- Extract (subject, predicate, object) where subject/object are entities, predicate is relationship
{% if language == "zh" %}
- subject_name 和 object_name 必须使用中文
- subject_name 和 object_name 使用中文
{% else %}
- subject_name and object_name must be in English (translate if original is in another language)
- subject_name and object_name in English
{% endif %}
- Exclude all temporal expressions from every field
- Use ONLY the predicates listed in "Predicate Instructions" (uppercase English tokens)
- Do NOT translate predicate tokens
- Do NOT include `statement_id` field (assigned automatically)
**When NOT to extract triplets:**
- Non-propositional utterances (emotions, fillers, onomatopoeia)
- No clear predicate from the given definitions applies
- Standalone noun phrases or checklist items → extract as entities only
- Do NOT invent generic predicates (e.g., "IS_DOING", "FEELS", "MENTIONS")
**If no valid triplet exists:** Return triplets: [], extract entities if present, otherwise both arrays empty.
- Use ONLY predicates from "Predicate Instructions" (uppercase tokens)
- Exclude temporal expressions, do NOT include `statement_id`
- **When NOT to extract:** emotions, fillers, no clear predicate, standalone nouns
- **If no valid triplet:** Return triplets: []
{%- if predicate_instructions -%}
**Predicate Instructions:**
@@ -217,34 +260,91 @@ Output:
]
}
**Example 4 (姓名别名识别 - Chinese):** "我的名字是乐力齐,我的小名是齐齐,同事们都叫我小乐"
**Example 4 (别 - Chinese):** "我的名字是乐力齐,我的小名是齐齐,同事们都叫我小乐"
Output:
{
"triplets": [],
"entities": [
{"entity_idx": 0, "name": "用户", "type": "Person", "description": "用户本人,有多个称呼", "example": "", "aliases": ["乐力齐", "齐齐", "小乐"], "is_explicit_memory": false}
{"entity_idx": 0, "name": "用户", "type": "Person", "description": "用户本人", "example": "", "aliases": ["乐力齐", "齐齐", "小乐"], "is_explicit_memory": false}
]
}
**Example 5 (别名顺序 - Chinese):** "我叫陈思远。对了,我的网名叫「远山」"
Output:
{
"triplets": [],
"entities": [
{"entity_idx": 0, "name": "用户", "type": "Person", "description": "用户本人", "example": "", "aliases": ["陈思远", "远山"], "is_explicit_memory": false}
]
}
**Example 6 (否定别名 - Chinese):** "我不叫陈思远,我其实叫小小张"
Output:
{
"triplets": [],
"entities": [
{"entity_idx": 0, "name": "用户", "type": "Person", "description": "用户本人", "example": "", "aliases": ["小小张"], "denied_aliases": ["陈思远"], "is_explicit_memory": false}
]
}
**Example 7 (否定别名 - Chinese):** "我不叫远山"
Output:
{
"triplets": [],
"entities": [
{"entity_idx": 0, "name": "用户", "type": "Person", "description": "用户本人", "example": "", "aliases": [], "denied_aliases": ["远山"], "is_explicit_memory": false}
]
}
**Example 8 (复杂场景 - Chinese):** "大家都叫我明明,我的全名是小明,但我不是小红"
Output:
{
"triplets": [],
"entities": [
{"entity_idx": 0, "name": "用户", "type": "Person", "description": "用户本人", "example": "", "aliases": ["明明", "小明"], "denied_aliases": ["小红"], "is_explicit_memory": false}
]
}
**Example 9 (纠正错误 - Chinese):** "我搞错了,我的网名不叫做远山,网名叫做大山"
Output:
{
"triplets": [],
"entities": [
{"entity_idx": 0, "name": "用户", "type": "Person", "description": "用户本人", "example": "", "aliases": ["大山"], "denied_aliases": ["远山"], "is_explicit_memory": false}
]
}
**Example 10 (多重纠正 - Chinese):** "其实我不是老张,也不叫小张,我叫张三"
Output:
{
"triplets": [],
"entities": [
{"entity_idx": 0, "name": "用户", "type": "Person", "description": "用户本人", "example": "", "aliases": ["张三"], "denied_aliases": ["老张", "小张"], "is_explicit_memory": false}
]
}
{% endif %}
===End of Examples===
{% if ontology_types %}
**⚠️ REMINDER: The examples above use generic type names for illustration only. You MUST use ONLY the predefined ontology type names from the "ALLOWED Type Names" list above. For example, use "PredictiveMaintenance" instead of "Concept", use "ProductionLine" instead of "Equipment", etc. Map each entity to the closest matching predefined type.**
**⚠️ REMINDER: Examples use generic types for illustration. You MUST use predefined types from "ALLOWED Names" above.**
{% endif %}
===Output Format===
**JSON Requirements:**
- Use only ASCII double quotes (") for JSON structure
- Never use Chinese quotation marks ("") or Unicode quotes
- Escape quotation marks in text with backslashes (\")
- Ensure proper string closure and comma separation
- No line breaks within JSON string values
- Use ASCII double quotes ("), escape with \"
- No Chinese quotes (""), no line breaks in strings
{% if language == "zh" %}
- **语言要求实体名称name、描述description)、示例(example、subject_name、object_name 必须使用中文**
- **语言name、descriptionexample、subject_name、object_name 使用中文**
{% else %}
- **Language Requirement: Entity names, descriptions, examples, subject_name, object_name must be in English**
- **If the original text is in Chinese, translate all names to English**
- **Language: names, descriptions, examples in English (translate if needed)**
{% endif %}
- **⚠️ ALIASES ORDER: preserve temporal order of appearance**
- **🚨 MANDATORY FIELD: EVERY entity MUST include "aliases" field, even if empty array []**
<!-- TODO: v0.2.10 - denied_aliases 要求暂时禁用
- **🚨 DENIED_ALIASES: "我不叫X" → X to denied_aliases (mutually exclusive with aliases)**
- **🚨 MANDATORY FIELDS: EVERY entity MUST include both "aliases" and "denied_aliases" fields, even if empty arrays []**
-->
{{ json_schema }}

View File

@@ -7,6 +7,7 @@ from sqlalchemy.orm import Session
from app.core.logging_config import get_db_logger
from app.models.app_model import App
from app.models.end_user_model import EndUser
from app.models.end_user_info_model import EndUserInfo
from app.models.workspace_model import Workspace
# 获取数据库专用日志器
@@ -70,7 +71,8 @@ class EndUserRepository:
app_id: uuid.UUID,
workspace_id: uuid.UUID,
other_id: str,
original_user_id: Optional[str] = None
original_user_id: Optional[str] = None,
other_name: Optional[str] = None
) -> EndUser:
"""获取或创建终端用户
@@ -79,6 +81,7 @@ class EndUserRepository:
workspace_id: 工作空间ID
other_id: 第三方ID
original_user_id: 原始用户ID (存储到 other_id)
other_name: 用户名称(用于创建 EndUserInfo
"""
try:
# 尝试查找现有用户
@@ -106,10 +109,22 @@ class EndUserRepository:
other_id=other_id
)
self.db.add(end_user)
self.db.flush() # 刷新以获取 end_user.id但不提交事务
# 创建对应的 EndUserInfo 记录
end_user_info = EndUserInfo(
end_user_id=end_user.id,
other_name=other_name or "", # 如果没有提供 other_name使用空字符串
aliases={}, # 空字典而不是 None
meta_data={} # 空字典而不是 None
)
self.db.add(end_user_info)
# 一起提交
self.db.commit()
self.db.refresh(end_user)
db_logger.info(f"创建新终端用户: (other_id: {other_id}) for workspace {workspace_id}")
db_logger.info(f"创建新终端用户及其信息: (other_id: {other_id}) for workspace {workspace_id}")
return end_user
except Exception as e:

View File

@@ -454,6 +454,7 @@ class UserMemoryService:
"""
try:
from app.models.end_user_info_model import EndUserInfo
from app.models.end_user_model import EndUser
from app.core.api_key_utils import datetime_to_timestamp
# 转换为UUID并查询
@@ -471,6 +472,12 @@ class UserMemoryService:
# 定义允许更新的字段白名单
allowed_fields = {'other_name', 'aliases', 'meta_data'}
# 检查是否更新了 aliases 字段
aliases_updated = 'aliases' in update_data and update_data['aliases'] != end_user_info_record.aliases
# 检查是否更新了 other_name 字段
other_name_updated = 'other_name' in update_data and update_data['other_name'] != end_user_info_record.other_name
# 更新字段(仅允许白名单中的字段)
for field, value in update_data.items():
if field in allowed_fields:
@@ -479,10 +486,30 @@ class UserMemoryService:
# 更新时间戳
end_user_info_record.updated_at = datetime.now()
# 如果 other_name 被更新,同步更新 end_user 表
if other_name_updated:
end_user_record = db.query(EndUser).filter(EndUser.id == user_uuid).first()
if end_user_record:
end_user_record.other_name = update_data['other_name']
end_user_record.updated_at = datetime.now()
logger.info(f"同步更新 end_user 表的 other_name: end_user_id={end_user_id}, other_name={update_data['other_name']}")
else:
logger.warning(f"未找到对应的 end_user 记录: end_user_id={end_user_id}")
# 提交更改
db.commit()
db.refresh(end_user_info_record)
# 如果 aliases 被更新,同步到 Neo4j
if aliases_updated:
try:
import asyncio
asyncio.create_task(self._sync_aliases_to_neo4j(end_user_id, update_data['aliases']))
logger.info(f"已触发 aliases 同步到 Neo4j: end_user_id={end_user_id}, aliases={update_data['aliases']}")
except Exception as sync_error:
logger.error(f"触发同步 aliases 到 Neo4j 失败: {sync_error}", exc_info=True)
# 不影响主流程,只记录错误
# 构建响应数据(转换时间为毫秒时间戳)
response_data = {
"end_user_info_id": str(end_user_info_record.id),
@@ -518,6 +545,42 @@ class UserMemoryService:
"error": str(e)
}
async def _sync_aliases_to_neo4j(self, end_user_id: str, aliases: List[str]) -> None:
"""
将 aliases 同步到 Neo4j 中的用户实体
Args:
end_user_id: 终端用户ID
aliases: 别名列表
"""
from app.repositories.neo4j.neo4j_connector import Neo4jConnector
# Cypher 查询:更新用户实体的 aliases
cypher_query = """
MATCH (e:ExtractedEntity)
WHERE e.end_user_id = $end_user_id
AND e.name IN ['用户', '', 'User', 'I']
SET e.aliases = $aliases
RETURN e.id AS entity_id, e.name AS entity_name, e.aliases AS updated_aliases
"""
connector = Neo4jConnector()
try:
result = await connector.execute_query(
cypher_query,
end_user_id=end_user_id,
aliases=aliases
)
if result:
logger.info(f"成功同步 aliases 到 Neo4j: end_user_id={end_user_id}, 更新了 {len(result)} 个实体节点")
else:
logger.warning(f"未找到需要更新的用户实体节点: end_user_id={end_user_id}")
except Exception as e:
logger.error(f"同步 aliases 到 Neo4j 失败: {e}", exc_info=True)
raise
async def get_cached_memory_insight(
self,
db: Session,