Merge branch 'develop' into refactor/memory_search

# Conflicts:
#	api/app/core/memory/storage_services/search/__init__.py
This commit is contained in:
Eternity
2026-04-20 17:49:29 +08:00
202 changed files with 6621 additions and 1690 deletions

View File

@@ -14,6 +14,7 @@ from dotenv import load_dotenv
from app.core.logging_config import get_agent_logger
from app.core.memory.agent.utils.get_dialogs import get_chunked_dialogs
from app.core.memory.storage_services.extraction_engine.deduplication.deduped_and_disamb import _USER_PLACEHOLDER_NAMES
from app.core.memory.storage_services.extraction_engine.extraction_orchestrator import ExtractionOrchestrator
from app.core.memory.storage_services.extraction_engine.knowledge_extraction.memory_summary import \
memory_summary_generation
@@ -191,15 +192,37 @@ async def write(
if success:
logger.info("Successfully saved all data to Neo4j")
# 使用 Celery 异步任务触发聚类(不阻塞主流程)
if all_entity_nodes:
end_user_id = all_entity_nodes[0].end_user_id
# Neo4j 写入完成后,用 PgSQL 权威 aliases 覆盖 Neo4j 用户实体
try:
from app.repositories.end_user_info_repository import EndUserInfoRepository
if end_user_id:
with get_db_context() as db_session:
info = EndUserInfoRepository(db_session).get_by_end_user_id(uuid.UUID(end_user_id))
pg_aliases = info.aliases if info and info.aliases else []
if info is not None:
# 将 Python 侧占位名集合作为参数传入,避免 Cypher 硬编码
placeholder_names = list(_USER_PLACEHOLDER_NAMES)
await neo4j_connector.execute_query(
"""
MATCH (e:ExtractedEntity)
WHERE e.end_user_id = $end_user_id AND toLower(e.name) IN $placeholder_names
SET e.aliases = $aliases
""",
end_user_id=end_user_id, aliases=pg_aliases,
placeholder_names=placeholder_names,
)
logger.info(f"[AliasSync] Neo4j 用户实体 aliases 已用 PgSQL 权威源覆盖: {pg_aliases}")
except Exception as sync_err:
logger.warning(f"[AliasSync] PgSQL→Neo4j aliases 同步失败(不影响主流程): {sync_err}")
# 使用 Celery 异步任务触发聚类(不阻塞主流程)
try:
from app.tasks import run_incremental_clustering
end_user_id = all_entity_nodes[0].end_user_id
new_entity_ids = [e.id for e in all_entity_nodes]
# 异步提交 Celery 任务
task = run_incremental_clustering.apply_async(
kwargs={
"end_user_id": end_user_id,
@@ -207,7 +230,6 @@ async def write(
"llm_model_id": str(memory_config.llm_model_id) if memory_config.llm_model_id else None,
"embedding_model_id": str(memory_config.embedding_model_id) if memory_config.embedding_model_id else None,
},
# 设置任务优先级(低优先级,不影响主业务)
priority=3,
)
logger.info(
@@ -215,7 +237,6 @@ async def write(
f"task_id={task.id}, end_user_id={end_user_id}, entity_count={len(new_entity_ids)}"
)
except Exception as e:
# 聚类任务提交失败不影响主流程
logger.error(f"[Clustering] 提交聚类任务失败(不影响主流程): {e}", exc_info=True)
break

View File

@@ -61,9 +61,9 @@ from app.core.memory.models.triplet_models import (
# User metadata models
from app.core.memory.models.metadata_models import (
UserMetadata,
UserMetadataBehavioralHints,
UserMetadataProfile,
MetadataExtractionResponse,
MetadataFieldChange,
)
# Ontology scenario models (LLM extracted from scenarios)
@@ -133,9 +133,9 @@ __all__ = [
"Triplet",
"TripletExtractionResponse",
"UserMetadata",
"UserMetadataBehavioralHints",
"UserMetadataProfile",
"MetadataExtractionResponse",
"MetadataFieldChange",
# Ontology models
"OntologyClass",
"OntologyExtractionResponse",

View File

@@ -4,7 +4,7 @@ Independent from triplet_models.py - these models are used by the
standalone metadata extraction pipeline (post-dedup async Celery task).
"""
from typing import List
from typing import List, Literal, Optional
from pydantic import BaseModel, ConfigDict, Field
@@ -13,8 +13,8 @@ class UserMetadataProfile(BaseModel):
"""用户画像信息"""
model_config = ConfigDict(extra="ignore")
role: str = Field(default="", description="用户职业或角色")
domain: str = Field(default="", description="用户所在领域")
role: List[str] = Field(default_factory=list, description="用户职业或角色")
domain: List[str] = Field(default_factory=list, description="用户所在领域")
expertise: List[str] = Field(
default_factory=list, description="用户擅长的技能或工具"
)
@@ -23,31 +23,37 @@ class UserMetadataProfile(BaseModel):
)
class UserMetadataBehavioralHints(BaseModel):
"""行为偏好"""
model_config = ConfigDict(extra="ignore")
learning_stage: str = Field(default="", description="学习阶段")
preferred_depth: str = Field(default="", description="偏好深度")
tone_preference: str = Field(default="", description="语气偏好")
class UserMetadata(BaseModel):
"""用户元数据顶层结构"""
model_config = ConfigDict(extra="ignore")
profile: UserMetadataProfile = Field(default_factory=UserMetadataProfile)
behavioral_hints: UserMetadataBehavioralHints = Field(
default_factory=UserMetadataBehavioralHints
class MetadataFieldChange(BaseModel):
"""单个元数据字段的变更操作"""
model_config = ConfigDict(extra="ignore")
field_path: str = Field(
description="字段路径,用点号分隔,如 'profile.role''profile.expertise'"
)
action: Literal["set", "remove"] = Field(
description="操作类型:'set' 表示新增或修改,'remove' 表示移除"
)
value: Optional[str] = Field(
default=None,
description="字段的新值action='set' 时必填)。标量字段直接填值,列表字段填单个要新增的元素"
)
knowledge_tags: List[str] = Field(default_factory=list, description="知识标签")
class MetadataExtractionResponse(BaseModel):
"""元数据提取 LLM 响应结构"""
"""元数据提取 LLM 响应结构(增量模式)"""
model_config = ConfigDict(extra="ignore")
user_metadata: UserMetadata = Field(default_factory=UserMetadata)
metadata_changes: List[MetadataFieldChange] = Field(
default_factory=list,
description="元数据的增量变更列表,每项描述一个字段的新增、修改或移除操作",
)
aliases_to_add: List[str] = Field(
default_factory=list,
description="本次新发现的用户别名(用户自我介绍或他人对用户的称呼)",

View File

@@ -82,51 +82,38 @@ def _merge_attribute(canonical: ExtractedEntityNode, ent: ExtractedEntityNode):
canonical.connect_strength = next(iter(pair))
# 别名合并(去重保序,使用标准化工具)
# 用户实体的 aliases 由 PgSQL end_user_info 作为唯一权威源,去重合并时不修改
try:
canonical_name = (getattr(canonical, "name", "") or "").strip()
incoming_name = (getattr(ent, "name", "") or "").strip()
# 收集所有需要合并的别名
all_aliases = []
# 1. 添加canonical现有的别名
existing = getattr(canonical, "aliases", []) or []
all_aliases.extend(existing)
# 2. 添加incoming实体的名称如果不同于canonical的名称
if incoming_name and incoming_name != canonical_name:
all_aliases.append(incoming_name)
# 3. 添加incoming实体的所有别名
incoming = getattr(ent, "aliases", []) or []
all_aliases.extend(incoming)
# 4. 标准化并去重优先使用alias_utils工具函数
try:
from app.core.memory.utils.alias_utils import normalize_aliases
canonical.aliases = normalize_aliases(canonical_name, all_aliases)
except Exception:
# 如果导入失败,使用增强的去重逻辑
seen_normalized = set()
unique_aliases = []
if canonical_name.lower() not in _USER_PLACEHOLDER_NAMES:
incoming_name = (getattr(ent, "name", "") or "").strip()
for alias in all_aliases:
if not alias:
continue
alias_stripped = str(alias).strip()
if not alias_stripped or alias_stripped == canonical_name:
continue
# 标准化:转小写用于去重判断
alias_normalized = alias_stripped.lower()
if alias_normalized not in seen_normalized:
seen_normalized.add(alias_normalized)
unique_aliases.append(alias_stripped)
# 收集所有需要合并的别名,过滤掉用户占位名避免污染非用户实体
all_aliases = list(getattr(canonical, "aliases", []) or [])
if incoming_name and incoming_name != canonical_name and incoming_name.lower() not in _USER_PLACEHOLDER_NAMES:
all_aliases.append(incoming_name)
all_aliases.extend(
a for a in (getattr(ent, "aliases", []) or [])
if a and a.strip().lower() not in _USER_PLACEHOLDER_NAMES
)
# 排序并赋值
canonical.aliases = sorted(unique_aliases)
try:
from app.core.memory.utils.alias_utils import normalize_aliases
canonical.aliases = normalize_aliases(canonical_name, all_aliases)
except Exception:
seen_normalized = set()
unique_aliases = []
for alias in all_aliases:
if not alias:
continue
alias_stripped = str(alias).strip()
if not alias_stripped or alias_stripped == canonical_name:
continue
alias_normalized = alias_stripped.lower()
if alias_normalized not in seen_normalized:
seen_normalized.add(alias_normalized)
unique_aliases.append(alias_stripped)
canonical.aliases = sorted(unique_aliases)
except Exception:
pass
@@ -733,66 +720,37 @@ def fuzzy_match(
def _merge_entities_with_aliases(canonical: ExtractedEntityNode, losing: ExtractedEntityNode):
""" 模糊匹配中的实体合并。
"""模糊匹配中的实体合并(别名部分)
合并策略:
1. 保留canonical的主名称不变
2. 将losing的主名称添加为alias如果不同
3. 合并两个实体的所有aliases
4. 自动去重case-insensitive并排序
Args:
canonical: 规范实体(保留)
losing: 被合并实体(删除)
Note:
使用alias_utils.normalize_aliases进行标准化去重
用户实体的 aliases 由 PgSQL end_user_info 作为唯一权威源,跳过合并。
"""
# 获取规范实体的名称
canonical_name = (getattr(canonical, "name", "") or "").strip()
if canonical_name.lower() in _USER_PLACEHOLDER_NAMES:
return
losing_name = (getattr(losing, "name", "") or "").strip()
# 收集所有需要合并的别名
all_aliases = []
# 1. 添加canonical现有的别名
current_aliases = getattr(canonical, "aliases", []) or []
all_aliases.extend(current_aliases)
# 2. 添加losing实体的名称如果不同于canonical的名称
all_aliases = list(getattr(canonical, "aliases", []) or [])
if losing_name and losing_name != canonical_name:
all_aliases.append(losing_name)
all_aliases.extend(getattr(losing, "aliases", []) or [])
# 3. 添加losing实体的所有别名
losing_aliases = getattr(losing, "aliases", []) or []
all_aliases.extend(losing_aliases)
# 4. 标准化并去重(使用标准化后的字符串进行去重)
try:
from app.core.memory.utils.alias_utils import normalize_aliases
canonical.aliases = normalize_aliases(canonical_name, all_aliases)
except Exception:
# 如果导入失败,使用增强的去重逻辑
# 使用标准化后的字符串作为key进行去重
seen_normalized = set()
unique_aliases = []
for alias in all_aliases:
if not alias:
continue
alias_stripped = str(alias).strip()
if not alias_stripped or alias_stripped == canonical_name:
continue
# 标准化:转小写用于去重判断
alias_normalized = alias_stripped.lower()
if alias_normalized not in seen_normalized:
seen_normalized.add(alias_normalized)
unique_aliases.append(alias_stripped)
# 排序并赋值
canonical.aliases = sorted(unique_aliases)
# ========== 主循环:遍历所有实体对进行模糊匹配 ==========

View File

@@ -1391,18 +1391,18 @@ class ExtractionOrchestrator:
"""
将本轮提取的用户别名同步到 end_user 和 end_user_info 表。
注意:此方法在 Neo4j 写入之前调用,因此不能依赖 Neo4j 作为别名的权威数据源。
改为直接使用内存中去重后的 entity_nodes 的 aliases与 PgSQL 已有的 aliases 合并。
PgSQL end_user_info.aliases 是用户别名的唯一权威源。
此方法仅将本轮 LLM 从对话中新提取的别名增量追加到 PgSQL
不再从 Neo4j 二层去重合并历史别名,避免脏数据反向污染 PgSQL。
策略:
1. 从内存中的 entity_nodes 提取本轮用户别名current_aliases
2. 从去重后的 entity_nodes 中提取完整别名(含 Neo4j 二层去重合并的历史别名
3. 从 PgSQL end_user_info 读取已有的 aliasesdb_aliases
4. 合并 db_aliases + deduped_aliases + current_aliases去重保序
5. 写回 PgSQL
1. 从本轮对话原始发言中提取用户别名current_aliases
2. 从 PgSQL end_user_info 读取已有的 aliasesdb_aliases
3. 合并 db_aliases + current_aliases,去重保序
4. 写回 PgSQL
Args:
entity_nodes: 去重后的实体节点列表(内存中,含二层去重合并结果
entity_nodes: 去重后的实体节点列表(内存中)
dialog_data_list: 对话数据列表
"""
try:
@@ -1418,11 +1418,6 @@ class ExtractionOrchestrator:
# 1. 提取本轮对话的用户别名(保持 LLM 提取的原始顺序,不排序)
current_aliases = self._extract_current_aliases(entity_nodes, dialog_data_list)
# 1.5 从去重后的 entity_nodes 中提取完整别名
# 二层去重会将 Neo4j 中已有的历史别名合并到 entity_nodes 中,
# 这里提取出来确保 PgSQL 与 Neo4j 的别名保持同步
deduped_aliases = self._extract_deduped_entity_aliases(entity_nodes)
# 1.6 从 Neo4j 查询已有的 AI 助手别名,作为额外的排除源
# (防止 LLM 未提取出 AI 助手实体时AI 别名泄漏到用户别名中)
neo4j_assistant_aliases = await self._fetch_neo4j_assistant_aliases(end_user_id)
@@ -1434,19 +1429,12 @@ class ExtractionOrchestrator:
]
if len(current_aliases) < before_count:
logger.info(f"通过 Neo4j AI 助手别名排除了 {before_count - len(current_aliases)} 个误归属别名")
# 同样过滤 deduped_aliases
deduped_aliases = [
a for a in deduped_aliases
if a.strip().lower() not in neo4j_assistant_aliases
]
if not current_aliases and not deduped_aliases:
if not current_aliases:
logger.debug(f"本轮未提取到用户别名,跳过同步: end_user_id={end_user_id}")
return
logger.info(f"本轮对话提取的 aliases: {current_aliases}")
if deduped_aliases:
logger.info(f"去重后实体的完整 aliases含历史: {deduped_aliases}")
# 2. 同步到数据库
end_user_uuid = uuid.UUID(end_user_id)
@@ -1457,21 +1445,15 @@ class ExtractionOrchestrator:
logger.warning(f"未找到 end_user_id={end_user_id} 的用户记录")
return
# 3. 从 PgSQL 读取已有 aliases 并与本轮合并
# 3. 从 PgSQL 读取已有 aliases 并与本轮新增合并
info = EndUserInfoRepository(db).get_by_end_user_id(end_user_uuid)
db_aliases = (info.aliases if info and info.aliases else [])
# 过滤掉占位名称
db_aliases = [a for a in db_aliases if a.strip().lower() not in self.USER_PLACEHOLDER_NAMES]
# 合并:已有 + 去重后完整别名 + 本轮新增,去重保序
# 合并:PgSQL 已有 + 本轮新增,去重保序(不再合并 Neo4j 历史别名)
merged_aliases = list(db_aliases)
seen_lower = {a.strip().lower() for a in merged_aliases}
# 先合并去重后实体的完整别名(含 Neo4j 历史别名)
for alias in deduped_aliases:
if alias.strip().lower() not in seen_lower:
merged_aliases.append(alias)
seen_lower.add(alias.strip().lower())
# 再合并本轮新提取的别名
for alias in current_aliases:
if alias.strip().lower() not in seen_lower:
merged_aliases.append(alias)
@@ -1505,9 +1487,7 @@ class ExtractionOrchestrator:
info.aliases = merged_aliases
logger.info(f"同步合并后 aliases 到 end_user_info: {merged_aliases}")
else:
first_alias = current_aliases[0].strip() if current_aliases else (
deduped_aliases[0].strip() if deduped_aliases else ""
)
first_alias = current_aliases[0].strip() if current_aliases else ""
# 确保 first_alias 不是占位名称
if first_alias and first_alias.lower() not in self.USER_PLACEHOLDER_NAMES:
db.add(EndUserInfo(

View File

@@ -118,7 +118,7 @@ class MetadataExtractor:
existing_aliases: Optional[List[str]] = None,
) -> Optional[tuple]:
"""
对筛选后的 statement 列表调用 LLM 提取元数据和用户别名。
对筛选后的 statement 列表调用 LLM 提取元数据增量变更和用户别名。
Args:
statements: 用户发言的 statement 文本列表
@@ -126,7 +126,8 @@ class MetadataExtractor:
existing_aliases: 数据库已有的用户别名列表(可选)
Returns:
(UserMetadata, List[str], List[str]) tuple: (metadata, aliases_to_add, aliases_to_remove) on success, None on failure
(List[MetadataFieldChange], List[str], List[str]) tuple:
(metadata_changes, aliases_to_add, aliases_to_remove) on success, None on failure
"""
if not statements:
return None
@@ -160,12 +161,12 @@ class MetadataExtractor:
)
if response:
metadata = response.user_metadata if response.user_metadata else None
changes = response.metadata_changes if response.metadata_changes else []
to_add = response.aliases_to_add if response.aliases_to_add else []
to_remove = (
response.aliases_to_remove if response.aliases_to_remove else []
)
return metadata, to_add, to_remove
return changes, to_add, to_remove
logger.warning("LLM 返回的响应为空")
return None

View File

@@ -1,5 +1,5 @@
===Task===
Extract user metadata from the following conversation statements spoken by the user.
Extract user metadata changes from the following conversation statements spoken by the user.
{% if language == "zh" %}
**"三度原则"判断标准:**
@@ -10,28 +10,36 @@ Extract user metadata from the following conversation statements spoken by the u
**提取规则:**
- **只提取关于"用户本人"的画像信息**,忽略用户提到的第三方人物(如朋友、同事、家人)的信息
- 仅提取文本中明确提到的信息,不要推测
- 如果文本中没有可提取的用户画像信息,返回空的 user_metadata 对象
- **输出语言必须与输入文本的语言一致**(输入中文则输出中文值,输入英文则输出英文值)
**增量模式(重要):**
你只需要输出**本次对话引起的变更操作**,不要输出完整的元数据。每个变更是一个对象,包含:
- `field_path`:字段路径,用点号分隔(如 `profile.role`、`profile.expertise`
- `action`:操作类型
* `set`:新增或修改一个字段的值
* `remove`:移除一个字段的值
- `value`:字段的新值(`action="set"` 时必填,`action="remove"` 时填要移除的元素值)
* 所有字段均为列表类型,每个元素一条变更记录
**判断规则:**
- 用户提到新信息 → `action="set"`,填入新值
- 用户明确否定已有信息(如"我不再做老师了"、"我已经不学Python了")→ `action="remove"``value` 填要移除的元素值
- 如果本次对话没有任何可提取的变更,返回空的 `metadata_changes` 数组 `[]`
- **不要为未被提及的字段生成任何变更操作**
{% if existing_metadata %}
**重要:合并已有元数据**
下方提供了数据库中已有的用户元数据。请结合用户最新发言,输出**合并后的完整元数据**
- 如果用户明确否定了已有信息(如"我不再教高中物理了"),在输出中**移除**该信息
- 如果用户提到了新信息,**添加**到对应字段中
- 如果已有信息未被用户否定,**保留**在输出中
- 标量字段(如 role、domain如果用户提到了新值用新值替换否则保留已有值
- 最终输出应该是完整的、合并后的元数据,不是增量
**已有元数据(仅供参考,用于判断是否需要变更):**
请对比已有数据和用户最新发言,输出差异部分的变更操作。
- 如果用户说的信息和已有数据一致,不需要输出变更
- 如果用户否定了已有数据中的某个值,输出 `remove` 操作
- 如果用户提到了新信息,输出 `set` 操作
{% endif %}
**字段说明:**
- profile.role用户的职业或角色如 教师、医生、后端工程师
- profile.domain用户所在领域如 教育、医疗、软件开发
- profile.expertise用户擅长的技能或工具通用,不限于编程),如 Python、心理咨询、高中物理
- profile.interests用户主动表达兴趣的话题或领域标签
- behavioral_hints.learning_stage学习阶段初学者/中级/高级)
- behavioral_hints.preferred_depth偏好深度概览/技术细节/深入探讨)
- behavioral_hints.tone_preference语气偏好轻松随意/专业简洁/学术严谨)
- knowledge_tags用户涉及的知识领域标签
- profile.role用户的职业或角色(列表),如 教师、医生、后端工程师,一个人可以有多个角色
- profile.domain用户所在领域(列表),如 教育、医疗、软件开发,一个人可以涉及多个领域
- profile.expertise用户擅长的技能或工具列表),如 Python、心理咨询、高中物理
- profile.interests用户主动表达兴趣的话题或领域标签(列表)
**用户别名变更(增量模式):**
- **aliases_to_add**:本次新发现的用户别名,包括:
@@ -43,7 +51,6 @@ Extract user metadata from the following conversation statements spoken by the u
- **aliases_to_remove**:用户明确否认的别名,包括:
* 用户说"我不叫XX了"、"别叫我XX"、"我改名了不叫XX" → 将 XX 放入此数组
* **严格限制**:只将用户原文中**逐字提到**的被否认名字放入,不要推断关联的其他别名
* 例如:用户说"我不叫陈小刀了" → 只移除"陈小刀",不要移除"陈哥"、"老陈"等未被提及的别名
* 如果没有要移除的别名,返回空数组 `[]`
{% if existing_aliases %}
- 已有别名:{{ existing_aliases | tojson }}(仅供参考,不需要在输出中重复)
@@ -57,28 +64,36 @@ Extract user metadata from the following conversation statements spoken by the u
**Extraction rules:**
- **Only extract profile information about the user themselves**, ignore information about third parties (friends, colleagues, family) mentioned by the user
- Only extract information explicitly mentioned in the text, do not speculate
- If no user profile information can be extracted, return an empty user_metadata object
- **Output language must match the input text language**
**Incremental mode (important):**
You should only output **the change operations caused by this conversation**, not the complete metadata. Each change is an object containing:
- `field_path`: Field path separated by dots (e.g. `profile.role`, `profile.expertise`)
- `action`: Operation type
* `set`: Add or update a field value
* `remove`: Remove a field value
- `value`: The new value for the field (required when `action="set"`, for `action="remove"` fill in the element value to remove)
* All fields are list types, one change record per element
**Decision rules:**
- User mentions new information → `action="set"`, fill in the new value
- User explicitly negates existing info (e.g. "I'm no longer a teacher", "I stopped learning Python") → `action="remove"`, `value` is the element to remove
- If this conversation has no extractable changes, return an empty `metadata_changes` array `[]`
- **Do NOT generate any change operations for fields not mentioned in the conversation**
{% if existing_metadata %}
**Important: Merge with existing metadata**
Existing user metadata from the database is provided below. Combine with the user's latest statements to output the **complete merged metadata**:
- If the user explicitly negates existing info (e.g. "I no longer teach high school physics"), **remove** it from output
- If the user mentions new info, **add** it to the corresponding field
- If existing info is not negated by the user, **keep** it in the output
- Scalar fields (e.g. role, domain): replace with new value if user mentions one; otherwise keep existing
- The final output should be the complete, merged metadata — not an incremental update
**Existing metadata (for reference only, to determine if changes are needed):**
Compare existing data with the user's latest statements, and only output change operations for the differences.
- If the user's statement matches existing data, no change is needed
- If the user negates a value in existing data, output a `remove` operation
- If the user mentions new information, output a `set` operation
{% endif %}
**Field descriptions:**
- profile.role: User's occupation or role, e.g. teacher, doctor, software engineer
- profile.domain: User's domain, e.g. education, healthcare, software development
- profile.expertise: User's skills or tools (general, not limited to programming)
- profile.interests: Topics or domain tags the user actively expressed interest in
- behavioral_hints.learning_stage: Learning stage (beginner/intermediate/advanced)
- behavioral_hints.preferred_depth: Preferred depth (overview/detailed/deep dive)
- behavioral_hints.tone_preference: Tone preference (casual/professional/academic)
- knowledge_tags: Knowledge domain tags related to the user
- profile.role: User's occupation or role (list), e.g. teacher, doctor, software engineer. A person can have multiple roles
- profile.domain: User's domain (list), e.g. education, healthcare, software development. A person can span multiple domains
- profile.expertise: User's skills or tools (list), e.g. Python, counseling, physics
- profile.interests: Topics or domain tags the user actively expressed interest in (list)
**User alias changes (incremental mode):**
- **aliases_to_add**: Newly discovered user aliases from this conversation, including:
@@ -90,7 +105,6 @@ Existing user metadata from the database is provided below. Combine with the use
- **aliases_to_remove**: Aliases the user explicitly denies, including:
* User says "Don't call me XX anymore", "I'm not called XX", "I changed my name from XX" → put XX in this array
* **Strict rule**: Only include the exact name the user **verbatim mentions** as denied. Do NOT infer or remove related aliases
* Example: User says "I'm not called John anymore" → only remove "John", do NOT remove "Johnny", "J" or other related aliases not mentioned
* If no aliases to remove, return empty array `[]`
{% if existing_aliases %}
- Existing aliases: {{ existing_aliases | tojson }} (for reference only, do not repeat in output)
@@ -113,20 +127,11 @@ Existing user metadata from the database is provided below. Combine with the use
Return a JSON object with the following structure:
```json
{
"user_metadata": {
"profile": {
"role": "",
"domain": "",
"expertise": [],
"interests": []
},
"behavioral_hints": {
"learning_stage": "",
"preferred_depth": "",
"tone_preference": ""
},
"knowledge_tags": []
},
"metadata_changes": [
{"field_path": "profile.role", "action": "set", "value": "后端工程师"},
{"field_path": "profile.expertise", "action": "set", "value": "Python"},
{"field_path": "profile.expertise", "action": "remove", "value": "Java"}
],
"aliases_to_add": [],
"aliases_to_remove": []
}