refactor(memory): enhance extraction ontology and add assistant pruning graph support

- Expand entity type ontology with detailed definitions, examples, and notes (merged types: 地点设施, 物品设备, 产品服务, 软件平台, 角色职业, 知识能力, 偏好习惯目标, 称呼别名, 智能体) - Add relation ontology taxonomy with 15 predicate categories and usage rules - Strengthen reference resolution rules: resolve pronouns before extraction, skip unresolvable references entirely - Add guidelines to avoid extracting abstract propositions, emotions, and low-value entities (effort/reward/success patterns) - Add 7 new extraction examples covering edge cases - Add AssistantOriginal/AssistantPruned node models and graph persistence (PRUNED_TO and BELONGS_TO_DIALOG edges, Neo4j indexes and constraints) - Add graph_build_step.py for building graph nodes/edges from DialogData - Update write_pipeline.py to pass assistant pruning nodes/edges to graph saver - Update data_pruning.py with related preprocessing changes
2026-04-28 13:32:29 +08:00
parent 2355536b44
commit 7747ed7ac1
11 changed files with 917 additions and 421 deletions
--- a/api/app/core/memory/models/graph_models.py
+++ b/api/app/core/memory/models/graph_models.py
@@ -578,3 +578,47 @@ class PerceptualNode(Node):
    domain: str
    file_type: str
    summary_embedding: list[float] | None
+
+
+class AssistantOriginalNode(Node):
+    """Node storing the original text of an Assistant message before pruning.
+
+    Attributes:
+        pair_id: Shared ID with the corresponding AssistantPrunedNode for pairing
+        dialog_id: ID of the parent dialogue this message belongs to
+        text: The full original Assistant response text
+    """
+    pair_id: str = Field(..., description="Shared pairing ID with the corresponding pruned node")
+    dialog_id: str = Field(..., description="ID of the parent dialogue")
+    text: str = Field(..., description="Original Assistant message text")
+
+
+class AssistantPrunedNode(Node):
+    """Node storing the pruned (compressed) text of an Assistant message.
+
+    Attributes:
+        pair_id: Shared ID with the corresponding AssistantOriginalNode for pairing
+        dialog_id: ID of the parent dialogue this message belongs to
+        text: The pruned memory hint text (or "NULL" if no memory value)
+        memory_type: Type of the memory hint (comfort|suggestion|recommendation|warning|instruction|NULL)
+        text_embedding: Optional embedding vector for semantic search on pruned text
+    """
+    pair_id: str = Field(..., description="Shared pairing ID with the corresponding original node")
+    dialog_id: str = Field(..., description="ID of the parent dialogue")
+    text: str = Field(..., description="Pruned assistant memory hint text")
+    memory_type: str = Field(..., description="Memory type: comfort|suggestion|recommendation|warning|instruction|NULL")
+    text_embedding: Optional[List[float]] = Field(None, description="Embedding vector for semantic search")
+
+
+class AssistantPrunedEdge(Edge):
+    """Edge connecting an AssistantOriginal node to its AssistantPruned node (PRUNED_TO).
+
+    Attributes:
+        pair_id: Shared pairing ID for traceability
+    """
+    pair_id: str = Field(..., description="Shared pairing ID for traceability")
+
+
+class AssistantDialogEdge(Edge):
+    """Edge connecting an AssistantOriginal node to its parent Dialogue node (BELONGS_TO_DIALOG)."""
+    pass
--- a/api/app/core/memory/pipelines/write_pipeline.py
+++ b/api/app/core/memory/pipelines/write_pipeline.py
@@ -77,6 +77,10 @@ class ExtractionResult(BaseModel):
    stmt_entity_edges: List[StatementEntityEdge]
    entity_entity_edges: List[EntityEntityEdge]
    perceptual_edges: List[PerceptualEdge]
+    assistant_original_nodes: List[Any] = Field(default_factory=list)
+    assistant_pruned_nodes: List[Any] = Field(default_factory=list)
+    assistant_pruned_edges: List[Any] = Field(default_factory=list)
+    assistant_dialog_edges: List[Any] = Field(default_factory=list)
    dialog_data_list: List[Any] = Field(
        default_factory=list,
        description="原始 DialogData 列表，类型为 Any 以避免循环依赖",
@@ -482,6 +486,10 @@ class WritePipeline:
            stmt_entity_edges=dedup_result.statement_entity_edges,
            entity_entity_edges=dedup_result.entity_entity_edges,
            perceptual_edges=graph.perceptual_edges,
+            assistant_original_nodes=graph.assistant_original_nodes,
+            assistant_pruned_nodes=graph.assistant_pruned_nodes,
+            assistant_pruned_edges=graph.assistant_pruned_edges,
+            assistant_dialog_edges=graph.assistant_dialog_edges,
            dialog_data_list=dialog_data_list,
        )

@@ -523,6 +531,10 @@ class WritePipeline:
                    entity_edges=result.entity_entity_edges,
                    perceptual_edges=result.perceptual_edges,
                    connector=self._neo4j_connector,
+                    assistant_original_nodes=result.assistant_original_nodes,
+                    assistant_pruned_nodes=result.assistant_pruned_nodes,
+                    assistant_pruned_edges=result.assistant_pruned_edges,
+                    assistant_dialog_edges=result.assistant_dialog_edges,
                )
                if success:
                    logger.info("Successfully saved all data to Neo4j")
--- a/api/app/core/memory/storage_services/extraction_engine/data_preprocessing/data_pruning.py
+++ b/api/app/core/memory/storage_services/extraction_engine/data_preprocessing/data_pruning.py
@@ -15,7 +15,9 @@ import hashlib
 import json
 import logging
 from collections import OrderedDict
+from datetime import datetime
 from typing import List, Optional, Dict
+from uuid import uuid4

 from pydantic import BaseModel, Field

@@ -39,6 +41,16 @@ def message_has_files(message: "ConversationMessage") -> bool:
    return message.files and len(message.files) > 0


+class AssistantPruningRecord(BaseModel):
+    """单个 User-Assistant 消息对的剪枝记录，用于后续写入 Neo4j。"""
+
+    pair_id: str = Field(..., description="唯一配对 ID，Original 和 Pruned 节点共享")
+    original_text: str = Field(..., description="Assistant 原始回复全文")
+    pruned_text: str = Field(..., description="剪枝后文本（assistant_memory_hint），或 'NULL'")
+    memory_type: str = Field(..., description="comfort|suggestion|recommendation|warning|instruction|NULL")
+    created_at: str = Field(..., description="ISO 时间戳")
+
+
 class AssistantPruningResponse(BaseModel):
    """LLM 对单个 User-Assistant 消息对的剪枝结果。

@@ -95,6 +107,9 @@ class SemanticPruner:
        # Snapshot 数据收集：每个消息对的 input + gold
        self._snapshot_records: List[Dict] = []

+        # 剪枝记录：用于后续写入 Neo4j（AssistantOriginal + AssistantPruned 节点）
+        self.pruning_records: List[AssistantPruningRecord] = []
+
        # 运行日志
        self.run_logs: List[str] = []

@@ -246,6 +261,15 @@ class SemanticPruner:
                    },
                })

+                # 收集剪枝记录（用于后续写入 Neo4j）
+                self.pruning_records.append(AssistantPruningRecord(
+                    pair_id=uuid4().hex,
+                    original_text=asst_msg.msg,
+                    pruned_text=result.assistant_memory_hint,
+                    memory_type=result.assistant_memory_type,
+                    created_at=datetime.now().isoformat(),
+                ))
+
                if result.assistant_memory_hint == "NULL":
                    self._log(
                        f"  [{label}] 索引{asst_idx} → NULL，删除 "
--- a/api/app/core/memory/storage_services/extraction_engine/steps/extraction_pipeline_orchestrator.py
+++ b/api/app/core/memory/storage_services/extraction_engine/steps/extraction_pipeline_orchestrator.py
@@ -855,6 +855,7 @@ class NewExtractionOrchestrator:
                                entity_idx=e.entity_idx,
                                name=e.name,
                                type=e.type,
+                                type_description=getattr(e, "type_description", ""),
                                description=e.description,
                                is_explicit_memory=e.is_explicit_memory,
                            )
@@ -865,6 +866,7 @@ class NewExtractionOrchestrator:
                                subject_name=t.subject_name,
                                subject_id=t.subject_id,
                                predicate=t.predicate,
+                                predicate_description=getattr(t, "predicate_description", ""),
                                object_name=t.object_name,
                                object_id=t.object_id,
                            )
--- a/api/app/core/memory/storage_services/extraction_engine/steps/graph_build_step.py
+++ b/api/app/core/memory/storage_services/extraction_engine/steps/graph_build_step.py
@@ -28,6 +28,10 @@ from app.core.memory.models.graph_models import (
    StatementChunkEdge,
    StatementEntityEdge,
    StatementNode,
+    AssistantOriginalNode,
+    AssistantPrunedNode,
+    AssistantPrunedEdge,
+    AssistantDialogEdge,
 )
 from app.core.memory.models.message_models import DialogData, TemporalInfo

@@ -47,6 +51,10 @@ class GraphBuildResult:
        "stmt_entity_edges",
        "entity_entity_edges",
        "perceptual_edges",
+        "assistant_original_nodes",
+        "assistant_pruned_nodes",
+        "assistant_pruned_edges",
+        "assistant_dialog_edges",
    )

    def __init__(
@@ -60,6 +68,10 @@ class GraphBuildResult:
        stmt_entity_edges: List[StatementEntityEdge],
        entity_entity_edges: List[EntityEntityEdge],
        perceptual_edges: List[PerceptualEdge],
+        assistant_original_nodes: Optional[List[AssistantOriginalNode]] = None,
+        assistant_pruned_nodes: Optional[List[AssistantPrunedNode]] = None,
+        assistant_pruned_edges: Optional[List[AssistantPrunedEdge]] = None,
+        assistant_dialog_edges: Optional[List[AssistantDialogEdge]] = None,
    ):
        self.dialogue_nodes = dialogue_nodes
        self.chunk_nodes = chunk_nodes
@@ -70,6 +82,10 @@ class GraphBuildResult:
        self.stmt_entity_edges = stmt_entity_edges
        self.entity_entity_edges = entity_entity_edges
        self.perceptual_edges = perceptual_edges
+        self.assistant_original_nodes = assistant_original_nodes or []
+        self.assistant_pruned_nodes = assistant_pruned_nodes or []
+        self.assistant_pruned_edges = assistant_pruned_edges or []
+        self.assistant_dialog_edges = assistant_dialog_edges or []


 async def build_graph_nodes_and_edges(
@@ -343,6 +359,77 @@ async def build_graph_nodes_and_edges(
        f"实体-实体边: {len(entity_entity_edges)}"
    )

+    # ── Assistant 剪枝节点和边 ──
+    assistant_original_nodes: List[AssistantOriginalNode] = []
+    assistant_pruned_nodes: List[AssistantPrunedNode] = []
+    assistant_pruned_edges: List[AssistantPrunedEdge] = []
+    assistant_dialog_edges: List[AssistantDialogEdge] = []
+
+    for dialog_data in dialog_data_list:
+        pruning_records = dialog_data.metadata.get("assistant_pruning_records", [])
+        for record in pruning_records:
+            pair_id = record["pair_id"]
+            original_id = f"ao_{pair_id}"
+            pruned_id = f"ap_{pair_id}"
+
+            # AssistantOriginal 始终创建（记录原始对话）
+            original_node = AssistantOriginalNode(
+                id=original_id,
+                name=f"AssistantOriginal_{pair_id[:8]}",
+                end_user_id=dialog_data.end_user_id,
+                run_id=dialog_data.run_id,
+                created_at=dialog_data.created_at,
+                expired_at=dialog_data.expired_at,
+                pair_id=pair_id,
+                dialog_id=dialog_data.id,
+                text=record["original_text"],
+            )
+            assistant_original_nodes.append(original_node)
+
+            # BELONGS_TO_DIALOG: Original → Dialogue
+            assistant_dialog_edges.append(AssistantDialogEdge(
+                source=original_id,
+                target=dialog_data.id,
+                end_user_id=dialog_data.end_user_id,
+                run_id=dialog_data.run_id,
+                created_at=dialog_data.created_at,
+            ))
+
+            # pruned_text 为 NULL 时不创建 AssistantPruned 节点和 PRUNED_TO 边
+            if record["pruned_text"] == "NULL":
+                continue
+
+            pruned_node = AssistantPrunedNode(
+                id=pruned_id,
+                name=f"AssistantPruned_{pair_id[:8]}",
+                end_user_id=dialog_data.end_user_id,
+                run_id=dialog_data.run_id,
+                created_at=dialog_data.created_at,
+                expired_at=dialog_data.expired_at,
+                pair_id=pair_id,
+                dialog_id=dialog_data.id,
+                text=record["pruned_text"],
+                memory_type=record["memory_type"],
+            )
+            assistant_pruned_nodes.append(pruned_node)
+
+            # PRUNED_TO: Original → Pruned
+            assistant_pruned_edges.append(AssistantPrunedEdge(
+                source=original_id,
+                target=pruned_id,
+                end_user_id=dialog_data.end_user_id,
+                run_id=dialog_data.run_id,
+                created_at=dialog_data.created_at,
+                pair_id=pair_id,
+            ))
+
+    if assistant_original_nodes:
+        logger.info(
+            f"Assistant 剪枝节点创建完成 - "
+            f"原始节点: {len(assistant_original_nodes)}, "
+            f"剪枝节点: {len(assistant_pruned_nodes)}"
+        )
+
    if progress_callback:
        nodes_edges_stats = {
            "dialogue_nodes_count": len(dialogue_nodes),
@@ -365,4 +452,8 @@ async def build_graph_nodes_and_edges(
        stmt_entity_edges=stmt_entity_edges,
        entity_entity_edges=entity_entity_edges,
        perceptual_edges=perceptual_edges,
+        assistant_original_nodes=assistant_original_nodes,
+        assistant_pruned_nodes=assistant_pruned_nodes,
+        assistant_pruned_edges=assistant_pruned_edges,
+        assistant_dialog_edges=assistant_dialog_edges,
    )
--- a/api/app/core/memory/utils/prompt/prompts/extracat_Pruning.jinja2
+++ b/api/app/core/memory/utils/prompt/prompts/extracat_Pruning.jinja2
@@ -1,199 +1,130 @@
-{#
-  对话级抽取与相关性判定模板（用于剪枝加速）
-  输入：pruning_scene, ontology_class_infos, dialog_text, language
-    - ontology_class_infos: List[{class_name: str, class_description: str}]
-  输出：严格 JSON（不要包含任何多余文本），字段：
-    - is_related: bool，是否与所选场景相关
-    - times: [string]，从对话中抽取的时间相关文本（日期、时间、时间段、有效期等）
-    - ids: [string]，编号/ID/订单号/申请号/账号等
-    - amounts: [string]，金额/费用/价格相关（带单位或货币符号）
-    - contacts: [string]，联系方式（电话/手机号/邮箱/微信/QQ等）
-    - addresses: [string]，地址/地点相关文本
-    - keywords: [string]，其它有助于保留的重要关键词（与场景强相关的术语）
-    - preserve_keywords: [string]，必须保留的情绪/兴趣/爱好/个人偏好相关词或短语片段
+你是一个面向记忆存储的 Assistant 辅助信息提取器。

-  要求：
-  - 必须只输出上述 JSON，且键名一致；不得输出解释、前后缀；不得包含注释。
-  - times/ids/amounts/contacts/addresses/keywords/preserve_keywords 仅抽取原文片段或规范化后的简单字符串。
-  - 仅输出上述键；避免多余解释或字段。
-#}
+任务：

-{# ── 确定场景说明 ── #}
-{% if ontology_class_infos and ontology_class_infos | length > 0 %}
-  {% if language == 'en' %}
-    {% set instruction = 'Scene "' ~ pruning_scene ~ '": The dialogue is relevant if it involves any of the following entity types.' %}
-  {% else %}
-    {% set instruction = '场景「' ~ pruning_scene ~ '」：对话涉及以下任意实体类型时视为相关。' %}
-  {% endif %}
-{% else %}
-  {% if language == 'en' %}
-    {% set instruction = 'Scene "' ~ pruning_scene ~ '": Determine whether the dialogue content is relevant to this scene based on overall context.' %}
-  {% else %}
-    {% set instruction = '场景「' ~ pruning_scene ~ '」：根据对话整体内容判断是否与该场景相关。' %}
-  {% endif %}
-{% endif %}
+- 输入是一个 JSON，对话放在 `msgs` 数组里，且数组中只有两条消息：第一条是 `User`，第二条是 `Assistant`。
+- 你只处理第二条消息里的 `Assistant.msg`。
+- 第一条消息里的 `User.msg` 只用于理解上下文，不允许出现在输出里。
+- 你的输出必须包含两个字段：
+  1. `assistant_memory_hint`
+  2. `assistant_memory_type`

-{% if language == "zh" %}
-你是一个对话内容分析助手。请对下方对话全文进行一次性分析，完成两项任务：
-1. 判断对话是否与指定场景相关；
-2. 从对话中抽取所有需要保留的重要信息片段。
+目标：

-场景说明：{{ instruction }}
+- 从 `Assistant.msg` 中提取一条适合后续检索的极短辅助摘要。
+- 删除冗长解释、寒暄、礼貌话术、重复复述和空泛铺垫。
+- 允许做摘要式改写，但只能保留原消息中已经出现的建议、推荐、提醒、安慰、步骤或其他对后续记忆有帮助的核心内容。
+- 如果没有值得保留的信息，`assistant_memory_hint` 输出 `"NULL"`，`assistant_memory_type` 也输出 `"NULL"`。

-{% if ontology_class_infos and ontology_class_infos | length > 0 %}
-【本场景实体类型定义】
-以下实体类型定义了本场景中哪些内容是重要的。
-凡是与以下任意类型相关的内容，都必须保留，并将关键词/短语提取到 keywords 字段：
+硬约束：

-{% for info in ontology_class_infos %}
- {{ info.class_name }}：{{ info.class_description }}
-{% endfor %}
+- 不得改写、复述或输出 `User.msg`。
+- 不得捏造新事实、新建议、新步骤、新材料。
+- 不得改变 `Assistant` 原始语义和立场。
+- 可以压缩、合并、重写 `Assistant.msg`，但必须忠于原内容。
+- `assistant_memory_type` 只能从以下枚举中选择：
+  `comfort | suggestion | recommendation | warning | instruction | NULL`
+- 只输出严格 JSON，不要输出解释。

-重要提示：只要对话中出现与上述任意实体类型相关的内容，即判定为相关（is_related=true）。
-{% endif %}
+压缩原则：

---
-【必须保留的内容（不可删除）】
-以下类型的内容无论是否与场景直接相关，都必须保留，请将其关键词/短语抽取到对应字段：
- 时间信息：日期、时间点、时间段、有效期 → times 字段
- 编号信息：学号、工号、订单号、申请号、账号、ID → ids 字段
- 金额信息：价格、费用、金额（含货币符号或单位，如"100元"、"¥200"）→ amounts 字段（注意：考试分数、成绩分数不属于金额，不要放入此字段）
- 联系方式：电话、手机号、邮箱、微信、QQ → contacts 字段
- 地址信息：地点、地址、位置 → addresses 字段
- 场景关键词：与**当前场景**强相关的专业术语、事件名称 → keywords 字段（注意：只放与当前场景直接相关的词，跨场景的内容不要放入此字段）
- **情绪与情感**：喜悦、悲伤、愤怒、焦虑、开心、难过、委屈、兴奋、害怕、担心、压力、感动等情绪表达 → preserve_keywords 字段
- **兴趣与爱好**：喜欢、热爱、爱好、擅长、享受、沉迷、着迷、讨厌某事物等个人偏好表达 → preserve_keywords 字段
- **个人情感态度**：对人际关系、情感状态的明确表达（如"我跟室友闹矛盾了"、"我都快抑郁了"）→ preserve_keywords 字段
- 注意：学业目标（如"我想考研"）、成绩（如"87分"）、学科偏好（如"喜欢数学"）属于学业信息，不属于情绪/情感，不要放入 preserve_keywords 字段
+- 优先保留具体建议、推荐、提醒、操作步骤、风险提示、安慰动作。
+- 优先删除长背景解释、寒暄、礼貌收尾、对用户原话的重复复述。
+- 如果原文是长说明、长步骤、长菜谱，输出更短的概要版本，但不要丢掉核心意图。
+- 优先保留最短但仍有信息密度的版本。
+- `assistant_memory_hint` 尽量写成完整句，不要只写零散词组或标签。
+- 优先使用显式主语来写结果，例如：
+  `安慰了用户……`
+  `建议用户……`
+  `推荐用户……`
+  `提醒用户……`

-【场景无关内容标记】
-请从对话中识别出与当前场景（{{ pruning_scene }}）**既不相关、也无语义关联**的消息片段，将其原文（或关键片段）提取到 scene_unrelated_snippets 字段。
-判断标准：
- 与场景实体类型完全无关
- 与场景话题没有因果/时间/情境上的关联（例如：不是"因为上课所以累"这种关联）
- 纯粹是另一个话题的内容（如在教育场景中讨论购物、娱乐等）
-注意：有情绪/感受表达的消息即使话题不同，也可能有语义关联，请谨慎标记。
-
-**重要：scene_unrelated_snippets 必须认真填写，不能为空数组。**
-如果对话中存在与场景无关的内容，必须将其原文片段提取出来。
-
-示例（场景=在线教育）：
- "我最近心情很差，跟室友闹矛盾了" → 与教育场景无关，加入 scene_unrelated_snippets
- "她总是很晚回来吵到我睡觉" → 与教育场景无关，加入 scene_unrelated_snippets
- "对，我都快抑郁了" → 与教育场景无关，加入 scene_unrelated_snippets
- "期末考试12月25日" → 与教育场景相关，不加入 scene_unrelated_snippets
- "我上次高数作业87分" → 与教育场景相关，不加入 scene_unrelated_snippets
- "我的目标是考研" → 与教育场景相关，不加入 scene_unrelated_snippets
-
-示例（场景=情感陪伴）：
- "我最近心情很差，跟室友闹矛盾了" → 与情感陪伴场景相关（情绪+关系），不加入 scene_unrelated_snippets
- "对，我都快抑郁了" → 与情感陪伴场景相关（情绪），不加入 scene_unrelated_snippets
- "期末考试12月25日，3号教学楼201室" → 与情感陪伴场景无关（教育信息），加入 scene_unrelated_snippets
- "我上次高数作业87分，这次能考好吗" → 与情感陪伴场景无关（学业信息），加入 scene_unrelated_snippets
- "我的目标是考研，想读应用数学" → 与情感陪伴场景无关（学业目标），加入 scene_unrelated_snippets
-
-【可以删除的内容】
-以下类型的内容属于低价值信息，可以在剪枝时删除：
- 纯寒暄问候：如"你好"、"在吗"、"拜拜"、"嗯"、"好的"、"哦"等无实质内容的短语
- 纯表情/符号：如"[微笑]"、"😊"、"哈哈"等
- 重复确认：如"对对对"、"是的是的"、"嗯嗯嗯"等无新增信息的重复
- 无意义填充：如"啊"、"呢"、"嘛"等语气词单独成句
-
-**注意：即使消息很短，只要包含情绪、兴趣、爱好、个人观点等有价值信息，就必须保留，不得删除。**
-例如：
- "我好开心呀" → 包含情绪（开心），必须保留，preserve_keywords 中加入"开心"
- "好喜欢打羽毛球呀" → 包含兴趣爱好（喜欢打羽毛球），必须保留，preserve_keywords 中加入"喜欢打羽毛球"
- "我好难过" → 包含情绪（难过），必须保留，preserve_keywords 中加入"难过"
- "太好啦！看到你开心，我也跟着心情亮起来" → 包含情绪，必须保留，preserve_keywords 中加入"开心"
-
---
-对话全文：
-"""
-{{ dialog_text }}
-"""
-
-只输出严格 JSON（键固定、顺序不限）：
+Few-shot 示例 1
+输入：
 {
-  "is_related": <true 或 false>,
-  "times": [<string>...],
-  "ids": [<string>...],
-  "amounts": [<string>...],
-  "contacts": [<string>...],
-  "addresses": [<string>...],
-  "keywords": [<string>...],
-  "preserve_keywords": [<string>...],
-  "scene_unrelated_snippets": [<string>...]
+  "msgs": [
+    {
+      "role": "User",
+      "msg": "我室友小雯这学期一直在准备毕业论文，这两周都在改答辩 PPT。她下周三答辩，我有点担心她会紧张。"
+    },
+    {
+      "role": "Assistant",
+      "msg": "听起来你很关心小雯，也希望她答辩顺利。她现在紧张其实很正常，很多人在答辩前都会这样。"
+    }
+  ]
 }
-{% else %}
-You are a dialogue content analysis assistant. Please analyze the full dialogue below in one pass and complete two tasks:
-1. Determine whether the dialogue is relevant to the specified scene;
-2. Extract all important information fragments that must be preserved.
-
-Scenario Description: {{ instruction }}
-
-{% if ontology_class_infos and ontology_class_infos | length > 0 %}
-[Scene Entity Type Definitions]
-The following entity types define what content is important in this scene.
-Content related to ANY of these types must be preserved and extracted into the keywords field:
-
-{% for info in ontology_class_infos %}
- {{ info.class_name }}: {{ info.class_description }}
-{% endfor %}
-
-Important: If the dialogue contains content related to any of the entity types above, mark it as relevant (is_related=true).
-{% endif %}
-
---
-[MUST PRESERVE (cannot be deleted)]
-The following types of content must always be preserved regardless of scene relevance. Extract their keywords/phrases into the corresponding fields:
- Time information: dates, time points, durations, expiry dates → times field
- ID information: student IDs, employee IDs, order numbers, application numbers, account IDs → ids field
- Amount information: prices, fees, amounts (with currency symbols or units, e.g., "$100", "¥200") → amounts field (Note: exam scores and grades are NOT amounts, do not put them here)
- Contact information: phone numbers, emails, WeChat, QQ → contacts field
- Address information: locations, addresses, places → addresses field
- Scene keywords: professional terms and event names strongly related to **the current scene** → keywords field (Note: only put terms directly related to the current scene; cross-scene content should not be placed here)
- **Emotions and feelings**: joy, sadness, anger, anxiety, happiness, sadness, excitement, fear, worry, stress, being moved, etc. → preserve_keywords field
- **Interests and hobbies**: likes, loves, hobbies, good at, enjoys, obsessed with, hates something, personal preferences → preserve_keywords field
- **Personal emotional attitudes**: clear expressions about interpersonal relationships or emotional states (e.g., "I had a fight with my roommate", "I'm almost depressed") → preserve_keywords field
- Note: Academic goals (e.g., "I want to pursue a master's degree"), grades (e.g., "87 points"), and subject preferences (e.g., "I like math") are academic information, NOT emotions/feelings — do not put them in preserve_keywords
-
-[Scene-Unrelated Content Marking]
-Please identify message snippets in the dialogue that are **neither relevant to nor semantically associated with** the current scene ({{ pruning_scene }}), and extract their original text (or key fragments) into the scene_unrelated_snippets field.
-Criteria:
- Completely unrelated to the scene's entity types
- No causal/temporal/contextual association with the scene topic (e.g., "feeling tired because of class" IS associated)
- Purely belongs to a different topic (e.g., discussing shopping or entertainment in an education scene)
-Note: Messages with emotional/feeling expressions may still have semantic association even if the topic differs — mark carefully.
-
-[CAN BE DELETED]
-The following types of content are low-value and can be removed during pruning:
- Pure greetings: e.g., "hello", "are you there", "bye", "ok", "yeah" — short phrases with no substantive content
- Pure emojis/symbols: e.g., "[smile]", "😊", "haha"
- Repetitive confirmations: e.g., "yes yes yes", "right right", "uh huh" — repetitions with no new information
- Meaningless fillers: standalone interjections like "ah", "well", "hmm"
-
-**Note: Even if a message is short, if it contains emotions, interests, hobbies, or personal opinions, it MUST be preserved.**
-Examples:
- "I'm so happy!" → contains emotion (happy), must preserve; add "happy" to preserve_keywords
- "I love playing badminton!" → contains interest (love playing badminton), must preserve; add "love playing badminton" to preserve_keywords
- "I feel so sad" → contains emotion (sad), must preserve; add "sad" to preserve_keywords
-
---
-Full Dialogue:
-"""
-{{ dialog_text }}
-"""
-
-Output strict JSON only (fixed keys, order doesn't matter):
+输出：
 {
-  "is_related": <true or false>,
-  "times": [<string>...],
-  "ids": [<string>...],
-  "amounts": [<string>...],
-  "contacts": [<string>...],
-  "addresses": [<string>...],
-  "keywords": [<string>...],
-  "preserve_keywords": [<string>...],
-  "scene_unrelated_snippets": [<string>...]
+  "assistant_memory_hint": "安慰了用户对室友答辩状态的担忧。",
+  "assistant_memory_type": "comfort"
 }
-{% endif %}
+
+Few-shot 示例 2
+输入：
+{
+  "msgs": [
+    {
+      "role": "User",
+      "msg": "我最近总失眠，已经两周了，想先自己调一调。"
+    },
+    {
+      "role": "Assistant",
+      "msg": "如果你想先自己调整，可以先减少咖啡因摄入，尤其下午和晚上尽量不要再喝咖啡或浓茶，同时把睡前刷手机的时间压缩一些，尽量固定上床时间，先连续观察几天。"
+    }
+  ]
+}
+输出：
+{
+  "assistant_memory_hint": "建议用户减少咖啡因摄入、减少睡前刷手机时间并固定上床时间。",
+  "assistant_memory_type": "suggestion"
+}
+
+Few-shot 示例 3
+输入：
+{
+  "msgs": [
+    {
+      "role": "User",
+      "msg": "我晚上想做个简单点的减脂餐，最好二十分钟左右能搞定。"
+    },
+    {
+      "role": "Assistant",
+      "msg": "你可以做一个鸡胸肉沙拉碗，主要用鸡胸肉、生菜、黄瓜和圣女果。鸡胸肉简单煎熟切块后和蔬菜拌在一起，调味尽量用橄榄油加一点醋，不要放太多沙拉酱。"
+    }
+  ]
+}
+输出：
+{
+  "assistant_memory_hint": "推荐用户做鸡胸肉沙拉碗，并提醒用户调味时少放沙拉酱。",
+  "assistant_memory_type": "recommendation"
+}
+
+Few-shot 示例 4
+输入：
+{
+  "msgs": [
+    {
+      "role": "User",
+      "msg": "剪枝引擎和萃取引擎我都想先做，但是估计都会比较花时间。"
+    },
+    {
+      "role": "Assistant",
+      "msg": "这两个模块都涉及比较多的设计和实现细节。如果你想先推进，我建议先拆需求，再分别评估开发量。"
+    }
+  ]
+}
+输出：
+{
+  "assistant_memory_hint": "建议用户先拆需求，再分别评估两个模块的开发量。",
+  "assistant_memory_type": "suggestion"
+}
+
+现在处理下面这个输入。
+输入：
+{{ dialog_text }}
+
+只输出严格 JSON：
+{
+  "assistant_memory_hint": "<string or NULL>",
+  "assistant_memory_type": "comfort | suggestion | recommendation | warning | instruction | NULL"
+}
--- a/api/app/core/memory/utils/prompt/prompts/extract_statement.jinja2
+++ b/api/app/core/memory/utils/prompt/prompts/extract_statement.jinja2
@@ -2,7 +2,7 @@
 {{ input_json }}
 {%- endmacro %}

-===Tasks===
+=== Tasks ===

 {% if language == "zh" %}
 你的任务是从提供的目标文本中识别并提取陈述句，并为每条陈述句标注以下信息：
@@ -11,11 +11,12 @@
 - statement_text
 - statement_type
 - temporal_type
+- has_emotional_state
 - has_unsolved_reference
 - valid_at
 - invalid_at

-每条输出都应是一个结构化的记忆候选陈述句。
+每条输出都应是一个结构化的候选记忆陈述句。
 {% else %}
 Your task is to identify and extract declarative statements from the provided target text, and annotate each extracted statement with:

@@ -23,6 +24,7 @@ Your task is to identify and extract declarative statements from the provided ta
 - statement_text
 - statement_type
 - temporal_type
+- has_emotional_state
 - has_unsolved_reference
 - valid_at
 - invalid_at
@@ -30,7 +32,7 @@ Your task is to identify and extract declarative statements from the provided ta
 Each output item should be a structured candidate memory statement.
 {% endif %}

-===Inputs===
+=== Inputs ===
 {% if language == "zh" %}

 - chunk_id: chunk 唯一 ID
@@ -48,7 +50,7 @@ Each output item should be a structured candidate memory statement.
 - supporting_context.msgs: ordered contextual messages, which may include User and Assistant messages
  {% endif %}

-===Scope===
+=== Scope ===
 {% if language == "zh" %}

 - 只从 `target_content` 中提取陈述句。
@@ -66,12 +68,12 @@ Each output item should be a structured candidate memory statement.
 - Every output statement must be directly grounded in wording from `target_content`.
  {% endif %}

-===Extraction Rules===
+=== Extraction Rules ===
 {% if language == "zh" %}
 拆分规则：

 - 以“一个完整意思”为单位提取陈述句，通常对应一个完整句子或一个自然语义片段。
- 默认保留句子级结构；只有当一个句子内部包含两个及以上彼此独立、拆开后明显更清楚的重要信息时，才拆成多条。
+- 默认保留句子级结构；只有当一个句子内部包含两个及以上彼此独立、拆开后明显更清晰的重要信息时，才拆成多条。
 - 宁可多提取，也不要漏掉 `target_content` 中能独立成立、且语义稳定的 statement。
 - 但不要为了提高覆盖率而引入原文没有的信息，或输出语义不成立的 statement。

@@ -82,6 +84,9 @@ Each output item should be a structured candidate memory statement.

 共指消解：

+- 先完成最终的 `statement_text` 改写，再判断 `has_unsolved_reference`。
+- `has_unsolved_reference` 必须基于最终输出的 `statement_text` 判断，而不是基于原始 `target_content` 里是否出现过代词来判断。
+- 如果最终 `statement_text` 已经把引用改写成具体实体名，例如“助理恭喜用户”“小李点了一杯美式咖啡”，则 `has_unsolved_reference` 必须是 `false`。
 - 如果可以解析到具体实体名，优先输出具体实体名，并将 `has_unsolved_reference` 设为 `false`。
 - 如果不能解析到具体实体名，但可以解析到最小必要描述，则输出该最小必要描述，并将 `has_unsolved_reference` 设为 `true`。
 - 如果既不能解析到具体实体名，也不能稳定解析到最小必要描述，则保留最小必要原始表达，并将 `has_unsolved_reference` 设为 `true`。
@@ -117,6 +122,15 @@ statement_type：
 - 如果没有明确时间，不要编造时间。
 - 对于点状事件（例如某天发生的一次考试、一次见面、一次提交），`valid_at` 和 `invalid_at` 都应填写为该事件的起止边界；不要只填 `valid_at`。

+情感状态判断：
+
+- `has_emotional_state` 只用于判断当前 statement 是否反映了用户的情感状态。
+- 如果根据当前 statement 和 supporting_context，可以判断用户当前存在某种情感状态，则输出 `true`。
+- 该字段不是情绪分类字段，不要求输出具体情绪类型。
+- 明确情绪表达例如“开心”“难过”“紧张”“有压力”通常应标为 `true`。
+- 即使没有明确情绪词，只要语义足以表明用户当前具有情感状态，也可以标为 `true`，例如“我很好”。
+- 如果只是客观事实、动作描述或安排，且无法从当前上下文稳定判断用户情感状态，则输出 `false`。
+
 temporal_type：

 - `STATIC`：相对稳定、持续性的状态、身份、属性、长期偏好、长期关系、长期职业或长期居住状态；若带起始时间，可填 `valid_at`，`invalid_at` 必须为 `"NULL"`。
@@ -129,7 +143,7 @@ temporal_type：
 - 允许为解决代词、省略和时间歧义做最小必要改写。
 - 不要引入原文未明确表达的新事实、额外推断或风格化概括。
  {% else %}
-  Granularity:
+  Splitting rules:
 - Extract statements at the level of one complete thought, usually one full sentence or one natural semantic unit.
 - Preserve sentence-level structure by default; split only when a sentence contains two or more independent and important pieces of information that become clearly easier to understand when separated.
 - Prefer higher recall: do not miss independently valid and semantically stable statements in `target_content`.
@@ -149,6 +163,9 @@ Coreference resolution:

 Clear vs unresolved reference:

+- First produce the final rewritten `statement_text`, then decide `has_unsolved_reference`.
+- `has_unsolved_reference` must be judged from the final `statement_text`, not from whether the original `target_content` once contained a pronoun.
+- If the final `statement_text` already resolves the reference to a concrete named entity, such as “The assistant congratulates the user” or “Xiao Li ordered an Americano,” then `has_unsolved_reference` must be `false`.
 - A reference is fully resolved only if the current `supporting_context` can map it to a concrete named entity.
 - `Zhang San`, `Old Zhang` when clearly resolved to Zhang San, `Professor Li`, and `Teacher Wang` are clear references.
 - `the user's friend`, `the user's coworker`, `a teacher`, and `an interviewer` are allowed outputs but still count as unresolved.
@@ -177,6 +194,15 @@ Temporal rules:
 - If no explicit time is available, do not invent one.
 - For point-in-time events such as a single exam, a meeting, or a submission on one day, populate both `valid_at` and `invalid_at`; do not fill only `valid_at`.

+Emotional-state detection:
+
+- `has_emotional_state` is used only to judge whether the current statement reflects the user's emotional state.
+- If the current statement plus supporting context is sufficient to infer that the user currently has some emotional state, output `true`.
+- This field is not an emotion category field. Do not infer or output a specific emotion label here.
+- Explicit emotion wording such as “happy”, “sad”, “nervous”, or “under pressure” should usually be marked `true`.
+- Statements without explicit emotion words may still be `true` if the user's emotional state is reasonably inferable, such as “I am fine.”
+- If the statement is only an objective fact or action description and the user's emotional state cannot be stably inferred from the current context, output `false`.
+
 temporal_type:

 - `STATIC`: relatively stable, ongoing states, identities, attributes, long-term preferences, long-term relationships, occupations, or residence states.
@@ -190,7 +216,7 @@ Rewrite boundary:
 - Do not introduce unsupported facts, extra inference, or stylistic summarization.
  {% endif %}

-===Examples===
+=== Examples ===
 {% if language == "zh" %}
 示例 1:
 示例输入: {
@@ -219,6 +245,7 @@ Rewrite boundary:
      "statement_text": "李教授这学期要求很严。",
      "statement_type": "OPINION",
      "temporal_type": "DYNAMIC",
+      "has_emotional_state": false,
      "has_unsolved_reference": false,
      "valid_at": "2023-09-04T18:00:00",
      "invalid_at": "NULL"
@@ -228,17 +255,19 @@ Rewrite boundary:
      "statement_text": "李教授讲课清晰透彻。",
      "statement_type": "OPINION",
      "temporal_type": "ATEMPORAL",
+      "has_emotional_state": false,
      "has_unsolved_reference": false,
      "valid_at": "NULL",
      "invalid_at": "NULL"
    },
    {
      "statement_id": "stmt_m1n2o3p4",
-      "statement_text": "李教授的气场很吓人。",
+      "statement_text": "用户每次被李教授点名都有点发怵。",
      "statement_type": "OPINION",
-      "temporal_type": "ATEMPORAL",
+      "temporal_type": "DYNAMIC",
+      "has_emotional_state": true,
      "has_unsolved_reference": false,
-      "valid_at": "NULL",
+      "valid_at": "2023-09-04T18:00:00",
      "invalid_at": "NULL"
    }
  ]
@@ -248,13 +277,13 @@ Rewrite boundary:
 示例输入: {
  "chunk_id": "chunk_b2c3d4e5",
  "end_user_id": "eu_12345678",
-  "target_content": "我最近在学 Python，每天晚上都会练一个小时。这周还打算先把基础语法和函数部分过一遍。",
+  "target_content": "我最近在学Python，每天晚上都会练一个小时。这周还打算先把基础语法和函数部分过一遍。",
  "target_message_date": "2026-04-01T00:00:00",
  "supporting_context": {
    "msgs": [
      {
        "role": "User",
-        "msg": "我最近在学 Python。"
+        "msg": "我最近在学Python。"
      },
      {
        "role": "Assistant",
@@ -268,27 +297,30 @@ Rewrite boundary:
  "statements": [
    {
      "statement_id": "stmt_m3n4o5p6",
-      "statement_text": "用户最近在学 Python。",
+      "statement_text": "用户最近在学Python。",
      "statement_type": "FACT",
      "temporal_type": "DYNAMIC",
+      "has_emotional_state": false,
      "has_unsolved_reference": false,
      "valid_at": "2026-04-01T00:00:00",
      "invalid_at": "NULL"
    },
    {
      "statement_id": "stmt_q7r8s9t0",
-      "statement_text": "用户最近每天晚上都会练一个小时 Python。",
+      "statement_text": "用户最近每晚都会练一个小时Python。",
      "statement_type": "FACT",
      "temporal_type": "DYNAMIC",
+      "has_emotional_state": false,
      "has_unsolved_reference": false,
      "valid_at": "2026-04-01T00:00:00",
      "invalid_at": "NULL"
    },
    {
      "statement_id": "stmt_u1v2w3x4",
-      "statement_text": "用户这周打算先复习 Python 的基础语法和函数部分。",
+      "statement_text": "用户这周打算先复习Python的基础语法和函数部分。",
      "statement_type": "FACT",
      "temporal_type": "DYNAMIC",
+      "has_emotional_state": false,
      "has_unsolved_reference": false,
      "valid_at": "2026-04-01T00:00:00",
      "invalid_at": "NULL"
@@ -323,6 +355,7 @@ Rewrite boundary:
      "statement_text": "用户觉得那两个有点难。",
      "statement_type": "OPINION",
      "temporal_type": "DYNAMIC",
+      "has_emotional_state": true,
      "has_unsolved_reference": true,
      "valid_at": "2026-04-01T00:00:00",
      "invalid_at": "NULL"
@@ -332,6 +365,7 @@ Rewrite boundary:
      "statement_text": "用户昨晚看了半天那两个还是没太搞明白。",
      "statement_type": "FACT",
      "temporal_type": "DYNAMIC",
+      "has_emotional_state": false,
      "has_unsolved_reference": true,
      "valid_at": "2026-03-31T00:00:00",
      "invalid_at": "2026-03-31T23:59:59"
@@ -341,6 +375,7 @@ Rewrite boundary:
      "statement_text": "如果周末还弄不出来，用户可能会去问助教。",
      "statement_type": "OTHER",
      "temporal_type": "DYNAMIC",
+      "has_emotional_state": false,
      "has_unsolved_reference": true,
      "valid_at": "2026-04-01T00:00:00",
      "invalid_at": "NULL"
@@ -375,6 +410,7 @@ Example Output: {
      "statement_text": "Professor Li is very strict this semester.",
      "statement_type": "OPINION",
      "temporal_type": "DYNAMIC",
+      "has_emotional_state": false,
      "has_unsolved_reference": false,
      "valid_at": "2023-09-04T18:00:00",
      "invalid_at": "NULL"
@@ -384,17 +420,19 @@ Example Output: {
      "statement_text": "Professor Li explains things clearly.",
      "statement_type": "OPINION",
      "temporal_type": "ATEMPORAL",
+      "has_emotional_state": false,
      "has_unsolved_reference": false,
      "valid_at": "NULL",
      "invalid_at": "NULL"
    },
    {
      "statement_id": "stmt_m1n2o3p4",
-      "statement_text": "Professor Li's presence is intimidating.",
+      "statement_text": "The user gets nervous every time Professor Li calls on the user.",
      "statement_type": "OPINION",
-      "temporal_type": "ATEMPORAL",
+      "temporal_type": "DYNAMIC",
+      "has_emotional_state": true,
      "has_unsolved_reference": false,
-      "valid_at": "NULL",
+      "valid_at": "2023-09-04T18:00:00",
      "invalid_at": "NULL"
    }
  ]
@@ -427,6 +465,7 @@ Example Output: {
      "statement_text": "The user has been learning Python recently.",
      "statement_type": "FACT",
      "temporal_type": "DYNAMIC",
+      "has_emotional_state": false,
      "has_unsolved_reference": false,
      "valid_at": "2026-04-01T00:00:00",
      "invalid_at": "NULL"
@@ -436,6 +475,7 @@ Example Output: {
      "statement_text": "The user has recently been practicing Python for an hour every night.",
      "statement_type": "FACT",
      "temporal_type": "DYNAMIC",
+      "has_emotional_state": false,
      "has_unsolved_reference": false,
      "valid_at": "2026-04-01T00:00:00",
      "invalid_at": "NULL"
@@ -445,6 +485,7 @@ Example Output: {
      "statement_text": "The user plans to review Python basic syntax and functions first this week.",
      "statement_type": "FACT",
      "temporal_type": "DYNAMIC",
+      "has_emotional_state": false,
      "has_unsolved_reference": false,
      "valid_at": "2026-04-01T00:00:00",
      "invalid_at": "NULL"
@@ -479,6 +520,7 @@ Example Output: {
      "statement_text": "The user thinks those two things are difficult.",
      "statement_type": "OPINION",
      "temporal_type": "DYNAMIC",
+      "has_emotional_state": true,
      "has_unsolved_reference": true,
      "valid_at": "2026-04-01T00:00:00",
      "invalid_at": "NULL"
@@ -488,6 +530,7 @@ Example Output: {
      "statement_text": "The user spent a long time last night looking at those two things but still did not really understand them.",
      "statement_type": "FACT",
      "temporal_type": "DYNAMIC",
+      "has_emotional_state": false,
      "has_unsolved_reference": true,
      "valid_at": "2026-03-31T00:00:00",
      "invalid_at": "2026-03-31T23:59:59"
@@ -497,6 +540,7 @@ Example Output: {
      "statement_text": "If the user still cannot finish them by the weekend, the user may ask the TA.",
      "statement_type": "OTHER",
      "temporal_type": "DYNAMIC",
+      "has_emotional_state": false,
      "has_unsolved_reference": true,
      "valid_at": "2026-04-01T00:00:00",
      "invalid_at": "NULL"
@@ -504,7 +548,7 @@ Example Output: {
  ]
 }
 {% endif %}
-===End of Examples===
+=== End of Examples ===

 {% if language == "zh" %}
 最终输出前检查：
@@ -512,7 +556,9 @@ Example Output: {
 - 是否只保留 `target_content` 中可直接支持的陈述句
 - 如果主语是用户，是否统一写“用户”
 - 非用户主体是否尽量写成具体名称；若无法做到，是否已正确标记 `has_unsolved_reference = true`
+- 如果最终 `statement_text` 已经落到具体实体名，`has_unsolved_reference` 是否已经改为 `false`
 - statement_type 是否合法，且没有把一般事实机械标成 `OPINION`
+- `has_emotional_state` 是否仅用于判断是否存在情感状态，而没有被当作情绪分类字段
 - temporal_type 是否与 valid_at / invalid_at 一致
 - 输出是否严格符合 JSON schema
  {% else %}
@@ -520,7 +566,9 @@ Example Output: {
 - Keep only statements directly supported by `target_content`
 - If the subject is the user, render it as “the user”
 - Render non-user subjects as concrete names when possible; otherwise mark `has_unsolved_reference = true`
+- If the final `statement_text` already resolves the reference to a concrete named entity, ensure `has_unsolved_reference = false`
 - Ensure statement_type is valid and do not mechanically label ordinary facts as `OPINION`
+- Ensure `has_emotional_state` is used only for emotional-state presence detection, not emotion classification
 - Ensure temporal_type is consistent with valid_at and invalid_at
 - Ensure the output strictly matches the JSON schema
  {% endif %}
@@ -555,8 +603,7 @@ Example Output: {
 - Preserve the original language and do not translate.
  {% endif %}

-现在处理下面这个输入：
-{{ render_input() }}
+现在处理下面这个输入：{{ render_input() }}

 Return only a JSON object matching the schema below:
 {
@@ -566,6 +613,7 @@ Return only a JSON object matching the schema below:
      "statement_text": "string",
      "statement_type": "FACT | OPINION | OTHER",
      "temporal_type": "STATIC | DYNAMIC | ATEMPORAL",
+      "has_emotional_state": "boolean",
      "has_unsolved_reference": "boolean",
      "valid_at": "string | NULL",
      "invalid_at": "string | NULL"
--- a/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2
+++ b/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2
@@ -5,13 +5,21 @@ Extract entities and knowledge triplets from the given statement.
 重要：

 - `name`、`subject_name`、`object_name` 默认保持原文中的表面形式，不要翻译。
- 但对用户自指表达，如“我”“我的”“我自己”，统一规范为 `用户`。
+- 但在抽取前，必须先做指代解析。
+- 用户自指表达，如“我”“我的”“我自己”，一律规范为 `用户`。
+- 非用户自指代词或指示表达，如“他”“她”“它”“这个”“那个”“这家”“那家”“这里”“那里”，如果能从 `supporting_context` 中稳定解析出具体指代，则必须替换为具体指代实体名。
+- 如果上述代词或指示表达不能稳定解析，则整条跳过。
+- 命名关系中新出现的称呼、别名、昵称、产品名保持原样，不做替换。
 - `description` 使用中文。
 - `type`、`predicate`、`type_description`、`predicate_description` 一律使用中文。
  {% else %}
  Important:
- Keep `name`, `subject_name`, and `object_name` in their original surface form from the source text. Do not translate them.
- Exception: normalize user self-reference such as "I", "me", and "myself" to `用户`.
+- Keep `name`, `subject_name`, and `object_name` in their original surface form from the source text by default.
+- But you MUST resolve references before extraction.
+- Normalize user self-reference such as "I", "me", and "myself" to `用户`.
+- For non-user pronouns or demonstratives such as "he", "she", "it", "this", "that", "this company", "that place", if a stable referent can be resolved from `supporting_context`, replace them with the resolved entity name.
+- If such references cannot be resolved stably, skip the entire statement.
+- Newly introduced names in naming or alias expressions must stay in their original form.
 - Generate `description` in English.
 - Always generate `type`, `predicate`, `type_description`, and `predicate_description` in Chinese.
  {% endif %}
@@ -69,11 +77,13 @@ Primary statement to analyze:
 开始抽取前，先检查 `has_unsolved_reference`。

 - 如果 `has_unsolved_reference` 是 `true`，不要抽取任何内容。
- 此时必须返回：
+- 如果 `statement_text` 中仍存在无法稳定解析的代词、指示词或省略主体，也应视为 unresolved reference。
+- 这两种情况下都必须返回：
  {% else %}
  Before any extraction, check `has_unsolved_reference`.
 - If `has_unsolved_reference` is `true`, do not extract anything.
- In that case, return exactly:
+- If unresolved pronouns, demonstratives, or omitted subjects still remain in `statement_text`, treat the statement as unresolved as well.
+- In either case, return exactly:
  {% endif %}

 ```json
@@ -86,8 +96,10 @@ Primary statement to analyze:
 {% if language == "zh" %}

 - 不要在引用未解析时尝试部分抽取。
+- 不要保留“他”“这个”“那个”这类原代词继续输出实体或关系。
  {% else %}
 - Do not attempt partial extraction when the reference is unresolved.
+- Do not keep unresolved forms such as "he", "this", or "that" as extracted entities or relation arguments.
  {% endif %}

 ===Input Boundary===
@@ -100,6 +112,8 @@ Primary statement to analyze:
 - 如果 `supporting_context.msgs` 中的 Assistant 消息包含总结、猜测、解释或改写，这些内容只能作为理解辅助，不能直接作为抽取来源。
 - `statement_type`、`temporal_type`、`valid_at`、`invalid_at` 是辅助理解字段，不是抽取目标。
 - 对 `statement_text` 中的用户自指表达，要统一规范成实体 `用户`。
+- 对其他可稳定解析的代词或指示表达，要替换为具体指代实体名后再抽取。
+- 对命名关系中新出现的称呼、别名、昵称、产品名，不要因为上下文可推断其所指而直接改写，它们应保持原样作为实体名。
  {% else %}
 - Treat `statement_text` as the only direct extraction target.
 - Use `supporting_context.msgs` only to interpret references, ellipsis, subject identity, and necessary background in `statement_text`.
@@ -108,41 +122,253 @@ Primary statement to analyze:
 - If Assistant messages in `supporting_context.msgs` contain summary, guess, interpretation, or rephrasing, use them only as interpretive support and never as a direct extraction source.
 - Treat `statement_type`, `temporal_type`, `valid_at`, and `invalid_at` as auxiliary context, not extraction targets.
 - Normalize user self-reference in `statement_text` to the entity `用户`.
+- Replace other resolvable pronouns or demonstratives with their resolved entity names before extraction.
+- For newly introduced names in naming or alias expressions, do not rewrite them even if the context reveals who they refer to; keep them as entity names.
  {% endif %}

 ===预定义实体类型===
 只能使用以下中文实体类型。如果没有完全匹配的类型，请选择最接近的一项，不要发明新类型。

- `人物`: 现实中的具体个人
- `组织`: 公司、机构、团队、社群等组织性主体
- `群体`: 未具名或泛指的一组人
- `地点`: 具有地理或空间意义的位置
- `设施`: 建筑、场馆、房间、实验室等功能性空间
- `地址`: 具体地址或位置描述
- `物品`: 一般具体物体
- `设备`: 具有明确用途的工具或器材
- `产品`: 可被制造、购买、使用的产品
- `交通工具`: 用于出行或运输的工具
- `文档`: 文章、报告、表格、说明等文档
- `媒体`: 图片、音频、视频等媒体对象
- `网站`: 网站、网页或互联网平台
- `软件`: 软件、应用、系统或数字服务
- `账号`: 账号、账户、用户档案
- `标识符`: ID、编号、用户名、工号等标识
- `联系方式`: 电话、邮箱、社交账号等联系方式
- `角色`: 某实体承担的社会或功能角色
- `职业`: 工作或职业身份
- `技能`: 可学习或掌握的能力
- `知识主题`: 主题、领域、方法、理论或知识概念
- `目标`: 希望达成的结果
- `偏好`: 稳定的喜欢、倾向或偏爱
- `习惯`: 重复出现的行为模式
- `语言`: 自然语言或编程语言
- `金额`: 金额或货币数值
- `数量`: 带或不带单位的数量值
- `货币`: 货币单位
- `组织部门`: 组织内部的部门或业务单元
- `称呼`: 用于指代或称呼实体的名字
+- `人物`
+  - definition: 可稳定指向、可被当作具体个体区分和归并的个人实体。
+  - positive_examples: `用户`、`张三`、`王教授`、`小林`
+  - negative_examples: `老师`、`导师`、`学生`、`他们`
+  - notes: 强调“这个人是谁”，不强调他承担的社会身份；用户自指统一归为 `用户`。
+
+- `组织`
+  - definition: 公司、机构、学校、实验室、团队、社群等组织性主体。
+  - positive_examples: `腾讯`、`清华大学`、`机器人公司`、`实验室`
+  - negative_examples: `人事部`、`教研组`、`办公室`
+  - notes: 如果表达的是组织内部单元，当前一级仍优先并入 `组织`，除非后续单独扩展子类。
+
+- `群体`
+  - definition: 边界相对稳定、可被当作整体引用的一组人。
+  - positive_examples: `我的朋友`、`同事们`、`实验室成员`
+  - negative_examples: `他们`、`一些人`、`一个朋友`
+  - notes: 只用于边界相对稳定的人群；边界不稳或 unresolved 的表达不要归入 `群体`。
+
+- `智能体`
+  - definition: 具有行动、交互或执行能力的非人主体，如机器人、AI 或其他智慧体。
+  - positive_examples: `机器人查票员`、`家务机器人`、`智能助手`
+  - negative_examples: `手机`、`电脑`、`机器人公司`
+  - notes: 如果对象只是普通设备，不归入 `智能体`；只有在叙述中被当作主体行动或交互时才使用。
+
+- `角色职业`
+  - definition: 人物承担的社会角色、功能身份或职业身份。
+  - positive_examples: `导师`、`老师`、`学生`、`医生`、`程序员`
+  - negative_examples: `张三`、`王教授`、`我的朋友`
+  - notes: 强调“这个人是什么身份”，不强调“这个人是谁”；如果文本落到具体个人，优先用 `人物`。
+
+- `地点设施`
+  - definition: 具有地理意义或功能性空间意义的位置与场所。
+  - positive_examples: `北京`、`巴黎`、`图书馆`、`办公室`、`教室`
+  - negative_examples: `这里`、`那里`、`朝这边`、`明天去的地方`
+  - notes: 地理地点和功能场所当前一级合并；未稳定解析的位置指代表达不要抽取。
+
+- `物品设备`
+  - definition: 可被持有、使用、携带的具体物体、设备、工具或交通工具。
+  - positive_examples: `手机`、`电脑`、`相机`、`自行车`
+  - negative_examples: `微信`、`GitHub`、`会员服务`
+  - notes: 交通工具当前并入此类；数字服务不归入本类。
+
+- `产品服务`
+  - definition: 可被购买、使用、消费或订阅的产品或服务。
+  - positive_examples: `iPhone`、`健身课`、`会员服务`
+  - negative_examples: `微信`、`GitHub`、`手机`
+  - notes: 具体商品和服务当前一级合并；纯软件平台优先归入 `软件平台`。
+
+- `软件平台`
+  - definition: 软件、应用、网站、在线平台或数字服务系统。
+  - positive_examples: `微信`、`GitHub`、`ChatGPT`、`飞书`
+  - negative_examples: `iPhone`、`会员服务`、`手机号`
+  - notes: 软件、网站、平台当前一级合并；如果语境强调的是账号本身，改用 `账号`。
+
+- `账号`
+  - definition: 账户、账号、用户档案类实体。
+  - positive_examples: `GitHub账号`、`微信号`
+  - negative_examples: `用户名`、`工号`、`邮箱`
+  - notes: 与 `标识符`、`联系方式` 分开；账号是主体可持有的账户对象。
+
+- `标识符`
+  - definition: 用于识别实体的编号、ID、用户名、学号、工号等标识。
+  - positive_examples: `学号`、`工号`、`用户名`
+  - negative_examples: `GitHub账号`、`手机号`
+  - notes: 当前允许保留，但通常只有在存在明确识别关系时才值得抽取。
+
+- `联系方式`
+  - definition: 可用于联系实体的电话、邮箱、社交联系地址。
+  - positive_examples: `手机号`、`邮箱`、`微信联系方式`
+  - negative_examples: `用户名`、`GitHub账号`
+  - notes: 当前允许保留，但通常只有在存在明确联系关系时才值得抽取。
+
+- `文档媒体`
+  - definition: 文章、报告、表格、图片、音频、视频等内容载体。
+  - positive_examples: `简历`、`论文`、`照片`、`录音`
+  - negative_examples: `微积分`、`微信`、`学号`
+  - notes: 文档与媒体当前一级合并；如果只是内容主题，不归入本类。
+
+- `知识能力`
+  - definition: 可学习、掌握、使用或讨论的知识主题、技能、学科或语言。
+  - positive_examples: `微积分`、`机器学习`、`写作`、`Python`、`中文`
+  - negative_examples: `紧张`、`成功`、`意义`
+  - notes: 不包含情绪、心理状态、抽象结果或价值判断；这些应写入 `description`。
+
+- `偏好习惯目标`
+  - definition: 用户稳定的偏好、重复习惯，以及具体、明确、用户特异且值得长期保留的目标。
+  - positive_examples: `喜欢安静环境`、`晨跑`、`通过雅思`
+  - negative_examples: `紧张`、`开心`、`成功`、`回报`
+  - notes: 这是高风险类型；只允许稳定偏好、重复习惯、具体目标，不允许抽象愿望或情绪状态。
+
+- `称呼别名`
+  - definition: 用于指代或称呼实体的名字。
+  - positive_examples: `山哥`、`老张`、`X1`
+  - negative_examples: `导师`、`程序员`、`好人`
+  - notes: 只用于名字性表达，不用于角色、职业、评价词。
+
+实体类型总规则：
+
+- unresolved 或边界不稳的表达，不因“看起来像名词”就创建实体。
+- 情绪、心理状态、金额、数量、普通时间、一次性动作短语，默认不作为独立实体类型抽取。
+- 抽象命题片段、泛化结果、价值判断，默认不创建实体；如有保留价值，应写入相关高价值实体的 `description`。
+
+实体类型选择原则：
+
+- 优先保留对用户画像、偏好、长期身份、稳定关系或持续兴趣有记忆价值的实体类型。
+- 对于“努力”“回报”“意义”“成功”这类泛化概念、抽象命题片段或价值判断，默认不要仅因句中出现就创建实体。
+- `群体` 只用于边界相对稳定、可被当作整体引用的人群；像“他们”“一些人”“一个朋友”这类边界不稳或 unresolved 的表达不要归入 `群体`。
+- `偏好习惯目标` 只能用于稳定偏好、重复习惯或具体明确的用户目标，不能把抽象结果、泛因果终点、空泛愿望或情绪状态强行归入其中。
+- 当前阶段不抽取情绪状态实体；像“紧张”“开心”“难过”“焦虑”“放松”这类情绪或心理状态，不要归入 `知识能力`、`偏好习惯目标` 或其他现有类型。
+
+===关系本体大类===
+以下大类是当前 `predicate` 本体树的第一层，用于帮助理解和约束后面的具体关系白名单。输出具体 `predicate` 时仍然必须使用后文列出的细关系，而不是直接输出这些大类名称。
+
+- `命名关系`
+  - definition: 表达实体名称、别名、称呼之间的对应或使用关系。
+  - covered_predicates: `别名属于`、`使用称呼`
+  - positive_examples: `山哥 -> 别名属于 -> 用户`、`我的朋友 -> 使用称呼 -> 山哥`
+  - negative_examples: `导师 -> 别名属于 -> 用户`、`好人 -> 使用称呼 -> 用户`
+  - notes: 只处理名字性表达，不处理角色、职业、评价词。
+  - status: `enabled`
+
+- `类型归属关系`
+  - definition: 表达实体属于某种类别，或主体承担某种角色/职业身份的关系。
+  - covered_predicates: `属于类型`、`担任角色`、`从事职业`
+  - positive_examples: `王教授 -> 担任角色 -> 导师`、`张三 -> 从事职业 -> 程序员`
+  - negative_examples: `张三 -> 担任角色 -> 山哥`、`用户 -> 从事职业 -> 紧张`
+  - notes: 用于“是什么”，不用于“叫什么”。
+  - status: `enabled`
+
+- `成员隶属关系`
+  - definition: 表达主体属于某个组织、群体或集合的成员归属关系。
+  - covered_predicates: `成员属于`
+  - positive_examples: `张三 -> 成员属于 -> 实验室成员`、`用户 -> 成员属于 -> 社群`
+  - negative_examples: `他们 -> 成员属于 -> 学校`、`一个朋友 -> 成员属于 -> 班级`
+  - notes: 前提是主体和归属对象都足够稳定；边界不稳的人群不要硬抽。
+  - status: `enabled`
+
+- `任职服务关系`
+  - definition: 表达人物或主体在组织中的工作、任职或服务关系。
+  - covered_predicates: `任职于`
+  - positive_examples: `张明 -> 任职于 -> 腾讯`、`王教授 -> 任职于 -> 清华大学`
+  - negative_examples: `张明 -> 任职于 -> 导师`、`用户 -> 任职于 -> 明天的面试`
+  - notes: 优先用于人物到组织的稳定供职关系。
+  - status: `enabled`
+
+- `空间位置关系`
+  - definition: 表达实体与地点、场所、空间位置之间的稳定位置关系。
+  - covered_predicates: `位于`、`拥有位置`、`居住于`
+  - positive_examples: `用户 -> 居住于 -> 巴黎`、`办公室 -> 位于 -> 北京`
+  - negative_examples: `用户 -> 位于 -> 明天下午三点`、`这里 -> 位于 -> 学校`
+  - notes: 普通时间表达和未解析位置指代不进入此类。
+  - status: `enabled`
+
+- `前往到访关系`
+  - definition: 表达主体前往、到访某地点、场所、组织、课程或活动对象的关系。
+  - covered_predicates: `前往`
+  - positive_examples: `用户 -> 前往 -> 图书馆`、`用户 -> 前往 -> 公司`
+  - negative_examples: `用户 -> 前往 -> 明天下午三点`、`用户 -> 前往 -> 复习微积分任务`
+  - notes: 当前应优先用于稳定倾向或有记忆价值的到访对象，不鼓励因一次性日程而过抽。
+  - status: `enabled`
+
+- `组成包含关系`
+  - definition: 表达部分与整体、包含与被包含之间的结构关系。
+  - covered_predicates: `组成部分`、`包含部分`
+  - positive_examples: `教研组 -> 组成部分 -> 学院`、`学院 -> 包含部分 -> 教研组`
+  - negative_examples: `用户 -> 组成部分 -> 图书馆`、`微积分 -> 包含部分 -> 用户`
+  - notes: 只用于结构性组成关系，不用于临时搭配或抽象联系。
+  - status: `enabled`
+
+- `拥有持有关系`
+  - definition: 表达主体拥有、持有、配有某对象、账号、联系方式或标识的关系。
+  - covered_predicates: `拥有`、`拥有账号`、`拥有联系方式`、`标识为`
+  - positive_examples: `用户 -> 拥有账号 -> GitHub账号`、`用户 -> 拥有联系方式 -> 邮箱`、`用户 -> 标识为 -> 学号`
+  - negative_examples: `用户 -> 拥有 -> 紧张`、`努力 -> 拥有 -> 回报`
+  - notes: 不用于抽象命题、情绪状态或口号式表达。
+  - status: `enabled`
+
+- `使用采用关系`
+  - definition: 表达主体使用、采用某工具、产品、平台、语言或资源的关系。
+  - covered_predicates: `使用`、`使用语言`
+  - positive_examples: `用户 -> 使用 -> 微信`、`用户 -> 使用语言 -> 中文`
+  - negative_examples: `用户 -> 使用 -> 成功`、`用户 -> 使用语言 -> 紧张`
+  - notes: 以后若扩展“采用方法”，也可挂在本大类下。
+  - status: `enabled`
+
+- `创建生产关系`
+  - definition: 表达主体创建、撰写、生产某对象或结果的关系。
+  - covered_predicates: `创建了`、`由…创建`、`撰写了`
+  - positive_examples: `用户 -> 撰写了 -> 简历`、`简历 -> 由…创建 -> 用户`
+  - negative_examples: `用户 -> 创建了 -> 明天下午三点`、`努力 -> 由…创建 -> 用户`
+  - notes: 只用于明确的生产、创作、撰写关系。
+  - status: `enabled`
+
+- `知识学习关系`
+  - definition: 表达主体与知识、技能、学科、语言等知识能力对象之间的认知、学习或兴趣关系。
+  - covered_predicates: `了解`、`学习`、`感兴趣于`
+  - positive_examples: `用户 -> 学习 -> 微积分`、`用户 -> 了解 -> 机器学习`、`用户 -> 感兴趣于 -> 心理学`
+  - negative_examples: `用户 -> 学习 -> 紧张`、`用户 -> 感兴趣于 -> 成功`
+  - notes: 关系对象应是 `知识能力` 类，而不是情绪、价值判断或抽象结果。
+  - status: `enabled`
+
+- `偏好目标关系`
+  - definition: 表达主体对对象的稳定偏好、厌恶，或对具体明确目标的指向关系。
+  - covered_predicates: `偏好`、`不喜欢`、`想要`
+  - positive_examples: `用户 -> 偏好 -> 安静环境`、`用户 -> 不喜欢 -> 辛辣食物`、`用户 -> 想要 -> 通过雅思`
+  - negative_examples: `用户 -> 想要 -> 成功`、`用户 -> 偏好 -> 紧张`、`用户 -> 不喜欢 -> 努力就会有回报`
+  - notes: 这是高风险大类；`想要` 只用于具体、明确、用户特异的目标，不用于抽象愿望。
+  - status: `enabled`
+
+- `职责责任关系`
+  - definition: 表达主体负责某项工作、职责、事务或领域的关系。
+  - covered_predicates: `负责`
+  - positive_examples: `张三 -> 负责 -> 招聘工作`、`王教授 -> 负责 -> 实验室项目`
+  - negative_examples: `张三 -> 负责 -> 紧张`、`用户 -> 负责 -> 成功`
+  - notes: 关系对象应是具体职责或事务，不应是情绪或抽象结果。
+  - status: `enabled`
+
+- `沟通交互关系`
+  - definition: 表达两个主体之间发生沟通、交流或交互的关系。
+  - covered_predicates: `沟通于`
+  - positive_examples: `用户 -> 沟通于 -> 张三`、`导师 -> 沟通于 -> 学生`
+  - negative_examples: `用户 -> 沟通于 -> 紧张`、`图书馆 -> 沟通于 -> 微积分`
+  - notes: 两端通常都应是可作为交互主体的实体。
+  - status: `enabled`
+
+- `提及关系`
+  - definition: 表达主体或文本明确提到某实体的关系。
+  - covered_predicates: `提到`
+  - positive_examples: `用户 -> 提到 -> 腾讯`、`文档 -> 提到 -> 张三`
+  - negative_examples: `用户 -> 提到 -> 努力`、`用户 -> 提到 -> 回报`、`用户 -> 提到 -> 紧张`
+  - notes: 受限大类；不用于保留泛化概念、抽象命题片段、情绪状态或仅在句面上出现但没有记忆价值的对象。
+  - status: `restricted`
+
+- `一般关联关系`
+  - definition: 表达两个实体之间存在明确、稳定、值得保留，但当前无更精确谓词可用的关联关系。
+  - covered_predicates: `关联于`、`相关于`
+  - positive_examples: `项目 -> 关联于 -> 实验室`、`账号 -> 相关于 -> 平台`
+  - negative_examples: `努力 -> 相关于 -> 回报`、`用户 -> 关联于 -> 紧张`、`成功 -> 相关于 -> 意义`
+  - notes: 受限大类；不能作为失败兜底关系，不能用来连接抽象概念、口号式表达或无法成立的关系。
+  - status: `restricted`

 ===预定义关系类型===
 只能使用以下中文关系类型。如果没有完全匹配的关系，请选择最接近的一项，不要发明新关系。
@@ -172,60 +398,90 @@ Primary statement to analyze:
 - `感兴趣于`: 主体对某主题感兴趣
 - `偏好`: 主体偏好某对象、方式或主题
 - `不喜欢`: 主体不喜欢某对象、方式或主题
- `想要`: 主体想获得、达成或拥有某对象或结果
+- `想要`: 主体想获得、达成或拥有具体、明确、用户特异且值得保留的对象或目标，不用于抽象结果、泛化愿望或口号式表达
 - `负责`: 主体负责某项工作、职责或领域
 - `沟通于`: 两个实体之间发生沟通或交流
 - `拥有联系方式`: 实体具有某联系方式
 - `拥有账号`: 实体具有某账号
 - `标识为`: 实体由某标识符标识
 - `使用语言`: 主体使用某语言
- `相关于`: 当存在明确联系但无更精确关系时使用的弱关系
+- `相关于`: 当存在明确、稳定且具有记忆价值的联系，但无更精确关系时使用的弱关系；不得用于泛化概念、抽象命题片段、口号式表达或仅为补全结构的联系

 ===Extraction Order===
 {% if language == "zh" %}
 按以下顺序执行：

 0. 先检查 `has_unsolved_reference`；如果为 `true`，直接返回空结果。
-1. 识别 `statement_text` 中值得抽取的稳定实体。
-2. 判断这些实体之间是否存在可由预定义关系类型表达的有效关系。
-3. 最后补充实体字段和关系字段。
+1. 先做指代解析：用户自指统一替换为 `用户`；其他可稳定解析的代词或指示表达替换为具体指代实体名。
+2. 如果仍存在无法稳定解析的代词、指示词或省略主体，直接返回空结果。
+3. 识别 `statement_text` 中值得抽取的稳定实体。
+4. 判断这些实体之间是否存在可由预定义关系类型表达的有效关系。
+5. 最后补充实体字段和关系字段。

 不要让附加字段主导整个抽取过程。
 {% else %}
 Follow this order:

 0. First check `has_unsolved_reference`; if it is `true`, immediately return the empty result.
-1. Identify stable entities worth extracting from `statement_text`.
-2. Determine whether any valid relations between those entities can be expressed using the predefined Chinese predicates.
-3. Finally fill auxiliary entity and predicate fields.
+1. Resolve references first: normalize user self-reference to `用户`; replace other stably resolvable pronouns or demonstratives with their resolved entity names.
+2. If unresolved pronouns, demonstratives, or omitted subjects still remain, immediately return the empty result.
+3. Identify stable entities worth extracting from `statement_text`.
+4. Determine whether any valid relations between those entities can be expressed using the predefined Chinese predicates.
+5. Finally fill auxiliary entity and predicate fields.

 Do not let auxiliary fields drive the extraction process.
 {% endif %}

 ===Guidelines===

+**Reference Resolution:**
+{% if language == "zh" %}
+
+- 指代解析优先于实体抽取和关系抽取。
+- 所有用户自指表达都必须规范成 `用户`，包括“我”“我的”“我自己”等。
+- 对“他”“她”“它”“这个”“那个”“这家”“那家”“这里”“那里”等非用户自指表达，若上下文可稳定解析，则必须用解析后的具体实体名替换。
+- 若非用户自指表达无法稳定解析，则整条跳过，不输出部分结果。
+- 新出现的称呼、别名、昵称、产品名不是待消解代词，应保持原样。
+  {% else %}
+- Reference resolution happens before entity or relation extraction.
+- All user self-reference must be normalized to `用户`, including forms such as "I", "me", "my", and "myself".
+- For non-user references such as "he", "she", "it", "this", "that", "this company", "that place", "here", or "there", if the context supports a stable resolution, replace them with the resolved entity name.
+- If a non-user reference cannot be resolved stably, skip the entire statement and do not output partial results.
+- Newly introduced names, aliases, nicknames, and product names are not pronouns to be resolved; keep them in their original form.
+  {% endif %}
+
 **Entity Extraction:**
 {% if language == "zh" %}

 - 只有当某个名字、概念、对象、群体或地点在当前陈述中承担明确语义角色，或是理解有效关系所必需时，才创建实体。
 - 不要因为表面上出现了名词、修饰词或短语，就机械地创建实体。
+- 不要把完整命题、因果链、价值判断或口号式表达拆成多个低价值实体；例如“努力就会有回报”默认不应抽取出“努力”或“回报”作为实体。
 - 普通时间表达默认不抽取为实体，包括日期、时刻、明天、下周、今晚八点等。
 - 一次性动作短语默认不抽取为实体，例如“复习微积分”“去图书馆学习”“参观卢浮宫”。
 - 不要为了表达一句带时间或地点的行动，而额外创造“任务”“计划”“事件”实体。
 - 但如果动作明确把主体和某个稳定实体连接起来，可以保留该稳定实体，并抽取轻关系。例如“我去图书馆”“我去公司开会”“我去上课”“我去看演唱会”可以抽取 `前往`。
+- 当句子只是在讨论一般道理、抽象规律、空泛结果或非个体化概念，而这些概念本身不构成可复用记忆时，不要创建实体。
+- 如果句子表达的是用户的观点、信念、判断、愿望或目标倾向，但其中抽象对象不值得作为独立实体保留，则只保留相关高价值实体，不要再创建这些低价值对象实体，并把未抽取的抽象内容压缩写入相关实体的 `description`；例如“用户认为努力就会有回报”应只保留 `用户`，并在 `description` 中体现“用户认为努力就会有回报”。
+- 对于未抽取的抽象实体、抽象命题片段或泛化结果，只要它们对理解该高价值实体有帮助，就应优先写入该实体的 `description`，而不是改用宽泛关系或补造弱实体。
+- 当前阶段同样不要把情绪或心理状态抽成实体；如果句子里出现“紧张”“开心”“难过”“焦虑”“放松”等，应写入相关高价值实体的 `description`，而不是把它们标成 `知识能力`、`偏好习惯目标` 或其他近似类型。
 - 如果陈述里有值得保留的实体信息，但没有有效关系，可以只返回 `entities`，并把 `triplets` 设为 `[]`。
- `name` 默认保持原文中的表面形式，不要翻译；但用户自指要统一写成 `用户`。
+- `name` 默认保持原文中的表面形式，但用户自指必须写成 `用户`，可稳定解析的其他代词必须替换为具体指代实体名。
 - `description` 必须使用中文。
 - `type` 和 `type_description` 必须使用上方预定义的中文标签与中文定义。
  {% else %}
 - Extract entities only when they play a clear semantic role in the statement or are necessary for understanding a valid relation.
 - Do not mechanically create entities for every noun, modifier, or surface mention.
+- Do not split generic propositions, causal slogans, or value judgments into low-value abstract entities. For example, "effort brings reward" should not create standalone entities for "effort" or "reward" by default.
 - Do not extract ordinary time expressions as entities, including dates, timestamps, "tomorrow", "next week", or "8 PM tonight".
 - Do not extract one-off action phrases as entities, such as "review calculus", "study in the library", or "visit the Louvre".
 - Do not create extra "task", "plan", or "event" entities just to represent an action with time or location modifiers.
 - But if an action clearly connects the subject to a stable entity, keep that stable entity and use a light relation. For example, statements like "I go to the library", "I go to the office", "I go to class", or "I go to a concert" can use `前往`.
+- If the sentence is only about a generic principle, abstract outcome, or non-personalized concept that is not worth remembering on its own, do not create an entity for it.
+- If a statement expresses the user's belief, judgment, opinion, wish, or goal tendency but the referenced abstract concepts are not worth keeping as standalone entities, keep only the relevant high-value entities, do not create those low-value concept entities, and compress the unextracted abstract content into the relevant entity `description`. For example, "the user believes effort brings reward" should keep only `用户` and reflect that belief in `description`.
+- For abstract entities, proposition fragments, or generic outcomes that are not extracted, prefer writing them into the relevant retained entity's `description` when they help preserve the memory, instead of switching to a broad relation or inventing a weak entity.
+- In the current stage, do not extract emotional or psychological states as entities. States such as nervousness, happiness, sadness, anxiety, or relief should be written into the relevant retained entity's `description` rather than mapped to `知识能力`, `偏好习惯目标`, or any other approximate type.
 - If the statement contains entity-worthy content but no valid relation, it is acceptable to return `entities` with `triplets: []`.
- Keep `name` in its original surface form from the source text; exception: normalize user self-reference to `用户`.
+- Keep `name` in its original surface form by default, but write user self-reference as `用户` and replace other stably resolvable references with their resolved entity names.
 - `description` must be in English.
 - `type` and `type_description` must use the predefined Chinese labels and Chinese definitions above.
  {% endif %}
@@ -233,11 +489,11 @@ Do not let auxiliary fields drive the extraction process.
 **Semantic Memory (`is_explicit_memory`):**
 {% if language == "zh" %}

- 只有当实体明显属于语义知识记忆中的抽象概念时，才设为 `true`，例如概念、定义、理论、方法和知识主题。
+- 只有当实体明显属于语义知识记忆中的抽象知识对象时，才设为 `true`，例如概念、定义、理论、方法以及 `知识能力` 中的知识类对象。
 - 对人、组织、地点、具体物体以及大多数实例级实体，一律设为 `false`。
 - 除非非常明确，否则默认设为 `false`。
  {% else %}
- Use `true` only for abstract conceptual entities that belong in semantic knowledge memory, such as concepts, definitions, theories, methods, and knowledge topics.
+- Use `true` only for abstract knowledge-oriented entities that belong in semantic knowledge memory, such as concepts, definitions, theories, methods, and knowledge-oriented members of `知识能力`.
 - Use `false` for people, organizations, locations, concrete objects, and most instance-level entities.
 - Default to `false` unless the entity is clearly an abstract knowledge concept.
  {% endif %}
@@ -269,8 +525,14 @@ Do not let auxiliary fields drive the extraction process.
 - 如果没有任何预定义关系适用，返回 `triplets: []`。
 - 排除语气词、模糊情绪、孤立名词和缺乏明确关系结构的片段。
 - 如果陈述不支持有效关系，不要强行构造 triplet。
+- 不要为了保留一句抽象判断或泛因果命题，而强行构造“用户-拥有-努力”“努力-导致-回报”这类低价值 triplet。
+- `提到` 不用于保留泛化概念、抽象命题片段、口号式表达或仅在句面上出现但无记忆价值的对象。
+- `相关于` 不用于补救无法成立的关系，也不用于连接“努力”“回报”“成功”“意义”这类抽象概念。
+- `想要` 只用于具体、明确、用户特异且值得保留的对象或目标；如果想要的内容过于抽象或空泛，不要抽取 `想要`，应改写进相关实体的 `description`。
+- 不要为了保留情绪或心理状态而创建实体或弱关系；像“紧张”“开心”“难过”“焦虑”默认应写入相关实体的 `description`。
+- 对于这类观点句，如果相关概念本身不值得保留，也不要只为了补全结构而额外创建对应实体；允许输出仅包含 `用户` 的 `entities` 和空的 `triplets`。
 - 如果 `has_unsolved_reference` 是 `true`，不要抽取实体或 triplets。
- `subject_name` 和 `object_name` 默认保持原文中的表面形式，不要翻译；但用户自指要统一写成 `用户`。
+- `subject_name` 和 `object_name` 默认保持原文中的表面形式，但用户自指必须写成 `用户`，可稳定解析的其他代词必须替换为具体指代实体名。
 - `predicate_description` 必须直接复用对应 `predicate` 的中文定义。
 - 不要把普通时间表达作为 triplet 的宾语。
 - 不要为了表达一次性计划、安排、日程而强行构造关系。
@@ -282,8 +544,14 @@ Do not let auxiliary fields drive the extraction process.
 - If no predefined relation fits, return `triplets: []`.
 - Exclude fillers, vague emotions, standalone nouns, and fragments without a clear relational structure.
 - If the statement does not support a valid relation, do not force a triplet.
+- Do not force low-value triplets such as "user-has-effort" or "effort-causes-reward" just to preserve a generic causal belief or slogan-like proposition.
+- Do not use `提到` to preserve generic concepts, proposition fragments, slogan-like expressions, or surface mentions that have no memory value.
+- Do not use `相关于` as a rescue relation when no real relation exists, and do not connect abstract concepts such as "effort", "reward", "success", or "meaning" with it.
+- Use `想要` only for concrete, specific, user-grounded objects or goals worth retaining; if the desired content is too abstract or generic, do not extract `想要` and instead rewrite it into the relevant entity `description`.
+- Do not create entities or weak relations just to preserve emotional or psychological states; states such as nervousness, happiness, sadness, or anxiety should normally be written into the relevant retained entity `description`.
+- For such opinion statements, if the referenced concepts are not worth keeping, do not create extra entities just to complete a structure; it is valid to return only the `用户` entity with empty `triplets`.
 - If `has_unsolved_reference` is `true`, do not extract entities or triplets.
- Keep `subject_name` and `object_name` in their original surface form; exception: normalize user self-reference to `用户`.
+- Keep `subject_name` and `object_name` in their original surface form by default, but write user self-reference as `用户` and replace other stably resolvable references with their resolved entity names.
 - `predicate_description` must directly reuse the corresponding Chinese definition of `predicate`.
 - Do not use ordinary time expressions as triplet objects.
 - Do not force relations just to encode one-off plans, schedules, or actions.
@@ -320,6 +588,7 @@ Do not let auxiliary fields drive the extraction process.
  1. `alias -> 别名属于 -> canonical entity`
  2. `caller -> 使用称呼 -> alias`
 - 如果施称方在句中明确出现且对语义重要，不要省略它。
+- 在命名关系中，新出现的称呼、别名、昵称、产品名必须保持原样，不要被替换成其所指实体名。
  {% else %}
 - Distinguish between a naming fact and a naming act when the statement expresses both.
 - If the statement says that some entity or group calls or addresses another entity by a name, and the caller is explicitly mentioned in `statement_text`, extract the caller as an entity.
@@ -328,6 +597,7 @@ Do not let auxiliary fields drive the extraction process.
  1. `alias -> 别名属于 -> canonical entity`
  2. `caller -> 使用称呼 -> alias`
 - Do not drop the caller entity if it is explicitly stated and semantically important to the naming relation.
+- In naming relations, newly introduced names, aliases, nicknames, or product names must stay in their original form rather than being replaced by their referent.
  {% endif %}

 **subject_name / object_name Consistency:**
@@ -352,29 +622,28 @@ Output:
    {"subject_name": "用户", "subject_id": 0, "predicate": "居住于", "predicate_description": "人物居住在某地点", "object_name": "巴黎", "object_id": 1}
  ],
  "entities": [
-    {"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "现实中的具体个人", "description": "居住在巴黎的说话者", "is_explicit_memory": false},
-    {"entity_idx": 1, "name": "巴黎", "type": "地点", "type_description": "具有地理或空间意义的位置", "description": "用户居住的城市", "is_explicit_memory": false}
+    {"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "居住在巴黎的说话者", "is_explicit_memory": false},
+    {"entity_idx": 1, "name": "巴黎", "type": "地点设施", "type_description": "具有地理意义或功能性空间意义的位置与场所", "description": "用户居住的城市", "is_explicit_memory": false}
  ]
 }

 **示例 2**
-Statement: "张明在腾讯工作，负责 AI 产品开发。"
+Statement: "他在腾讯工作。"
+Input condition: supporting context has already made it clear that “他” refers to “张明”.

 Output:
 {
  "triplets": [
-    {"subject_name": "张明", "subject_id": 0, "predicate": "任职于", "predicate_description": "主体在某组织中工作或任职", "object_name": "腾讯", "object_id": 1},
-    {"subject_name": "张明", "subject_id": 0, "predicate": "负责", "predicate_description": "主体负责某项工作、职责或领域", "object_name": "AI 产品开发", "object_id": 2}
+    {"subject_name": "张明", "subject_id": 0, "predicate": "任职于", "predicate_description": "主体在某组织中工作或任职", "object_name": "腾讯", "object_id": 1}
  ],
  "entities": [
-    {"entity_idx": 0, "name": "张明", "type": "人物", "type_description": "现实中的具体个人", "description": "在腾讯负责 AI 产品开发的人员", "is_explicit_memory": false},
-    {"entity_idx": 1, "name": "腾讯", "type": "组织", "type_description": "公司、机构、团队、社群等组织性主体", "description": "张明任职的公司", "is_explicit_memory": false},
-    {"entity_idx": 2, "name": "AI 产品开发", "type": "知识主题", "type_description": "主题、领域、方法、理论或知识概念", "description": "张明负责的工作方向", "is_explicit_memory": true}
+    {"entity_idx": 0, "name": "张明", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "在腾讯工作的人员", "is_explicit_memory": false},
+    {"entity_idx": 1, "name": "腾讯", "type": "组织", "type_description": "公司、机构、学校、实验室、团队、社群等组织性主体。", "description": "张明任职的公司", "is_explicit_memory": false}
  ]
 }

 **示例 3**
-Statement: "我明天下午三点去图书馆复习微积分。"
+Statement: "我常去图书馆学微积分。"

 Output:
 {
@@ -383,9 +652,9 @@ Output:
    {"subject_name": "用户", "subject_id": 0, "predicate": "学习", "predicate_description": "主体正在学习某知识主题或技能", "object_name": "微积分", "object_id": 2}
  ],
  "entities": [
-    {"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "现实中的具体个人", "description": "提到自己安排的说话者", "is_explicit_memory": false},
-    {"entity_idx": 1, "name": "图书馆", "type": "设施", "type_description": "建筑、场馆、房间、实验室等功能性空间", "description": "用户提到要去的地点", "is_explicit_memory": false},
-    {"entity_idx": 2, "name": "微积分", "type": "知识主题", "type_description": "主题、领域、方法、理论或知识概念", "description": "用户提到的学习主题", "is_explicit_memory": true}
+    {"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "经常去图书馆学习微积分的说话者", "is_explicit_memory": false},
+    {"entity_idx": 1, "name": "图书馆", "type": "地点设施", "type_description": "具有地理意义或功能性空间意义的位置与场所。", "description": "用户经常前往学习的地点", "is_explicit_memory": false},
+    {"entity_idx": 2, "name": "微积分", "type": "知识能力", "type_description": "可学习、掌握、使用或讨论的知识主题、技能、学科或语言。", "description": "用户经常学习的主题", "is_explicit_memory": true}
  ]
 }

@@ -409,9 +678,86 @@ Output:
    {"subject_name": "我的朋友", "subject_id": 1, "predicate": "使用称呼", "predicate_description": "主体使用某个名字来称呼另一实体", "object_name": "山哥", "object_id": 2}
  ],
  "entities": [
-    {"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "现实中的具体个人", "description": "被朋友称作山哥的说话者", "is_explicit_memory": false},
-    {"entity_idx": 1, "name": "我的朋友", "type": "群体", "type_description": "未具名或泛指的一组人", "description": "使用山哥这一称呼的人群", "is_explicit_memory": false},
-    {"entity_idx": 2, "name": "山哥", "type": "称呼", "type_description": "用于指代或称呼实体的名字", "description": "朋友用来称呼用户的昵称", "is_explicit_memory": false}
+    {"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "被朋友称作山哥的说话者", "is_explicit_memory": false},
+    {"entity_idx": 1, "name": "我的朋友", "type": "群体", "type_description": "边界相对稳定、可被当作整体引用的一组人。", "description": "使用山哥这一称呼的人群", "is_explicit_memory": false},
+    {"entity_idx": 2, "name": "山哥", "type": "称呼别名", "type_description": "用于指代或称呼实体的名字。", "description": "朋友用来称呼用户的昵称", "is_explicit_memory": false}
+  ]
+}
+
+**示例 6**
+Statement: "我认为努力就会有回报。"
+
+Output:
+{
+  "triplets": [],
+  "entities": [
+    {"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "认为努力就会有回报的说话者", "is_explicit_memory": false}
+  ]
+}
+
+**示例 7**
+Statement: "我想要成功。"
+
+Output:
+{
+  "triplets": [],
+  "entities": [
+    {"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "想要成功的说话者", "is_explicit_memory": false}
+  ]
+}
+
+**示例 8**
+Statement: "我最近有点紧张，不过这很正常。"
+
+Output:
+{
+  "triplets": [],
+  "entities": [
+    {"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "最近有些紧张并认为这很正常的说话者", "is_explicit_memory": false}
+  ]
+}
+
+**示例 9**
+Statement: "王教授是导师。"
+
+Output:
+{
+  "triplets": [
+    {"subject_name": "王教授", "subject_id": 0, "predicate": "担任角色", "predicate_description": "主体承担某个角色", "object_name": "导师", "object_id": 1}
+  ],
+  "entities": [
+    {"entity_idx": 0, "name": "王教授", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "承担导师角色的具体个人", "is_explicit_memory": false},
+    {"entity_idx": 1, "name": "导师", "type": "角色职业", "type_description": "人物承担的社会角色、功能身份或职业身份。", "description": "王教授承担的角色身份", "is_explicit_memory": false}
+  ]
+}
+
+**示例 10**
+Statement: "我的GitHub账号用户名是chen4。"
+
+Output:
+{
+  "triplets": [
+    {"subject_name": "用户", "subject_id": 0, "predicate": "拥有账号", "predicate_description": "实体具有某账号", "object_name": "GitHub账号", "object_id": 1},
+    {"subject_name": "GitHub账号", "subject_id": 1, "predicate": "标识为", "predicate_description": "实体由某标识符标识", "object_name": "chen4", "object_id": 2}
+  ],
+  "entities": [
+    {"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "拥有该 GitHub 账号的说话者", "is_explicit_memory": false},
+    {"entity_idx": 1, "name": "GitHub账号", "type": "账号", "type_description": "账户、账号、用户档案类实体。", "description": "用户拥有的 GitHub 账号", "is_explicit_memory": false},
+    {"entity_idx": 2, "name": "chen4", "type": "标识符", "type_description": "用于识别实体的编号、ID、用户名、学号、工号等标识。", "description": "该 GitHub 账号对应的用户名标识", "is_explicit_memory": false}
+  ]
+}
+
+**示例 11**
+Statement: "机器人查票员和我沟通。"
+
+Output:
+{
+  "triplets": [
+    {"subject_name": "机器人查票员", "subject_id": 0, "predicate": "沟通于", "predicate_description": "两个实体之间发生沟通或交流", "object_name": "用户", "object_id": 1}
+  ],
+  "entities": [
+    {"entity_idx": 0, "name": "机器人查票员", "type": "智能体", "type_description": "具有行动、交互或执行能力的非人主体，如机器人、AI 或其他智慧体。", "description": "与用户发生沟通的机器人主体", "is_explicit_memory": false},
+    {"entity_idx": 1, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "与机器人查票员沟通的说话者", "is_explicit_memory": false}
  ]
 }
 ===End of Examples===
@@ -424,10 +770,11 @@ JSON 要求：
 - 字符串内部引号必须转义为 `\"`
 - 不要使用中文引号
 - 字符串值中不要换行
- `name`、`subject_name`、`object_name` 默认保持原文中的表面形式，不要翻译；但用户自指必须规范成 `用户`
+- `name`、`subject_name`、`object_name` 默认保持原文中的表面形式，但用户自指必须规范成 `用户`，可稳定解析的其他代词必须替换为具体指代实体名
 - `description` 必须使用中文
 - `type`、`predicate`、`type_description`、`predicate_description` 必须使用上方预定义的中文标签和中文说明
 - 如果 `has_unsolved_reference` 是 `true`，输出必须是 `{"entities": [], "triplets": []}`
+- 如果存在无法稳定解析的代词或指示表达，输出也必须是 `{"entities": [], "triplets": []}`
 - 如果没有有效 triplet，返回 `"triplets": []`
  {% else %}
  JSON Requirements:
@@ -435,10 +782,11 @@ JSON 要求：
 - Escape internal quotes using `\"`
 - No Chinese quotation marks
 - No line breaks inside string values
- `name`, `subject_name`, and `object_name` must keep the original surface form from the source text, except user self-reference which must be normalized to `用户`
+- `name`, `subject_name`, and `object_name` keep their original surface forms by default, but user self-reference must be normalized to `用户` and other stably resolvable references must be replaced by their resolved entity names
 - `description` must be in English
 - `type`, `predicate`, `type_description`, and `predicate_description` must use the predefined Chinese labels and Chinese definitions above
 - If `has_unsolved_reference` is `true`, the output must be `{"entities": [], "triplets": []}`
+- If unresolved references still remain, the output must also be `{"entities": [], "triplets": []}`
 - If no valid triplet exists, return `"triplets": []`
  {% endif %}

--- a/api/app/repositories/neo4j/create_indexes.py
+++ b/api/app/repositories/neo4j/create_indexes.py
@@ -46,6 +46,12 @@ async def create_fulltext_indexes():
            OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } }
        """)

+        # 创建 AssistantPruned 剪枝文本全文索引
+        await connector.execute_query("""
+            CREATE FULLTEXT INDEX assistantPrunedFulltext IF NOT EXISTS FOR (p:AssistantPruned) ON EACH [p.text]
+            OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } }
+        """)
+
    finally:
        await connector.close()

@@ -135,6 +141,17 @@ async def create_vector_indexes():
              `vector.similarity_function`: 'cosine'
            }}
        """)
+
+        # AssistantPruned text embedding index (optional, for semantic search on pruned hints)
+        await connector.execute_query("""
+            CREATE VECTOR INDEX assistant_pruned_embedding_index IF NOT EXISTS
+            FOR (p:AssistantPruned)
+            ON p.text_embedding
+            OPTIONS {indexConfig: {
+              `vector.dimensions`: 1024,
+              `vector.similarity_function`: 'cosine'
+            }}
+        """)
    finally:
        await connector.close()

@@ -179,6 +196,22 @@ async def create_unique_constraints():
            """
        )

+        # AssistantOriginal.id unique
+        await connector.execute_query(
+            """
+            CREATE CONSTRAINT assistant_original_id_unique IF NOT EXISTS
+            FOR (o:AssistantOriginal) REQUIRE o.id IS UNIQUE
+            """
+        )
+
+        # AssistantPruned.id unique
+        await connector.execute_query(
+            """
+            CREATE CONSTRAINT assistant_pruned_id_unique IF NOT EXISTS
+            FOR (p:AssistantPruned) REQUIRE p.id IS UNIQUE
+            """
+        )
+
    finally:
        await connector.close()

--- a/api/app/repositories/neo4j/cypher_queries.py
+++ b/api/app/repositories/neo4j/cypher_queries.py
@@ -1363,154 +1363,60 @@ ORDER BY score DESC
 LIMIT $limit
 """

-SEARCH_STATEMENTS_BY_KEYWORD = """
-CALL db.index.fulltext.queryNodes("statementsFulltext", $query) YIELD node AS s, score
-WHERE ($end_user_id IS NULL OR s.end_user_id = $end_user_id)
-OPTIONAL MATCH (c:Chunk)-[:CONTAINS]->(s)
-OPTIONAL MATCH (s)-[:REFERENCES_ENTITY]->(e:ExtractedEntity)
-RETURN s.id AS id,
-       s.statement AS statement,
-       s.end_user_id AS end_user_id,
-       s.chunk_id AS chunk_id,
-       s.created_at AS created_at,
-       s.expired_at AS expired_at,
-       s.valid_at AS valid_at,
-       properties(s)['invalid_at'] AS invalid_at,
-       c.id AS chunk_id_from_rel,
-       collect(DISTINCT e.id) AS entity_ids,
-       COALESCE(s.activation_value, s.importance_score, 0.5) AS activation_value,
-       COALESCE(s.importance_score, 0.5) AS importance_score,
-       s.last_access_time AS last_access_time,
-       COALESCE(s.access_count, 0) AS access_count,
-       score
-ORDER BY score DESC
-LIMIT $limit
-"""

-SEARCH_ENTITIES_BY_NAME_OR_ALIAS = """
-CALL db.index.fulltext.queryNodes("entitiesFulltext", $query) YIELD node AS e, score
-WHERE ($end_user_id IS NULL OR e.end_user_id = $end_user_id)
-WITH e, score
-With collect({entity: e, score: score}) AS fulltextResults
+# ── Assistant Pruning Nodes & Edges ──

-OPTIONAL MATCH (ae:ExtractedEntity)
-WHERE ($end_user_id IS NULL OR ae.end_user_id = $end_user_id)
-  AND ae.aliases IS NOT NULL
-  AND ANY(alias IN ae.aliases WHERE toLower(alias) CONTAINS toLower($query))
-WITH fulltextResults, collect(ae) AS aliasEntities
-
-UNWIND (fulltextResults + [x IN aliasEntities | {entity: x, score:
-     CASE 
-       WHEN ANY(alias IN x.aliases WHERE toLower(alias) = toLower($query)) THEN 1.0
-       WHEN ANY(alias IN x.aliases WHERE toLower(alias) STARTS WITH toLower($query)) THEN 0.9
-       ELSE 0.8
-     END
-}]) AS row
-WITH row.entity AS e, row.score AS score
-WITH DISTINCT e, MAX(score) AS score
-OPTIONAL MATCH (s:Statement)-[:REFERENCES_ENTITY]->(e)
-OPTIONAL MATCH (c:Chunk)-[:CONTAINS]->(s)
-RETURN e.id AS id,
-       e.name AS name,
-       e.end_user_id AS end_user_id,
-       e.entity_type AS entity_type,
-       e.created_at AS created_at,
-       e.expired_at AS expired_at,
-       e.entity_idx AS entity_idx,
-       e.statement_id AS statement_id,
-       e.description AS description,
-       e.aliases AS aliases,
-       e.name_embedding AS name_embedding,
-       e.connect_strength AS connect_strength,
-       collect(DISTINCT s.id) AS statement_ids,
-       collect(DISTINCT c.id) AS chunk_ids,
-       COALESCE(e.activation_value, e.importance_score, 0.5) AS activation_value,
-       COALESCE(e.importance_score, 0.5) AS importance_score,
-       e.last_access_time AS last_access_time,
-       COALESCE(e.access_count, 0) AS access_count,
-       score
-ORDER BY score DESC
-LIMIT $limit
-"""
-
-SEARCH_CHUNKS_BY_CONTENT = """
-CALL db.index.fulltext.queryNodes("chunksFulltext", $query) YIELD node AS c, score
-WHERE ($end_user_id IS NULL OR c.end_user_id = $end_user_id)
-OPTIONAL MATCH (c)-[:CONTAINS]->(s:Statement)
-OPTIONAL MATCH (s)-[:REFERENCES_ENTITY]->(e:ExtractedEntity)
-RETURN c.id AS id,
-       c.end_user_id AS end_user_id,
-       c.content AS content,
-       c.dialog_id AS dialog_id,
-       c.sequence_number AS sequence_number,
-       collect(DISTINCT s.id) AS statement_ids,
-       collect(DISTINCT e.id) AS entity_ids,
-       COALESCE(c.activation_value, 0.5) AS activation_value,
-       c.last_access_time AS last_access_time,
-       COALESCE(c.access_count, 0) AS access_count,
-       score
-ORDER BY score DESC
-LIMIT $limit
-"""
-
-# MemorySummary keyword search using fulltext index
-SEARCH_MEMORY_SUMMARIES_BY_KEYWORD = """
-CALL db.index.fulltext.queryNodes("summariesFulltext", $query) YIELD node AS m, score
-WHERE ($end_user_id IS NULL OR m.end_user_id = $end_user_id)
-OPTIONAL MATCH (m)-[:DERIVED_FROM_STATEMENT]->(s:Statement)
-RETURN m.id AS id,
-       m.name AS name,
-       m.end_user_id AS end_user_id,
-       m.dialog_id AS dialog_id,
-       m.chunk_ids AS chunk_ids,
-       m.content AS content,
-       m.created_at AS created_at,
-       COALESCE(m.activation_value, m.importance_score, 0.5) AS activation_value,
-       COALESCE(m.importance_score, 0.5) AS importance_score,
-       m.last_access_time AS last_access_time,
-       COALESCE(m.access_count, 0) AS access_count,
-       score
-ORDER BY score DESC
-LIMIT $limit
-"""
-
-# Community keyword search: matches name or summary via fulltext index
-SEARCH_COMMUNITIES_BY_KEYWORD = """
-CALL db.index.fulltext.queryNodes("communitiesFulltext", $query) YIELD node AS c, score
-WHERE ($end_user_id IS NULL OR c.end_user_id = $end_user_id)
-RETURN c.community_id AS id,
-       c.name AS name,
-       c.summary AS content,
-       c.core_entities AS core_entities,
-       c.member_count AS member_count,
-       c.end_user_id AS end_user_id,
-       c.updated_at AS updated_at,
-       score
-ORDER BY score DESC
-LIMIT $limit
-"""
-
-FULLTEXT_QUERY_CYPHER_MAPPING = {
-    Neo4jNodeType.STATEMENT: SEARCH_STATEMENTS_BY_KEYWORD,
-    Neo4jNodeType.EXTRACTEDENTITY: SEARCH_ENTITIES_BY_NAME_OR_ALIAS,
-    Neo4jNodeType.CHUNK: SEARCH_CHUNKS_BY_CONTENT,
-    Neo4jNodeType.MEMORYSUMMARY: SEARCH_MEMORY_SUMMARIES_BY_KEYWORD,
-    Neo4jNodeType.COMMUNITY: SEARCH_COMMUNITIES_BY_KEYWORD,
-    Neo4jNodeType.PERCEPTUAL: SEARCH_PERCEPTUALS_BY_KEYWORD
+ASSISTANT_ORIGINAL_NODE_SAVE = """
+UNWIND $originals AS orig
+MERGE (o:AssistantOriginal {id: orig.id})
+SET o += {
+    end_user_id: orig.end_user_id,
+    run_id: orig.run_id,
+    dialog_id: orig.dialog_id,
+    pair_id: orig.pair_id,
+    text: orig.text,
+    created_at: orig.created_at,
+    expired_at: orig.expired_at
 }
-USER_ID_QUERY_CYPHER_MAPPING = {
-    Neo4jNodeType.STATEMENT: SEARCH_STATEMENTS_BY_USER_ID,
-    Neo4jNodeType.EXTRACTEDENTITY: SEARCH_ENTITIES_BY_USER_ID,
-    Neo4jNodeType.CHUNK: SEARCH_CHUNKS_BY_USER_ID,
-    Neo4jNodeType.MEMORYSUMMARY: SEARCH_MEMORY_SUMMARIES_BY_USER_ID,
-    Neo4jNodeType.COMMUNITY: SEARCH_COMMUNITIES_BY_USER_ID,
-    Neo4jNodeType.PERCEPTUAL: SEARCH_PERCEPTUAL_BY_USER_ID
-}
-NODE_ID_QUERY_CYPHER_MAPPING = {
-    Neo4jNodeType.STATEMENT: SEARCH_STATEMENTS_BY_IDS,
-    Neo4jNodeType.EXTRACTEDENTITY: SEARCH_ENTITIES_BY_IDS,
-    Neo4jNodeType.CHUNK: SEARCH_CHUNKS_BY_IDS,
-    Neo4jNodeType.MEMORYSUMMARY: SEARCH_MEMORY_SUMMARIES_BY_IDS,
-    Neo4jNodeType.COMMUNITY: SEARCH_COMMUNITIES_BY_IDS,
-    Neo4jNodeType.PERCEPTUAL: SEARCH_PERCEPTUAL_BY_IDS
+RETURN o.id AS uuid
+"""
+
+ASSISTANT_PRUNED_NODE_SAVE = """
+UNWIND $pruneds AS p
+MERGE (pr:AssistantPruned {id: p.id})
+SET pr += {
+    end_user_id: p.end_user_id,
+    run_id: p.run_id,
+    dialog_id: p.dialog_id,
+    pair_id: p.pair_id,
+    text: p.text,
+    memory_type: p.memory_type,
+    text_embedding: p.text_embedding,
+    created_at: p.created_at,
+    expired_at: p.expired_at
 }
+RETURN pr.id AS uuid
+"""
+
+ASSISTANT_PRUNED_EDGE_SAVE = """
+UNWIND $edges AS edge
+MATCH (o:AssistantOriginal {id: edge.source})
+MATCH (p:AssistantPruned {id: edge.target})
+MERGE (o)-[r:PRUNED_TO]->(p)
+SET r.pair_id = edge.pair_id,
+    r.end_user_id = edge.end_user_id,
+    r.run_id = edge.run_id,
+    r.created_at = edge.created_at
+RETURN elementId(r) AS uuid
+"""
+
+ASSISTANT_DIALOG_EDGE_SAVE = """
+UNWIND $edges AS edge
+MATCH (o:AssistantOriginal {id: edge.source})
+MATCH (d:Dialogue {id: edge.target})
+MERGE (o)-[r:BELONGS_TO_DIALOG]->(d)
+SET r.end_user_id = edge.end_user_id,
+    r.run_id = edge.run_id,
+    r.created_at = edge.created_at
+RETURN elementId(r) AS uuid
+"""
--- a/api/app/repositories/neo4j/graph_saver.py
+++ b/api/app/repositories/neo4j/graph_saver.py
@@ -24,6 +24,10 @@ from app.core.memory.models.graph_models import (
    EntityEntityEdge,
    PerceptualNode,
    PerceptualEdge,
+    AssistantOriginalNode,
+    AssistantPrunedNode,
+    AssistantPrunedEdge,
+    AssistantDialogEdge,
 )
 import logging

@@ -166,6 +170,10 @@ async def save_dialog_and_statements_to_neo4j(
        statement_entity_edges: List[StatementEntityEdge],
        perceptual_edges: List[PerceptualEdge],
        connector: Neo4jConnector,
+        assistant_original_nodes: Optional[List[AssistantOriginalNode]] = None,
+        assistant_pruned_nodes: Optional[List[AssistantPrunedNode]] = None,
+        assistant_pruned_edges: Optional[List[AssistantPrunedEdge]] = None,
+        assistant_dialog_edges: Optional[List[AssistantDialogEdge]] = None,
 ) -> bool:
    """Save dialogue nodes, chunk nodes, statement nodes, entities, and all relationships to Neo4j using graph models.

@@ -368,6 +376,55 @@ async def save_dialog_and_statements_to_neo4j(
            results['perceptual_chunk_edges'] = perceptual_edges_uuids
            logger.info(f"Successfully saved {len(perceptual_edges_uuids)} perceptual-chunk edges to Neo4j")

+        # 8. Save assistant original nodes
+        if assistant_original_nodes:
+            from app.repositories.neo4j.cypher_queries import ASSISTANT_ORIGINAL_NODE_SAVE
+            original_data = [node.model_dump() for node in assistant_original_nodes]
+            result = await tx.run(ASSISTANT_ORIGINAL_NODE_SAVE, originals=original_data)
+            original_uuids = [record["uuid"] async for record in result]
+            results['assistant_originals'] = original_uuids
+            logger.info(f"Successfully saved {len(original_uuids)} assistant original nodes to Neo4j")
+
+        # 9. Save assistant pruned nodes
+        if assistant_pruned_nodes:
+            from app.repositories.neo4j.cypher_queries import ASSISTANT_PRUNED_NODE_SAVE
+            pruned_data = [node.model_dump() for node in assistant_pruned_nodes]
+            result = await tx.run(ASSISTANT_PRUNED_NODE_SAVE, pruneds=pruned_data)
+            pruned_uuids = [record["uuid"] async for record in result]
+            results['assistant_pruneds'] = pruned_uuids
+            logger.info(f"Successfully saved {len(pruned_uuids)} assistant pruned nodes to Neo4j")
+
+        # 10. Save PRUNED_TO edges (Original → Pruned)
+        if assistant_pruned_edges:
+            from app.repositories.neo4j.cypher_queries import ASSISTANT_PRUNED_EDGE_SAVE
+            edge_data = [{
+                "source": edge.source,
+                "target": edge.target,
+                "pair_id": edge.pair_id,
+                "end_user_id": edge.end_user_id,
+                "run_id": edge.run_id,
+                "created_at": edge.created_at.isoformat() if edge.created_at else None,
+            } for edge in assistant_pruned_edges]
+            result = await tx.run(ASSISTANT_PRUNED_EDGE_SAVE, edges=edge_data)
+            pruned_edge_uuids = [record["uuid"] async for record in result]
+            results['assistant_pruned_edges'] = pruned_edge_uuids
+            logger.info(f"Successfully saved {len(pruned_edge_uuids)} PRUNED_TO edges to Neo4j")
+
+        # 11. Save BELONGS_TO_DIALOG edges (Original → Dialogue)
+        if assistant_dialog_edges:
+            from app.repositories.neo4j.cypher_queries import ASSISTANT_DIALOG_EDGE_SAVE
+            edge_data = [{
+                "source": edge.source,
+                "target": edge.target,
+                "end_user_id": edge.end_user_id,
+                "run_id": edge.run_id,
+                "created_at": edge.created_at.isoformat() if edge.created_at else None,
+            } for edge in assistant_dialog_edges]
+            result = await tx.run(ASSISTANT_DIALOG_EDGE_SAVE, edges=edge_data)
+            dialog_edge_uuids = [record["uuid"] async for record in result]
+            results['assistant_dialog_edges'] = dialog_edge_uuids
+            logger.info(f"Successfully saved {len(dialog_edge_uuids)} BELONGS_TO_DIALOG edges to Neo4j")
+
        return results

    try: