refactor(memory): enhance extraction ontology and add assistant pruning graph support
- Expand entity type ontology with detailed definitions, examples, and notes (merged types: 地点设施, 物品设备, 产品服务, 软件平台, 角色职业, 知识能力, 偏好习惯目标, 称呼别名, 智能体) - Add relation ontology taxonomy with 15 predicate categories and usage rules - Strengthen reference resolution rules: resolve pronouns before extraction, skip unresolvable references entirely - Add guidelines to avoid extracting abstract propositions, emotions, and low-value entities (effort/reward/success patterns) - Add 7 new extraction examples covering edge cases - Add AssistantOriginal/AssistantPruned node models and graph persistence (PRUNED_TO and BELONGS_TO_DIALOG edges, Neo4j indexes and constraints) - Add graph_build_step.py for building graph nodes/edges from DialogData - Update write_pipeline.py to pass assistant pruning nodes/edges to graph saver - Update data_pruning.py with related preprocessing changes
This commit is contained in:
@@ -578,3 +578,47 @@ class PerceptualNode(Node):
|
||||
domain: str
|
||||
file_type: str
|
||||
summary_embedding: list[float] | None
|
||||
|
||||
|
||||
class AssistantOriginalNode(Node):
|
||||
"""Node storing the original text of an Assistant message before pruning.
|
||||
|
||||
Attributes:
|
||||
pair_id: Shared ID with the corresponding AssistantPrunedNode for pairing
|
||||
dialog_id: ID of the parent dialogue this message belongs to
|
||||
text: The full original Assistant response text
|
||||
"""
|
||||
pair_id: str = Field(..., description="Shared pairing ID with the corresponding pruned node")
|
||||
dialog_id: str = Field(..., description="ID of the parent dialogue")
|
||||
text: str = Field(..., description="Original Assistant message text")
|
||||
|
||||
|
||||
class AssistantPrunedNode(Node):
|
||||
"""Node storing the pruned (compressed) text of an Assistant message.
|
||||
|
||||
Attributes:
|
||||
pair_id: Shared ID with the corresponding AssistantOriginalNode for pairing
|
||||
dialog_id: ID of the parent dialogue this message belongs to
|
||||
text: The pruned memory hint text (or "NULL" if no memory value)
|
||||
memory_type: Type of the memory hint (comfort|suggestion|recommendation|warning|instruction|NULL)
|
||||
text_embedding: Optional embedding vector for semantic search on pruned text
|
||||
"""
|
||||
pair_id: str = Field(..., description="Shared pairing ID with the corresponding original node")
|
||||
dialog_id: str = Field(..., description="ID of the parent dialogue")
|
||||
text: str = Field(..., description="Pruned assistant memory hint text")
|
||||
memory_type: str = Field(..., description="Memory type: comfort|suggestion|recommendation|warning|instruction|NULL")
|
||||
text_embedding: Optional[List[float]] = Field(None, description="Embedding vector for semantic search")
|
||||
|
||||
|
||||
class AssistantPrunedEdge(Edge):
|
||||
"""Edge connecting an AssistantOriginal node to its AssistantPruned node (PRUNED_TO).
|
||||
|
||||
Attributes:
|
||||
pair_id: Shared pairing ID for traceability
|
||||
"""
|
||||
pair_id: str = Field(..., description="Shared pairing ID for traceability")
|
||||
|
||||
|
||||
class AssistantDialogEdge(Edge):
|
||||
"""Edge connecting an AssistantOriginal node to its parent Dialogue node (BELONGS_TO_DIALOG)."""
|
||||
pass
|
||||
|
||||
@@ -77,6 +77,10 @@ class ExtractionResult(BaseModel):
|
||||
stmt_entity_edges: List[StatementEntityEdge]
|
||||
entity_entity_edges: List[EntityEntityEdge]
|
||||
perceptual_edges: List[PerceptualEdge]
|
||||
assistant_original_nodes: List[Any] = Field(default_factory=list)
|
||||
assistant_pruned_nodes: List[Any] = Field(default_factory=list)
|
||||
assistant_pruned_edges: List[Any] = Field(default_factory=list)
|
||||
assistant_dialog_edges: List[Any] = Field(default_factory=list)
|
||||
dialog_data_list: List[Any] = Field(
|
||||
default_factory=list,
|
||||
description="原始 DialogData 列表,类型为 Any 以避免循环依赖",
|
||||
@@ -482,6 +486,10 @@ class WritePipeline:
|
||||
stmt_entity_edges=dedup_result.statement_entity_edges,
|
||||
entity_entity_edges=dedup_result.entity_entity_edges,
|
||||
perceptual_edges=graph.perceptual_edges,
|
||||
assistant_original_nodes=graph.assistant_original_nodes,
|
||||
assistant_pruned_nodes=graph.assistant_pruned_nodes,
|
||||
assistant_pruned_edges=graph.assistant_pruned_edges,
|
||||
assistant_dialog_edges=graph.assistant_dialog_edges,
|
||||
dialog_data_list=dialog_data_list,
|
||||
)
|
||||
|
||||
@@ -523,6 +531,10 @@ class WritePipeline:
|
||||
entity_edges=result.entity_entity_edges,
|
||||
perceptual_edges=result.perceptual_edges,
|
||||
connector=self._neo4j_connector,
|
||||
assistant_original_nodes=result.assistant_original_nodes,
|
||||
assistant_pruned_nodes=result.assistant_pruned_nodes,
|
||||
assistant_pruned_edges=result.assistant_pruned_edges,
|
||||
assistant_dialog_edges=result.assistant_dialog_edges,
|
||||
)
|
||||
if success:
|
||||
logger.info("Successfully saved all data to Neo4j")
|
||||
|
||||
@@ -15,7 +15,9 @@ import hashlib
|
||||
import json
|
||||
import logging
|
||||
from collections import OrderedDict
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Dict
|
||||
from uuid import uuid4
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
@@ -39,6 +41,16 @@ def message_has_files(message: "ConversationMessage") -> bool:
|
||||
return message.files and len(message.files) > 0
|
||||
|
||||
|
||||
class AssistantPruningRecord(BaseModel):
|
||||
"""单个 User-Assistant 消息对的剪枝记录,用于后续写入 Neo4j。"""
|
||||
|
||||
pair_id: str = Field(..., description="唯一配对 ID,Original 和 Pruned 节点共享")
|
||||
original_text: str = Field(..., description="Assistant 原始回复全文")
|
||||
pruned_text: str = Field(..., description="剪枝后文本(assistant_memory_hint),或 'NULL'")
|
||||
memory_type: str = Field(..., description="comfort|suggestion|recommendation|warning|instruction|NULL")
|
||||
created_at: str = Field(..., description="ISO 时间戳")
|
||||
|
||||
|
||||
class AssistantPruningResponse(BaseModel):
|
||||
"""LLM 对单个 User-Assistant 消息对的剪枝结果。
|
||||
|
||||
@@ -95,6 +107,9 @@ class SemanticPruner:
|
||||
# Snapshot 数据收集:每个消息对的 input + gold
|
||||
self._snapshot_records: List[Dict] = []
|
||||
|
||||
# 剪枝记录:用于后续写入 Neo4j(AssistantOriginal + AssistantPruned 节点)
|
||||
self.pruning_records: List[AssistantPruningRecord] = []
|
||||
|
||||
# 运行日志
|
||||
self.run_logs: List[str] = []
|
||||
|
||||
@@ -246,6 +261,15 @@ class SemanticPruner:
|
||||
},
|
||||
})
|
||||
|
||||
# 收集剪枝记录(用于后续写入 Neo4j)
|
||||
self.pruning_records.append(AssistantPruningRecord(
|
||||
pair_id=uuid4().hex,
|
||||
original_text=asst_msg.msg,
|
||||
pruned_text=result.assistant_memory_hint,
|
||||
memory_type=result.assistant_memory_type,
|
||||
created_at=datetime.now().isoformat(),
|
||||
))
|
||||
|
||||
if result.assistant_memory_hint == "NULL":
|
||||
self._log(
|
||||
f" [{label}] 索引{asst_idx} → NULL,删除 "
|
||||
|
||||
@@ -855,6 +855,7 @@ class NewExtractionOrchestrator:
|
||||
entity_idx=e.entity_idx,
|
||||
name=e.name,
|
||||
type=e.type,
|
||||
type_description=getattr(e, "type_description", ""),
|
||||
description=e.description,
|
||||
is_explicit_memory=e.is_explicit_memory,
|
||||
)
|
||||
@@ -865,6 +866,7 @@ class NewExtractionOrchestrator:
|
||||
subject_name=t.subject_name,
|
||||
subject_id=t.subject_id,
|
||||
predicate=t.predicate,
|
||||
predicate_description=getattr(t, "predicate_description", ""),
|
||||
object_name=t.object_name,
|
||||
object_id=t.object_id,
|
||||
)
|
||||
|
||||
@@ -28,6 +28,10 @@ from app.core.memory.models.graph_models import (
|
||||
StatementChunkEdge,
|
||||
StatementEntityEdge,
|
||||
StatementNode,
|
||||
AssistantOriginalNode,
|
||||
AssistantPrunedNode,
|
||||
AssistantPrunedEdge,
|
||||
AssistantDialogEdge,
|
||||
)
|
||||
from app.core.memory.models.message_models import DialogData, TemporalInfo
|
||||
|
||||
@@ -47,6 +51,10 @@ class GraphBuildResult:
|
||||
"stmt_entity_edges",
|
||||
"entity_entity_edges",
|
||||
"perceptual_edges",
|
||||
"assistant_original_nodes",
|
||||
"assistant_pruned_nodes",
|
||||
"assistant_pruned_edges",
|
||||
"assistant_dialog_edges",
|
||||
)
|
||||
|
||||
def __init__(
|
||||
@@ -60,6 +68,10 @@ class GraphBuildResult:
|
||||
stmt_entity_edges: List[StatementEntityEdge],
|
||||
entity_entity_edges: List[EntityEntityEdge],
|
||||
perceptual_edges: List[PerceptualEdge],
|
||||
assistant_original_nodes: Optional[List[AssistantOriginalNode]] = None,
|
||||
assistant_pruned_nodes: Optional[List[AssistantPrunedNode]] = None,
|
||||
assistant_pruned_edges: Optional[List[AssistantPrunedEdge]] = None,
|
||||
assistant_dialog_edges: Optional[List[AssistantDialogEdge]] = None,
|
||||
):
|
||||
self.dialogue_nodes = dialogue_nodes
|
||||
self.chunk_nodes = chunk_nodes
|
||||
@@ -70,6 +82,10 @@ class GraphBuildResult:
|
||||
self.stmt_entity_edges = stmt_entity_edges
|
||||
self.entity_entity_edges = entity_entity_edges
|
||||
self.perceptual_edges = perceptual_edges
|
||||
self.assistant_original_nodes = assistant_original_nodes or []
|
||||
self.assistant_pruned_nodes = assistant_pruned_nodes or []
|
||||
self.assistant_pruned_edges = assistant_pruned_edges or []
|
||||
self.assistant_dialog_edges = assistant_dialog_edges or []
|
||||
|
||||
|
||||
async def build_graph_nodes_and_edges(
|
||||
@@ -343,6 +359,77 @@ async def build_graph_nodes_and_edges(
|
||||
f"实体-实体边: {len(entity_entity_edges)}"
|
||||
)
|
||||
|
||||
# ── Assistant 剪枝节点和边 ──
|
||||
assistant_original_nodes: List[AssistantOriginalNode] = []
|
||||
assistant_pruned_nodes: List[AssistantPrunedNode] = []
|
||||
assistant_pruned_edges: List[AssistantPrunedEdge] = []
|
||||
assistant_dialog_edges: List[AssistantDialogEdge] = []
|
||||
|
||||
for dialog_data in dialog_data_list:
|
||||
pruning_records = dialog_data.metadata.get("assistant_pruning_records", [])
|
||||
for record in pruning_records:
|
||||
pair_id = record["pair_id"]
|
||||
original_id = f"ao_{pair_id}"
|
||||
pruned_id = f"ap_{pair_id}"
|
||||
|
||||
# AssistantOriginal 始终创建(记录原始对话)
|
||||
original_node = AssistantOriginalNode(
|
||||
id=original_id,
|
||||
name=f"AssistantOriginal_{pair_id[:8]}",
|
||||
end_user_id=dialog_data.end_user_id,
|
||||
run_id=dialog_data.run_id,
|
||||
created_at=dialog_data.created_at,
|
||||
expired_at=dialog_data.expired_at,
|
||||
pair_id=pair_id,
|
||||
dialog_id=dialog_data.id,
|
||||
text=record["original_text"],
|
||||
)
|
||||
assistant_original_nodes.append(original_node)
|
||||
|
||||
# BELONGS_TO_DIALOG: Original → Dialogue
|
||||
assistant_dialog_edges.append(AssistantDialogEdge(
|
||||
source=original_id,
|
||||
target=dialog_data.id,
|
||||
end_user_id=dialog_data.end_user_id,
|
||||
run_id=dialog_data.run_id,
|
||||
created_at=dialog_data.created_at,
|
||||
))
|
||||
|
||||
# pruned_text 为 NULL 时不创建 AssistantPruned 节点和 PRUNED_TO 边
|
||||
if record["pruned_text"] == "NULL":
|
||||
continue
|
||||
|
||||
pruned_node = AssistantPrunedNode(
|
||||
id=pruned_id,
|
||||
name=f"AssistantPruned_{pair_id[:8]}",
|
||||
end_user_id=dialog_data.end_user_id,
|
||||
run_id=dialog_data.run_id,
|
||||
created_at=dialog_data.created_at,
|
||||
expired_at=dialog_data.expired_at,
|
||||
pair_id=pair_id,
|
||||
dialog_id=dialog_data.id,
|
||||
text=record["pruned_text"],
|
||||
memory_type=record["memory_type"],
|
||||
)
|
||||
assistant_pruned_nodes.append(pruned_node)
|
||||
|
||||
# PRUNED_TO: Original → Pruned
|
||||
assistant_pruned_edges.append(AssistantPrunedEdge(
|
||||
source=original_id,
|
||||
target=pruned_id,
|
||||
end_user_id=dialog_data.end_user_id,
|
||||
run_id=dialog_data.run_id,
|
||||
created_at=dialog_data.created_at,
|
||||
pair_id=pair_id,
|
||||
))
|
||||
|
||||
if assistant_original_nodes:
|
||||
logger.info(
|
||||
f"Assistant 剪枝节点创建完成 - "
|
||||
f"原始节点: {len(assistant_original_nodes)}, "
|
||||
f"剪枝节点: {len(assistant_pruned_nodes)}"
|
||||
)
|
||||
|
||||
if progress_callback:
|
||||
nodes_edges_stats = {
|
||||
"dialogue_nodes_count": len(dialogue_nodes),
|
||||
@@ -365,4 +452,8 @@ async def build_graph_nodes_and_edges(
|
||||
stmt_entity_edges=stmt_entity_edges,
|
||||
entity_entity_edges=entity_entity_edges,
|
||||
perceptual_edges=perceptual_edges,
|
||||
assistant_original_nodes=assistant_original_nodes,
|
||||
assistant_pruned_nodes=assistant_pruned_nodes,
|
||||
assistant_pruned_edges=assistant_pruned_edges,
|
||||
assistant_dialog_edges=assistant_dialog_edges,
|
||||
)
|
||||
|
||||
@@ -1,199 +1,130 @@
|
||||
{#
|
||||
对话级抽取与相关性判定模板(用于剪枝加速)
|
||||
输入:pruning_scene, ontology_class_infos, dialog_text, language
|
||||
- ontology_class_infos: List[{class_name: str, class_description: str}]
|
||||
输出:严格 JSON(不要包含任何多余文本),字段:
|
||||
- is_related: bool,是否与所选场景相关
|
||||
- times: [string],从对话中抽取的时间相关文本(日期、时间、时间段、有效期等)
|
||||
- ids: [string],编号/ID/订单号/申请号/账号等
|
||||
- amounts: [string],金额/费用/价格相关(带单位或货币符号)
|
||||
- contacts: [string],联系方式(电话/手机号/邮箱/微信/QQ等)
|
||||
- addresses: [string],地址/地点相关文本
|
||||
- keywords: [string],其它有助于保留的重要关键词(与场景强相关的术语)
|
||||
- preserve_keywords: [string],必须保留的情绪/兴趣/爱好/个人偏好相关词或短语片段
|
||||
你是一个面向记忆存储的 Assistant 辅助信息提取器。
|
||||
|
||||
要求:
|
||||
- 必须只输出上述 JSON,且键名一致;不得输出解释、前后缀;不得包含注释。
|
||||
- times/ids/amounts/contacts/addresses/keywords/preserve_keywords 仅抽取原文片段或规范化后的简单字符串。
|
||||
- 仅输出上述键;避免多余解释或字段。
|
||||
#}
|
||||
任务:
|
||||
|
||||
{# ── 确定场景说明 ── #}
|
||||
{% if ontology_class_infos and ontology_class_infos | length > 0 %}
|
||||
{% if language == 'en' %}
|
||||
{% set instruction = 'Scene "' ~ pruning_scene ~ '": The dialogue is relevant if it involves any of the following entity types.' %}
|
||||
{% else %}
|
||||
{% set instruction = '场景「' ~ pruning_scene ~ '」:对话涉及以下任意实体类型时视为相关。' %}
|
||||
{% endif %}
|
||||
{% else %}
|
||||
{% if language == 'en' %}
|
||||
{% set instruction = 'Scene "' ~ pruning_scene ~ '": Determine whether the dialogue content is relevant to this scene based on overall context.' %}
|
||||
{% else %}
|
||||
{% set instruction = '场景「' ~ pruning_scene ~ '」:根据对话整体内容判断是否与该场景相关。' %}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
- 输入是一个 JSON,对话放在 `msgs` 数组里,且数组中只有两条消息:第一条是 `User`,第二条是 `Assistant`。
|
||||
- 你只处理第二条消息里的 `Assistant.msg`。
|
||||
- 第一条消息里的 `User.msg` 只用于理解上下文,不允许出现在输出里。
|
||||
- 你的输出必须包含两个字段:
|
||||
1. `assistant_memory_hint`
|
||||
2. `assistant_memory_type`
|
||||
|
||||
{% if language == "zh" %}
|
||||
你是一个对话内容分析助手。请对下方对话全文进行一次性分析,完成两项任务:
|
||||
1. 判断对话是否与指定场景相关;
|
||||
2. 从对话中抽取所有需要保留的重要信息片段。
|
||||
目标:
|
||||
|
||||
场景说明:{{ instruction }}
|
||||
- 从 `Assistant.msg` 中提取一条适合后续检索的极短辅助摘要。
|
||||
- 删除冗长解释、寒暄、礼貌话术、重复复述和空泛铺垫。
|
||||
- 允许做摘要式改写,但只能保留原消息中已经出现的建议、推荐、提醒、安慰、步骤或其他对后续记忆有帮助的核心内容。
|
||||
- 如果没有值得保留的信息,`assistant_memory_hint` 输出 `"NULL"`,`assistant_memory_type` 也输出 `"NULL"`。
|
||||
|
||||
{% if ontology_class_infos and ontology_class_infos | length > 0 %}
|
||||
【本场景实体类型定义】
|
||||
以下实体类型定义了本场景中哪些内容是重要的。
|
||||
凡是与以下任意类型相关的内容,都必须保留,并将关键词/短语提取到 keywords 字段:
|
||||
硬约束:
|
||||
|
||||
{% for info in ontology_class_infos %}
|
||||
- {{ info.class_name }}:{{ info.class_description }}
|
||||
{% endfor %}
|
||||
- 不得改写、复述或输出 `User.msg`。
|
||||
- 不得捏造新事实、新建议、新步骤、新材料。
|
||||
- 不得改变 `Assistant` 原始语义和立场。
|
||||
- 可以压缩、合并、重写 `Assistant.msg`,但必须忠于原内容。
|
||||
- `assistant_memory_type` 只能从以下枚举中选择:
|
||||
`comfort | suggestion | recommendation | warning | instruction | NULL`
|
||||
- 只输出严格 JSON,不要输出解释。
|
||||
|
||||
重要提示:只要对话中出现与上述任意实体类型相关的内容,即判定为相关(is_related=true)。
|
||||
{% endif %}
|
||||
压缩原则:
|
||||
|
||||
---
|
||||
【必须保留的内容(不可删除)】
|
||||
以下类型的内容无论是否与场景直接相关,都必须保留,请将其关键词/短语抽取到对应字段:
|
||||
- 时间信息:日期、时间点、时间段、有效期 → times 字段
|
||||
- 编号信息:学号、工号、订单号、申请号、账号、ID → ids 字段
|
||||
- 金额信息:价格、费用、金额(含货币符号或单位,如"100元"、"¥200")→ amounts 字段(注意:考试分数、成绩分数不属于金额,不要放入此字段)
|
||||
- 联系方式:电话、手机号、邮箱、微信、QQ → contacts 字段
|
||||
- 地址信息:地点、地址、位置 → addresses 字段
|
||||
- 场景关键词:与**当前场景**强相关的专业术语、事件名称 → keywords 字段(注意:只放与当前场景直接相关的词,跨场景的内容不要放入此字段)
|
||||
- **情绪与情感**:喜悦、悲伤、愤怒、焦虑、开心、难过、委屈、兴奋、害怕、担心、压力、感动等情绪表达 → preserve_keywords 字段
|
||||
- **兴趣与爱好**:喜欢、热爱、爱好、擅长、享受、沉迷、着迷、讨厌某事物等个人偏好表达 → preserve_keywords 字段
|
||||
- **个人情感态度**:对人际关系、情感状态的明确表达(如"我跟室友闹矛盾了"、"我都快抑郁了")→ preserve_keywords 字段
|
||||
- 注意:学业目标(如"我想考研")、成绩(如"87分")、学科偏好(如"喜欢数学")属于学业信息,不属于情绪/情感,不要放入 preserve_keywords 字段
|
||||
- 优先保留具体建议、推荐、提醒、操作步骤、风险提示、安慰动作。
|
||||
- 优先删除长背景解释、寒暄、礼貌收尾、对用户原话的重复复述。
|
||||
- 如果原文是长说明、长步骤、长菜谱,输出更短的概要版本,但不要丢掉核心意图。
|
||||
- 优先保留最短但仍有信息密度的版本。
|
||||
- `assistant_memory_hint` 尽量写成完整句,不要只写零散词组或标签。
|
||||
- 优先使用显式主语来写结果,例如:
|
||||
`安慰了用户……`
|
||||
`建议用户……`
|
||||
`推荐用户……`
|
||||
`提醒用户……`
|
||||
|
||||
【场景无关内容标记】
|
||||
请从对话中识别出与当前场景({{ pruning_scene }})**既不相关、也无语义关联**的消息片段,将其原文(或关键片段)提取到 scene_unrelated_snippets 字段。
|
||||
判断标准:
|
||||
- 与场景实体类型完全无关
|
||||
- 与场景话题没有因果/时间/情境上的关联(例如:不是"因为上课所以累"这种关联)
|
||||
- 纯粹是另一个话题的内容(如在教育场景中讨论购物、娱乐等)
|
||||
注意:有情绪/感受表达的消息即使话题不同,也可能有语义关联,请谨慎标记。
|
||||
|
||||
**重要:scene_unrelated_snippets 必须认真填写,不能为空数组。**
|
||||
如果对话中存在与场景无关的内容,必须将其原文片段提取出来。
|
||||
|
||||
示例(场景=在线教育):
|
||||
- "我最近心情很差,跟室友闹矛盾了" → 与教育场景无关,加入 scene_unrelated_snippets
|
||||
- "她总是很晚回来吵到我睡觉" → 与教育场景无关,加入 scene_unrelated_snippets
|
||||
- "对,我都快抑郁了" → 与教育场景无关,加入 scene_unrelated_snippets
|
||||
- "期末考试12月25日" → 与教育场景相关,不加入 scene_unrelated_snippets
|
||||
- "我上次高数作业87分" → 与教育场景相关,不加入 scene_unrelated_snippets
|
||||
- "我的目标是考研" → 与教育场景相关,不加入 scene_unrelated_snippets
|
||||
|
||||
示例(场景=情感陪伴):
|
||||
- "我最近心情很差,跟室友闹矛盾了" → 与情感陪伴场景相关(情绪+关系),不加入 scene_unrelated_snippets
|
||||
- "对,我都快抑郁了" → 与情感陪伴场景相关(情绪),不加入 scene_unrelated_snippets
|
||||
- "期末考试12月25日,3号教学楼201室" → 与情感陪伴场景无关(教育信息),加入 scene_unrelated_snippets
|
||||
- "我上次高数作业87分,这次能考好吗" → 与情感陪伴场景无关(学业信息),加入 scene_unrelated_snippets
|
||||
- "我的目标是考研,想读应用数学" → 与情感陪伴场景无关(学业目标),加入 scene_unrelated_snippets
|
||||
|
||||
【可以删除的内容】
|
||||
以下类型的内容属于低价值信息,可以在剪枝时删除:
|
||||
- 纯寒暄问候:如"你好"、"在吗"、"拜拜"、"嗯"、"好的"、"哦"等无实质内容的短语
|
||||
- 纯表情/符号:如"[微笑]"、"😊"、"哈哈"等
|
||||
- 重复确认:如"对对对"、"是的是的"、"嗯嗯嗯"等无新增信息的重复
|
||||
- 无意义填充:如"啊"、"呢"、"嘛"等语气词单独成句
|
||||
|
||||
**注意:即使消息很短,只要包含情绪、兴趣、爱好、个人观点等有价值信息,就必须保留,不得删除。**
|
||||
例如:
|
||||
- "我好开心呀" → 包含情绪(开心),必须保留,preserve_keywords 中加入"开心"
|
||||
- "好喜欢打羽毛球呀" → 包含兴趣爱好(喜欢打羽毛球),必须保留,preserve_keywords 中加入"喜欢打羽毛球"
|
||||
- "我好难过" → 包含情绪(难过),必须保留,preserve_keywords 中加入"难过"
|
||||
- "太好啦!看到你开心,我也跟着心情亮起来" → 包含情绪,必须保留,preserve_keywords 中加入"开心"
|
||||
|
||||
---
|
||||
对话全文:
|
||||
"""
|
||||
{{ dialog_text }}
|
||||
"""
|
||||
|
||||
只输出严格 JSON(键固定、顺序不限):
|
||||
Few-shot 示例 1
|
||||
输入:
|
||||
{
|
||||
"is_related": <true 或 false>,
|
||||
"times": [<string>...],
|
||||
"ids": [<string>...],
|
||||
"amounts": [<string>...],
|
||||
"contacts": [<string>...],
|
||||
"addresses": [<string>...],
|
||||
"keywords": [<string>...],
|
||||
"preserve_keywords": [<string>...],
|
||||
"scene_unrelated_snippets": [<string>...]
|
||||
"msgs": [
|
||||
{
|
||||
"role": "User",
|
||||
"msg": "我室友小雯这学期一直在准备毕业论文,这两周都在改答辩 PPT。她下周三答辩,我有点担心她会紧张。"
|
||||
},
|
||||
{
|
||||
"role": "Assistant",
|
||||
"msg": "听起来你很关心小雯,也希望她答辩顺利。她现在紧张其实很正常,很多人在答辩前都会这样。"
|
||||
}
|
||||
]
|
||||
}
|
||||
{% else %}
|
||||
You are a dialogue content analysis assistant. Please analyze the full dialogue below in one pass and complete two tasks:
|
||||
1. Determine whether the dialogue is relevant to the specified scene;
|
||||
2. Extract all important information fragments that must be preserved.
|
||||
|
||||
Scenario Description: {{ instruction }}
|
||||
|
||||
{% if ontology_class_infos and ontology_class_infos | length > 0 %}
|
||||
[Scene Entity Type Definitions]
|
||||
The following entity types define what content is important in this scene.
|
||||
Content related to ANY of these types must be preserved and extracted into the keywords field:
|
||||
|
||||
{% for info in ontology_class_infos %}
|
||||
- {{ info.class_name }}: {{ info.class_description }}
|
||||
{% endfor %}
|
||||
|
||||
Important: If the dialogue contains content related to any of the entity types above, mark it as relevant (is_related=true).
|
||||
{% endif %}
|
||||
|
||||
---
|
||||
[MUST PRESERVE (cannot be deleted)]
|
||||
The following types of content must always be preserved regardless of scene relevance. Extract their keywords/phrases into the corresponding fields:
|
||||
- Time information: dates, time points, durations, expiry dates → times field
|
||||
- ID information: student IDs, employee IDs, order numbers, application numbers, account IDs → ids field
|
||||
- Amount information: prices, fees, amounts (with currency symbols or units, e.g., "$100", "¥200") → amounts field (Note: exam scores and grades are NOT amounts, do not put them here)
|
||||
- Contact information: phone numbers, emails, WeChat, QQ → contacts field
|
||||
- Address information: locations, addresses, places → addresses field
|
||||
- Scene keywords: professional terms and event names strongly related to **the current scene** → keywords field (Note: only put terms directly related to the current scene; cross-scene content should not be placed here)
|
||||
- **Emotions and feelings**: joy, sadness, anger, anxiety, happiness, sadness, excitement, fear, worry, stress, being moved, etc. → preserve_keywords field
|
||||
- **Interests and hobbies**: likes, loves, hobbies, good at, enjoys, obsessed with, hates something, personal preferences → preserve_keywords field
|
||||
- **Personal emotional attitudes**: clear expressions about interpersonal relationships or emotional states (e.g., "I had a fight with my roommate", "I'm almost depressed") → preserve_keywords field
|
||||
- Note: Academic goals (e.g., "I want to pursue a master's degree"), grades (e.g., "87 points"), and subject preferences (e.g., "I like math") are academic information, NOT emotions/feelings — do not put them in preserve_keywords
|
||||
|
||||
[Scene-Unrelated Content Marking]
|
||||
Please identify message snippets in the dialogue that are **neither relevant to nor semantically associated with** the current scene ({{ pruning_scene }}), and extract their original text (or key fragments) into the scene_unrelated_snippets field.
|
||||
Criteria:
|
||||
- Completely unrelated to the scene's entity types
|
||||
- No causal/temporal/contextual association with the scene topic (e.g., "feeling tired because of class" IS associated)
|
||||
- Purely belongs to a different topic (e.g., discussing shopping or entertainment in an education scene)
|
||||
Note: Messages with emotional/feeling expressions may still have semantic association even if the topic differs — mark carefully.
|
||||
|
||||
[CAN BE DELETED]
|
||||
The following types of content are low-value and can be removed during pruning:
|
||||
- Pure greetings: e.g., "hello", "are you there", "bye", "ok", "yeah" — short phrases with no substantive content
|
||||
- Pure emojis/symbols: e.g., "[smile]", "😊", "haha"
|
||||
- Repetitive confirmations: e.g., "yes yes yes", "right right", "uh huh" — repetitions with no new information
|
||||
- Meaningless fillers: standalone interjections like "ah", "well", "hmm"
|
||||
|
||||
**Note: Even if a message is short, if it contains emotions, interests, hobbies, or personal opinions, it MUST be preserved.**
|
||||
Examples:
|
||||
- "I'm so happy!" → contains emotion (happy), must preserve; add "happy" to preserve_keywords
|
||||
- "I love playing badminton!" → contains interest (love playing badminton), must preserve; add "love playing badminton" to preserve_keywords
|
||||
- "I feel so sad" → contains emotion (sad), must preserve; add "sad" to preserve_keywords
|
||||
|
||||
---
|
||||
Full Dialogue:
|
||||
"""
|
||||
{{ dialog_text }}
|
||||
"""
|
||||
|
||||
Output strict JSON only (fixed keys, order doesn't matter):
|
||||
输出:
|
||||
{
|
||||
"is_related": <true or false>,
|
||||
"times": [<string>...],
|
||||
"ids": [<string>...],
|
||||
"amounts": [<string>...],
|
||||
"contacts": [<string>...],
|
||||
"addresses": [<string>...],
|
||||
"keywords": [<string>...],
|
||||
"preserve_keywords": [<string>...],
|
||||
"scene_unrelated_snippets": [<string>...]
|
||||
"assistant_memory_hint": "安慰了用户对室友答辩状态的担忧。",
|
||||
"assistant_memory_type": "comfort"
|
||||
}
|
||||
{% endif %}
|
||||
|
||||
Few-shot 示例 2
|
||||
输入:
|
||||
{
|
||||
"msgs": [
|
||||
{
|
||||
"role": "User",
|
||||
"msg": "我最近总失眠,已经两周了,想先自己调一调。"
|
||||
},
|
||||
{
|
||||
"role": "Assistant",
|
||||
"msg": "如果你想先自己调整,可以先减少咖啡因摄入,尤其下午和晚上尽量不要再喝咖啡或浓茶,同时把睡前刷手机的时间压缩一些,尽量固定上床时间,先连续观察几天。"
|
||||
}
|
||||
]
|
||||
}
|
||||
输出:
|
||||
{
|
||||
"assistant_memory_hint": "建议用户减少咖啡因摄入、减少睡前刷手机时间并固定上床时间。",
|
||||
"assistant_memory_type": "suggestion"
|
||||
}
|
||||
|
||||
Few-shot 示例 3
|
||||
输入:
|
||||
{
|
||||
"msgs": [
|
||||
{
|
||||
"role": "User",
|
||||
"msg": "我晚上想做个简单点的减脂餐,最好二十分钟左右能搞定。"
|
||||
},
|
||||
{
|
||||
"role": "Assistant",
|
||||
"msg": "你可以做一个鸡胸肉沙拉碗,主要用鸡胸肉、生菜、黄瓜和圣女果。鸡胸肉简单煎熟切块后和蔬菜拌在一起,调味尽量用橄榄油加一点醋,不要放太多沙拉酱。"
|
||||
}
|
||||
]
|
||||
}
|
||||
输出:
|
||||
{
|
||||
"assistant_memory_hint": "推荐用户做鸡胸肉沙拉碗,并提醒用户调味时少放沙拉酱。",
|
||||
"assistant_memory_type": "recommendation"
|
||||
}
|
||||
|
||||
Few-shot 示例 4
|
||||
输入:
|
||||
{
|
||||
"msgs": [
|
||||
{
|
||||
"role": "User",
|
||||
"msg": "剪枝引擎和萃取引擎我都想先做,但是估计都会比较花时间。"
|
||||
},
|
||||
{
|
||||
"role": "Assistant",
|
||||
"msg": "这两个模块都涉及比较多的设计和实现细节。如果你想先推进,我建议先拆需求,再分别评估开发量。"
|
||||
}
|
||||
]
|
||||
}
|
||||
输出:
|
||||
{
|
||||
"assistant_memory_hint": "建议用户先拆需求,再分别评估两个模块的开发量。",
|
||||
"assistant_memory_type": "suggestion"
|
||||
}
|
||||
|
||||
现在处理下面这个输入。
|
||||
输入:
|
||||
{{ dialog_text }}
|
||||
|
||||
只输出严格 JSON:
|
||||
{
|
||||
"assistant_memory_hint": "<string or NULL>",
|
||||
"assistant_memory_type": "comfort | suggestion | recommendation | warning | instruction | NULL"
|
||||
}
|
||||
@@ -2,7 +2,7 @@
|
||||
{{ input_json }}
|
||||
{%- endmacro %}
|
||||
|
||||
===Tasks===
|
||||
=== Tasks ===
|
||||
|
||||
{% if language == "zh" %}
|
||||
你的任务是从提供的目标文本中识别并提取陈述句,并为每条陈述句标注以下信息:
|
||||
@@ -11,11 +11,12 @@
|
||||
- statement_text
|
||||
- statement_type
|
||||
- temporal_type
|
||||
- has_emotional_state
|
||||
- has_unsolved_reference
|
||||
- valid_at
|
||||
- invalid_at
|
||||
|
||||
每条输出都应是一个结构化的记忆候选陈述句。
|
||||
每条输出都应是一个结构化的候选记忆陈述句。
|
||||
{% else %}
|
||||
Your task is to identify and extract declarative statements from the provided target text, and annotate each extracted statement with:
|
||||
|
||||
@@ -23,6 +24,7 @@ Your task is to identify and extract declarative statements from the provided ta
|
||||
- statement_text
|
||||
- statement_type
|
||||
- temporal_type
|
||||
- has_emotional_state
|
||||
- has_unsolved_reference
|
||||
- valid_at
|
||||
- invalid_at
|
||||
@@ -30,7 +32,7 @@ Your task is to identify and extract declarative statements from the provided ta
|
||||
Each output item should be a structured candidate memory statement.
|
||||
{% endif %}
|
||||
|
||||
===Inputs===
|
||||
=== Inputs ===
|
||||
{% if language == "zh" %}
|
||||
|
||||
- chunk_id: chunk 唯一 ID
|
||||
@@ -48,7 +50,7 @@ Each output item should be a structured candidate memory statement.
|
||||
- supporting_context.msgs: ordered contextual messages, which may include User and Assistant messages
|
||||
{% endif %}
|
||||
|
||||
===Scope===
|
||||
=== Scope ===
|
||||
{% if language == "zh" %}
|
||||
|
||||
- 只从 `target_content` 中提取陈述句。
|
||||
@@ -66,12 +68,12 @@ Each output item should be a structured candidate memory statement.
|
||||
- Every output statement must be directly grounded in wording from `target_content`.
|
||||
{% endif %}
|
||||
|
||||
===Extraction Rules===
|
||||
=== Extraction Rules ===
|
||||
{% if language == "zh" %}
|
||||
拆分规则:
|
||||
|
||||
- 以“一个完整意思”为单位提取陈述句,通常对应一个完整句子或一个自然语义片段。
|
||||
- 默认保留句子级结构;只有当一个句子内部包含两个及以上彼此独立、拆开后明显更清楚的重要信息时,才拆成多条。
|
||||
- 默认保留句子级结构;只有当一个句子内部包含两个及以上彼此独立、拆开后明显更清晰的重要信息时,才拆成多条。
|
||||
- 宁可多提取,也不要漏掉 `target_content` 中能独立成立、且语义稳定的 statement。
|
||||
- 但不要为了提高覆盖率而引入原文没有的信息,或输出语义不成立的 statement。
|
||||
|
||||
@@ -82,6 +84,9 @@ Each output item should be a structured candidate memory statement.
|
||||
|
||||
共指消解:
|
||||
|
||||
- 先完成最终的 `statement_text` 改写,再判断 `has_unsolved_reference`。
|
||||
- `has_unsolved_reference` 必须基于最终输出的 `statement_text` 判断,而不是基于原始 `target_content` 里是否出现过代词来判断。
|
||||
- 如果最终 `statement_text` 已经把引用改写成具体实体名,例如“助理恭喜用户”“小李点了一杯美式咖啡”,则 `has_unsolved_reference` 必须是 `false`。
|
||||
- 如果可以解析到具体实体名,优先输出具体实体名,并将 `has_unsolved_reference` 设为 `false`。
|
||||
- 如果不能解析到具体实体名,但可以解析到最小必要描述,则输出该最小必要描述,并将 `has_unsolved_reference` 设为 `true`。
|
||||
- 如果既不能解析到具体实体名,也不能稳定解析到最小必要描述,则保留最小必要原始表达,并将 `has_unsolved_reference` 设为 `true`。
|
||||
@@ -117,6 +122,15 @@ statement_type:
|
||||
- 如果没有明确时间,不要编造时间。
|
||||
- 对于点状事件(例如某天发生的一次考试、一次见面、一次提交),`valid_at` 和 `invalid_at` 都应填写为该事件的起止边界;不要只填 `valid_at`。
|
||||
|
||||
情感状态判断:
|
||||
|
||||
- `has_emotional_state` 只用于判断当前 statement 是否反映了用户的情感状态。
|
||||
- 如果根据当前 statement 和 supporting_context,可以判断用户当前存在某种情感状态,则输出 `true`。
|
||||
- 该字段不是情绪分类字段,不要求输出具体情绪类型。
|
||||
- 明确情绪表达例如“开心”“难过”“紧张”“有压力”通常应标为 `true`。
|
||||
- 即使没有明确情绪词,只要语义足以表明用户当前具有情感状态,也可以标为 `true`,例如“我很好”。
|
||||
- 如果只是客观事实、动作描述或安排,且无法从当前上下文稳定判断用户情感状态,则输出 `false`。
|
||||
|
||||
temporal_type:
|
||||
|
||||
- `STATIC`:相对稳定、持续性的状态、身份、属性、长期偏好、长期关系、长期职业或长期居住状态;若带起始时间,可填 `valid_at`,`invalid_at` 必须为 `"NULL"`。
|
||||
@@ -129,7 +143,7 @@ temporal_type:
|
||||
- 允许为解决代词、省略和时间歧义做最小必要改写。
|
||||
- 不要引入原文未明确表达的新事实、额外推断或风格化概括。
|
||||
{% else %}
|
||||
Granularity:
|
||||
Splitting rules:
|
||||
- Extract statements at the level of one complete thought, usually one full sentence or one natural semantic unit.
|
||||
- Preserve sentence-level structure by default; split only when a sentence contains two or more independent and important pieces of information that become clearly easier to understand when separated.
|
||||
- Prefer higher recall: do not miss independently valid and semantically stable statements in `target_content`.
|
||||
@@ -149,6 +163,9 @@ Coreference resolution:
|
||||
|
||||
Clear vs unresolved reference:
|
||||
|
||||
- First produce the final rewritten `statement_text`, then decide `has_unsolved_reference`.
|
||||
- `has_unsolved_reference` must be judged from the final `statement_text`, not from whether the original `target_content` once contained a pronoun.
|
||||
- If the final `statement_text` already resolves the reference to a concrete named entity, such as “The assistant congratulates the user” or “Xiao Li ordered an Americano,” then `has_unsolved_reference` must be `false`.
|
||||
- A reference is fully resolved only if the current `supporting_context` can map it to a concrete named entity.
|
||||
- `Zhang San`, `Old Zhang` when clearly resolved to Zhang San, `Professor Li`, and `Teacher Wang` are clear references.
|
||||
- `the user's friend`, `the user's coworker`, `a teacher`, and `an interviewer` are allowed outputs but still count as unresolved.
|
||||
@@ -177,6 +194,15 @@ Temporal rules:
|
||||
- If no explicit time is available, do not invent one.
|
||||
- For point-in-time events such as a single exam, a meeting, or a submission on one day, populate both `valid_at` and `invalid_at`; do not fill only `valid_at`.
|
||||
|
||||
Emotional-state detection:
|
||||
|
||||
- `has_emotional_state` is used only to judge whether the current statement reflects the user's emotional state.
|
||||
- If the current statement plus supporting context is sufficient to infer that the user currently has some emotional state, output `true`.
|
||||
- This field is not an emotion category field. Do not infer or output a specific emotion label here.
|
||||
- Explicit emotion wording such as “happy”, “sad”, “nervous”, or “under pressure” should usually be marked `true`.
|
||||
- Statements without explicit emotion words may still be `true` if the user's emotional state is reasonably inferable, such as “I am fine.”
|
||||
- If the statement is only an objective fact or action description and the user's emotional state cannot be stably inferred from the current context, output `false`.
|
||||
|
||||
temporal_type:
|
||||
|
||||
- `STATIC`: relatively stable, ongoing states, identities, attributes, long-term preferences, long-term relationships, occupations, or residence states.
|
||||
@@ -190,7 +216,7 @@ Rewrite boundary:
|
||||
- Do not introduce unsupported facts, extra inference, or stylistic summarization.
|
||||
{% endif %}
|
||||
|
||||
===Examples===
|
||||
=== Examples ===
|
||||
{% if language == "zh" %}
|
||||
示例 1:
|
||||
示例输入: {
|
||||
@@ -219,6 +245,7 @@ Rewrite boundary:
|
||||
"statement_text": "李教授这学期要求很严。",
|
||||
"statement_type": "OPINION",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": false,
|
||||
"has_unsolved_reference": false,
|
||||
"valid_at": "2023-09-04T18:00:00",
|
||||
"invalid_at": "NULL"
|
||||
@@ -228,17 +255,19 @@ Rewrite boundary:
|
||||
"statement_text": "李教授讲课清晰透彻。",
|
||||
"statement_type": "OPINION",
|
||||
"temporal_type": "ATEMPORAL",
|
||||
"has_emotional_state": false,
|
||||
"has_unsolved_reference": false,
|
||||
"valid_at": "NULL",
|
||||
"invalid_at": "NULL"
|
||||
},
|
||||
{
|
||||
"statement_id": "stmt_m1n2o3p4",
|
||||
"statement_text": "李教授的气场很吓人。",
|
||||
"statement_text": "用户每次被李教授点名都有点发怵。",
|
||||
"statement_type": "OPINION",
|
||||
"temporal_type": "ATEMPORAL",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": true,
|
||||
"has_unsolved_reference": false,
|
||||
"valid_at": "NULL",
|
||||
"valid_at": "2023-09-04T18:00:00",
|
||||
"invalid_at": "NULL"
|
||||
}
|
||||
]
|
||||
@@ -248,13 +277,13 @@ Rewrite boundary:
|
||||
示例输入: {
|
||||
"chunk_id": "chunk_b2c3d4e5",
|
||||
"end_user_id": "eu_12345678",
|
||||
"target_content": "我最近在学 Python,每天晚上都会练一个小时。这周还打算先把基础语法和函数部分过一遍。",
|
||||
"target_content": "我最近在学Python,每天晚上都会练一个小时。这周还打算先把基础语法和函数部分过一遍。",
|
||||
"target_message_date": "2026-04-01T00:00:00",
|
||||
"supporting_context": {
|
||||
"msgs": [
|
||||
{
|
||||
"role": "User",
|
||||
"msg": "我最近在学 Python。"
|
||||
"msg": "我最近在学Python。"
|
||||
},
|
||||
{
|
||||
"role": "Assistant",
|
||||
@@ -268,27 +297,30 @@ Rewrite boundary:
|
||||
"statements": [
|
||||
{
|
||||
"statement_id": "stmt_m3n4o5p6",
|
||||
"statement_text": "用户最近在学 Python。",
|
||||
"statement_text": "用户最近在学Python。",
|
||||
"statement_type": "FACT",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": false,
|
||||
"has_unsolved_reference": false,
|
||||
"valid_at": "2026-04-01T00:00:00",
|
||||
"invalid_at": "NULL"
|
||||
},
|
||||
{
|
||||
"statement_id": "stmt_q7r8s9t0",
|
||||
"statement_text": "用户最近每天晚上都会练一个小时 Python。",
|
||||
"statement_text": "用户最近每晚都会练一个小时Python。",
|
||||
"statement_type": "FACT",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": false,
|
||||
"has_unsolved_reference": false,
|
||||
"valid_at": "2026-04-01T00:00:00",
|
||||
"invalid_at": "NULL"
|
||||
},
|
||||
{
|
||||
"statement_id": "stmt_u1v2w3x4",
|
||||
"statement_text": "用户这周打算先复习 Python 的基础语法和函数部分。",
|
||||
"statement_text": "用户这周打算先复习Python的基础语法和函数部分。",
|
||||
"statement_type": "FACT",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": false,
|
||||
"has_unsolved_reference": false,
|
||||
"valid_at": "2026-04-01T00:00:00",
|
||||
"invalid_at": "NULL"
|
||||
@@ -323,6 +355,7 @@ Rewrite boundary:
|
||||
"statement_text": "用户觉得那两个有点难。",
|
||||
"statement_type": "OPINION",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": true,
|
||||
"has_unsolved_reference": true,
|
||||
"valid_at": "2026-04-01T00:00:00",
|
||||
"invalid_at": "NULL"
|
||||
@@ -332,6 +365,7 @@ Rewrite boundary:
|
||||
"statement_text": "用户昨晚看了半天那两个还是没太搞明白。",
|
||||
"statement_type": "FACT",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": false,
|
||||
"has_unsolved_reference": true,
|
||||
"valid_at": "2026-03-31T00:00:00",
|
||||
"invalid_at": "2026-03-31T23:59:59"
|
||||
@@ -341,6 +375,7 @@ Rewrite boundary:
|
||||
"statement_text": "如果周末还弄不出来,用户可能会去问助教。",
|
||||
"statement_type": "OTHER",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": false,
|
||||
"has_unsolved_reference": true,
|
||||
"valid_at": "2026-04-01T00:00:00",
|
||||
"invalid_at": "NULL"
|
||||
@@ -375,6 +410,7 @@ Example Output: {
|
||||
"statement_text": "Professor Li is very strict this semester.",
|
||||
"statement_type": "OPINION",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": false,
|
||||
"has_unsolved_reference": false,
|
||||
"valid_at": "2023-09-04T18:00:00",
|
||||
"invalid_at": "NULL"
|
||||
@@ -384,17 +420,19 @@ Example Output: {
|
||||
"statement_text": "Professor Li explains things clearly.",
|
||||
"statement_type": "OPINION",
|
||||
"temporal_type": "ATEMPORAL",
|
||||
"has_emotional_state": false,
|
||||
"has_unsolved_reference": false,
|
||||
"valid_at": "NULL",
|
||||
"invalid_at": "NULL"
|
||||
},
|
||||
{
|
||||
"statement_id": "stmt_m1n2o3p4",
|
||||
"statement_text": "Professor Li's presence is intimidating.",
|
||||
"statement_text": "The user gets nervous every time Professor Li calls on the user.",
|
||||
"statement_type": "OPINION",
|
||||
"temporal_type": "ATEMPORAL",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": true,
|
||||
"has_unsolved_reference": false,
|
||||
"valid_at": "NULL",
|
||||
"valid_at": "2023-09-04T18:00:00",
|
||||
"invalid_at": "NULL"
|
||||
}
|
||||
]
|
||||
@@ -427,6 +465,7 @@ Example Output: {
|
||||
"statement_text": "The user has been learning Python recently.",
|
||||
"statement_type": "FACT",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": false,
|
||||
"has_unsolved_reference": false,
|
||||
"valid_at": "2026-04-01T00:00:00",
|
||||
"invalid_at": "NULL"
|
||||
@@ -436,6 +475,7 @@ Example Output: {
|
||||
"statement_text": "The user has recently been practicing Python for an hour every night.",
|
||||
"statement_type": "FACT",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": false,
|
||||
"has_unsolved_reference": false,
|
||||
"valid_at": "2026-04-01T00:00:00",
|
||||
"invalid_at": "NULL"
|
||||
@@ -445,6 +485,7 @@ Example Output: {
|
||||
"statement_text": "The user plans to review Python basic syntax and functions first this week.",
|
||||
"statement_type": "FACT",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": false,
|
||||
"has_unsolved_reference": false,
|
||||
"valid_at": "2026-04-01T00:00:00",
|
||||
"invalid_at": "NULL"
|
||||
@@ -479,6 +520,7 @@ Example Output: {
|
||||
"statement_text": "The user thinks those two things are difficult.",
|
||||
"statement_type": "OPINION",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": true,
|
||||
"has_unsolved_reference": true,
|
||||
"valid_at": "2026-04-01T00:00:00",
|
||||
"invalid_at": "NULL"
|
||||
@@ -488,6 +530,7 @@ Example Output: {
|
||||
"statement_text": "The user spent a long time last night looking at those two things but still did not really understand them.",
|
||||
"statement_type": "FACT",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": false,
|
||||
"has_unsolved_reference": true,
|
||||
"valid_at": "2026-03-31T00:00:00",
|
||||
"invalid_at": "2026-03-31T23:59:59"
|
||||
@@ -497,6 +540,7 @@ Example Output: {
|
||||
"statement_text": "If the user still cannot finish them by the weekend, the user may ask the TA.",
|
||||
"statement_type": "OTHER",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": false,
|
||||
"has_unsolved_reference": true,
|
||||
"valid_at": "2026-04-01T00:00:00",
|
||||
"invalid_at": "NULL"
|
||||
@@ -504,7 +548,7 @@ Example Output: {
|
||||
]
|
||||
}
|
||||
{% endif %}
|
||||
===End of Examples===
|
||||
=== End of Examples ===
|
||||
|
||||
{% if language == "zh" %}
|
||||
最终输出前检查:
|
||||
@@ -512,7 +556,9 @@ Example Output: {
|
||||
- 是否只保留 `target_content` 中可直接支持的陈述句
|
||||
- 如果主语是用户,是否统一写“用户”
|
||||
- 非用户主体是否尽量写成具体名称;若无法做到,是否已正确标记 `has_unsolved_reference = true`
|
||||
- 如果最终 `statement_text` 已经落到具体实体名,`has_unsolved_reference` 是否已经改为 `false`
|
||||
- statement_type 是否合法,且没有把一般事实机械标成 `OPINION`
|
||||
- `has_emotional_state` 是否仅用于判断是否存在情感状态,而没有被当作情绪分类字段
|
||||
- temporal_type 是否与 valid_at / invalid_at 一致
|
||||
- 输出是否严格符合 JSON schema
|
||||
{% else %}
|
||||
@@ -520,7 +566,9 @@ Example Output: {
|
||||
- Keep only statements directly supported by `target_content`
|
||||
- If the subject is the user, render it as “the user”
|
||||
- Render non-user subjects as concrete names when possible; otherwise mark `has_unsolved_reference = true`
|
||||
- If the final `statement_text` already resolves the reference to a concrete named entity, ensure `has_unsolved_reference = false`
|
||||
- Ensure statement_type is valid and do not mechanically label ordinary facts as `OPINION`
|
||||
- Ensure `has_emotional_state` is used only for emotional-state presence detection, not emotion classification
|
||||
- Ensure temporal_type is consistent with valid_at and invalid_at
|
||||
- Ensure the output strictly matches the JSON schema
|
||||
{% endif %}
|
||||
@@ -555,8 +603,7 @@ Example Output: {
|
||||
- Preserve the original language and do not translate.
|
||||
{% endif %}
|
||||
|
||||
现在处理下面这个输入:
|
||||
{{ render_input() }}
|
||||
现在处理下面这个输入:{{ render_input() }}
|
||||
|
||||
Return only a JSON object matching the schema below:
|
||||
{
|
||||
@@ -566,6 +613,7 @@ Return only a JSON object matching the schema below:
|
||||
"statement_text": "string",
|
||||
"statement_type": "FACT | OPINION | OTHER",
|
||||
"temporal_type": "STATIC | DYNAMIC | ATEMPORAL",
|
||||
"has_emotional_state": "boolean",
|
||||
"has_unsolved_reference": "boolean",
|
||||
"valid_at": "string | NULL",
|
||||
"invalid_at": "string | NULL"
|
||||
|
||||
@@ -5,13 +5,21 @@ Extract entities and knowledge triplets from the given statement.
|
||||
重要:
|
||||
|
||||
- `name`、`subject_name`、`object_name` 默认保持原文中的表面形式,不要翻译。
|
||||
- 但对用户自指表达,如“我”“我的”“我自己”,统一规范为 `用户`。
|
||||
- 但在抽取前,必须先做指代解析。
|
||||
- 用户自指表达,如“我”“我的”“我自己”,一律规范为 `用户`。
|
||||
- 非用户自指代词或指示表达,如“他”“她”“它”“这个”“那个”“这家”“那家”“这里”“那里”,如果能从 `supporting_context` 中稳定解析出具体指代,则必须替换为具体指代实体名。
|
||||
- 如果上述代词或指示表达不能稳定解析,则整条跳过。
|
||||
- 命名关系中新出现的称呼、别名、昵称、产品名保持原样,不做替换。
|
||||
- `description` 使用中文。
|
||||
- `type`、`predicate`、`type_description`、`predicate_description` 一律使用中文。
|
||||
{% else %}
|
||||
Important:
|
||||
- Keep `name`, `subject_name`, and `object_name` in their original surface form from the source text. Do not translate them.
|
||||
- Exception: normalize user self-reference such as "I", "me", and "myself" to `用户`.
|
||||
- Keep `name`, `subject_name`, and `object_name` in their original surface form from the source text by default.
|
||||
- But you MUST resolve references before extraction.
|
||||
- Normalize user self-reference such as "I", "me", and "myself" to `用户`.
|
||||
- For non-user pronouns or demonstratives such as "he", "she", "it", "this", "that", "this company", "that place", if a stable referent can be resolved from `supporting_context`, replace them with the resolved entity name.
|
||||
- If such references cannot be resolved stably, skip the entire statement.
|
||||
- Newly introduced names in naming or alias expressions must stay in their original form.
|
||||
- Generate `description` in English.
|
||||
- Always generate `type`, `predicate`, `type_description`, and `predicate_description` in Chinese.
|
||||
{% endif %}
|
||||
@@ -69,11 +77,13 @@ Primary statement to analyze:
|
||||
开始抽取前,先检查 `has_unsolved_reference`。
|
||||
|
||||
- 如果 `has_unsolved_reference` 是 `true`,不要抽取任何内容。
|
||||
- 此时必须返回:
|
||||
- 如果 `statement_text` 中仍存在无法稳定解析的代词、指示词或省略主体,也应视为 unresolved reference。
|
||||
- 这两种情况下都必须返回:
|
||||
{% else %}
|
||||
Before any extraction, check `has_unsolved_reference`.
|
||||
- If `has_unsolved_reference` is `true`, do not extract anything.
|
||||
- In that case, return exactly:
|
||||
- If unresolved pronouns, demonstratives, or omitted subjects still remain in `statement_text`, treat the statement as unresolved as well.
|
||||
- In either case, return exactly:
|
||||
{% endif %}
|
||||
|
||||
```json
|
||||
@@ -86,8 +96,10 @@ Primary statement to analyze:
|
||||
{% if language == "zh" %}
|
||||
|
||||
- 不要在引用未解析时尝试部分抽取。
|
||||
- 不要保留“他”“这个”“那个”这类原代词继续输出实体或关系。
|
||||
{% else %}
|
||||
- Do not attempt partial extraction when the reference is unresolved.
|
||||
- Do not keep unresolved forms such as "he", "this", or "that" as extracted entities or relation arguments.
|
||||
{% endif %}
|
||||
|
||||
===Input Boundary===
|
||||
@@ -100,6 +112,8 @@ Primary statement to analyze:
|
||||
- 如果 `supporting_context.msgs` 中的 Assistant 消息包含总结、猜测、解释或改写,这些内容只能作为理解辅助,不能直接作为抽取来源。
|
||||
- `statement_type`、`temporal_type`、`valid_at`、`invalid_at` 是辅助理解字段,不是抽取目标。
|
||||
- 对 `statement_text` 中的用户自指表达,要统一规范成实体 `用户`。
|
||||
- 对其他可稳定解析的代词或指示表达,要替换为具体指代实体名后再抽取。
|
||||
- 对命名关系中新出现的称呼、别名、昵称、产品名,不要因为上下文可推断其所指而直接改写,它们应保持原样作为实体名。
|
||||
{% else %}
|
||||
- Treat `statement_text` as the only direct extraction target.
|
||||
- Use `supporting_context.msgs` only to interpret references, ellipsis, subject identity, and necessary background in `statement_text`.
|
||||
@@ -108,41 +122,253 @@ Primary statement to analyze:
|
||||
- If Assistant messages in `supporting_context.msgs` contain summary, guess, interpretation, or rephrasing, use them only as interpretive support and never as a direct extraction source.
|
||||
- Treat `statement_type`, `temporal_type`, `valid_at`, and `invalid_at` as auxiliary context, not extraction targets.
|
||||
- Normalize user self-reference in `statement_text` to the entity `用户`.
|
||||
- Replace other resolvable pronouns or demonstratives with their resolved entity names before extraction.
|
||||
- For newly introduced names in naming or alias expressions, do not rewrite them even if the context reveals who they refer to; keep them as entity names.
|
||||
{% endif %}
|
||||
|
||||
===预定义实体类型===
|
||||
只能使用以下中文实体类型。如果没有完全匹配的类型,请选择最接近的一项,不要发明新类型。
|
||||
|
||||
- `人物`: 现实中的具体个人
|
||||
- `组织`: 公司、机构、团队、社群等组织性主体
|
||||
- `群体`: 未具名或泛指的一组人
|
||||
- `地点`: 具有地理或空间意义的位置
|
||||
- `设施`: 建筑、场馆、房间、实验室等功能性空间
|
||||
- `地址`: 具体地址或位置描述
|
||||
- `物品`: 一般具体物体
|
||||
- `设备`: 具有明确用途的工具或器材
|
||||
- `产品`: 可被制造、购买、使用的产品
|
||||
- `交通工具`: 用于出行或运输的工具
|
||||
- `文档`: 文章、报告、表格、说明等文档
|
||||
- `媒体`: 图片、音频、视频等媒体对象
|
||||
- `网站`: 网站、网页或互联网平台
|
||||
- `软件`: 软件、应用、系统或数字服务
|
||||
- `账号`: 账号、账户、用户档案
|
||||
- `标识符`: ID、编号、用户名、工号等标识
|
||||
- `联系方式`: 电话、邮箱、社交账号等联系方式
|
||||
- `角色`: 某实体承担的社会或功能角色
|
||||
- `职业`: 工作或职业身份
|
||||
- `技能`: 可学习或掌握的能力
|
||||
- `知识主题`: 主题、领域、方法、理论或知识概念
|
||||
- `目标`: 希望达成的结果
|
||||
- `偏好`: 稳定的喜欢、倾向或偏爱
|
||||
- `习惯`: 重复出现的行为模式
|
||||
- `语言`: 自然语言或编程语言
|
||||
- `金额`: 金额或货币数值
|
||||
- `数量`: 带或不带单位的数量值
|
||||
- `货币`: 货币单位
|
||||
- `组织部门`: 组织内部的部门或业务单元
|
||||
- `称呼`: 用于指代或称呼实体的名字
|
||||
- `人物`
|
||||
- definition: 可稳定指向、可被当作具体个体区分和归并的个人实体。
|
||||
- positive_examples: `用户`、`张三`、`王教授`、`小林`
|
||||
- negative_examples: `老师`、`导师`、`学生`、`他们`
|
||||
- notes: 强调“这个人是谁”,不强调他承担的社会身份;用户自指统一归为 `用户`。
|
||||
|
||||
- `组织`
|
||||
- definition: 公司、机构、学校、实验室、团队、社群等组织性主体。
|
||||
- positive_examples: `腾讯`、`清华大学`、`机器人公司`、`实验室`
|
||||
- negative_examples: `人事部`、`教研组`、`办公室`
|
||||
- notes: 如果表达的是组织内部单元,当前一级仍优先并入 `组织`,除非后续单独扩展子类。
|
||||
|
||||
- `群体`
|
||||
- definition: 边界相对稳定、可被当作整体引用的一组人。
|
||||
- positive_examples: `我的朋友`、`同事们`、`实验室成员`
|
||||
- negative_examples: `他们`、`一些人`、`一个朋友`
|
||||
- notes: 只用于边界相对稳定的人群;边界不稳或 unresolved 的表达不要归入 `群体`。
|
||||
|
||||
- `智能体`
|
||||
- definition: 具有行动、交互或执行能力的非人主体,如机器人、AI 或其他智慧体。
|
||||
- positive_examples: `机器人查票员`、`家务机器人`、`智能助手`
|
||||
- negative_examples: `手机`、`电脑`、`机器人公司`
|
||||
- notes: 如果对象只是普通设备,不归入 `智能体`;只有在叙述中被当作主体行动或交互时才使用。
|
||||
|
||||
- `角色职业`
|
||||
- definition: 人物承担的社会角色、功能身份或职业身份。
|
||||
- positive_examples: `导师`、`老师`、`学生`、`医生`、`程序员`
|
||||
- negative_examples: `张三`、`王教授`、`我的朋友`
|
||||
- notes: 强调“这个人是什么身份”,不强调“这个人是谁”;如果文本落到具体个人,优先用 `人物`。
|
||||
|
||||
- `地点设施`
|
||||
- definition: 具有地理意义或功能性空间意义的位置与场所。
|
||||
- positive_examples: `北京`、`巴黎`、`图书馆`、`办公室`、`教室`
|
||||
- negative_examples: `这里`、`那里`、`朝这边`、`明天去的地方`
|
||||
- notes: 地理地点和功能场所当前一级合并;未稳定解析的位置指代表达不要抽取。
|
||||
|
||||
- `物品设备`
|
||||
- definition: 可被持有、使用、携带的具体物体、设备、工具或交通工具。
|
||||
- positive_examples: `手机`、`电脑`、`相机`、`自行车`
|
||||
- negative_examples: `微信`、`GitHub`、`会员服务`
|
||||
- notes: 交通工具当前并入此类;数字服务不归入本类。
|
||||
|
||||
- `产品服务`
|
||||
- definition: 可被购买、使用、消费或订阅的产品或服务。
|
||||
- positive_examples: `iPhone`、`健身课`、`会员服务`
|
||||
- negative_examples: `微信`、`GitHub`、`手机`
|
||||
- notes: 具体商品和服务当前一级合并;纯软件平台优先归入 `软件平台`。
|
||||
|
||||
- `软件平台`
|
||||
- definition: 软件、应用、网站、在线平台或数字服务系统。
|
||||
- positive_examples: `微信`、`GitHub`、`ChatGPT`、`飞书`
|
||||
- negative_examples: `iPhone`、`会员服务`、`手机号`
|
||||
- notes: 软件、网站、平台当前一级合并;如果语境强调的是账号本身,改用 `账号`。
|
||||
|
||||
- `账号`
|
||||
- definition: 账户、账号、用户档案类实体。
|
||||
- positive_examples: `GitHub账号`、`微信号`
|
||||
- negative_examples: `用户名`、`工号`、`邮箱`
|
||||
- notes: 与 `标识符`、`联系方式` 分开;账号是主体可持有的账户对象。
|
||||
|
||||
- `标识符`
|
||||
- definition: 用于识别实体的编号、ID、用户名、学号、工号等标识。
|
||||
- positive_examples: `学号`、`工号`、`用户名`
|
||||
- negative_examples: `GitHub账号`、`手机号`
|
||||
- notes: 当前允许保留,但通常只有在存在明确识别关系时才值得抽取。
|
||||
|
||||
- `联系方式`
|
||||
- definition: 可用于联系实体的电话、邮箱、社交联系地址。
|
||||
- positive_examples: `手机号`、`邮箱`、`微信联系方式`
|
||||
- negative_examples: `用户名`、`GitHub账号`
|
||||
- notes: 当前允许保留,但通常只有在存在明确联系关系时才值得抽取。
|
||||
|
||||
- `文档媒体`
|
||||
- definition: 文章、报告、表格、图片、音频、视频等内容载体。
|
||||
- positive_examples: `简历`、`论文`、`照片`、`录音`
|
||||
- negative_examples: `微积分`、`微信`、`学号`
|
||||
- notes: 文档与媒体当前一级合并;如果只是内容主题,不归入本类。
|
||||
|
||||
- `知识能力`
|
||||
- definition: 可学习、掌握、使用或讨论的知识主题、技能、学科或语言。
|
||||
- positive_examples: `微积分`、`机器学习`、`写作`、`Python`、`中文`
|
||||
- negative_examples: `紧张`、`成功`、`意义`
|
||||
- notes: 不包含情绪、心理状态、抽象结果或价值判断;这些应写入 `description`。
|
||||
|
||||
- `偏好习惯目标`
|
||||
- definition: 用户稳定的偏好、重复习惯,以及具体、明确、用户特异且值得长期保留的目标。
|
||||
- positive_examples: `喜欢安静环境`、`晨跑`、`通过雅思`
|
||||
- negative_examples: `紧张`、`开心`、`成功`、`回报`
|
||||
- notes: 这是高风险类型;只允许稳定偏好、重复习惯、具体目标,不允许抽象愿望或情绪状态。
|
||||
|
||||
- `称呼别名`
|
||||
- definition: 用于指代或称呼实体的名字。
|
||||
- positive_examples: `山哥`、`老张`、`X1`
|
||||
- negative_examples: `导师`、`程序员`、`好人`
|
||||
- notes: 只用于名字性表达,不用于角色、职业、评价词。
|
||||
|
||||
实体类型总规则:
|
||||
|
||||
- unresolved 或边界不稳的表达,不因“看起来像名词”就创建实体。
|
||||
- 情绪、心理状态、金额、数量、普通时间、一次性动作短语,默认不作为独立实体类型抽取。
|
||||
- 抽象命题片段、泛化结果、价值判断,默认不创建实体;如有保留价值,应写入相关高价值实体的 `description`。
|
||||
|
||||
实体类型选择原则:
|
||||
|
||||
- 优先保留对用户画像、偏好、长期身份、稳定关系或持续兴趣有记忆价值的实体类型。
|
||||
- 对于“努力”“回报”“意义”“成功”这类泛化概念、抽象命题片段或价值判断,默认不要仅因句中出现就创建实体。
|
||||
- `群体` 只用于边界相对稳定、可被当作整体引用的人群;像“他们”“一些人”“一个朋友”这类边界不稳或 unresolved 的表达不要归入 `群体`。
|
||||
- `偏好习惯目标` 只能用于稳定偏好、重复习惯或具体明确的用户目标,不能把抽象结果、泛因果终点、空泛愿望或情绪状态强行归入其中。
|
||||
- 当前阶段不抽取情绪状态实体;像“紧张”“开心”“难过”“焦虑”“放松”这类情绪或心理状态,不要归入 `知识能力`、`偏好习惯目标` 或其他现有类型。
|
||||
|
||||
===关系本体大类===
|
||||
以下大类是当前 `predicate` 本体树的第一层,用于帮助理解和约束后面的具体关系白名单。输出具体 `predicate` 时仍然必须使用后文列出的细关系,而不是直接输出这些大类名称。
|
||||
|
||||
- `命名关系`
|
||||
- definition: 表达实体名称、别名、称呼之间的对应或使用关系。
|
||||
- covered_predicates: `别名属于`、`使用称呼`
|
||||
- positive_examples: `山哥 -> 别名属于 -> 用户`、`我的朋友 -> 使用称呼 -> 山哥`
|
||||
- negative_examples: `导师 -> 别名属于 -> 用户`、`好人 -> 使用称呼 -> 用户`
|
||||
- notes: 只处理名字性表达,不处理角色、职业、评价词。
|
||||
- status: `enabled`
|
||||
|
||||
- `类型归属关系`
|
||||
- definition: 表达实体属于某种类别,或主体承担某种角色/职业身份的关系。
|
||||
- covered_predicates: `属于类型`、`担任角色`、`从事职业`
|
||||
- positive_examples: `王教授 -> 担任角色 -> 导师`、`张三 -> 从事职业 -> 程序员`
|
||||
- negative_examples: `张三 -> 担任角色 -> 山哥`、`用户 -> 从事职业 -> 紧张`
|
||||
- notes: 用于“是什么”,不用于“叫什么”。
|
||||
- status: `enabled`
|
||||
|
||||
- `成员隶属关系`
|
||||
- definition: 表达主体属于某个组织、群体或集合的成员归属关系。
|
||||
- covered_predicates: `成员属于`
|
||||
- positive_examples: `张三 -> 成员属于 -> 实验室成员`、`用户 -> 成员属于 -> 社群`
|
||||
- negative_examples: `他们 -> 成员属于 -> 学校`、`一个朋友 -> 成员属于 -> 班级`
|
||||
- notes: 前提是主体和归属对象都足够稳定;边界不稳的人群不要硬抽。
|
||||
- status: `enabled`
|
||||
|
||||
- `任职服务关系`
|
||||
- definition: 表达人物或主体在组织中的工作、任职或服务关系。
|
||||
- covered_predicates: `任职于`
|
||||
- positive_examples: `张明 -> 任职于 -> 腾讯`、`王教授 -> 任职于 -> 清华大学`
|
||||
- negative_examples: `张明 -> 任职于 -> 导师`、`用户 -> 任职于 -> 明天的面试`
|
||||
- notes: 优先用于人物到组织的稳定供职关系。
|
||||
- status: `enabled`
|
||||
|
||||
- `空间位置关系`
|
||||
- definition: 表达实体与地点、场所、空间位置之间的稳定位置关系。
|
||||
- covered_predicates: `位于`、`拥有位置`、`居住于`
|
||||
- positive_examples: `用户 -> 居住于 -> 巴黎`、`办公室 -> 位于 -> 北京`
|
||||
- negative_examples: `用户 -> 位于 -> 明天下午三点`、`这里 -> 位于 -> 学校`
|
||||
- notes: 普通时间表达和未解析位置指代不进入此类。
|
||||
- status: `enabled`
|
||||
|
||||
- `前往到访关系`
|
||||
- definition: 表达主体前往、到访某地点、场所、组织、课程或活动对象的关系。
|
||||
- covered_predicates: `前往`
|
||||
- positive_examples: `用户 -> 前往 -> 图书馆`、`用户 -> 前往 -> 公司`
|
||||
- negative_examples: `用户 -> 前往 -> 明天下午三点`、`用户 -> 前往 -> 复习微积分任务`
|
||||
- notes: 当前应优先用于稳定倾向或有记忆价值的到访对象,不鼓励因一次性日程而过抽。
|
||||
- status: `enabled`
|
||||
|
||||
- `组成包含关系`
|
||||
- definition: 表达部分与整体、包含与被包含之间的结构关系。
|
||||
- covered_predicates: `组成部分`、`包含部分`
|
||||
- positive_examples: `教研组 -> 组成部分 -> 学院`、`学院 -> 包含部分 -> 教研组`
|
||||
- negative_examples: `用户 -> 组成部分 -> 图书馆`、`微积分 -> 包含部分 -> 用户`
|
||||
- notes: 只用于结构性组成关系,不用于临时搭配或抽象联系。
|
||||
- status: `enabled`
|
||||
|
||||
- `拥有持有关系`
|
||||
- definition: 表达主体拥有、持有、配有某对象、账号、联系方式或标识的关系。
|
||||
- covered_predicates: `拥有`、`拥有账号`、`拥有联系方式`、`标识为`
|
||||
- positive_examples: `用户 -> 拥有账号 -> GitHub账号`、`用户 -> 拥有联系方式 -> 邮箱`、`用户 -> 标识为 -> 学号`
|
||||
- negative_examples: `用户 -> 拥有 -> 紧张`、`努力 -> 拥有 -> 回报`
|
||||
- notes: 不用于抽象命题、情绪状态或口号式表达。
|
||||
- status: `enabled`
|
||||
|
||||
- `使用采用关系`
|
||||
- definition: 表达主体使用、采用某工具、产品、平台、语言或资源的关系。
|
||||
- covered_predicates: `使用`、`使用语言`
|
||||
- positive_examples: `用户 -> 使用 -> 微信`、`用户 -> 使用语言 -> 中文`
|
||||
- negative_examples: `用户 -> 使用 -> 成功`、`用户 -> 使用语言 -> 紧张`
|
||||
- notes: 以后若扩展“采用方法”,也可挂在本大类下。
|
||||
- status: `enabled`
|
||||
|
||||
- `创建生产关系`
|
||||
- definition: 表达主体创建、撰写、生产某对象或结果的关系。
|
||||
- covered_predicates: `创建了`、`由…创建`、`撰写了`
|
||||
- positive_examples: `用户 -> 撰写了 -> 简历`、`简历 -> 由…创建 -> 用户`
|
||||
- negative_examples: `用户 -> 创建了 -> 明天下午三点`、`努力 -> 由…创建 -> 用户`
|
||||
- notes: 只用于明确的生产、创作、撰写关系。
|
||||
- status: `enabled`
|
||||
|
||||
- `知识学习关系`
|
||||
- definition: 表达主体与知识、技能、学科、语言等知识能力对象之间的认知、学习或兴趣关系。
|
||||
- covered_predicates: `了解`、`学习`、`感兴趣于`
|
||||
- positive_examples: `用户 -> 学习 -> 微积分`、`用户 -> 了解 -> 机器学习`、`用户 -> 感兴趣于 -> 心理学`
|
||||
- negative_examples: `用户 -> 学习 -> 紧张`、`用户 -> 感兴趣于 -> 成功`
|
||||
- notes: 关系对象应是 `知识能力` 类,而不是情绪、价值判断或抽象结果。
|
||||
- status: `enabled`
|
||||
|
||||
- `偏好目标关系`
|
||||
- definition: 表达主体对对象的稳定偏好、厌恶,或对具体明确目标的指向关系。
|
||||
- covered_predicates: `偏好`、`不喜欢`、`想要`
|
||||
- positive_examples: `用户 -> 偏好 -> 安静环境`、`用户 -> 不喜欢 -> 辛辣食物`、`用户 -> 想要 -> 通过雅思`
|
||||
- negative_examples: `用户 -> 想要 -> 成功`、`用户 -> 偏好 -> 紧张`、`用户 -> 不喜欢 -> 努力就会有回报`
|
||||
- notes: 这是高风险大类;`想要` 只用于具体、明确、用户特异的目标,不用于抽象愿望。
|
||||
- status: `enabled`
|
||||
|
||||
- `职责责任关系`
|
||||
- definition: 表达主体负责某项工作、职责、事务或领域的关系。
|
||||
- covered_predicates: `负责`
|
||||
- positive_examples: `张三 -> 负责 -> 招聘工作`、`王教授 -> 负责 -> 实验室项目`
|
||||
- negative_examples: `张三 -> 负责 -> 紧张`、`用户 -> 负责 -> 成功`
|
||||
- notes: 关系对象应是具体职责或事务,不应是情绪或抽象结果。
|
||||
- status: `enabled`
|
||||
|
||||
- `沟通交互关系`
|
||||
- definition: 表达两个主体之间发生沟通、交流或交互的关系。
|
||||
- covered_predicates: `沟通于`
|
||||
- positive_examples: `用户 -> 沟通于 -> 张三`、`导师 -> 沟通于 -> 学生`
|
||||
- negative_examples: `用户 -> 沟通于 -> 紧张`、`图书馆 -> 沟通于 -> 微积分`
|
||||
- notes: 两端通常都应是可作为交互主体的实体。
|
||||
- status: `enabled`
|
||||
|
||||
- `提及关系`
|
||||
- definition: 表达主体或文本明确提到某实体的关系。
|
||||
- covered_predicates: `提到`
|
||||
- positive_examples: `用户 -> 提到 -> 腾讯`、`文档 -> 提到 -> 张三`
|
||||
- negative_examples: `用户 -> 提到 -> 努力`、`用户 -> 提到 -> 回报`、`用户 -> 提到 -> 紧张`
|
||||
- notes: 受限大类;不用于保留泛化概念、抽象命题片段、情绪状态或仅在句面上出现但没有记忆价值的对象。
|
||||
- status: `restricted`
|
||||
|
||||
- `一般关联关系`
|
||||
- definition: 表达两个实体之间存在明确、稳定、值得保留,但当前无更精确谓词可用的关联关系。
|
||||
- covered_predicates: `关联于`、`相关于`
|
||||
- positive_examples: `项目 -> 关联于 -> 实验室`、`账号 -> 相关于 -> 平台`
|
||||
- negative_examples: `努力 -> 相关于 -> 回报`、`用户 -> 关联于 -> 紧张`、`成功 -> 相关于 -> 意义`
|
||||
- notes: 受限大类;不能作为失败兜底关系,不能用来连接抽象概念、口号式表达或无法成立的关系。
|
||||
- status: `restricted`
|
||||
|
||||
===预定义关系类型===
|
||||
只能使用以下中文关系类型。如果没有完全匹配的关系,请选择最接近的一项,不要发明新关系。
|
||||
@@ -172,60 +398,90 @@ Primary statement to analyze:
|
||||
- `感兴趣于`: 主体对某主题感兴趣
|
||||
- `偏好`: 主体偏好某对象、方式或主题
|
||||
- `不喜欢`: 主体不喜欢某对象、方式或主题
|
||||
- `想要`: 主体想获得、达成或拥有某对象或结果
|
||||
- `想要`: 主体想获得、达成或拥有具体、明确、用户特异且值得保留的对象或目标,不用于抽象结果、泛化愿望或口号式表达
|
||||
- `负责`: 主体负责某项工作、职责或领域
|
||||
- `沟通于`: 两个实体之间发生沟通或交流
|
||||
- `拥有联系方式`: 实体具有某联系方式
|
||||
- `拥有账号`: 实体具有某账号
|
||||
- `标识为`: 实体由某标识符标识
|
||||
- `使用语言`: 主体使用某语言
|
||||
- `相关于`: 当存在明确联系但无更精确关系时使用的弱关系
|
||||
- `相关于`: 当存在明确、稳定且具有记忆价值的联系,但无更精确关系时使用的弱关系;不得用于泛化概念、抽象命题片段、口号式表达或仅为补全结构的联系
|
||||
|
||||
===Extraction Order===
|
||||
{% if language == "zh" %}
|
||||
按以下顺序执行:
|
||||
|
||||
0. 先检查 `has_unsolved_reference`;如果为 `true`,直接返回空结果。
|
||||
1. 识别 `statement_text` 中值得抽取的稳定实体。
|
||||
2. 判断这些实体之间是否存在可由预定义关系类型表达的有效关系。
|
||||
3. 最后补充实体字段和关系字段。
|
||||
1. 先做指代解析:用户自指统一替换为 `用户`;其他可稳定解析的代词或指示表达替换为具体指代实体名。
|
||||
2. 如果仍存在无法稳定解析的代词、指示词或省略主体,直接返回空结果。
|
||||
3. 识别 `statement_text` 中值得抽取的稳定实体。
|
||||
4. 判断这些实体之间是否存在可由预定义关系类型表达的有效关系。
|
||||
5. 最后补充实体字段和关系字段。
|
||||
|
||||
不要让附加字段主导整个抽取过程。
|
||||
{% else %}
|
||||
Follow this order:
|
||||
|
||||
0. First check `has_unsolved_reference`; if it is `true`, immediately return the empty result.
|
||||
1. Identify stable entities worth extracting from `statement_text`.
|
||||
2. Determine whether any valid relations between those entities can be expressed using the predefined Chinese predicates.
|
||||
3. Finally fill auxiliary entity and predicate fields.
|
||||
1. Resolve references first: normalize user self-reference to `用户`; replace other stably resolvable pronouns or demonstratives with their resolved entity names.
|
||||
2. If unresolved pronouns, demonstratives, or omitted subjects still remain, immediately return the empty result.
|
||||
3. Identify stable entities worth extracting from `statement_text`.
|
||||
4. Determine whether any valid relations between those entities can be expressed using the predefined Chinese predicates.
|
||||
5. Finally fill auxiliary entity and predicate fields.
|
||||
|
||||
Do not let auxiliary fields drive the extraction process.
|
||||
{% endif %}
|
||||
|
||||
===Guidelines===
|
||||
|
||||
**Reference Resolution:**
|
||||
{% if language == "zh" %}
|
||||
|
||||
- 指代解析优先于实体抽取和关系抽取。
|
||||
- 所有用户自指表达都必须规范成 `用户`,包括“我”“我的”“我自己”等。
|
||||
- 对“他”“她”“它”“这个”“那个”“这家”“那家”“这里”“那里”等非用户自指表达,若上下文可稳定解析,则必须用解析后的具体实体名替换。
|
||||
- 若非用户自指表达无法稳定解析,则整条跳过,不输出部分结果。
|
||||
- 新出现的称呼、别名、昵称、产品名不是待消解代词,应保持原样。
|
||||
{% else %}
|
||||
- Reference resolution happens before entity or relation extraction.
|
||||
- All user self-reference must be normalized to `用户`, including forms such as "I", "me", "my", and "myself".
|
||||
- For non-user references such as "he", "she", "it", "this", "that", "this company", "that place", "here", or "there", if the context supports a stable resolution, replace them with the resolved entity name.
|
||||
- If a non-user reference cannot be resolved stably, skip the entire statement and do not output partial results.
|
||||
- Newly introduced names, aliases, nicknames, and product names are not pronouns to be resolved; keep them in their original form.
|
||||
{% endif %}
|
||||
|
||||
**Entity Extraction:**
|
||||
{% if language == "zh" %}
|
||||
|
||||
- 只有当某个名字、概念、对象、群体或地点在当前陈述中承担明确语义角色,或是理解有效关系所必需时,才创建实体。
|
||||
- 不要因为表面上出现了名词、修饰词或短语,就机械地创建实体。
|
||||
- 不要把完整命题、因果链、价值判断或口号式表达拆成多个低价值实体;例如“努力就会有回报”默认不应抽取出“努力”或“回报”作为实体。
|
||||
- 普通时间表达默认不抽取为实体,包括日期、时刻、明天、下周、今晚八点等。
|
||||
- 一次性动作短语默认不抽取为实体,例如“复习微积分”“去图书馆学习”“参观卢浮宫”。
|
||||
- 不要为了表达一句带时间或地点的行动,而额外创造“任务”“计划”“事件”实体。
|
||||
- 但如果动作明确把主体和某个稳定实体连接起来,可以保留该稳定实体,并抽取轻关系。例如“我去图书馆”“我去公司开会”“我去上课”“我去看演唱会”可以抽取 `前往`。
|
||||
- 当句子只是在讨论一般道理、抽象规律、空泛结果或非个体化概念,而这些概念本身不构成可复用记忆时,不要创建实体。
|
||||
- 如果句子表达的是用户的观点、信念、判断、愿望或目标倾向,但其中抽象对象不值得作为独立实体保留,则只保留相关高价值实体,不要再创建这些低价值对象实体,并把未抽取的抽象内容压缩写入相关实体的 `description`;例如“用户认为努力就会有回报”应只保留 `用户`,并在 `description` 中体现“用户认为努力就会有回报”。
|
||||
- 对于未抽取的抽象实体、抽象命题片段或泛化结果,只要它们对理解该高价值实体有帮助,就应优先写入该实体的 `description`,而不是改用宽泛关系或补造弱实体。
|
||||
- 当前阶段同样不要把情绪或心理状态抽成实体;如果句子里出现“紧张”“开心”“难过”“焦虑”“放松”等,应写入相关高价值实体的 `description`,而不是把它们标成 `知识能力`、`偏好习惯目标` 或其他近似类型。
|
||||
- 如果陈述里有值得保留的实体信息,但没有有效关系,可以只返回 `entities`,并把 `triplets` 设为 `[]`。
|
||||
- `name` 默认保持原文中的表面形式,不要翻译;但用户自指要统一写成 `用户`。
|
||||
- `name` 默认保持原文中的表面形式,但用户自指必须写成 `用户`,可稳定解析的其他代词必须替换为具体指代实体名。
|
||||
- `description` 必须使用中文。
|
||||
- `type` 和 `type_description` 必须使用上方预定义的中文标签与中文定义。
|
||||
{% else %}
|
||||
- Extract entities only when they play a clear semantic role in the statement or are necessary for understanding a valid relation.
|
||||
- Do not mechanically create entities for every noun, modifier, or surface mention.
|
||||
- Do not split generic propositions, causal slogans, or value judgments into low-value abstract entities. For example, "effort brings reward" should not create standalone entities for "effort" or "reward" by default.
|
||||
- Do not extract ordinary time expressions as entities, including dates, timestamps, "tomorrow", "next week", or "8 PM tonight".
|
||||
- Do not extract one-off action phrases as entities, such as "review calculus", "study in the library", or "visit the Louvre".
|
||||
- Do not create extra "task", "plan", or "event" entities just to represent an action with time or location modifiers.
|
||||
- But if an action clearly connects the subject to a stable entity, keep that stable entity and use a light relation. For example, statements like "I go to the library", "I go to the office", "I go to class", or "I go to a concert" can use `前往`.
|
||||
- If the sentence is only about a generic principle, abstract outcome, or non-personalized concept that is not worth remembering on its own, do not create an entity for it.
|
||||
- If a statement expresses the user's belief, judgment, opinion, wish, or goal tendency but the referenced abstract concepts are not worth keeping as standalone entities, keep only the relevant high-value entities, do not create those low-value concept entities, and compress the unextracted abstract content into the relevant entity `description`. For example, "the user believes effort brings reward" should keep only `用户` and reflect that belief in `description`.
|
||||
- For abstract entities, proposition fragments, or generic outcomes that are not extracted, prefer writing them into the relevant retained entity's `description` when they help preserve the memory, instead of switching to a broad relation or inventing a weak entity.
|
||||
- In the current stage, do not extract emotional or psychological states as entities. States such as nervousness, happiness, sadness, anxiety, or relief should be written into the relevant retained entity's `description` rather than mapped to `知识能力`, `偏好习惯目标`, or any other approximate type.
|
||||
- If the statement contains entity-worthy content but no valid relation, it is acceptable to return `entities` with `triplets: []`.
|
||||
- Keep `name` in its original surface form from the source text; exception: normalize user self-reference to `用户`.
|
||||
- Keep `name` in its original surface form by default, but write user self-reference as `用户` and replace other stably resolvable references with their resolved entity names.
|
||||
- `description` must be in English.
|
||||
- `type` and `type_description` must use the predefined Chinese labels and Chinese definitions above.
|
||||
{% endif %}
|
||||
@@ -233,11 +489,11 @@ Do not let auxiliary fields drive the extraction process.
|
||||
**Semantic Memory (`is_explicit_memory`):**
|
||||
{% if language == "zh" %}
|
||||
|
||||
- 只有当实体明显属于语义知识记忆中的抽象概念时,才设为 `true`,例如概念、定义、理论、方法和知识主题。
|
||||
- 只有当实体明显属于语义知识记忆中的抽象知识对象时,才设为 `true`,例如概念、定义、理论、方法以及 `知识能力` 中的知识类对象。
|
||||
- 对人、组织、地点、具体物体以及大多数实例级实体,一律设为 `false`。
|
||||
- 除非非常明确,否则默认设为 `false`。
|
||||
{% else %}
|
||||
- Use `true` only for abstract conceptual entities that belong in semantic knowledge memory, such as concepts, definitions, theories, methods, and knowledge topics.
|
||||
- Use `true` only for abstract knowledge-oriented entities that belong in semantic knowledge memory, such as concepts, definitions, theories, methods, and knowledge-oriented members of `知识能力`.
|
||||
- Use `false` for people, organizations, locations, concrete objects, and most instance-level entities.
|
||||
- Default to `false` unless the entity is clearly an abstract knowledge concept.
|
||||
{% endif %}
|
||||
@@ -269,8 +525,14 @@ Do not let auxiliary fields drive the extraction process.
|
||||
- 如果没有任何预定义关系适用,返回 `triplets: []`。
|
||||
- 排除语气词、模糊情绪、孤立名词和缺乏明确关系结构的片段。
|
||||
- 如果陈述不支持有效关系,不要强行构造 triplet。
|
||||
- 不要为了保留一句抽象判断或泛因果命题,而强行构造“用户-拥有-努力”“努力-导致-回报”这类低价值 triplet。
|
||||
- `提到` 不用于保留泛化概念、抽象命题片段、口号式表达或仅在句面上出现但无记忆价值的对象。
|
||||
- `相关于` 不用于补救无法成立的关系,也不用于连接“努力”“回报”“成功”“意义”这类抽象概念。
|
||||
- `想要` 只用于具体、明确、用户特异且值得保留的对象或目标;如果想要的内容过于抽象或空泛,不要抽取 `想要`,应改写进相关实体的 `description`。
|
||||
- 不要为了保留情绪或心理状态而创建实体或弱关系;像“紧张”“开心”“难过”“焦虑”默认应写入相关实体的 `description`。
|
||||
- 对于这类观点句,如果相关概念本身不值得保留,也不要只为了补全结构而额外创建对应实体;允许输出仅包含 `用户` 的 `entities` 和空的 `triplets`。
|
||||
- 如果 `has_unsolved_reference` 是 `true`,不要抽取实体或 triplets。
|
||||
- `subject_name` 和 `object_name` 默认保持原文中的表面形式,不要翻译;但用户自指要统一写成 `用户`。
|
||||
- `subject_name` 和 `object_name` 默认保持原文中的表面形式,但用户自指必须写成 `用户`,可稳定解析的其他代词必须替换为具体指代实体名。
|
||||
- `predicate_description` 必须直接复用对应 `predicate` 的中文定义。
|
||||
- 不要把普通时间表达作为 triplet 的宾语。
|
||||
- 不要为了表达一次性计划、安排、日程而强行构造关系。
|
||||
@@ -282,8 +544,14 @@ Do not let auxiliary fields drive the extraction process.
|
||||
- If no predefined relation fits, return `triplets: []`.
|
||||
- Exclude fillers, vague emotions, standalone nouns, and fragments without a clear relational structure.
|
||||
- If the statement does not support a valid relation, do not force a triplet.
|
||||
- Do not force low-value triplets such as "user-has-effort" or "effort-causes-reward" just to preserve a generic causal belief or slogan-like proposition.
|
||||
- Do not use `提到` to preserve generic concepts, proposition fragments, slogan-like expressions, or surface mentions that have no memory value.
|
||||
- Do not use `相关于` as a rescue relation when no real relation exists, and do not connect abstract concepts such as "effort", "reward", "success", or "meaning" with it.
|
||||
- Use `想要` only for concrete, specific, user-grounded objects or goals worth retaining; if the desired content is too abstract or generic, do not extract `想要` and instead rewrite it into the relevant entity `description`.
|
||||
- Do not create entities or weak relations just to preserve emotional or psychological states; states such as nervousness, happiness, sadness, or anxiety should normally be written into the relevant retained entity `description`.
|
||||
- For such opinion statements, if the referenced concepts are not worth keeping, do not create extra entities just to complete a structure; it is valid to return only the `用户` entity with empty `triplets`.
|
||||
- If `has_unsolved_reference` is `true`, do not extract entities or triplets.
|
||||
- Keep `subject_name` and `object_name` in their original surface form; exception: normalize user self-reference to `用户`.
|
||||
- Keep `subject_name` and `object_name` in their original surface form by default, but write user self-reference as `用户` and replace other stably resolvable references with their resolved entity names.
|
||||
- `predicate_description` must directly reuse the corresponding Chinese definition of `predicate`.
|
||||
- Do not use ordinary time expressions as triplet objects.
|
||||
- Do not force relations just to encode one-off plans, schedules, or actions.
|
||||
@@ -320,6 +588,7 @@ Do not let auxiliary fields drive the extraction process.
|
||||
1. `alias -> 别名属于 -> canonical entity`
|
||||
2. `caller -> 使用称呼 -> alias`
|
||||
- 如果施称方在句中明确出现且对语义重要,不要省略它。
|
||||
- 在命名关系中,新出现的称呼、别名、昵称、产品名必须保持原样,不要被替换成其所指实体名。
|
||||
{% else %}
|
||||
- Distinguish between a naming fact and a naming act when the statement expresses both.
|
||||
- If the statement says that some entity or group calls or addresses another entity by a name, and the caller is explicitly mentioned in `statement_text`, extract the caller as an entity.
|
||||
@@ -328,6 +597,7 @@ Do not let auxiliary fields drive the extraction process.
|
||||
1. `alias -> 别名属于 -> canonical entity`
|
||||
2. `caller -> 使用称呼 -> alias`
|
||||
- Do not drop the caller entity if it is explicitly stated and semantically important to the naming relation.
|
||||
- In naming relations, newly introduced names, aliases, nicknames, or product names must stay in their original form rather than being replaced by their referent.
|
||||
{% endif %}
|
||||
|
||||
**subject_name / object_name Consistency:**
|
||||
@@ -352,29 +622,28 @@ Output:
|
||||
{"subject_name": "用户", "subject_id": 0, "predicate": "居住于", "predicate_description": "人物居住在某地点", "object_name": "巴黎", "object_id": 1}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "现实中的具体个人", "description": "居住在巴黎的说话者", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "巴黎", "type": "地点", "type_description": "具有地理或空间意义的位置", "description": "用户居住的城市", "is_explicit_memory": false}
|
||||
{"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "居住在巴黎的说话者", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "巴黎", "type": "地点设施", "type_description": "具有地理意义或功能性空间意义的位置与场所", "description": "用户居住的城市", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**示例 2**
|
||||
Statement: "张明在腾讯工作,负责 AI 产品开发。"
|
||||
Statement: "他在腾讯工作。"
|
||||
Input condition: supporting context has already made it clear that “他” refers to “张明”.
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [
|
||||
{"subject_name": "张明", "subject_id": 0, "predicate": "任职于", "predicate_description": "主体在某组织中工作或任职", "object_name": "腾讯", "object_id": 1},
|
||||
{"subject_name": "张明", "subject_id": 0, "predicate": "负责", "predicate_description": "主体负责某项工作、职责或领域", "object_name": "AI 产品开发", "object_id": 2}
|
||||
{"subject_name": "张明", "subject_id": 0, "predicate": "任职于", "predicate_description": "主体在某组织中工作或任职", "object_name": "腾讯", "object_id": 1}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "张明", "type": "人物", "type_description": "现实中的具体个人", "description": "在腾讯负责 AI 产品开发的人员", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "腾讯", "type": "组织", "type_description": "公司、机构、团队、社群等组织性主体", "description": "张明任职的公司", "is_explicit_memory": false},
|
||||
{"entity_idx": 2, "name": "AI 产品开发", "type": "知识主题", "type_description": "主题、领域、方法、理论或知识概念", "description": "张明负责的工作方向", "is_explicit_memory": true}
|
||||
{"entity_idx": 0, "name": "张明", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "在腾讯工作的人员", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "腾讯", "type": "组织", "type_description": "公司、机构、学校、实验室、团队、社群等组织性主体。", "description": "张明任职的公司", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**示例 3**
|
||||
Statement: "我明天下午三点去图书馆复习微积分。"
|
||||
Statement: "我常去图书馆学微积分。"
|
||||
|
||||
Output:
|
||||
{
|
||||
@@ -383,9 +652,9 @@ Output:
|
||||
{"subject_name": "用户", "subject_id": 0, "predicate": "学习", "predicate_description": "主体正在学习某知识主题或技能", "object_name": "微积分", "object_id": 2}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "现实中的具体个人", "description": "提到自己安排的说话者", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "图书馆", "type": "设施", "type_description": "建筑、场馆、房间、实验室等功能性空间", "description": "用户提到要去的地点", "is_explicit_memory": false},
|
||||
{"entity_idx": 2, "name": "微积分", "type": "知识主题", "type_description": "主题、领域、方法、理论或知识概念", "description": "用户提到的学习主题", "is_explicit_memory": true}
|
||||
{"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "经常去图书馆学习微积分的说话者", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "图书馆", "type": "地点设施", "type_description": "具有地理意义或功能性空间意义的位置与场所。", "description": "用户经常前往学习的地点", "is_explicit_memory": false},
|
||||
{"entity_idx": 2, "name": "微积分", "type": "知识能力", "type_description": "可学习、掌握、使用或讨论的知识主题、技能、学科或语言。", "description": "用户经常学习的主题", "is_explicit_memory": true}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -409,9 +678,86 @@ Output:
|
||||
{"subject_name": "我的朋友", "subject_id": 1, "predicate": "使用称呼", "predicate_description": "主体使用某个名字来称呼另一实体", "object_name": "山哥", "object_id": 2}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "现实中的具体个人", "description": "被朋友称作山哥的说话者", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "我的朋友", "type": "群体", "type_description": "未具名或泛指的一组人", "description": "使用山哥这一称呼的人群", "is_explicit_memory": false},
|
||||
{"entity_idx": 2, "name": "山哥", "type": "称呼", "type_description": "用于指代或称呼实体的名字", "description": "朋友用来称呼用户的昵称", "is_explicit_memory": false}
|
||||
{"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "被朋友称作山哥的说话者", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "我的朋友", "type": "群体", "type_description": "边界相对稳定、可被当作整体引用的一组人。", "description": "使用山哥这一称呼的人群", "is_explicit_memory": false},
|
||||
{"entity_idx": 2, "name": "山哥", "type": "称呼别名", "type_description": "用于指代或称呼实体的名字。", "description": "朋友用来称呼用户的昵称", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**示例 6**
|
||||
Statement: "我认为努力就会有回报。"
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "认为努力就会有回报的说话者", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**示例 7**
|
||||
Statement: "我想要成功。"
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "想要成功的说话者", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**示例 8**
|
||||
Statement: "我最近有点紧张,不过这很正常。"
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "最近有些紧张并认为这很正常的说话者", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**示例 9**
|
||||
Statement: "王教授是导师。"
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [
|
||||
{"subject_name": "王教授", "subject_id": 0, "predicate": "担任角色", "predicate_description": "主体承担某个角色", "object_name": "导师", "object_id": 1}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "王教授", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "承担导师角色的具体个人", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "导师", "type": "角色职业", "type_description": "人物承担的社会角色、功能身份或职业身份。", "description": "王教授承担的角色身份", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**示例 10**
|
||||
Statement: "我的GitHub账号用户名是chen4。"
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [
|
||||
{"subject_name": "用户", "subject_id": 0, "predicate": "拥有账号", "predicate_description": "实体具有某账号", "object_name": "GitHub账号", "object_id": 1},
|
||||
{"subject_name": "GitHub账号", "subject_id": 1, "predicate": "标识为", "predicate_description": "实体由某标识符标识", "object_name": "chen4", "object_id": 2}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "拥有该 GitHub 账号的说话者", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "GitHub账号", "type": "账号", "type_description": "账户、账号、用户档案类实体。", "description": "用户拥有的 GitHub 账号", "is_explicit_memory": false},
|
||||
{"entity_idx": 2, "name": "chen4", "type": "标识符", "type_description": "用于识别实体的编号、ID、用户名、学号、工号等标识。", "description": "该 GitHub 账号对应的用户名标识", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**示例 11**
|
||||
Statement: "机器人查票员和我沟通。"
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [
|
||||
{"subject_name": "机器人查票员", "subject_id": 0, "predicate": "沟通于", "predicate_description": "两个实体之间发生沟通或交流", "object_name": "用户", "object_id": 1}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "机器人查票员", "type": "智能体", "type_description": "具有行动、交互或执行能力的非人主体,如机器人、AI 或其他智慧体。", "description": "与用户发生沟通的机器人主体", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "与机器人查票员沟通的说话者", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
===End of Examples===
|
||||
@@ -424,10 +770,11 @@ JSON 要求:
|
||||
- 字符串内部引号必须转义为 `\"`
|
||||
- 不要使用中文引号
|
||||
- 字符串值中不要换行
|
||||
- `name`、`subject_name`、`object_name` 默认保持原文中的表面形式,不要翻译;但用户自指必须规范成 `用户`
|
||||
- `name`、`subject_name`、`object_name` 默认保持原文中的表面形式,但用户自指必须规范成 `用户`,可稳定解析的其他代词必须替换为具体指代实体名
|
||||
- `description` 必须使用中文
|
||||
- `type`、`predicate`、`type_description`、`predicate_description` 必须使用上方预定义的中文标签和中文说明
|
||||
- 如果 `has_unsolved_reference` 是 `true`,输出必须是 `{"entities": [], "triplets": []}`
|
||||
- 如果存在无法稳定解析的代词或指示表达,输出也必须是 `{"entities": [], "triplets": []}`
|
||||
- 如果没有有效 triplet,返回 `"triplets": []`
|
||||
{% else %}
|
||||
JSON Requirements:
|
||||
@@ -435,10 +782,11 @@ JSON 要求:
|
||||
- Escape internal quotes using `\"`
|
||||
- No Chinese quotation marks
|
||||
- No line breaks inside string values
|
||||
- `name`, `subject_name`, and `object_name` must keep the original surface form from the source text, except user self-reference which must be normalized to `用户`
|
||||
- `name`, `subject_name`, and `object_name` keep their original surface forms by default, but user self-reference must be normalized to `用户` and other stably resolvable references must be replaced by their resolved entity names
|
||||
- `description` must be in English
|
||||
- `type`, `predicate`, `type_description`, and `predicate_description` must use the predefined Chinese labels and Chinese definitions above
|
||||
- If `has_unsolved_reference` is `true`, the output must be `{"entities": [], "triplets": []}`
|
||||
- If unresolved references still remain, the output must also be `{"entities": [], "triplets": []}`
|
||||
- If no valid triplet exists, return `"triplets": []`
|
||||
{% endif %}
|
||||
|
||||
|
||||
@@ -46,6 +46,12 @@ async def create_fulltext_indexes():
|
||||
OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } }
|
||||
""")
|
||||
|
||||
# 创建 AssistantPruned 剪枝文本全文索引
|
||||
await connector.execute_query("""
|
||||
CREATE FULLTEXT INDEX assistantPrunedFulltext IF NOT EXISTS FOR (p:AssistantPruned) ON EACH [p.text]
|
||||
OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } }
|
||||
""")
|
||||
|
||||
finally:
|
||||
await connector.close()
|
||||
|
||||
@@ -135,6 +141,17 @@ async def create_vector_indexes():
|
||||
`vector.similarity_function`: 'cosine'
|
||||
}}
|
||||
""")
|
||||
|
||||
# AssistantPruned text embedding index (optional, for semantic search on pruned hints)
|
||||
await connector.execute_query("""
|
||||
CREATE VECTOR INDEX assistant_pruned_embedding_index IF NOT EXISTS
|
||||
FOR (p:AssistantPruned)
|
||||
ON p.text_embedding
|
||||
OPTIONS {indexConfig: {
|
||||
`vector.dimensions`: 1024,
|
||||
`vector.similarity_function`: 'cosine'
|
||||
}}
|
||||
""")
|
||||
finally:
|
||||
await connector.close()
|
||||
|
||||
@@ -179,6 +196,22 @@ async def create_unique_constraints():
|
||||
"""
|
||||
)
|
||||
|
||||
# AssistantOriginal.id unique
|
||||
await connector.execute_query(
|
||||
"""
|
||||
CREATE CONSTRAINT assistant_original_id_unique IF NOT EXISTS
|
||||
FOR (o:AssistantOriginal) REQUIRE o.id IS UNIQUE
|
||||
"""
|
||||
)
|
||||
|
||||
# AssistantPruned.id unique
|
||||
await connector.execute_query(
|
||||
"""
|
||||
CREATE CONSTRAINT assistant_pruned_id_unique IF NOT EXISTS
|
||||
FOR (p:AssistantPruned) REQUIRE p.id IS UNIQUE
|
||||
"""
|
||||
)
|
||||
|
||||
finally:
|
||||
await connector.close()
|
||||
|
||||
|
||||
@@ -1363,154 +1363,60 @@ ORDER BY score DESC
|
||||
LIMIT $limit
|
||||
"""
|
||||
|
||||
SEARCH_STATEMENTS_BY_KEYWORD = """
|
||||
CALL db.index.fulltext.queryNodes("statementsFulltext", $query) YIELD node AS s, score
|
||||
WHERE ($end_user_id IS NULL OR s.end_user_id = $end_user_id)
|
||||
OPTIONAL MATCH (c:Chunk)-[:CONTAINS]->(s)
|
||||
OPTIONAL MATCH (s)-[:REFERENCES_ENTITY]->(e:ExtractedEntity)
|
||||
RETURN s.id AS id,
|
||||
s.statement AS statement,
|
||||
s.end_user_id AS end_user_id,
|
||||
s.chunk_id AS chunk_id,
|
||||
s.created_at AS created_at,
|
||||
s.expired_at AS expired_at,
|
||||
s.valid_at AS valid_at,
|
||||
properties(s)['invalid_at'] AS invalid_at,
|
||||
c.id AS chunk_id_from_rel,
|
||||
collect(DISTINCT e.id) AS entity_ids,
|
||||
COALESCE(s.activation_value, s.importance_score, 0.5) AS activation_value,
|
||||
COALESCE(s.importance_score, 0.5) AS importance_score,
|
||||
s.last_access_time AS last_access_time,
|
||||
COALESCE(s.access_count, 0) AS access_count,
|
||||
score
|
||||
ORDER BY score DESC
|
||||
LIMIT $limit
|
||||
"""
|
||||
|
||||
SEARCH_ENTITIES_BY_NAME_OR_ALIAS = """
|
||||
CALL db.index.fulltext.queryNodes("entitiesFulltext", $query) YIELD node AS e, score
|
||||
WHERE ($end_user_id IS NULL OR e.end_user_id = $end_user_id)
|
||||
WITH e, score
|
||||
With collect({entity: e, score: score}) AS fulltextResults
|
||||
# ── Assistant Pruning Nodes & Edges ──
|
||||
|
||||
OPTIONAL MATCH (ae:ExtractedEntity)
|
||||
WHERE ($end_user_id IS NULL OR ae.end_user_id = $end_user_id)
|
||||
AND ae.aliases IS NOT NULL
|
||||
AND ANY(alias IN ae.aliases WHERE toLower(alias) CONTAINS toLower($query))
|
||||
WITH fulltextResults, collect(ae) AS aliasEntities
|
||||
|
||||
UNWIND (fulltextResults + [x IN aliasEntities | {entity: x, score:
|
||||
CASE
|
||||
WHEN ANY(alias IN x.aliases WHERE toLower(alias) = toLower($query)) THEN 1.0
|
||||
WHEN ANY(alias IN x.aliases WHERE toLower(alias) STARTS WITH toLower($query)) THEN 0.9
|
||||
ELSE 0.8
|
||||
END
|
||||
}]) AS row
|
||||
WITH row.entity AS e, row.score AS score
|
||||
WITH DISTINCT e, MAX(score) AS score
|
||||
OPTIONAL MATCH (s:Statement)-[:REFERENCES_ENTITY]->(e)
|
||||
OPTIONAL MATCH (c:Chunk)-[:CONTAINS]->(s)
|
||||
RETURN e.id AS id,
|
||||
e.name AS name,
|
||||
e.end_user_id AS end_user_id,
|
||||
e.entity_type AS entity_type,
|
||||
e.created_at AS created_at,
|
||||
e.expired_at AS expired_at,
|
||||
e.entity_idx AS entity_idx,
|
||||
e.statement_id AS statement_id,
|
||||
e.description AS description,
|
||||
e.aliases AS aliases,
|
||||
e.name_embedding AS name_embedding,
|
||||
e.connect_strength AS connect_strength,
|
||||
collect(DISTINCT s.id) AS statement_ids,
|
||||
collect(DISTINCT c.id) AS chunk_ids,
|
||||
COALESCE(e.activation_value, e.importance_score, 0.5) AS activation_value,
|
||||
COALESCE(e.importance_score, 0.5) AS importance_score,
|
||||
e.last_access_time AS last_access_time,
|
||||
COALESCE(e.access_count, 0) AS access_count,
|
||||
score
|
||||
ORDER BY score DESC
|
||||
LIMIT $limit
|
||||
"""
|
||||
|
||||
SEARCH_CHUNKS_BY_CONTENT = """
|
||||
CALL db.index.fulltext.queryNodes("chunksFulltext", $query) YIELD node AS c, score
|
||||
WHERE ($end_user_id IS NULL OR c.end_user_id = $end_user_id)
|
||||
OPTIONAL MATCH (c)-[:CONTAINS]->(s:Statement)
|
||||
OPTIONAL MATCH (s)-[:REFERENCES_ENTITY]->(e:ExtractedEntity)
|
||||
RETURN c.id AS id,
|
||||
c.end_user_id AS end_user_id,
|
||||
c.content AS content,
|
||||
c.dialog_id AS dialog_id,
|
||||
c.sequence_number AS sequence_number,
|
||||
collect(DISTINCT s.id) AS statement_ids,
|
||||
collect(DISTINCT e.id) AS entity_ids,
|
||||
COALESCE(c.activation_value, 0.5) AS activation_value,
|
||||
c.last_access_time AS last_access_time,
|
||||
COALESCE(c.access_count, 0) AS access_count,
|
||||
score
|
||||
ORDER BY score DESC
|
||||
LIMIT $limit
|
||||
"""
|
||||
|
||||
# MemorySummary keyword search using fulltext index
|
||||
SEARCH_MEMORY_SUMMARIES_BY_KEYWORD = """
|
||||
CALL db.index.fulltext.queryNodes("summariesFulltext", $query) YIELD node AS m, score
|
||||
WHERE ($end_user_id IS NULL OR m.end_user_id = $end_user_id)
|
||||
OPTIONAL MATCH (m)-[:DERIVED_FROM_STATEMENT]->(s:Statement)
|
||||
RETURN m.id AS id,
|
||||
m.name AS name,
|
||||
m.end_user_id AS end_user_id,
|
||||
m.dialog_id AS dialog_id,
|
||||
m.chunk_ids AS chunk_ids,
|
||||
m.content AS content,
|
||||
m.created_at AS created_at,
|
||||
COALESCE(m.activation_value, m.importance_score, 0.5) AS activation_value,
|
||||
COALESCE(m.importance_score, 0.5) AS importance_score,
|
||||
m.last_access_time AS last_access_time,
|
||||
COALESCE(m.access_count, 0) AS access_count,
|
||||
score
|
||||
ORDER BY score DESC
|
||||
LIMIT $limit
|
||||
"""
|
||||
|
||||
# Community keyword search: matches name or summary via fulltext index
|
||||
SEARCH_COMMUNITIES_BY_KEYWORD = """
|
||||
CALL db.index.fulltext.queryNodes("communitiesFulltext", $query) YIELD node AS c, score
|
||||
WHERE ($end_user_id IS NULL OR c.end_user_id = $end_user_id)
|
||||
RETURN c.community_id AS id,
|
||||
c.name AS name,
|
||||
c.summary AS content,
|
||||
c.core_entities AS core_entities,
|
||||
c.member_count AS member_count,
|
||||
c.end_user_id AS end_user_id,
|
||||
c.updated_at AS updated_at,
|
||||
score
|
||||
ORDER BY score DESC
|
||||
LIMIT $limit
|
||||
"""
|
||||
|
||||
FULLTEXT_QUERY_CYPHER_MAPPING = {
|
||||
Neo4jNodeType.STATEMENT: SEARCH_STATEMENTS_BY_KEYWORD,
|
||||
Neo4jNodeType.EXTRACTEDENTITY: SEARCH_ENTITIES_BY_NAME_OR_ALIAS,
|
||||
Neo4jNodeType.CHUNK: SEARCH_CHUNKS_BY_CONTENT,
|
||||
Neo4jNodeType.MEMORYSUMMARY: SEARCH_MEMORY_SUMMARIES_BY_KEYWORD,
|
||||
Neo4jNodeType.COMMUNITY: SEARCH_COMMUNITIES_BY_KEYWORD,
|
||||
Neo4jNodeType.PERCEPTUAL: SEARCH_PERCEPTUALS_BY_KEYWORD
|
||||
ASSISTANT_ORIGINAL_NODE_SAVE = """
|
||||
UNWIND $originals AS orig
|
||||
MERGE (o:AssistantOriginal {id: orig.id})
|
||||
SET o += {
|
||||
end_user_id: orig.end_user_id,
|
||||
run_id: orig.run_id,
|
||||
dialog_id: orig.dialog_id,
|
||||
pair_id: orig.pair_id,
|
||||
text: orig.text,
|
||||
created_at: orig.created_at,
|
||||
expired_at: orig.expired_at
|
||||
}
|
||||
USER_ID_QUERY_CYPHER_MAPPING = {
|
||||
Neo4jNodeType.STATEMENT: SEARCH_STATEMENTS_BY_USER_ID,
|
||||
Neo4jNodeType.EXTRACTEDENTITY: SEARCH_ENTITIES_BY_USER_ID,
|
||||
Neo4jNodeType.CHUNK: SEARCH_CHUNKS_BY_USER_ID,
|
||||
Neo4jNodeType.MEMORYSUMMARY: SEARCH_MEMORY_SUMMARIES_BY_USER_ID,
|
||||
Neo4jNodeType.COMMUNITY: SEARCH_COMMUNITIES_BY_USER_ID,
|
||||
Neo4jNodeType.PERCEPTUAL: SEARCH_PERCEPTUAL_BY_USER_ID
|
||||
}
|
||||
NODE_ID_QUERY_CYPHER_MAPPING = {
|
||||
Neo4jNodeType.STATEMENT: SEARCH_STATEMENTS_BY_IDS,
|
||||
Neo4jNodeType.EXTRACTEDENTITY: SEARCH_ENTITIES_BY_IDS,
|
||||
Neo4jNodeType.CHUNK: SEARCH_CHUNKS_BY_IDS,
|
||||
Neo4jNodeType.MEMORYSUMMARY: SEARCH_MEMORY_SUMMARIES_BY_IDS,
|
||||
Neo4jNodeType.COMMUNITY: SEARCH_COMMUNITIES_BY_IDS,
|
||||
Neo4jNodeType.PERCEPTUAL: SEARCH_PERCEPTUAL_BY_IDS
|
||||
RETURN o.id AS uuid
|
||||
"""
|
||||
|
||||
ASSISTANT_PRUNED_NODE_SAVE = """
|
||||
UNWIND $pruneds AS p
|
||||
MERGE (pr:AssistantPruned {id: p.id})
|
||||
SET pr += {
|
||||
end_user_id: p.end_user_id,
|
||||
run_id: p.run_id,
|
||||
dialog_id: p.dialog_id,
|
||||
pair_id: p.pair_id,
|
||||
text: p.text,
|
||||
memory_type: p.memory_type,
|
||||
text_embedding: p.text_embedding,
|
||||
created_at: p.created_at,
|
||||
expired_at: p.expired_at
|
||||
}
|
||||
RETURN pr.id AS uuid
|
||||
"""
|
||||
|
||||
ASSISTANT_PRUNED_EDGE_SAVE = """
|
||||
UNWIND $edges AS edge
|
||||
MATCH (o:AssistantOriginal {id: edge.source})
|
||||
MATCH (p:AssistantPruned {id: edge.target})
|
||||
MERGE (o)-[r:PRUNED_TO]->(p)
|
||||
SET r.pair_id = edge.pair_id,
|
||||
r.end_user_id = edge.end_user_id,
|
||||
r.run_id = edge.run_id,
|
||||
r.created_at = edge.created_at
|
||||
RETURN elementId(r) AS uuid
|
||||
"""
|
||||
|
||||
ASSISTANT_DIALOG_EDGE_SAVE = """
|
||||
UNWIND $edges AS edge
|
||||
MATCH (o:AssistantOriginal {id: edge.source})
|
||||
MATCH (d:Dialogue {id: edge.target})
|
||||
MERGE (o)-[r:BELONGS_TO_DIALOG]->(d)
|
||||
SET r.end_user_id = edge.end_user_id,
|
||||
r.run_id = edge.run_id,
|
||||
r.created_at = edge.created_at
|
||||
RETURN elementId(r) AS uuid
|
||||
"""
|
||||
|
||||
@@ -24,6 +24,10 @@ from app.core.memory.models.graph_models import (
|
||||
EntityEntityEdge,
|
||||
PerceptualNode,
|
||||
PerceptualEdge,
|
||||
AssistantOriginalNode,
|
||||
AssistantPrunedNode,
|
||||
AssistantPrunedEdge,
|
||||
AssistantDialogEdge,
|
||||
)
|
||||
import logging
|
||||
|
||||
@@ -166,6 +170,10 @@ async def save_dialog_and_statements_to_neo4j(
|
||||
statement_entity_edges: List[StatementEntityEdge],
|
||||
perceptual_edges: List[PerceptualEdge],
|
||||
connector: Neo4jConnector,
|
||||
assistant_original_nodes: Optional[List[AssistantOriginalNode]] = None,
|
||||
assistant_pruned_nodes: Optional[List[AssistantPrunedNode]] = None,
|
||||
assistant_pruned_edges: Optional[List[AssistantPrunedEdge]] = None,
|
||||
assistant_dialog_edges: Optional[List[AssistantDialogEdge]] = None,
|
||||
) -> bool:
|
||||
"""Save dialogue nodes, chunk nodes, statement nodes, entities, and all relationships to Neo4j using graph models.
|
||||
|
||||
@@ -368,6 +376,55 @@ async def save_dialog_and_statements_to_neo4j(
|
||||
results['perceptual_chunk_edges'] = perceptual_edges_uuids
|
||||
logger.info(f"Successfully saved {len(perceptual_edges_uuids)} perceptual-chunk edges to Neo4j")
|
||||
|
||||
# 8. Save assistant original nodes
|
||||
if assistant_original_nodes:
|
||||
from app.repositories.neo4j.cypher_queries import ASSISTANT_ORIGINAL_NODE_SAVE
|
||||
original_data = [node.model_dump() for node in assistant_original_nodes]
|
||||
result = await tx.run(ASSISTANT_ORIGINAL_NODE_SAVE, originals=original_data)
|
||||
original_uuids = [record["uuid"] async for record in result]
|
||||
results['assistant_originals'] = original_uuids
|
||||
logger.info(f"Successfully saved {len(original_uuids)} assistant original nodes to Neo4j")
|
||||
|
||||
# 9. Save assistant pruned nodes
|
||||
if assistant_pruned_nodes:
|
||||
from app.repositories.neo4j.cypher_queries import ASSISTANT_PRUNED_NODE_SAVE
|
||||
pruned_data = [node.model_dump() for node in assistant_pruned_nodes]
|
||||
result = await tx.run(ASSISTANT_PRUNED_NODE_SAVE, pruneds=pruned_data)
|
||||
pruned_uuids = [record["uuid"] async for record in result]
|
||||
results['assistant_pruneds'] = pruned_uuids
|
||||
logger.info(f"Successfully saved {len(pruned_uuids)} assistant pruned nodes to Neo4j")
|
||||
|
||||
# 10. Save PRUNED_TO edges (Original → Pruned)
|
||||
if assistant_pruned_edges:
|
||||
from app.repositories.neo4j.cypher_queries import ASSISTANT_PRUNED_EDGE_SAVE
|
||||
edge_data = [{
|
||||
"source": edge.source,
|
||||
"target": edge.target,
|
||||
"pair_id": edge.pair_id,
|
||||
"end_user_id": edge.end_user_id,
|
||||
"run_id": edge.run_id,
|
||||
"created_at": edge.created_at.isoformat() if edge.created_at else None,
|
||||
} for edge in assistant_pruned_edges]
|
||||
result = await tx.run(ASSISTANT_PRUNED_EDGE_SAVE, edges=edge_data)
|
||||
pruned_edge_uuids = [record["uuid"] async for record in result]
|
||||
results['assistant_pruned_edges'] = pruned_edge_uuids
|
||||
logger.info(f"Successfully saved {len(pruned_edge_uuids)} PRUNED_TO edges to Neo4j")
|
||||
|
||||
# 11. Save BELONGS_TO_DIALOG edges (Original → Dialogue)
|
||||
if assistant_dialog_edges:
|
||||
from app.repositories.neo4j.cypher_queries import ASSISTANT_DIALOG_EDGE_SAVE
|
||||
edge_data = [{
|
||||
"source": edge.source,
|
||||
"target": edge.target,
|
||||
"end_user_id": edge.end_user_id,
|
||||
"run_id": edge.run_id,
|
||||
"created_at": edge.created_at.isoformat() if edge.created_at else None,
|
||||
} for edge in assistant_dialog_edges]
|
||||
result = await tx.run(ASSISTANT_DIALOG_EDGE_SAVE, edges=edge_data)
|
||||
dialog_edge_uuids = [record["uuid"] async for record in result]
|
||||
results['assistant_dialog_edges'] = dialog_edge_uuids
|
||||
logger.info(f"Successfully saved {len(dialog_edge_uuids)} BELONGS_TO_DIALOG edges to Neo4j")
|
||||
|
||||
return results
|
||||
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user