[changes] dashscope applies patches and modifies prompts
This commit is contained in:
@@ -2,7 +2,8 @@
|
||||
Celery Worker 入口点
|
||||
用于启动 Celery Worker: celery -A app.celery_worker worker --loglevel=info
|
||||
"""
|
||||
from celery.signals import worker_process_init
|
||||
# 必须在导入任何使用 DashScope SDK 的模块之前应用补丁
|
||||
import app.plugins.dashscope_patch # noqa: F401
|
||||
|
||||
from app.celery_app import celery_app
|
||||
from app.core.logging_config import LoggingConfig, get_logger
|
||||
|
||||
@@ -145,6 +145,7 @@ async def term_memory_save(end_user_id, strategy_type, scope):
|
||||
|
||||
async def window_dialogue(end_user_id, langchain_messages, memory_config, scope):
|
||||
"""
|
||||
TODO 考虑作为滑动窗口写入的函数
|
||||
Process dialogue based on window size and write to Neo4j
|
||||
|
||||
Manages conversation data based on a sliding window approach. When the window
|
||||
|
||||
@@ -151,3 +151,14 @@ class ExtractionPipelineConfig(BaseModel):
|
||||
forgetting_engine: ForgettingEngineConfig = Field(default_factory=ForgettingEngineConfig)
|
||||
# 情绪引擎(旁路模块,SidecarStepFactory 通过此字段判断是否启用)
|
||||
emotion_enabled: bool = Field(default=False, description="是否启用情绪提取旁路")
|
||||
|
||||
# TODO 设置控制并发数量以适配LLM的QPM限流
|
||||
# # 流水线 LLM 并发上限(statement + triplet 共享),防止 QPM 爆掉
|
||||
# # 可通过环境变量 MAX_CONCURRENT_LLM_CALLS 覆盖
|
||||
# max_concurrent_llm_calls: int = Field(
|
||||
# default_factory=lambda: int(
|
||||
# __import__("os").environ.get("MAX_CONCURRENT_LLM_CALLS", "5")
|
||||
# ),
|
||||
# ge=1, le=64,
|
||||
# description="Maximum concurrent LLM calls in the extraction pipeline",
|
||||
# )
|
||||
|
||||
@@ -98,7 +98,7 @@ class SemanticPruner:
|
||||
self._snapshot = snapshot # PipelineSnapshot 实例,用于输出剪枝快照
|
||||
|
||||
# 加载 Jinja2 模板
|
||||
self.template = prompt_env.get_template("extracat_pruning.jinja2")
|
||||
self.template = prompt_env.get_template("extract_pruning.jinja2")
|
||||
|
||||
# LRU 缓存:避免对相同消息对重复调用 LLM
|
||||
self._cache: OrderedDict[str, AssistantPruningResponse] = OrderedDict()
|
||||
@@ -360,7 +360,7 @@ class SemanticPruner:
|
||||
) -> AssistantPruningResponse:
|
||||
"""调用 LLM 从 User-Assistant 消息对中提取 Assistant 记忆摘要。
|
||||
|
||||
使用 extracat_pruning.jinja2 模板,输入格式:
|
||||
使用 extract_pruning.jinja2 模板,输入格式:
|
||||
{"msgs": [{"role": "User", "msg": "..."}, {"role": "Assistant", "msg": "..."}]}
|
||||
"""
|
||||
# 构建模板输入
|
||||
@@ -387,7 +387,7 @@ class SemanticPruner:
|
||||
|
||||
# 渲染模板
|
||||
rendered = self.template.render(dialog_text=dialog_text)
|
||||
log_template_rendering("extracat_pruning.jinja2", {
|
||||
log_template_rendering("extract_pruning.jinja2", {
|
||||
"language": self.language,
|
||||
})
|
||||
log_prompt_rendering("pruning-assistant-hint", rendered)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
场景特定配置 - 统一填充词库
|
||||
|
||||
重要性判断已完全交由 extracat_pruning.jinja2 提示词 + LLM preserve_tokens 机制承担。
|
||||
重要性判断已完全交由 extract_pruning.jinja2 提示词 + LLM preserve_tokens 机制承担。
|
||||
本模块仅保留统一填充词库(filler_phrases),用于识别无意义寒暄/表情/口头禅。
|
||||
所有场景共用同一份词库,场景差异由 LLM 语义判断处理。
|
||||
"""
|
||||
|
||||
@@ -1,180 +0,0 @@
|
||||
你是一个面向记忆存储的 Assistant 辅助信息压缩器。
|
||||
|
||||
任务:
|
||||
|
||||
- 输入是一个 JSON,对话放在 `msgs` 数组里。
|
||||
- 你只处理 `Assistant.msg`。
|
||||
- `User.msg` 只用于理解上下文,不允许出现在输出里,也不允许被复述成用户摘要。
|
||||
- 你的输出必须包含两个字段:
|
||||
1. `assistant_memory_hint`
|
||||
2. `assistant_memory_type`
|
||||
|
||||
目标:
|
||||
|
||||
- 把较长的 `Assistant.msg` 压缩成一条更短、便于检索的辅助摘要。
|
||||
- 保留建议、推荐、提醒、说明、提问、附和、重复等核心动作。
|
||||
- 删除冗长解释、寒暄、礼貌套话和低价值铺垫,但不要漏掉真正有用的信息。
|
||||
|
||||
硬约束:
|
||||
|
||||
- 不得输出或复述 `User.msg`。
|
||||
- 不得捏造新事实、新建议、新步骤、新材料或新限制。
|
||||
- 不得改变 `Assistant` 原始语义和立场。
|
||||
- 可以压缩、合并、重写 `Assistant.msg`,但必须忠于原内容。
|
||||
- `assistant_memory_hint` 必须是简短的完整句,尽量包含清晰主谓宾,不要只写零散词组。
|
||||
- 如果 `assistant_memory_hint` 里出现"室友""老师""朋友""同事""这件事"这类泛称,而上下文中存在清晰、稳定、唯一的指代对象,则优先改写成那个清晰指代对象。
|
||||
- 只有在当前两条消息里无法稳定落到唯一对象时,才保留泛称或模糊表达。
|
||||
- 如果对象本身已经足够清晰,例如"数据库作业""鸡胸肉沙拉""李教授",则不要为了"更具体"而做不必要的过度展开。
|
||||
- `assistant_memory_type` 只能从以下枚举中选择:
|
||||
`comfort | suggestion | recommendation | warning | instruction | question | agreement | repetition | other`
|
||||
- 如果 `Assistant.msg` 同时包含多个动作,`assistant_memory_hint` 可以保留多个动作,但 `assistant_memory_type` 只标记其中最主要、最值得检索的主动作。
|
||||
- 不再输出 `NULL`。即使内容价值较低,也要尽量压成一条最短的辅助摘要。
|
||||
- 如果 `Assistant.msg` 含有提问、追问或反问,`assistant_memory_hint` 必须保留提问的具体内容,不能只写"询问了用户"。
|
||||
- 如果提问里给出了明确选项、候选分支或对比项,`assistant_memory_hint` 应尽量保留这些选项,而不是只保留上位概括。
|
||||
- `question` 只在"提问/追问/反问"是这条消息的主推进动作时使用;如果消息里同时有建议和提问,但建议明显更核心,则类型标为 `suggestion`,并在 hint 里按需保留提问内容。
|
||||
- 对 `question` 类型,优先保留:
|
||||
1. 问题的核心主题
|
||||
2. 明确给出的选项或分支
|
||||
3. 必要的限定条件
|
||||
- 对 `question` 类型,不要只保留寒暄式前缀,例如"听起来不错""如果方便的话";应保留真正要用户回答的部分。
|
||||
- 只输出严格 JSON,不要输出解释。
|
||||
|
||||
压缩原则:
|
||||
|
||||
- 优先保留具体建议、推荐、提醒、操作步骤、风险提示和问题内容。
|
||||
- 对纯附和内容,压成极短摘要,例如"附和了用户对某事的看法。"
|
||||
- 对明显重复用户内容的回复,压成极短摘要,例如"重复了用户关于某事的说法。"
|
||||
- 对泛泛回应、空泛鼓励、礼貌性延展,压成最短可理解摘要,并标为 `other`。
|
||||
- 如果上下文里能确定人名、关系对象或具体事物,优先在摘要里写出明确对象,不要无必要地保留"室友""那个老师""这件事"这类泛称。
|
||||
- 如果原文里的对象已经明确且自然,就直接保留该对象,不要改写成更绕或更长的表达。
|
||||
- 如果问题中存在"是 A、B 还是 C"这类显式选项,优先保留 A、B、C,而不是只写成"询问用户偏好"。
|
||||
- 如果原文既有建议又有提问,允许在 hint 里同时保留;但 type 只标主动作。若提问是核心推进动作,则 type 标为 `question`;若建议更核心,则 type 标为 `suggestion`。
|
||||
- 优先使用显式主语来写结果,例如:
|
||||
`安慰了用户……`
|
||||
`建议用户……`
|
||||
`推荐用户……`
|
||||
`提醒用户……`
|
||||
`询问用户……`
|
||||
`附和了用户……`
|
||||
`重复了用户……`
|
||||
|
||||
类型判断补充:
|
||||
|
||||
- `question`:主动作是向用户提问、追问、澄清、确认选项或收集偏好。
|
||||
- `suggestion`:主动作是给用户建议;即使末尾顺带问一句,也仍以建议为主。
|
||||
- `recommendation`:主动作是推荐某个方案、菜谱、产品或选择。
|
||||
- `warning`:主动作是提醒风险、限制、禁忌或后果。
|
||||
- `instruction`:主动作是说明操作顺序、步骤或执行流程。
|
||||
- `comfort`:主动作是安慰、理解、支持用户情绪。
|
||||
- `agreement`:主动作是附和、认同用户说法。
|
||||
- `repetition`:主动作是重复、转述用户已有内容,没有新增有效信息。
|
||||
- `other`:不适合归入以上类型,但仍值得压成一条短摘要。
|
||||
|
||||
Few-shot 示例 1
|
||||
输入:
|
||||
{
|
||||
"msgs": [
|
||||
{
|
||||
"role": "User",
|
||||
"msg": "我室友小雯这学期一直在准备毕业论文,这两周都在改答辩PPT。她下周三答辩,我有点担心她会紧张。"
|
||||
},
|
||||
{
|
||||
"role": "Assistant",
|
||||
"msg": "听起来你很关心小雯,也希望她答辩顺利。她现在紧张其实很正常,很多人在答辩前都会这样。"
|
||||
}
|
||||
]
|
||||
}
|
||||
输出:
|
||||
{
|
||||
"assistant_memory_hint": "安慰了用户对室友小雯答辩状态的担忧。",
|
||||
"assistant_memory_type": "comfort"
|
||||
}
|
||||
|
||||
Few-shot 示例 2
|
||||
输入:
|
||||
{
|
||||
"msgs": [
|
||||
{
|
||||
"role": "User",
|
||||
"msg": "我最近总失眠,已经两周了,想先自己调一调。"
|
||||
},
|
||||
{
|
||||
"role": "Assistant",
|
||||
"msg": "如果你想先自己调整,可以先减少咖啡因摄入,尤其下午和晚上尽量不要再喝咖啡或浓茶,同时把睡前刷手机的时间压缩一些,尽量固定上床时间,先连续观察几天。"
|
||||
}
|
||||
]
|
||||
}
|
||||
输出:
|
||||
{
|
||||
"assistant_memory_hint": "建议用户减少咖啡因摄入、减少睡前刷手机时间并固定上床时间。",
|
||||
"assistant_memory_type": "suggestion"
|
||||
}
|
||||
|
||||
Few-shot 示例 3
|
||||
输入:
|
||||
{
|
||||
"msgs": [
|
||||
{
|
||||
"role": "User",
|
||||
"msg": "我晚上想做个简单点的减脂餐,最好二十分钟左右能搞定。"
|
||||
},
|
||||
{
|
||||
"role": "Assistant",
|
||||
"msg": "你可以做一个鸡胸肉沙拉碗,主要用鸡胸肉、生菜、黄瓜和圣女果。鸡胸肉简单煎熟切块后和蔬菜拌在一起,调味尽量用橄榄油加一点醋,不要放太多沙拉酱。"
|
||||
}
|
||||
]
|
||||
}
|
||||
输出:
|
||||
{
|
||||
"assistant_memory_hint": "推荐用户做鸡胸肉沙拉碗,并提醒用户调味时少放沙拉酱。",
|
||||
"assistant_memory_type": "suggestion"
|
||||
}
|
||||
|
||||
Few-shot 示例 4
|
||||
输入:
|
||||
{
|
||||
"msgs": [
|
||||
{
|
||||
"role": "User",
|
||||
"msg": "听起来不错!"
|
||||
},
|
||||
{
|
||||
"role": "Assistant",
|
||||
"msg": "听起来不错!你最喜欢吃什么类型的沙拉呢?是蔬菜沙拉、水果沙拉还是其他的?如果有任何特定的食材是你最喜欢的,也可以告诉我哦。"
|
||||
}
|
||||
]
|
||||
}
|
||||
输出:
|
||||
{
|
||||
"assistant_memory_hint": "询问用户更喜欢蔬菜沙拉、水果沙拉还是其他类型的沙拉,以及是否有偏好的食材。",
|
||||
"assistant_memory_type": "question"
|
||||
}
|
||||
|
||||
Few-shot 示例 5
|
||||
输入:
|
||||
{
|
||||
"msgs": [
|
||||
{
|
||||
"role": "User",
|
||||
"msg": "我最近总失眠,白天特别困,想先自己调一调。"
|
||||
},
|
||||
{
|
||||
"role": "Assistant",
|
||||
"msg": "你可以先减少下午和晚上的咖啡因摄入,睡前也尽量少看手机。如果方便的话,我还想了解一下,你通常晚上大概几点上床、几点真正睡着?"
|
||||
}
|
||||
]
|
||||
}
|
||||
输出:
|
||||
{
|
||||
"assistant_memory_hint": "建议用户减少下午和晚上的咖啡因摄入并减少睡前看手机,同时询问用户通常几点上床和几点入睡。",
|
||||
"assistant_memory_type": "suggestion"
|
||||
}
|
||||
|
||||
现在处理下面这个输入。
|
||||
输入:{{ dialog_text }}
|
||||
|
||||
只输出严格 JSON:
|
||||
{
|
||||
"assistant_memory_hint": "<string>",
|
||||
"assistant_memory_type": "comfort | suggestion | recommendation | warning | instruction | question | agreement | repetition | other"
|
||||
}
|
||||
354
api/app/core/memory/utils/prompt/prompts/extract_pruning.jinja2
Normal file
354
api/app/core/memory/utils/prompt/prompts/extract_pruning.jinja2
Normal file
@@ -0,0 +1,354 @@
|
||||
{% if language == "zh" %}
|
||||
你是一个面向记忆存储的 Assistant 辅助信息压缩器。
|
||||
|
||||
任务:
|
||||
|
||||
- 输入是一个 JSON,对话放在 `msgs` 数组里。
|
||||
- 你只处理 `Assistant.msg`。
|
||||
- `User.msg` 只用于理解上下文,不允许出现在输出里,也不允许被复述成用户摘要。
|
||||
- 你的输出必须包含两个字段:
|
||||
1. `assistant_memory_hint`
|
||||
2. `assistant_memory_type`
|
||||
|
||||
目标:
|
||||
|
||||
- 把较长的 `Assistant.msg` 压缩成一条更短、便于检索的辅助摘要。
|
||||
- 保留建议、推荐、提醒、说明、提问、附和、重复等核心动作。
|
||||
- 删除冗长解释、寒暄、礼貌套话和低价值铺垫,但不要漏掉真正有用的信息。
|
||||
|
||||
硬约束:
|
||||
|
||||
- 不得输出或复述 `User.msg`。
|
||||
- 不得捏造新事实、新建议、新步骤、新材料或新限制。
|
||||
- 不得改变 `Assistant` 原始语义和立场。
|
||||
- 可以压缩、合并、重写 `Assistant.msg`,但必须忠于原内容。
|
||||
- `assistant_memory_hint` 必须是简短的完整句,尽量包含清晰主谓宾,不要只写零散词组。
|
||||
- 如果 `assistant_memory_hint` 里出现"室友""老师""朋友""同事""这件事"这类泛称,而上下文中存在清晰、稳定、唯一的指代对象,则优先改写成那个清晰指代对象。
|
||||
- 只有在当前两条消息里无法稳定落到唯一对象时,才保留泛称或模糊表达。
|
||||
- 如果对象本身已经足够清晰,例如"数据库作业""鸡胸肉沙拉""李教授",则不要为了"更具体"而做不必要的过度展开。
|
||||
- `assistant_memory_type` 只能从以下枚举中选择:
|
||||
`comfort | suggestion | recommendation | warning | instruction | question | agreement | repetition | other`
|
||||
- 如果 `Assistant.msg` 同时包含多个动作,`assistant_memory_hint` 可以保留多个动作,但 `assistant_memory_type` 只标记其中最主要、最值得检索的主动作。
|
||||
- 不再输出 `NULL`。即使内容价值较低,也要尽量压成一条最短的辅助摘要。
|
||||
- 如果 `Assistant.msg` 含有提问、追问或反问,`assistant_memory_hint` 必须保留提问的具体内容,不能只写"询问了用户"。
|
||||
- 如果提问里给出了明确选项、候选分支或对比项,`assistant_memory_hint` 应尽量保留这些选项,而不是只保留上位概括。
|
||||
- `question` 只在"提问/追问/反问"是这条消息的主推进动作时使用;如果消息里同时有建议和提问,但建议明显更核心,则类型标为 `suggestion`,并在 hint 里按需保留提问内容。
|
||||
- 对 `question` 类型,优先保留:
|
||||
1. 问题的核心主题
|
||||
2. 明确给出的选项或分支
|
||||
3. 必要的限定条件
|
||||
- 对 `question` 类型,不要只保留寒暄式前缀,例如"听起来不错""如果方便的话";应保留真正要用户回答的部分。
|
||||
- 只输出严格 JSON,不要输出解释。
|
||||
|
||||
压缩原则:
|
||||
|
||||
- 优先保留具体建议、推荐、提醒、操作步骤、风险提示和问题内容。
|
||||
- 对纯附和内容,压成极短摘要,例如"附和了用户对某事的看法。"
|
||||
- 对明显重复用户内容的回复,压成极短摘要,例如"重复了用户关于某事的说法。"
|
||||
- 对泛泛回应、空泛鼓励、礼貌性延展,压成最短可理解摘要,并标为 `other`。
|
||||
- 如果上下文里能确定人名、关系对象或具体事物,优先在摘要里写出明确对象,不要无必要地保留"室友""那个老师""这件事"这类泛称。
|
||||
- 如果原文里的对象已经明确且自然,就直接保留该对象,不要改写成更绕或更长的表达。
|
||||
- 如果问题中存在"是 A、B 还是 C"这类显式选项,优先保留 A、B、C,而不是只写成"询问用户偏好"。
|
||||
- 如果原文既有建议又有提问,允许在 hint 里同时保留;但 type 只标主动作。若提问是核心推进动作,则 type 标为 `question`;若建议更核心,则 type 标为 `suggestion`。
|
||||
- 优先使用显式主语来写结果,例如:
|
||||
`安慰了用户……`
|
||||
`建议用户……`
|
||||
`推荐用户……`
|
||||
`提醒用户……`
|
||||
`询问用户……`
|
||||
`附和了用户……`
|
||||
`重复了用户……`
|
||||
|
||||
类型判断补充:
|
||||
|
||||
- `question`:主动作是向用户提问、追问、澄清、确认选项或收集偏好。
|
||||
- `suggestion`:主动作是给用户建议;即使末尾顺带问一句,也仍以建议为主。
|
||||
- `recommendation`:主动作是推荐某个方案、菜谱、产品或选择。
|
||||
- `warning`:主动作是提醒风险、限制、禁忌或后果。
|
||||
- `instruction`:主动作是说明操作顺序、步骤或执行流程。
|
||||
- `comfort`:主动作是安慰、理解、支持用户情绪。
|
||||
- `agreement`:主动作是附和、认同用户说法。
|
||||
- `repetition`:主动作是重复、转述用户已有内容,没有新增有效信息。
|
||||
- `other`:不适合归入以上类型,但仍值得压成一条短摘要。
|
||||
|
||||
Few-shot 示例 1
|
||||
输入:
|
||||
{
|
||||
"msgs": [
|
||||
{
|
||||
"role": "User",
|
||||
"msg": "我室友小雯这学期一直在准备毕业论文,这两周都在改答辩PPT。她下周三答辩,我有点担心她会紧张。"
|
||||
},
|
||||
{
|
||||
"role": "Assistant",
|
||||
"msg": "听起来你很关心小雯,也希望她答辩顺利。她现在紧张其实很正常,很多人在答辩前都会这样。"
|
||||
}
|
||||
]
|
||||
}
|
||||
输出:
|
||||
{
|
||||
"assistant_memory_hint": "安慰了用户对室友小雯答辩状态的担忧。",
|
||||
"assistant_memory_type": "comfort"
|
||||
}
|
||||
|
||||
Few-shot 示例 2
|
||||
输入:
|
||||
{
|
||||
"msgs": [
|
||||
{
|
||||
"role": "User",
|
||||
"msg": "我最近总失眠,已经两周了,想先自己调一调。"
|
||||
},
|
||||
{
|
||||
"role": "Assistant",
|
||||
"msg": "如果你想先自己调整,可以先减少咖啡因摄入,尤其下午和晚上尽量不要再喝咖啡或浓茶,同时把睡前刷手机的时间压缩一些,尽量固定上床时间,先连续观察几天。"
|
||||
}
|
||||
]
|
||||
}
|
||||
输出:
|
||||
{
|
||||
"assistant_memory_hint": "建议用户减少咖啡因摄入、减少睡前刷手机时间并固定上床时间。",
|
||||
"assistant_memory_type": "suggestion"
|
||||
}
|
||||
|
||||
Few-shot 示例 3
|
||||
输入:
|
||||
{
|
||||
"msgs": [
|
||||
{
|
||||
"role": "User",
|
||||
"msg": "我晚上想做个简单点的减脂餐,最好二十分钟左右能搞定。"
|
||||
},
|
||||
{
|
||||
"role": "Assistant",
|
||||
"msg": "你可以做一个鸡胸肉沙拉碗,主要用鸡胸肉、生菜、黄瓜和圣女果。鸡胸肉简单煎熟切块后和蔬菜拌在一起,调味尽量用橄榄油加一点醋,不要放太多沙拉酱。"
|
||||
}
|
||||
]
|
||||
}
|
||||
输出:
|
||||
{
|
||||
"assistant_memory_hint": "推荐用户做鸡胸肉沙拉碗,并提醒用户调味时少放沙拉酱。",
|
||||
"assistant_memory_type": "suggestion"
|
||||
}
|
||||
|
||||
Few-shot 示例 4
|
||||
输入:
|
||||
{
|
||||
"msgs": [
|
||||
{
|
||||
"role": "User",
|
||||
"msg": "听起来不错!"
|
||||
},
|
||||
{
|
||||
"role": "Assistant",
|
||||
"msg": "听起来不错!你最喜欢吃什么类型的沙拉呢?是蔬菜沙拉、水果沙拉还是其他的?如果有任何特定的食材是你最喜欢的,也可以告诉我哦。"
|
||||
}
|
||||
]
|
||||
}
|
||||
输出:
|
||||
{
|
||||
"assistant_memory_hint": "询问用户更喜欢蔬菜沙拉、水果沙拉还是其他类型的沙拉,以及是否有偏好的食材。",
|
||||
"assistant_memory_type": "question"
|
||||
}
|
||||
|
||||
Few-shot 示例 5
|
||||
输入:
|
||||
{
|
||||
"msgs": [
|
||||
{
|
||||
"role": "User",
|
||||
"msg": "我最近总失眠,白天特别困,想先自己调一调。"
|
||||
},
|
||||
{
|
||||
"role": "Assistant",
|
||||
"msg": "你可以先减少下午和晚上的咖啡因摄入,睡前也尽量少看手机。如果方便的话,我还想了解一下,你通常晚上大概几点上床、几点真正睡着?"
|
||||
}
|
||||
]
|
||||
}
|
||||
输出:
|
||||
{
|
||||
"assistant_memory_hint": "建议用户减少下午和晚上的咖啡因摄入并减少睡前看手机,同时询问用户通常几点上床和几点入睡。",
|
||||
"assistant_memory_type": "suggestion"
|
||||
}
|
||||
{% else %}
|
||||
You are an Assistant-side memory compression module designed for memory storage.
|
||||
|
||||
Task:
|
||||
|
||||
- The input is a JSON object, and the dialogue is stored in the `msgs` array.
|
||||
- You only process `Assistant.msg`.
|
||||
- `User.msg` is context only. It must not appear in the output, and it must not be rewritten into a user summary.
|
||||
- Your output must contain exactly two fields:
|
||||
1. `assistant_memory_hint`
|
||||
2. `assistant_memory_type`
|
||||
|
||||
Goal:
|
||||
|
||||
- Compress a long `Assistant.msg` into a shorter retrieval-friendly assistant summary.
|
||||
- Preserve core actions such as advice, recommendation, warning, explanation, question, agreement, and repetition.
|
||||
- Remove verbose explanation, small talk, politeness padding, and low-value lead-in, but do not drop truly useful information.
|
||||
|
||||
Hard constraints:
|
||||
|
||||
- Do not output or restate `User.msg`.
|
||||
- Do not invent new facts, advice, steps, ingredients, or constraints.
|
||||
- Do not change the original meaning or stance of `Assistant.msg`.
|
||||
- You may compress, merge, or rewrite `Assistant.msg`, but you must stay faithful to the original content.
|
||||
- `assistant_memory_hint` must be a short complete sentence, ideally with a clear subject, predicate, and object, not a loose fragment.
|
||||
- If `assistant_memory_hint` contains generic labels such as "roommate", "teacher", "friend", "coworker", or "this matter", and the context provides a clear, stable, unique referent, prefer the explicit referent.
|
||||
- Only keep generic or vague wording when the current two-message context cannot resolve it stably to a unique referent.
|
||||
- If the object is already naturally clear, such as "database homework", "chicken salad", or "Professor Li", do not over-expand it just to sound more specific.
|
||||
- `assistant_memory_type` must be chosen only from:
|
||||
`comfort | suggestion | recommendation | warning | instruction | question | agreement | repetition | other`
|
||||
- If `Assistant.msg` contains multiple actions, `assistant_memory_hint` may keep multiple actions, but `assistant_memory_type` must label only the most important and most retrieval-worthy primary action.
|
||||
- Do not output `NULL`. Even if the content is low-value, compress it into the shortest useful assistant-side summary.
|
||||
- If `Assistant.msg` contains a question, follow-up question, or counter-question, `assistant_memory_hint` must preserve the actual question content and must not reduce it to "asked the user".
|
||||
- If the question contains explicit options, candidate branches, or comparisons, `assistant_memory_hint` should preserve those options instead of collapsing them into a generic abstraction.
|
||||
- Use `question` only when asking, follow-up asking, or counter-questioning is the main forward-driving action of the message. If the message contains both advice and a question, but advice is clearly more central, use `suggestion` and keep the question content in the hint when needed.
|
||||
- For `question`, prioritize:
|
||||
1. the core topic of the question
|
||||
2. the explicit options or branches
|
||||
3. the necessary constraints
|
||||
- For `question`, do not keep only social softeners such as "that sounds nice" or "if that's convenient"; keep the actual part that requires an answer.
|
||||
- Return strict JSON only. Do not output explanations.
|
||||
|
||||
Compression principles:
|
||||
|
||||
- Prioritize concrete advice, recommendations, warnings, operational steps, risk reminders, and question content.
|
||||
- Compress pure agreement into a very short summary, such as "Agreed with the user's view on something."
|
||||
- Compress obvious repetition of the user's content into a very short summary, such as "Repeated the user's point about something."
|
||||
- Compress generic responses, vague encouragement, and polite extension into the shortest understandable summary and label them `other`.
|
||||
- If the context makes a person, relation, or concrete object identifiable, prefer the explicit object in the summary and avoid unnecessary generic terms like "roommate", "that teacher", or "this matter".
|
||||
- If the object in the original message is already clear and natural, keep it directly rather than rewriting it into a longer or more awkward form.
|
||||
- If the question contains explicit choices such as "A, B, or C", preserve A, B, and C rather than reducing it to "asked about the user's preference".
|
||||
- If the original message contains both advice and a question, both may remain in the hint, but the type should mark only the primary action. If the question is the main forward-driving action, use `question`; if the advice is more central, use `suggestion`.
|
||||
- Prefer explicit leading verbs in the result, for example:
|
||||
`Comforted the user...`
|
||||
`Suggested that the user...`
|
||||
`Recommended that the user...`
|
||||
`Warned the user...`
|
||||
`Asked the user...`
|
||||
`Agreed with the user...`
|
||||
`Repeated the user's point...`
|
||||
|
||||
Type notes:
|
||||
|
||||
- `question`: the primary action is asking, following up, clarifying, confirming options, or collecting preferences.
|
||||
- `suggestion`: the primary action is giving advice, even if a question appears at the end.
|
||||
- `recommendation`: the primary action is recommending a plan, dish, product, or choice.
|
||||
- `warning`: the primary action is warning about a risk, restriction, taboo, or consequence.
|
||||
- `instruction`: the primary action is explaining an operation order, concrete steps, or an execution flow.
|
||||
- `comfort`: the primary action is comforting, understanding, or emotionally supporting the user.
|
||||
- `agreement`: the primary action is agreeing with or affirming the user's statement.
|
||||
- `repetition`: the primary action is repeating or rephrasing content the user already said, without adding meaningful new information.
|
||||
- `other`: does not fit the types above, but still deserves a short summary.
|
||||
|
||||
English few-shot example 1
|
||||
Input:
|
||||
{
|
||||
"msgs": [
|
||||
{
|
||||
"role": "User",
|
||||
"msg": "My roommate Xiaowen has been preparing her thesis all semester, and she has spent the last two weeks revising her defense slides. She defends next Wednesday, and I'm a little worried she'll be nervous."
|
||||
},
|
||||
{
|
||||
"role": "Assistant",
|
||||
"msg": "It sounds like you really care about Xiaowen and want her defense to go well. Feeling nervous before a defense is actually very normal, and many people feel that way."
|
||||
}
|
||||
]
|
||||
}
|
||||
Output:
|
||||
{
|
||||
"assistant_memory_hint": "Comforted the user about roommate Xiaowen's defense state.",
|
||||
"assistant_memory_type": "comfort"
|
||||
}
|
||||
|
||||
English few-shot example 2
|
||||
Input:
|
||||
{
|
||||
"msgs": [
|
||||
{
|
||||
"role": "User",
|
||||
"msg": "I've had insomnia for the past two weeks and want to try adjusting it myself first."
|
||||
},
|
||||
{
|
||||
"role": "Assistant",
|
||||
"msg": "If you want to adjust it yourself first, you can start by reducing caffeine intake, especially in the afternoon and evening, cutting down screen time before bed, and keeping a consistent bedtime for a few days."
|
||||
}
|
||||
]
|
||||
}
|
||||
Output:
|
||||
{
|
||||
"assistant_memory_hint": "Suggested that the user reduce caffeine intake, reduce screen time before bed, and keep a consistent bedtime.",
|
||||
"assistant_memory_type": "suggestion"
|
||||
}
|
||||
|
||||
English few-shot example 3
|
||||
Input:
|
||||
{
|
||||
"msgs": [
|
||||
{
|
||||
"role": "User",
|
||||
"msg": "I want to make a simple low-fat dinner tonight that takes about twenty minutes."
|
||||
},
|
||||
{
|
||||
"role": "Assistant",
|
||||
"msg": "You could make a chicken salad bowl with chicken breast, lettuce, cucumber, and cherry tomatoes. After cooking and slicing the chicken, mix it with the vegetables, and keep the dressing light without adding too much salad dressing."
|
||||
}
|
||||
]
|
||||
}
|
||||
Output:
|
||||
{
|
||||
"assistant_memory_hint": "Recommended that the user make a chicken salad bowl and use less salad dressing.",
|
||||
"assistant_memory_type": "suggestion"
|
||||
}
|
||||
|
||||
English few-shot example 4
|
||||
Input:
|
||||
{
|
||||
"msgs": [
|
||||
{
|
||||
"role": "User",
|
||||
"msg": "That sounds good!"
|
||||
},
|
||||
{
|
||||
"role": "Assistant",
|
||||
"msg": "That sounds good! What kind of salad do you like most? Vegetable salad, fruit salad, or something else? If you have any favorite ingredients, you can tell me too."
|
||||
}
|
||||
]
|
||||
}
|
||||
Output:
|
||||
{
|
||||
"assistant_memory_hint": "Asked what kind of salad the user prefers, whether they prefer vegetable salad, fruit salad, or something else, and whether they have any favorite ingredients.",
|
||||
"assistant_memory_type": "question"
|
||||
}
|
||||
|
||||
English few-shot example 5
|
||||
Input:
|
||||
{
|
||||
"msgs": [
|
||||
{
|
||||
"role": "User",
|
||||
"msg": "I've been having insomnia lately, I feel especially tired during the day, and I want to adjust it myself first."
|
||||
},
|
||||
{
|
||||
"role": "Assistant",
|
||||
"msg": "You can first reduce caffeine intake in the afternoon and evening and also try to look at your phone less before bed. If it's convenient, I'd also like to know what time you usually get into bed and what time you actually fall asleep."
|
||||
}
|
||||
]
|
||||
}
|
||||
Output:
|
||||
{
|
||||
"assistant_memory_hint": "Suggested that the user reduce afternoon and evening caffeine intake and reduce phone use before bed, while also asking when the user usually gets into bed and falls asleep.",
|
||||
"assistant_memory_type": "suggestion"
|
||||
}
|
||||
{% endif %}
|
||||
|
||||
现在处理下面这个输入。
|
||||
输入:{{ dialog_text }}
|
||||
|
||||
只输出严格 JSON:
|
||||
{
|
||||
"assistant_memory_hint": "<string>",
|
||||
"assistant_memory_type": "comfort | suggestion | recommendation | warning | instruction | question | agreement | repetition | other"
|
||||
}
|
||||
@@ -41,7 +41,6 @@ Each output item should be a structured candidate memory statement.
|
||||
- end_user_id: 终端用户 ID
|
||||
- dialog_at: 会话时间,必须是 ISO 8601 时间点
|
||||
- target_content: 当前要处理的对话片段文本,也是唯一允许被抽取的目标文本
|
||||
- dialog_at: 会话时间,优先作为解析相对时间表达的参考时间
|
||||
- target_message_date: 目标文本对应的时间,可作为辅助时间背景;当与 dialog_at 同时存在时,优先使用 dialog_at 解析相对时间表达
|
||||
- supporting_context: 完整对话上下文,仅用于辅助理解 target_content,不能单独贡献新的可抽取事实
|
||||
- supporting_context.msgs: 按顺序提供的上下文消息,可包含 User 和 Assistant
|
||||
@@ -50,7 +49,6 @@ Each output item should be a structured candidate memory statement.
|
||||
- end_user_id: end-user identifier
|
||||
- dialog_at: session time, which must be an ISO 8601 timestamp
|
||||
- target_content: the current dialogue fragment to process, and the only text span that may be extracted from
|
||||
- dialog_at: session time, used as the primary reference for resolving relative temporal expressions
|
||||
- target_message_date: the time associated with the target content and may serve as supporting temporal context; when both exist, prefer `dialog_at` for resolving relative expressions
|
||||
- supporting_context: full dialogue context used only to help interpret target_content and must not independently contribute new extractable facts
|
||||
- supporting_context.msgs: ordered contextual messages, which may include User and Assistant messages
|
||||
@@ -119,16 +117,29 @@ statement_type:
|
||||
|
||||
时间规则:
|
||||
|
||||
- 仅使用目标文本中明确陈述或可由 `target_message_date` 直接解析的时间信息;不要使用外部知识补时间。
|
||||
- 仅使用目标文本中明确陈述或可由 `dialog_at` / `target_message_date` 直接解析的时间信息;不要使用外部知识补时间。
|
||||
- 优先使用 `dialog_at` 作为“现在”来解释相对时间,例如“昨天”“上周五”“下个月”;只有在 `dialog_at` 缺失时才退回 `target_message_date`。
|
||||
- 如果相对时间可以稳定落到更具体的中文时间表达,就应直接改写进 `statement_text`,而不要保留原始模糊表达。
|
||||
- 可稳定具体化的示例包括:
|
||||
- “昨天” -> “2026年4月29日”
|
||||
- “前天晚上” -> “2026年4月28日晚上”
|
||||
- “上周三” -> “2026年4月22日”
|
||||
- “上周” -> “2026年4月20日至2026年4月26日”
|
||||
- “上周末” -> “2026年4月25日至2026年4月26日”
|
||||
- “上个月” -> “2026年3月”
|
||||
- “下周” -> “2026年5月4日至2026年5月10日”
|
||||
- 如果相对时间只能粗粒度定位,保留该粗粒度但仍尽量具体化;例如“去年冬天”可以保留为“去年冬天”,不要强行伪精确到具体日期。
|
||||
- 对开放区间时间表达,也要做相对时间消解并改写进 `statement_text`。
|
||||
- 常见开放过去区间表达包括:`最近`、`近来`、`这段时间`、`这些天`、`截至现在`、`更早之前`。
|
||||
- 常见开放未来区间表达包括:`即将`、`接下来`、`不久后`、`很快`、`未来一段时间`。
|
||||
- 这类表达无法稳定落到封闭日期区间时,可以改写为开放区间表达,例如:
|
||||
- “最近” -> “截至2026年4月1日之前的最近一段时间”
|
||||
- “近来” -> “截至2026年4月1日之前的近来一段时间”
|
||||
- “这段时间” -> “截至2026年4月1日之前的这段时间”
|
||||
- “即将” -> “在2026年4月1日之后即将发生”
|
||||
- “接下来” -> “在2026年4月1日之后接下来的一段时间”
|
||||
- “很快” -> “在2026年4月1日之后不久”
|
||||
- 如果相对时间不能稳定落到具体日期或日期区间,就保留其最小可信粗粒度,但仍尽量做相对时间消解;例如“去年冬天”可改写为“2025年冬天”,而不是保留“去年冬天”。
|
||||
- 对节假日类表达,能稳定映射到具体日期或日期区间时应具体化;例如“五一”通常可改写为具体日期,“清明节”通常也可改写为具体日期或短区间;“春节前后”这类边界不稳的表达仍保留较粗粒度。
|
||||
- `valid_at` 表示陈述开始成立或生效的时间。
|
||||
- `invalid_at` 表示陈述结束或不再成立的时间;如果仍在持续,填 `"NULL"`。
|
||||
- `dialog_at` 表示当前会话时间,每条 statement 都必须原样复制输入中的 `dialog_at`。
|
||||
@@ -168,6 +179,14 @@ User-subject normalization:
|
||||
|
||||
- If the subject of a statement is the user, always use “the user” as the subject in the extracted statement, regardless of whether the context provides the user’s real name, nickname, alias, or other identifier.
|
||||
- This is a hard rule. If a user-subject statement does not use “the user,” treat it as invalid.
|
||||
- Keep “the user” as the main retrieval anchor in English rewrites, including object position when possible.
|
||||
- For English reflexive self-expressions, preserve retrieval consistency without creating unnatural strings. Use these preferred rewrites:
|
||||
- “myself” in ordinary object position -> “the user”
|
||||
- “be myself” -> “be who the user is”
|
||||
- “embrace myself” -> “embrace who the user is”
|
||||
- “accept myself” -> “accept who the user is”
|
||||
- “express myself” -> “express the user’s thoughts” only if needed for grammaticality; otherwise keep the smallest rewrite anchored on “the user”
|
||||
- Do not rewrite fixed self-expressions into forms such as “embrace the user” or “be the user” when a more natural anchored template is available.
|
||||
|
||||
Coreference resolution:
|
||||
|
||||
@@ -202,14 +221,27 @@ Temporal rules:
|
||||
|
||||
- Use only temporal information explicitly stated in the target text or directly resolvable from `dialog_at` / `target_message_date`; do not add dates from external knowledge.
|
||||
- Prefer `dialog_at` as “now” when interpreting relative expressions such as “yesterday,” “last Friday,” or “next month”; only fall back to `target_message_date` when `dialog_at` is unavailable.
|
||||
- If a relative time can be stably grounded to a more concrete Chinese time phrase, rewrite it directly into `statement_text` rather than keeping the vague source phrase.
|
||||
- If a relative time can be stably grounded to a more concrete time expression in the output language, rewrite it directly into `statement_text` rather than keeping the vague source phrase.
|
||||
- Examples of stable concretization:
|
||||
- “yesterday” -> “2026年4月29日”
|
||||
- “the night before last” -> “2026年4月28日晚上”
|
||||
- “last Wednesday” -> “2026年4月22日”
|
||||
- “last month” -> “2026年3月”
|
||||
- “next week” -> “2026年5月4日至2026年5月10日”
|
||||
- If the relative time can only be grounded coarsely, keep that coarse granularity while still making it as concrete as reasonably possible; for example, “last winter” may stay as “去年冬天” instead of being forced into fake exact dates.
|
||||
- “yesterday” -> “April 29, 2026”
|
||||
- “the night before last” -> “the evening of April 28, 2026”
|
||||
- “last Wednesday” -> “April 22, 2026”
|
||||
- “last week” -> “April 20 to April 26, 2026”
|
||||
- “last weekend” -> “April 25 to April 26, 2026”
|
||||
- “last month” -> “March 2026”
|
||||
- “next week” -> “May 4 to May 10, 2026”
|
||||
- Open-interval temporal expressions should also be resolved and rewritten inside `statement_text`.
|
||||
- Common open past-interval expressions include: `recently`, `lately`, `these days`, `over this period`, `as of now`, and `earlier`.
|
||||
- Common open future-interval expressions include: `upcoming`, `coming up`, `soon`, `before long`, and `in the near future`.
|
||||
- When they cannot be stably converted into a closed date range, rewrite them as open intervals, for example:
|
||||
- `recently` -> `recently before April 1, 2026`
|
||||
- `lately` -> `lately before April 1, 2026`
|
||||
- `these days` -> `during the period leading up to April 1, 2026`
|
||||
- `upcoming` -> `upcoming after April 1, 2026`
|
||||
- `coming up` -> `coming up after April 1, 2026`
|
||||
- `soon` -> `soon after April 1, 2026`
|
||||
- If the relative time cannot be stably grounded to an exact date or date range, keep the smallest trustworthy coarse granularity but still resolve the relative reference as much as possible; for example, “last winter” may become “winter 2025” rather than remaining “last winter”.
|
||||
- For holiday expressions, concretize them when they can be stably mapped to specific dates or short date ranges; for example, Labor Day or Qingming Festival usually can be grounded, while expressions such as “around Spring Festival” should stay at a coarser granularity.
|
||||
- `valid_at` means when the statement became valid or started to hold.
|
||||
- `invalid_at` means when the statement ended or stopped being valid; use `"NULL"` if it is still ongoing.
|
||||
- `dialog_at` is the session timestamp, and every statement must copy the input `dialog_at` verbatim.
|
||||
@@ -237,9 +269,10 @@ temporal_type:
|
||||
Rewrite boundary:
|
||||
|
||||
- Minimal rewriting is allowed only to resolve reference, ellipsis, and temporal ambiguity.
|
||||
- For resolvable relative time expressions, rewrite them into grounded Chinese time phrases directly inside `statement_text`.
|
||||
- For resolvable relative time expressions, rewrite them into grounded time expressions directly inside `statement_text`, using the output language.
|
||||
- Do not keep both the vague source phrase and the grounded phrase together; output only the rewritten concrete form.
|
||||
- Do not fake precision for time expressions that cannot be grounded reliably from `dialog_at`.
|
||||
- In English, you may use a slightly more natural anchored paraphrase for reflexive user-self expressions when a literal replacement would be awkward, as long as the rewritten form still keeps “the user” as the retrieval anchor and does not change the meaning.
|
||||
- Do not introduce unsupported facts, extra inference, or stylistic summarization.
|
||||
{% endif %}
|
||||
|
||||
@@ -329,7 +362,7 @@ Rewrite boundary:
|
||||
"statements": [
|
||||
{
|
||||
"statement_id": "stmt_m3n4o5p6",
|
||||
"statement_text": "用户最近在学Python。",
|
||||
"statement_text": "用户截至2026年4月1日之前的最近一段时间在学Python。",
|
||||
"statement_type": "FACT",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": false,
|
||||
@@ -340,7 +373,7 @@ Rewrite boundary:
|
||||
},
|
||||
{
|
||||
"statement_id": "stmt_q7r8s9t0",
|
||||
"statement_text": "用户最近每晚都会练一个小时Python。",
|
||||
"statement_text": "用户截至2026年4月1日之前的最近一段时间每晚都会练一个小时Python。",
|
||||
"statement_type": "FACT",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": false,
|
||||
@@ -368,17 +401,17 @@ Rewrite boundary:
|
||||
"chunk_id": "chunk_c3d4e5f6",
|
||||
"end_user_id": "eu_12345678",
|
||||
"dialog_at": "2026-04-01T00:00:00Z",
|
||||
"target_content": "这周老师新布置的那两个我觉得有点难,而且我昨晚看了半天还是没太搞明白。要是周末再弄不出来,我可能就得去问助教了。",
|
||||
"target_content": "去年冬天老师布置的那两个项目我一直觉得有点难,而且我昨晚看了半天还是没太搞明白。要是这周末再弄不出来,我可能就得去问助教了。",
|
||||
"target_message_date": "2026-04-01T00:00:00",
|
||||
"supporting_context": {
|
||||
"msgs": [
|
||||
{
|
||||
"role": "User",
|
||||
"msg": "这周老师新布置的那两个我觉得有点难,而且我昨晚看了半天还是没太搞明白。要是周末再弄不出来,我可能就得去问助教了。"
|
||||
"msg": "去年冬天老师布置的那两个项目我一直觉得有点难,而且我昨晚看了半天还是没太搞明白。要是这周末再弄不出来,我可能就得去问助教了。"
|
||||
},
|
||||
{
|
||||
"role": "Assistant",
|
||||
"msg": "听起来你卡在老师这周新布置的两个内容上了,如果周末还没进展,再去问助教也可以。"
|
||||
"msg": "听起来你卡在老师去年冬天布置的那两个项目上了,如果这周末还没进展,再去问助教也可以。"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -388,7 +421,7 @@ Rewrite boundary:
|
||||
"statements": [
|
||||
{
|
||||
"statement_id": "stmt_y5z6a7b8",
|
||||
"statement_text": "用户觉得2026年3月30日至2026年4月5日老师新布置的那两个内容有点难。",
|
||||
"statement_text": "用户觉得2025年冬天老师布置的那两个项目有点难。",
|
||||
"statement_type": "OPINION",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": true,
|
||||
@@ -399,7 +432,7 @@ Rewrite boundary:
|
||||
},
|
||||
{
|
||||
"statement_id": "stmt_c9d0e1f2",
|
||||
"statement_text": "用户2026年3月31日晚上看了半天那两个内容还是没太搞明白。",
|
||||
"statement_text": "用户2026年3月31日晚上看了半天那两个项目还是没太搞明白。",
|
||||
"statement_type": "FACT",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": false,
|
||||
@@ -506,7 +539,7 @@ Example Output: {
|
||||
"statements": [
|
||||
{
|
||||
"statement_id": "stmt_m3n4o5p6",
|
||||
"statement_text": "The user has been learning Python recently.",
|
||||
"statement_text": "The user has been learning Python recently before April 1, 2026.",
|
||||
"statement_type": "FACT",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": false,
|
||||
@@ -517,7 +550,7 @@ Example Output: {
|
||||
},
|
||||
{
|
||||
"statement_id": "stmt_q7r8s9t0",
|
||||
"statement_text": "The user has recently been practicing Python for an hour every night.",
|
||||
"statement_text": "The user has been practicing Python for an hour every night recently before April 1, 2026.",
|
||||
"statement_type": "FACT",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": false,
|
||||
@@ -545,17 +578,17 @@ Example Input: {
|
||||
"chunk_id": "chunk_c3d4e5f6",
|
||||
"end_user_id": "eu_12345678",
|
||||
"dialog_at": "2026-04-01T00:00:00Z",
|
||||
"target_content": "The two things the teacher assigned this week seem hard to me, and even after looking at them for a long time last night I still didn't really understand them. If I still can't finish them by the weekend, I may have to ask the TA.",
|
||||
"target_content": "The two projects the teacher assigned last winter seem difficult to me, and even after looking at them for a long time last night I still didn't really understand them. If I still can't finish them by this weekend, I may have to ask the TA.",
|
||||
"target_message_date": "2026-04-01T00:00:00",
|
||||
"supporting_context": {
|
||||
"msgs": [
|
||||
{
|
||||
"role": "User",
|
||||
"msg": "The two things the teacher assigned this week seem hard to me, and even after looking at them for a long time last night I still didn't really understand them. If I still can't finish them by the weekend, I may have to ask the TA."
|
||||
"msg": "The two projects the teacher assigned last winter seem difficult to me, and even after looking at them for a long time last night I still didn't really understand them. If I still can't finish them by this weekend, I may have to ask the TA."
|
||||
},
|
||||
{
|
||||
"role": "Assistant",
|
||||
"msg": "It sounds like you're stuck on the two things assigned this week, and asking the TA would make sense if there is still no progress by the weekend."
|
||||
"msg": "It sounds like you're stuck on the two projects assigned last winter, and asking the TA would make sense if there is still no progress by this weekend."
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -565,7 +598,7 @@ Example Output: {
|
||||
"statements": [
|
||||
{
|
||||
"statement_id": "stmt_y5z6a7b8",
|
||||
"statement_text": "The user thinks the two items assigned during 2026-03-30 to 2026-04-05 are difficult.",
|
||||
"statement_text": "The user thinks the two projects assigned in winter 2025 are difficult.",
|
||||
"statement_type": "OPINION",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": true,
|
||||
@@ -576,7 +609,7 @@ Example Output: {
|
||||
},
|
||||
{
|
||||
"statement_id": "stmt_c9d0e1f2",
|
||||
"statement_text": "The user spent a long time on the evening of 2026-03-31 looking at those two items but still did not really understand them.",
|
||||
"statement_text": "The user spent a long time on the evening of 2026-03-31 looking at those two projects but still did not really understand them.",
|
||||
"statement_type": "FACT",
|
||||
"temporal_type": "DYNAMIC",
|
||||
"has_emotional_state": false,
|
||||
@@ -609,6 +642,7 @@ Example Output: {
|
||||
- 非用户主体是否尽量写成具体名称;若无法做到,是否已正确标记 `has_unsolved_reference = true`
|
||||
- 如果最终 `statement_text` 已经落到具体实体名,`has_unsolved_reference` 是否已经改为 `false`
|
||||
- 如果 `statement_text` 中出现可由 `dialog_at` 稳定解析的相对时间,是否已经改写成更具体的日期、月份或日期区间表达
|
||||
- 如果 `statement_text` 中出现“最近”“近来”“即将”“接下来”“很快”这类开放区间时间词,是否已经改写为带 `dialog_at` 锚点的开放区间表达
|
||||
- statement_type 是否合法,且没有把一般事实机械标成 `OPINION`
|
||||
- `has_emotional_state` 是否仅用于判断是否存在情感状态,而没有被当作情绪分类字段
|
||||
- temporal_type 是否与 valid_at / invalid_at 一致
|
||||
@@ -620,6 +654,7 @@ Example Output: {
|
||||
- Render non-user subjects as concrete names when possible; otherwise mark `has_unsolved_reference = true`
|
||||
- If the final `statement_text` already resolves the reference to a concrete named entity, ensure `has_unsolved_reference = false`
|
||||
- If `statement_text` contains relative time expressions that can be stably resolved from `dialog_at`, rewrite them into more concrete date, month, or date-range expressions
|
||||
- If `statement_text` contains open-interval temporal words such as `recently`, `lately`, `upcoming`, `coming up`, or `soon`, rewrite them into open interval expressions anchored on `dialog_at`
|
||||
- Ensure statement_type is valid and do not mechanically label ordinary facts as `OPINION`
|
||||
- Ensure `has_emotional_state` is used only for emotional-state presence detection, not emotion classification
|
||||
- Ensure temporal_type is consistent with valid_at and invalid_at
|
||||
|
||||
@@ -10,8 +10,9 @@ Extract entities and knowledge triplets from the given statement.
|
||||
- 非用户自指代词或指示表达,如“他”“她”“它”“这个”“那个”“这家”“那家”“这里”“那里”,如果能从 `supporting_context` 中稳定解析出具体指代,则必须替换为具体指代实体名。
|
||||
- 如果上述代词或指示表达不能稳定解析,则整条跳过。
|
||||
- 命名关系中新出现的称呼、别名、昵称、产品名保持原样,不做替换。
|
||||
- `description` 使用中文。
|
||||
- `type`、`predicate`、`type_description`、`predicate_description` 一律使用中文。
|
||||
- `description` 必须使用中文。
|
||||
- `type`、`predicate` 必须使用上方预定义的中文标签。
|
||||
- 除 `type`、`predicate` 外,其余输出文本字段都必须与输入语言一致;因此在中文输入下,`type_description`、`predicate_description` 也必须使用中文。
|
||||
- 每个 `triplet` 都必须携带 `valid_at` 和 `invalid_at`,并直接复制输入中的同名字段,不要自行改写或推断新的时间边界。
|
||||
{% else %}
|
||||
Important:
|
||||
@@ -22,7 +23,8 @@ Extract entities and knowledge triplets from the given statement.
|
||||
- If such references cannot be resolved stably, skip the entire statement.
|
||||
- Newly introduced names in naming or alias expressions must stay in their original form.
|
||||
- Generate `description` in English.
|
||||
- Always generate `type`, `predicate`, `type_description`, and `predicate_description` in Chinese.
|
||||
- Always generate `type` and `predicate` using the predefined Chinese labels above.
|
||||
- Except for `type` and `predicate`, all other output text fields must match the input language; therefore under English input, `type_description` and `predicate_description` must be written in English.
|
||||
- Every `triplet` MUST include `valid_at` and `invalid_at`, copied directly from the input fields with the same names; do not rewrite or infer new temporal bounds.
|
||||
{% endif %}
|
||||
|
||||
@@ -39,7 +41,7 @@ Extract entities and knowledge triplets from the given statement.
|
||||
- `supporting_context.msgs[].role`: `User` / `Assistant`
|
||||
- `supporting_context.msgs[].msg`: 消息文本
|
||||
- `speaker`: `user` / `assistant`
|
||||
- `dialog_at`: 会话时间,ISO 8601 时间点;可用于在 `description` 中标注实体的时间背景
|
||||
- `dialog_at`: 会话时间,ISO 8601 时间点
|
||||
- `valid_at`: ISO 8601 时间点,或 `NULL`
|
||||
- `invalid_at`: ISO 8601 时间点,或 `NULL`
|
||||
- `has_unsolved_reference`: 布尔值
|
||||
@@ -54,7 +56,7 @@ Extract entities and knowledge triplets from the given statement.
|
||||
- `supporting_context.msgs[].role`: `User` / `Assistant`
|
||||
- `supporting_context.msgs[].msg`: message text
|
||||
- `speaker`: `user` / `assistant`
|
||||
- `dialog_at`: session time as an ISO 8601 timestamp; may be used to anchor temporal context in entity `description`
|
||||
- `dialog_at`: session time as an ISO 8601 timestamp
|
||||
- `valid_at`: ISO 8601 timestamp or `NULL`
|
||||
- `invalid_at`: ISO 8601 timestamp or `NULL`
|
||||
- `has_unsolved_reference`: boolean
|
||||
@@ -137,15 +139,15 @@ Primary statement to analyze:
|
||||
===预定义实体类型===
|
||||
只能使用以下中文实体类型。如果没有完全匹配的类型,请选择最接近的一项,不要发明新类型。
|
||||
|
||||
- `人物`
|
||||
- definition: 可稳定指向、可被当作具体个体区分和归并的个人实体。
|
||||
- positive_examples: `用户`、`张三`、`王教授`、`小林`
|
||||
- negative_examples: `老师`、`导师`、`学生`、`他们`
|
||||
- notes: 强调“这个人是谁”,不强调他承担的社会身份;用户自指统一归为 `用户`。
|
||||
- `生命体`
|
||||
- definition: 可稳定指向、可被当作具体个体区分和归并的生命体个体。
|
||||
- positive_examples: `用户`、`张三`、`王教授`、`小林`、`用户的小狗`、`我的猫`
|
||||
- negative_examples: `老师`、`导师`、`学生`、`他们`、`一只狗`、`狗这种动物`、`一个朋友`
|
||||
- notes: 强调“这个生命体是谁或是哪一个”,不强调社会身份或泛化类别;用户自指统一归为 `用户`;有稳定所有格指向的非人类生命体可以抽取,如 `用户的小狗`。
|
||||
|
||||
- `组织`
|
||||
- definition: 公司、机构、学校、实验室、团队、社群等组织性主体。
|
||||
- positive_examples: `腾讯`、`清华大学`、`机器人公司`、`实验室`
|
||||
- positive_examples: `腾讯`、`清华大学`、`实验室`、`研究所`
|
||||
- negative_examples: `人事部`、`教研组`、`办公室`
|
||||
- notes: 如果表达的是组织内部单元,当前一级仍优先并入 `组织`,除非后续单独扩展子类。
|
||||
|
||||
@@ -156,10 +158,10 @@ Primary statement to analyze:
|
||||
- notes: 只用于边界相对稳定的人群;边界不稳或 unresolved 的表达不要归入 `群体`。
|
||||
|
||||
- `角色职业`
|
||||
- definition: 人物承担的社会角色、功能身份或职业身份。
|
||||
- definition: 人承担的社会角色、功能身份或职业身份。
|
||||
- positive_examples: `导师`、`老师`、`学生`、`医生`、`程序员`
|
||||
- negative_examples: `张三`、`王教授`、`我的朋友`
|
||||
- notes: 强调“这个人是什么身份”,不强调“这个人是谁”;如果文本落到具体个人,优先用 `人物`。
|
||||
- notes: 强调“这个人是什么身份”,不强调“这个人是谁”;如果文本落到具体个体,优先用 `生命体`。
|
||||
|
||||
- `地点设施`
|
||||
- definition: 具有地理意义或功能性空间意义的位置与场所。
|
||||
@@ -218,46 +220,45 @@ Primary statement to analyze:
|
||||
实体类型总规则:
|
||||
|
||||
- unresolved 或边界不稳的表达,不因“看起来像名词”就创建实体。
|
||||
- 情绪、心理状态、金额、数量、普通时间、一次性动作短语,默认不作为独立实体类型抽取。
|
||||
- 抽象命题片段、泛化结果、价值判断,默认不创建实体;如有保留价值且适合作为实体的稳定描述,可写入相关高价值实体的 `description`。
|
||||
|
||||
实体类型选择原则:
|
||||
|
||||
- 优先保留对用户画像、偏好、长期身份、稳定关系或持续兴趣有记忆价值的实体类型。
|
||||
- 对于“努力”“回报”“意义”“成功”这类泛化概念、抽象命题片段或价值判断,默认不要仅因句中出现就创建实体。
|
||||
- `群体` 只用于边界相对稳定、可被当作整体引用的人群;像“他们”“一些人”“一个朋友”这类边界不稳或 unresolved 的表达不要归入 `群体`。
|
||||
- `偏好习惯` 只保留稳定偏好、重复习惯或长期行为倾向;`具体目标` 只保留具体、明确、可验证的目标结果。都不允许吸纳抽象愿望、泛因果终点或情绪状态。
|
||||
- 当前阶段不抽取情绪状态实体;像“紧张”“开心”“难过”“焦虑”“放松”这类情绪或心理状态,不要归入 `知识能力`、`偏好习惯`、`具体目标` 或其他现有类型。
|
||||
- 情绪、心理状态、金额、数量、普通时间、一次性动作短语,默认不作为独立实体抽取。
|
||||
- 抽象命题片段、泛化结果、价值判断,默认不创建实体;如有保留价值,应写入相关高价值实体的 `description`。
|
||||
- 只有当某个名字、概念、对象、群体或地点在当前陈述中承担明确语义角色,或是理解有效关系所必需时,才创建实体。
|
||||
- 如果陈述里有值得保留的实体信息,但没有有效关系,可以只返回 `entities`,并把 `triplets` 设为 `[]`。
|
||||
|
||||
===关系本体大类===
|
||||
以下大类是当前 `predicate` 本体树的第一层,用于帮助理解和约束后面的具体关系白名单。输出具体 `predicate` 时仍然必须使用后文列出的细关系,而不是直接输出这些大类名称。
|
||||
{% if language == "zh" %}
|
||||
当前每个关系大类只保留一个 canonical `covered_predicates` 值;一旦判断某条关系属于该大类,输出时只能使用该唯一 predicate,不要再输出同类历史变体。
|
||||
{% else %}
|
||||
Each relation class now keeps only one canonical `covered_predicates` value. Once you decide a relation belongs to that class, you must output that single predicate only and never use legacy sibling variants.
|
||||
{% endif %}
|
||||
|
||||
- `命名关系`
|
||||
- definition: 表达实体名称、别名、称呼之间的对应或使用关系。
|
||||
- covered_predicates: `别名属于`、`使用称呼`
|
||||
- positive_examples: `山哥 -> 别名属于 -> 用户`、`我的朋友 -> 使用称呼 -> 山哥`
|
||||
- negative_examples: `导师 -> 别名属于 -> 用户`、`好人 -> 使用称呼 -> 用户`
|
||||
- definition: 表达实体名称、别名、称呼之间的对应关系。
|
||||
- covered_predicates: `别名属于`
|
||||
- positive_examples: `山哥 -> 别名属于 -> 用户`、`多多 -> 别名属于 -> 用户的小狗`
|
||||
- negative_examples: `导师 -> 别名属于 -> 用户`、`好人 -> 别名属于 -> 用户`
|
||||
- notes: 只处理名字性表达,不处理角色、职业、评价词。
|
||||
- status: `enabled`
|
||||
|
||||
- `归属身份关系`
|
||||
- definition: 表达主体所属的类别、身份、职业、角色,或其与组织、群体、集合之间的归属关系。
|
||||
- covered_predicates: `属于类型`、`担任角色`、`从事职业`、`成员属于`、`任职于`
|
||||
- positive_examples: `王教授 -> 担任角色 -> 导师`、`张三 -> 从事职业 -> 程序员`、`张三 -> 成员属于 -> 实验室成员`、`张明 -> 任职于 -> 腾讯`
|
||||
- negative_examples: `张三 -> 担任角色 -> 山哥`、`他们 -> 成员属于 -> 学校`、`用户 -> 任职于 -> 明天的面试`、`用户 -> 从事职业 -> 紧张`
|
||||
- notes: 这是一个上位父类,用于统一承接“是什么身份”与“归属哪里”两类关系。第一层不再强行区分“身份类归属”和“组织类归属”,真正的区分在子类 predicate 层完成。
|
||||
- covered_predicates: `属于类型`
|
||||
- positive_examples: `王教授 -> 属于类型 -> 导师`、`张三 -> 属于类型 -> 程序员`、`张三 -> 属于类型 -> 实验室成员`、`张明 -> 属于类型 -> 腾讯`
|
||||
- negative_examples: `张三 -> 属于类型 -> 山哥`、`他们 -> 属于类型 -> 学校`、`用户 -> 属于类型 -> 明天的面试`、`用户 -> 属于类型 -> 紧张`
|
||||
- notes: 当前统一使用 `属于类型` 作为这一大类的唯一输出 predicate。
|
||||
- status: `enabled`
|
||||
|
||||
- `空间位置关系`
|
||||
- definition: 表达实体与地点、场所、空间位置之间的稳定位置关系。
|
||||
- covered_predicates: `位于`、`拥有位置`、`居住于`
|
||||
- positive_examples: `用户 -> 居住于 -> 巴黎`、`办公室 -> 位于 -> 北京`
|
||||
- covered_predicates: `位于`
|
||||
- positive_examples: `用户 -> 位于 -> 巴黎`、`办公室 -> 位于 -> 北京`
|
||||
- negative_examples: `用户 -> 位于 -> 明天下午三点`、`这里 -> 位于 -> 学校`
|
||||
- notes: 普通时间表达和未解析位置指代不进入此类。
|
||||
- status: `enabled`
|
||||
|
||||
- `前往到访关系`
|
||||
- definition: 表达主体前往、到访某地点、场所、组织、课程或活动对象的关系。
|
||||
- definition: 表达主体前往、到访某地点、场所、组织或活动对象的关系。
|
||||
- covered_predicates: `前往`
|
||||
- positive_examples: `用户 -> 前往 -> 图书馆`、`用户 -> 前往 -> 公司`
|
||||
- negative_examples: `用户 -> 前往 -> 明天下午三点`、`用户 -> 前往 -> 复习微积分任务`
|
||||
@@ -266,50 +267,50 @@ Primary statement to analyze:
|
||||
|
||||
- `组成包含关系`
|
||||
- definition: 表达部分与整体、包含与被包含之间的结构关系。
|
||||
- covered_predicates: `组成部分`、`包含部分`
|
||||
- positive_examples: `教研组 -> 组成部分 -> 学院`、`学院 -> 包含部分 -> 教研组`
|
||||
- negative_examples: `用户 -> 组成部分 -> 图书馆`、`微积分 -> 包含部分 -> 用户`
|
||||
- notes: 只用于结构性组成关系,不用于临时搭配或抽象联系。
|
||||
- covered_predicates: `组成部分`
|
||||
- positive_examples: `教研组 -> 组成部分 -> 学院`
|
||||
- negative_examples: `用户 -> 组成部分 -> 图书馆`、`微积分 -> 组成部分 -> 用户`
|
||||
- notes: 当前统一采用 part-to-whole 方向,不用于临时搭配或抽象联系。
|
||||
- status: `enabled`
|
||||
|
||||
- `拥有持有关系`
|
||||
- definition: 表达主体拥有、持有、配有某对象、账号、联系方式或标识的关系。
|
||||
- covered_predicates: `拥有`、`拥有账号`、`拥有联系方式`、`标识为`
|
||||
- positive_examples: `用户 -> 拥有账号 -> GitHub账号`、`用户 -> 拥有联系方式 -> 邮箱`、`用户 -> 标识为 -> 学号`
|
||||
- covered_predicates: `拥有`
|
||||
- positive_examples: `用户 -> 拥有 -> GitHub账号`、`用户 -> 拥有 -> 邮箱`、`GitHub账号 -> 拥有 -> chen4`、`用户 -> 拥有 -> 用户的小狗`
|
||||
- negative_examples: `用户 -> 拥有 -> 紧张`、`努力 -> 拥有 -> 回报`
|
||||
- notes: 不用于抽象命题、情绪状态或口号式表达。
|
||||
- status: `enabled`
|
||||
|
||||
- `使用采用关系`
|
||||
- definition: 表达主体使用、采用某工具、产品、平台、语言或资源的关系。
|
||||
- covered_predicates: `使用`、`使用语言`
|
||||
- positive_examples: `用户 -> 使用 -> 微信`、`用户 -> 使用语言 -> 中文`
|
||||
- negative_examples: `用户 -> 使用 -> 成功`、`用户 -> 使用语言 -> 紧张`
|
||||
- definition: 表达主体使用、采用某工具、平台、语言或资源的关系。
|
||||
- covered_predicates: `使用`
|
||||
- positive_examples: `用户 -> 使用 -> 微信`、`用户 -> 使用 -> 中文`
|
||||
- negative_examples: `用户 -> 使用 -> 成功`、`用户 -> 使用 -> 紧张`
|
||||
- notes: 以后若扩展“采用方法”,也可挂在本大类下。
|
||||
- status: `enabled`
|
||||
|
||||
- `创建生产关系`
|
||||
- definition: 表达主体创建、撰写、生产某对象或结果的关系。
|
||||
- covered_predicates: `创建了`、`由…创建`、`撰写了`
|
||||
- positive_examples: `用户 -> 撰写了 -> 简历`、`简历 -> 由…创建 -> 用户`
|
||||
- negative_examples: `用户 -> 创建了 -> 明天下午三点`、`努力 -> 由…创建 -> 用户`
|
||||
- notes: 只用于明确的生产、创作、撰写关系。
|
||||
- covered_predicates: `创建了`
|
||||
- positive_examples: `用户 -> 创建了 -> 简历`
|
||||
- negative_examples: `用户 -> 创建了 -> 明天下午三点`、`努力 -> 创建了 -> 用户`
|
||||
- notes: 当前统一采用“创建者 -> 创建了 -> 被创建对象”的方向。
|
||||
- status: `enabled`
|
||||
|
||||
- `知识学习关系`
|
||||
- definition: 表达主体与知识、技能、学科、语言等知识能力对象之间的认知、学习或兴趣关系。
|
||||
- covered_predicates: `了解`、`学习`、`感兴趣于`
|
||||
- positive_examples: `用户 -> 学习 -> 微积分`、`用户 -> 了解 -> 机器学习`、`用户 -> 感兴趣于 -> 心理学`
|
||||
- negative_examples: `用户 -> 学习 -> 紧张`、`用户 -> 感兴趣于 -> 成功`
|
||||
- covered_predicates: `了解`
|
||||
- positive_examples: `用户 -> 了解 -> 微积分`、`用户 -> 了解 -> 机器学习`、`用户 -> 了解 -> 心理学`
|
||||
- negative_examples: `用户 -> 了解 -> 紧张`、`用户 -> 了解 -> 成功`
|
||||
- notes: 关系对象应是 `知识能力` 类,而不是情绪、价值判断或抽象结果。
|
||||
- status: `enabled`
|
||||
|
||||
- `偏好目标关系`
|
||||
- definition: 表达主体对对象的稳定偏好、厌恶,或对具体明确目标的指向关系。
|
||||
- covered_predicates: `偏好`、`不喜欢`、`想要`
|
||||
- positive_examples: `用户 -> 偏好 -> 安静环境`、`用户 -> 不喜欢 -> 辛辣食物`、`用户 -> 想要 -> 通过雅思`
|
||||
- negative_examples: `用户 -> 想要 -> 成功`、`用户 -> 偏好 -> 紧张`、`用户 -> 不喜欢 -> 努力就会有回报`
|
||||
- notes: 这是高风险大类;`想要` 只用于具体、明确、用户特异的目标,不用于抽象愿望。
|
||||
- covered_predicates: `偏好`
|
||||
- positive_examples: `用户 -> 偏好 -> 安静环境`、`用户 -> 偏好 -> 辛辣食物`、`用户 -> 偏好 -> 通过雅思`
|
||||
- negative_examples: `用户 -> 偏好 -> 成功`、`用户 -> 偏好 -> 紧张`、`用户 -> 偏好 -> 努力就会有回报`
|
||||
- notes: 当前统一使用 `偏好`;只有对象具体、明确且与用户稳定相关时才抽取。
|
||||
- status: `enabled`
|
||||
|
||||
- `职责责任关系`
|
||||
@@ -330,9 +331,9 @@ Primary statement to analyze:
|
||||
|
||||
- `弱关联关系`
|
||||
- definition: 表达两个实体之间存在明确、稳定、值得保留,但当前缺少更精确谓词可用的弱关联关系。
|
||||
- covered_predicates: `提到`、`关联于`、`相关于`
|
||||
- positive_examples: `项目 -> 关联于 -> 实验室`、`账号 -> 相关于 -> 平台`、`文档 -> 提到 -> 张三`
|
||||
- negative_examples: `努力 -> 相关于 -> 回报`、`用户 -> 提到 -> 紧张`、`成功 -> 关联于 -> 意义`
|
||||
- covered_predicates: `关联于`
|
||||
- positive_examples: `项目 -> 关联于 -> 实验室`、`账号 -> 关联于 -> 平台`、`文档 -> 关联于 -> 张三`
|
||||
- negative_examples: `努力 -> 关联于 -> 回报`、`用户 -> 关联于 -> 紧张`、`成功 -> 关联于 -> 意义`
|
||||
- notes: 受限大类;不能作为失败兜底关系,不能用来连接抽象概念、口号式表达、情绪状态或无法成立的关系。
|
||||
- status: `restricted`
|
||||
|
||||
@@ -340,38 +341,18 @@ Primary statement to analyze:
|
||||
只能使用以下中文关系类型。如果没有完全匹配的关系,请选择最接近的一项,不要发明新关系。
|
||||
|
||||
- `别名属于`: 别名指向其对应的规范实体
|
||||
- `使用称呼`: 主体使用某个名字来称呼另一实体
|
||||
- `属于类型`: 实体属于某种类别
|
||||
- `属于类型`: 实体属于某种类别、身份、职业、角色或归属对象
|
||||
- `位于`: 实体位于某地点、场所或空间位置
|
||||
- `前往`: 主体前往某个地点、场所、组织或活动对象
|
||||
- `组成部分`: 实体是另一实体的组成部分
|
||||
- `包含部分`: 实体包含另一实体作为组成部分
|
||||
- `位于`: 实体位于某地点
|
||||
- `拥有位置`: 实体具有相关位置
|
||||
- `前往`: 主体前往某个地点、场所、组织、课程或活动
|
||||
- `居住于`: 人物居住在某地点
|
||||
- `任职于`: 主体在某组织中工作或任职
|
||||
- `担任角色`: 主体承担某个角色
|
||||
- `从事职业`: 主体从事某种职业
|
||||
- `关联于`: 两个实体存在明确关联
|
||||
- `成员属于`: 主体是某组织或群体的成员
|
||||
- `拥有`: 主体拥有某对象、资源或资产
|
||||
- `使用`: 主体使用某工具、产品或服务
|
||||
- `创建了`: 主体创建了某对象、内容或成果
|
||||
- `由…创建`: 实体由某主体创建
|
||||
- `撰写了`: 主体撰写某文档或作品
|
||||
- `提到`: 主体或文本提到另一实体
|
||||
- `了解`: 主体了解某知识主题
|
||||
- `学习`: 主体正在学习某知识主题或技能
|
||||
- `感兴趣于`: 主体对某主题感兴趣
|
||||
- `偏好`: 主体偏好某对象、方式或主题
|
||||
- `不喜欢`: 主体不喜欢某对象、方式或主题
|
||||
- `想要`: 主体想获得、达成或拥有具体、明确、用户特异且值得保留的对象或目标,不用于抽象结果、泛化愿望或口号式表达
|
||||
- `负责`: 主体负责某项工作、职责或领域
|
||||
- `拥有`: 主体拥有、持有或配有某对象
|
||||
- `使用`: 主体使用、采用某工具、平台、语言或资源
|
||||
- `创建了`: 主体创建、撰写或生产某对象
|
||||
- `了解`: 主体了解、学习或持续关注某知识主题、技能、学科或语言
|
||||
- `偏好`: 主体对某对象具有稳定偏好、厌恶或具体明确目标倾向
|
||||
- `负责`: 主体负责某项工作、职责、事务或领域
|
||||
- `沟通于`: 两个实体之间发生沟通或交流
|
||||
- `拥有联系方式`: 实体具有某联系方式
|
||||
- `拥有账号`: 实体具有某账号
|
||||
- `标识为`: 实体由某标识符标识
|
||||
- `使用语言`: 主体使用某语言
|
||||
- `相关于`: 当存在明确、稳定且具有记忆价值的联系,但无更精确关系时使用的弱关系;不得用于泛化概念、抽象命题片段、口号式表达或仅为补全结构的联系
|
||||
- `关联于`: 当存在明确、稳定且具有记忆价值的联系,但无更精确关系时使用的弱关系;不得用于泛化概念、抽象命题片段、口号式表达或仅为补全结构的联系
|
||||
|
||||
===Extraction Order===
|
||||
{% if language == "zh" %}
|
||||
@@ -383,20 +364,15 @@ Primary statement to analyze:
|
||||
3. 识别 `statement_text` 中值得抽取的稳定实体。
|
||||
4. 判断这些实体之间是否存在可由预定义关系类型表达的有效关系。
|
||||
5. 最后补充实体字段和关系字段。
|
||||
|
||||
不要让附加字段主导整个抽取过程。
|
||||
{% else %}
|
||||
Follow this order:
|
||||
|
||||
0. First check `has_unsolved_reference`; if it is `true`, immediately return the empty result.
|
||||
1. Resolve references first: normalize user self-reference to `用户`; replace other stably resolvable pronouns or demonstratives with their resolved entity names.
|
||||
2. If unresolved pronouns, demonstratives, or omitted subjects still remain, immediately return the empty result.
|
||||
3. Identify stable entities worth extracting from `statement_text`.
|
||||
4. Determine whether any valid relations between those entities can be expressed using the predefined Chinese predicates.
|
||||
5. Finally fill auxiliary entity and predicate fields.
|
||||
|
||||
Do not let auxiliary fields drive the extraction process.
|
||||
{% endif %}
|
||||
{% else %}
|
||||
Follow this order:
|
||||
6. First check `has_unsolved_reference`; if it is `true`, immediately return the empty result.
|
||||
7. Resolve references first: normalize user self-reference to `用户`; replace other stably resolvable pronouns or demonstratives with their resolved entity names.
|
||||
8. If unresolved pronouns, demonstratives, or omitted subjects still remain, immediately return the empty result.
|
||||
9. Identify stable entities worth extracting from `statement_text`.
|
||||
10. Determine whether any valid relations between those entities can be expressed using the predefined Chinese predicates.
|
||||
11. Finally fill auxiliary entity and predicate fields.
|
||||
{% endif %}
|
||||
|
||||
===Guidelines===
|
||||
|
||||
@@ -419,48 +395,40 @@ Do not let auxiliary fields drive the extraction process.
|
||||
**Entity Extraction:**
|
||||
{% if language == "zh" %}
|
||||
|
||||
- 只有当某个名字、概念、对象、群体或地点在当前陈述中承担明确语义角色,或是理解有效关系所必需时,才创建实体。
|
||||
- 不要因为表面上出现了名词、修饰词或短语,就机械地创建实体。
|
||||
- 不要把完整命题、因果链、价值判断或口号式表达拆成多个低价值实体;例如“努力就会有回报”默认不应抽取出“努力”或“回报”作为实体。
|
||||
- 普通时间表达默认不抽取为实体,包括日期、时刻、明天、下周、今晚八点等。
|
||||
- 一次性动作短语默认不抽取为实体,例如“复习微积分”“去图书馆学习”“参观卢浮宫”。
|
||||
- 不要为了表达一句带时间或地点的行动,而额外创造“任务”“计划”“事件”实体。
|
||||
- 但如果动作明确把主体和某个稳定实体连接起来,可以保留该稳定实体,并抽取轻关系。例如“我去图书馆”“我去公司开会”“我去上课”“我去看演唱会”可以抽取 `前往`。
|
||||
- 当句子只是在讨论一般道理、抽象规律、空泛结果或非个体化概念,而这些概念本身不构成可复用记忆时,不要创建实体。
|
||||
- 如果句子表达的是用户的观点、信念、判断、愿望或目标倾向,但其中抽象对象不值得作为独立实体保留,则只保留相关高价值实体,不要再创建这些低价值对象实体;只有当未抽取内容适合作为该实体的稳定描述时,才写入相关实体的 `description`。例如“用户认为努力就会有回报”可只保留 `用户`,并在 `description` 中体现这一较稳定认知倾向。
|
||||
- 对于未抽取的抽象实体、抽象命题片段或泛化结果,不要默认全部写入 `description`;只有当它们适合作为该实体的稳定描述、且对后续区分或理解该实体有帮助时,才写入 `description`。
|
||||
- 当前阶段同样不要把情绪或心理状态抽成实体;如果句子里出现“紧张”“开心”“难过”“焦虑”“放松”等,只有在它们能被稳定概括为较持久的认知或态度时,才可间接体现在相关高价值实体的 `description` 中;短期情绪状态本身不要写入 `description`。
|
||||
- 如果陈述里有值得保留的实体信息,但没有有效关系,可以只返回 `entities`,并把 `triplets` 设为 `[]`。
|
||||
- `name` 默认保持原文中的表面形式,但用户自指必须写成 `用户`,可稳定解析的其他代词必须替换为具体指代实体名。
|
||||
- 如果句子表达的是用户的观点、信念、判断、愿望或目标倾向,但其中抽象对象不值得作为独立实体保留,则只保留相关高价值实体,不要再创建这些低价值对象实体;只有当未抽取内容适合作为该实体的稳定描述时,才写入相关实体的 `description`。
|
||||
- 当前阶段不要把情绪或心理状态抽成实体;像“紧张”“开心”“难过”“焦虑”“放松”等不应映射到 `知识能力`、`偏好习惯`、`具体目标` 或其他近似类型。
|
||||
- 有稳定所有格指向的具体生命体可以作为实体抽取,例如 `用户的小狗`、`张三的猫`;这类实体的名字应保留完整所有格短语,不要简化成 `小狗` 或错误归并到所有者。
|
||||
- 泛化或未稳定指向的生命体表达不要抽取,例如 `一只狗`、`狗这种动物`、`某个朋友`。
|
||||
- `description` 必须使用中文。
|
||||
- `type` 和 `type_description` 必须使用上方预定义的中文标签与中文定义。
|
||||
- `type` 必须使用上方预定义的中文标签;`type_description` 必须直接复用对应 `type` 的预定义中文定义。
|
||||
{% else %}
|
||||
- Extract entities only when they play a clear semantic role in the statement or are necessary for understanding a valid relation.
|
||||
- Do not mechanically create entities for every noun, modifier, or surface mention.
|
||||
- Do not split generic propositions, causal slogans, or value judgments into low-value abstract entities. For example, "effort brings reward" should not create standalone entities for "effort" or "reward" by default.
|
||||
- Do not extract ordinary time expressions as entities, including dates, timestamps, "tomorrow", "next week", or "8 PM tonight".
|
||||
- Do not extract one-off action phrases as entities, such as "review calculus", "study in the library", or "visit the Louvre".
|
||||
- Do not create extra "task", "plan", or "event" entities just to represent an action with time or location modifiers.
|
||||
- But if an action clearly connects the subject to a stable entity, keep that stable entity and use a light relation. For example, statements like "I go to the library", "I go to the office", "I go to class", or "I go to a concert" can use `前往`.
|
||||
- If the sentence is only about a generic principle, abstract outcome, or non-personalized concept that is not worth remembering on its own, do not create an entity for it.
|
||||
- If a statement expresses the user's belief, judgment, opinion, wish, or goal tendency but the referenced abstract concepts are not worth keeping as standalone entities, keep only the relevant high-value entities and do not create those low-value concept entities; write the unextracted content into an entity `description` only when it is suitable as a stable description of that entity. For example, "the user believes effort brings reward" may keep only `用户` and reflect that relatively stable belief in `description`.
|
||||
- For abstract entities, proposition fragments, or generic outcomes that are not extracted, do not automatically write them into `description`; only do so when they are suitable as a stable description of the retained entity and help identify or understand it.
|
||||
- In the current stage, do not extract emotional or psychological states as entities. States such as nervousness, happiness, sadness, anxiety, or relief should not be mapped to `知识能力`, `偏好习惯目标`, or any other approximate type, and short-lived emotional states should not be written into `description`.
|
||||
- If the statement contains entity-worthy content but no valid relation, it is acceptable to return `entities` with `triplets: []`.
|
||||
- Keep `name` in its original surface form by default, but write user self-reference as `用户` and replace other stably resolvable references with their resolved entity names.
|
||||
- If a statement expresses the user's belief, judgment, opinion, wish, or goal tendency but the referenced abstract concepts are not worth keeping as standalone entities, keep only the relevant high-value entities and do not create those low-value concept entities; write the unextracted content into an entity `description` only when it is suitable as a stable description of that entity.
|
||||
- In the current stage, do not extract emotional or psychological states as entities. States such as nervousness, happiness, sadness, anxiety, or relief should not be mapped to `知识能力`, `偏好习惯`, `具体目标`, or any other approximate type.
|
||||
- A concrete living being with a stable possessive reference may be extracted as an entity, such as `the user's dog` or `Zhang San's cat`; keep the full possessive phrase as the entity name, and do not collapse it to `dog` or merge it into the owner.
|
||||
- Do not extract generic or weakly resolved living-being mentions, such as `a dog`, `dogs as a species`, or `some friend`.
|
||||
- `description` must be in English.
|
||||
- `type` and `type_description` must use the predefined Chinese labels and Chinese definitions above.
|
||||
- `type` must use the predefined Chinese label above, while `type_description` must explain that predefined type in English.
|
||||
{% endif %}
|
||||
|
||||
**Semantic Memory (`is_explicit_memory`):**
|
||||
{% if language == "zh" %}
|
||||
|
||||
- 只有当实体明显属于语义知识记忆中的抽象知识对象时,才设为 `true`,例如概念、定义、理论、方法以及 `知识能力` 中的知识类对象。
|
||||
- 对人、组织、地点、具体物体以及大多数实例级实体,一律设为 `false`。
|
||||
- 对生命体、组织、地点、具体物体以及大多数实例级实体,一律设为 `false`。
|
||||
- 除非非常明确,否则默认设为 `false`。
|
||||
{% else %}
|
||||
- Use `true` only for abstract knowledge-oriented entities that belong in semantic knowledge memory, such as concepts, definitions, theories, methods, and knowledge-oriented members of `知识能力`.
|
||||
- Use `false` for people, organizations, locations, concrete objects, and most instance-level entities.
|
||||
- Use `false` for living beings, organizations, locations, concrete objects, and most instance-level entities.
|
||||
- Default to `false` unless the entity is clearly an abstract knowledge concept.
|
||||
{% endif %}
|
||||
|
||||
@@ -471,27 +439,19 @@ Do not let auxiliary fields drive the extraction process.
|
||||
- 优先描述实体在当前陈述和必要上下文中的身份、作用或关系。
|
||||
- `description` 只保留适合长期附着在该实体上的描述,例如稳定身份、稳定关系、长期偏好/兴趣/习惯、较稳定认知倾向或可用于区分实体的持久特征。
|
||||
- 不要把短期状态、一次性事件、临时计划、当前情绪、具体时间锚点,或只在当前句子里短暂成立的信息写进 `description`。
|
||||
- 但如果第一步已经把相对时间稳定改写成具体日期、月份或日期区间,且这段具体时间对识别当前实体有帮助,可以在 `description` 中沿用这段已经出现在 `statement_text` 里的具体时间表达。
|
||||
- triplet 这一步不要自己新增时间推理;只允许复用 `statement_text` 中已经具体化的时间表述,不要把“上周三”“上个月”再次自行展开。
|
||||
- 如果实体应保留,但当前 statement 中没有适合长期附着在该实体上的稳定描述,则 `description` 允许为空字符串 `""`;不要为了填充 `description` 而写入短期状态或临时信息。
|
||||
- 避免使用“陈述中提到的人物”“陈述中提到的组织”“陈述中提到的物品”这类低信息量模板。
|
||||
- 不要补充识别实体所不需要的外部知识。
|
||||
- 如果实体应保留,但当前 statement 中没有适合长期附着在该实体上的稳定描述,则 `description` 允许为空字符串 `""`。
|
||||
{% else %}
|
||||
- `description` should be short, context-grounded, and discriminative.
|
||||
- Prefer describing the entity's role, identity, or relation in the current statement and necessary supporting context.
|
||||
- `description` should keep only information suitable to remain attached to the entity over time, such as stable identity, stable relations, long-term preferences/interests/habits, relatively stable beliefs, or persistent distinguishing traits.
|
||||
- Do not put short-lived states, one-off events, temporary plans, current emotions, concrete time anchors, or information that only briefly holds in the current sentence into `description`.
|
||||
- But if step 1 has already rewritten a relative time into a concrete date, month, or date range, and that concrete time phrase helps identify the current entity, you may reuse that already-grounded phrase in `description`.
|
||||
- Do not perform new temporal inference in the triplet step; only reuse time wording that is already concretized in `statement_text`, and do not independently expand phrases like "last Wednesday" or "last month" again here.
|
||||
- If an entity should be retained but the current statement does not provide any suitable stable description for it, `description` may be the empty string `""`; do not fill it with short-lived states or temporary information just to avoid emptiness.
|
||||
- Avoid low-information templates such as "the person mentioned in the statement" or "the organization mentioned in the statement".
|
||||
- Do not add extra world knowledge that is not needed for identifying the entity in context.
|
||||
- If an entity should be retained but the current statement does not provide any suitable stable description for it, `description` may be the empty string `""`.
|
||||
{% endif %}
|
||||
|
||||
**Type Description (`type_description`):**
|
||||
|
||||
- `type_description` 必须直接复用对应 `type` 的中文定义。
|
||||
- 不要把当前实体实例描述写进 `type_description`。
|
||||
- {% if language == "zh" %}`type_description` 必须直接复用对应 `type` 的中文定义。{% else %}`type_description` must restate the corresponding `type` definition in English, while keeping the underlying `type` label itself in Chinese.{% endif %}
|
||||
- {% if language == "zh" %}不要把当前实体实例描述写进 `type_description`。{% else %}Do not put the current entity instance description into `type_description`.{% endif %}
|
||||
|
||||
**Triplet Extraction:**
|
||||
{% if language == "zh" %}
|
||||
@@ -499,42 +459,22 @@ Do not let auxiliary fields drive the extraction process.
|
||||
- 只有当陈述中表达了清晰关系时,才抽取 `(subject, predicate, object)`。
|
||||
- `predicate` 只能使用上方预定义的中文关系类型。
|
||||
- 如果没有任何预定义关系适用,返回 `triplets: []`。
|
||||
- 排除语气词、模糊情绪、孤立名词和缺乏明确关系结构的片段。
|
||||
- 如果陈述不支持有效关系,不要强行构造 triplet。
|
||||
- 不要为了保留一句抽象判断或泛因果命题,而强行构造“用户-拥有-努力”“努力-导致-回报”这类低价值 triplet。
|
||||
- `提到` 不用于保留泛化概念、抽象命题片段、口号式表达或仅在句面上出现但无记忆价值的对象。
|
||||
- `相关于` 不用于补救无法成立的关系,也不用于连接“努力”“回报”“成功”“意义”这类抽象概念。
|
||||
- `想要` 只用于具体、明确、用户特异且值得保留的对象或目标;如果想要的内容过于抽象或空泛,不要抽取 `想要`,应改写进相关实体的 `description`。
|
||||
- 不要为了保留情绪或心理状态而创建实体或弱关系;像“紧张”“开心”“难过”“焦虑”默认应写入相关实体的 `description`。
|
||||
- `关联于` 不用于补救无法成立的关系,也不用于连接“努力”“回报”“成功”“意义”这类抽象概念。
|
||||
- `偏好` 只用于具体、明确、用户特异且值得保留的对象或目标;如果相关内容过于抽象或空泛,不要抽取 `偏好`,应改写进相关实体的 `description`。
|
||||
- 对于这类观点句,如果相关概念本身不值得保留,也不要只为了补全结构而额外创建对应实体;允许输出仅包含 `用户` 的 `entities` 和空的 `triplets`。
|
||||
- 如果 `has_unsolved_reference` 是 `true`,不要抽取实体或 triplets。
|
||||
- `subject_name` 和 `object_name` 默认保持原文中的表面形式,但用户自指必须写成 `用户`,可稳定解析的其他代词必须替换为具体指代实体名。
|
||||
- `predicate_description` 必须直接复用对应 `predicate` 的中文定义。
|
||||
- 每个 triplet 都必须包含 `valid_at` 和 `invalid_at`,并直接复用输入中的同名字段值;如果输入是 `NULL`,这里也写 `NULL`。
|
||||
- 不要把普通时间表达作为 triplet 的宾语。
|
||||
- 不要为了表达一次性计划、安排、日程而强行构造关系。
|
||||
- 当句子表达主体去某个地点、场所、组织、课程或活动时,只要该对象本身有记忆价值,就可以抽取 `前往`,即使句中同时带有时间信息。
|
||||
- 当句子表达主体学习某个主题或技能时,可以抽取 `学习`,即使句中还包含地点或时间修饰。
|
||||
{% else %}
|
||||
- Extract `(subject, predicate, object)` only when there is a clear relation expressed in the statement.
|
||||
- `predicate` must use one of the predefined Chinese relation labels above.
|
||||
- If no predefined relation fits, return `triplets: []`.
|
||||
- Exclude fillers, vague emotions, standalone nouns, and fragments without a clear relational structure.
|
||||
- If the statement does not support a valid relation, do not force a triplet.
|
||||
- Do not force low-value triplets such as "user-has-effort" or "effort-causes-reward" just to preserve a generic causal belief or slogan-like proposition.
|
||||
- Do not use `提到` to preserve generic concepts, proposition fragments, slogan-like expressions, or surface mentions that have no memory value.
|
||||
- Do not use `相关于` as a rescue relation when no real relation exists, and do not connect abstract concepts such as "effort", "reward", "success", or "meaning" with it.
|
||||
- Use `想要` only for concrete, specific, user-grounded objects or goals worth retaining; if the desired content is too abstract or generic, do not extract `想要` and instead rewrite it into the relevant entity `description`.
|
||||
- Do not create entities or weak relations just to preserve emotional or psychological states; states such as nervousness, happiness, sadness, or anxiety should normally be written into the relevant retained entity `description`.
|
||||
- Do not use `关联于` as a rescue relation when no real relation exists, and do not connect abstract concepts such as "effort", "reward", "success", or "meaning" with it.
|
||||
- Use `偏好` only for concrete, specific, user-grounded objects or goals worth retaining; if the relevant content is too abstract or generic, do not extract `偏好` and instead rewrite it into the relevant entity `description`.
|
||||
- For such opinion statements, if the referenced concepts are not worth keeping, do not create extra entities just to complete a structure; it is valid to return only the `用户` entity with empty `triplets`.
|
||||
- If `has_unsolved_reference` is `true`, do not extract entities or triplets.
|
||||
- Keep `subject_name` and `object_name` in their original surface form by default, but write user self-reference as `用户` and replace other stably resolvable references with their resolved entity names.
|
||||
- `predicate_description` must directly reuse the corresponding Chinese definition of `predicate`.
|
||||
- Every triplet must include `valid_at` and `invalid_at`, copied directly from the input fields with the same names; if the input is `NULL`, write `NULL` here as well.
|
||||
- Do not use ordinary time expressions as triplet objects.
|
||||
- Do not force relations just to encode one-off plans, schedules, or actions.
|
||||
- When the statement says that the subject goes to a place, venue, organization, class, or activity, you may extract `前往` as long as that destination itself is worth remembering, even if the statement also includes time information.
|
||||
- When the statement says that the subject studies a topic or skill, you may extract `学习` even if the statement also includes location or time modifiers.
|
||||
{% endif %}
|
||||
|
||||
**Alias Relation (`别名属于`):**
|
||||
@@ -542,42 +482,23 @@ Do not let auxiliary fields drive the extraction process.
|
||||
|
||||
- 当多个名字明确指向同一实体时,使用 `别名属于`。
|
||||
- 方向始终是 `alias -> 别名属于 -> canonical entity`。
|
||||
- 这条规则适用于任何实体类型,包括人、组织、产品、地点、账号,以及用户自指场景。
|
||||
- 常见正例包括:真名、别名、昵称、网名、用户名、账号名、英文名,以及明确指向同一实体的稳定称呼。
|
||||
- 当一句话里出现多个名字都指向同一实体时,为每个别名创建单独实体,并分别连向规范实体。
|
||||
- 规范实体必须是“被命名的那个实体”,而不是与它相关的拥有者、施事者、使用者或上位对象。
|
||||
- 例如,如果句子表达“某个对象叫某个名字”,则这个名字应连向该对象本身;不要因为所有者更显眼,就把名字误连到所有者身上。
|
||||
- 对“X 的 Y 叫 Z / 名字是 Z”这类所有格命名表达,如果 `X 的 Y` 是稳定、清晰且类型允许的实体,则抽取 `Z -> 别名属于 -> X 的 Y`;不要抽取 `Z -> 别名属于 -> X`。
|
||||
- 如果所有格同时明确表达持有关系,也应抽取 `X -> 拥有 -> X 的 Y`。
|
||||
- 在用户自指场景中,规范实体应为已经规范化后的 `用户`。
|
||||
- 不要把角色、职业、身份、类别、夸赞、评价或其他非名字性描述抽成 `别名属于`。
|
||||
{% else %}
|
||||
- Use `别名属于` when multiple names clearly refer to the same entity.
|
||||
- Direction is always `alias -> 别名属于 -> canonical entity`.
|
||||
- This applies to any entity type, including people, organizations, products, places, accounts, and user/self references.
|
||||
- Typical positive cases include real names, alternative names, nicknames, screen names, usernames, account names, and stable forms of address when they clearly refer to the same entity.
|
||||
- The canonical entity must be the entity being named itself, not its owner, caller, user, or parent object.
|
||||
- For example, if the statement says that some object has a name, the alias should point to that object itself rather than a more salient owner.
|
||||
- For possessive naming patterns such as "X's Y is called Z" or "X's Y's name is Z", if `X's Y` is a stable, clear, and type-allowed entity, extract `Z -> 别名属于 -> X's Y`; do not extract `Z -> 别名属于 -> X`.
|
||||
- If the possessive phrase also explicitly expresses possession, also extract `X -> 拥有 -> X's Y`.
|
||||
- In user self-reference cases, the canonical entity should be the normalized user entity `用户`.
|
||||
- Do not use `别名属于` for roles, occupations, identities, categories, compliments, evaluations, or other non-name descriptions.
|
||||
{% endif %}
|
||||
|
||||
**Naming / Addressing Relations (`使用称呼`):**
|
||||
{% if language == "zh" %}
|
||||
|
||||
- 当一句话同时表达“命名事实”和“称呼行为”时,要区分这两层语义。
|
||||
- 如果句子明确说某个实体或群体用某个名字称呼另一实体,并且施称方在 `statement_text` 中明确出现,则要把施称方也抽成实体。
|
||||
- 在这种情况下,还要从施称方指向别名实体,抽取一条 `使用称呼` 关系。
|
||||
- 当两层语义都存在时,应同时抽取:
|
||||
1. `alias -> 别名属于 -> canonical entity`
|
||||
2. `caller -> 使用称呼 -> alias`
|
||||
- 如果施称方在句中明确出现且对语义重要,不要省略它。
|
||||
- 在命名关系中,新出现的称呼、别名、昵称、产品名必须保持原样,不要被替换成其所指实体名。
|
||||
{% else %}
|
||||
- Distinguish between a naming fact and a naming act when the statement expresses both.
|
||||
- If the statement says that some entity or group calls or addresses another entity by a name, and the caller is explicitly mentioned in `statement_text`, extract the caller as an entity.
|
||||
- In such cases, also extract a `使用称呼` relation from the caller to the alias entity.
|
||||
- When both layers are present, extract both:
|
||||
1. `alias -> 别名属于 -> canonical entity`
|
||||
2. `caller -> 使用称呼 -> alias`
|
||||
- Do not drop the caller entity if it is explicitly stated and semantically important to the naming relation.
|
||||
- In naming relations, newly introduced names, aliases, nicknames, or product names must stay in their original form rather than being replaced by their referent.
|
||||
{% endif %}
|
||||
|
||||
**subject_name / object_name Consistency:**
|
||||
{% if language == "zh" %}
|
||||
|
||||
@@ -585,27 +506,26 @@ Do not let auxiliary fields drive the extraction process.
|
||||
- 每个 triplet 中的 `object_name` 必须与 `object_id` 指向实体的 `name` 完全一致。
|
||||
- 每个 triplet 中的 `valid_at` 必须与输入中的 `valid_at` 完全一致。
|
||||
- 每个 triplet 中的 `invalid_at` 必须与输入中的 `invalid_at` 完全一致。
|
||||
- 不要在 triplet 里使用与实体名不同的表面形式。
|
||||
{% else %}
|
||||
- `subject_name` in each triplet MUST exactly match the `name` of the entity referenced by `subject_id`.
|
||||
- `object_name` in each triplet MUST exactly match the `name` of the entity referenced by `object_id`.
|
||||
- `valid_at` in each triplet MUST exactly match the input `valid_at`.
|
||||
- `invalid_at` in each triplet MUST exactly match the input `invalid_at`.
|
||||
- Do not use alternative surface forms inside triplets.
|
||||
{% endif %}
|
||||
|
||||
===Examples===
|
||||
{% if language == "zh" %}
|
||||
**示例 1**
|
||||
Statement: "我住在巴黎。"
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [
|
||||
{"subject_name": "用户", "subject_id": 0, "predicate": "居住于", "predicate_description": "人物居住在某地点", "object_name": "巴黎", "object_id": 1, "valid_at": "NULL", "invalid_at": "NULL"}
|
||||
{"subject_name": "用户", "subject_id": 0, "predicate": "位于", "predicate_description": "表达实体与地点、场所、空间位置之间的稳定位置关系。", "object_name": "巴黎", "object_id": 1, "valid_at": "NULL", "invalid_at": "NULL"}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "居住在巴黎的说话者", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "巴黎", "type": "地点设施", "type_description": "具有地理意义或功能性空间意义的位置与场所", "description": "用户居住的城市", "is_explicit_memory": false}
|
||||
{"entity_idx": 0, "name": "用户", "type": "生命体", "type_description": "可稳定指向、可被当作具体个体区分和归并的生命体个体。", "description": "居住在巴黎的说话者", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "巴黎", "type": "地点设施", "type_description": "具有地理意义或功能性空间意义的位置与场所。", "description": "用户居住的城市", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -616,11 +536,11 @@ Input condition: supporting context has already made it clear that “他” ref
|
||||
Output:
|
||||
{
|
||||
"triplets": [
|
||||
{"subject_name": "张明", "subject_id": 0, "predicate": "任职于", "predicate_description": "主体在某组织中工作或任职", "object_name": "腾讯", "object_id": 1, "valid_at": "NULL", "invalid_at": "NULL"}
|
||||
{"subject_name": "张明", "subject_id": 0, "predicate": "属于类型", "predicate_description": "表达主体所属的类别、身份、职业、角色,或其与组织、群体、集合之间的归属关系。", "object_name": "腾讯", "object_id": 1, "valid_at": "NULL", "invalid_at": "NULL"}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "张明", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "在腾讯工作的人员", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "腾讯", "type": "组织", "type_description": "公司、机构、学校、实验室、团队、社群等组织性主体。", "description": "张明任职的公司", "is_explicit_memory": false}
|
||||
{"entity_idx": 0, "name": "张明", "type": "生命体", "type_description": "可稳定指向、可被当作具体个体区分和归并的生命体个体。", "description": "在腾讯工作的人员", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "腾讯", "type": "组织", "type_description": "公司、机构、学校、实验室、团队、社群等组织性主体。", "description": "张明归属的组织", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -630,17 +550,89 @@ Statement: "我常去图书馆学微积分。"
|
||||
Output:
|
||||
{
|
||||
"triplets": [
|
||||
{"subject_name": "用户", "subject_id": 0, "predicate": "前往", "predicate_description": "主体前往某个地点、场所、组织、课程或活动", "object_name": "图书馆", "object_id": 1, "valid_at": "NULL", "invalid_at": "NULL"},
|
||||
{"subject_name": "用户", "subject_id": 0, "predicate": "学习", "predicate_description": "主体正在学习某知识主题或技能", "object_name": "微积分", "object_id": 2, "valid_at": "NULL", "invalid_at": "NULL"}
|
||||
{"subject_name": "用户", "subject_id": 0, "predicate": "前往", "predicate_description": "表达主体前往、到访某地点、场所、组织或活动对象的关系。", "object_name": "图书馆", "object_id": 1, "valid_at": "NULL", "invalid_at": "NULL"},
|
||||
{"subject_name": "用户", "subject_id": 0, "predicate": "了解", "predicate_description": "表达主体与知识、技能、学科、语言等知识能力对象之间的认知、学习或兴趣关系。", "object_name": "微积分", "object_id": 2, "valid_at": "NULL", "invalid_at": "NULL"}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "经常在图书馆学习微积分的说话者", "is_explicit_memory": false},
|
||||
{"entity_idx": 0, "name": "用户", "type": "生命体", "type_description": "可稳定指向、可被当作具体个体区分和归并的生命体个体。", "description": "经常在图书馆学习微积分的说话者", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "图书馆", "type": "地点设施", "type_description": "具有地理意义或功能性空间意义的位置与场所。", "description": "用户经常前往学习的地点", "is_explicit_memory": false},
|
||||
{"entity_idx": 2, "name": "微积分", "type": "知识能力", "type_description": "可学习、掌握、使用或讨论的知识主题、技能、学科或语言。", "description": "用户经常学习的主题", "is_explicit_memory": true}
|
||||
]
|
||||
}
|
||||
|
||||
**示例 4**
|
||||
Statement: "我的朋友都叫我山哥。"
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [
|
||||
{"subject_name": "山哥", "subject_id": 2, "predicate": "别名属于", "predicate_description": "表达实体名称、别名、称呼之间的对应关系。", "object_name": "用户", "object_id": 0, "valid_at": "NULL", "invalid_at": "NULL"}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "生命体", "type_description": "可稳定指向、可被当作具体个体区分和归并的生命体个体。", "description": "被朋友称作山哥的说话者", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "我的朋友", "type": "群体", "type_description": "边界相对稳定、可被当作整体引用的一组人。", "description": "使用山哥这一称呼的人群", "is_explicit_memory": false},
|
||||
{"entity_idx": 2, "name": "山哥", "type": "称呼别名", "type_description": "用于指代或称呼实体的名字。", "description": "朋友用来称呼用户的昵称", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**示例 5**
|
||||
Statement: "我认为努力就会有回报。"
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "生命体", "type_description": "可稳定指向、可被当作具体个体区分和归并的生命体个体。", "description": "认为努力就会有回报的说话者", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**示例 6**
|
||||
Statement: "我的GitHub用户名是chen4。"
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [
|
||||
{"subject_name": "用户", "subject_id": 0, "predicate": "拥有", "predicate_description": "表达主体拥有、持有、配有某对象、账号、联系方式或标识的关系。", "object_name": "GitHub账号", "object_id": 1, "valid_at": "NULL", "invalid_at": "NULL"},
|
||||
{"subject_name": "GitHub账号", "subject_id": 1, "predicate": "拥有", "predicate_description": "表达主体拥有、持有、配有某对象、账号、联系方式或标识的关系。", "object_name": "chen4", "object_id": 2, "valid_at": "NULL", "invalid_at": "NULL"}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "生命体", "type_description": "可稳定指向、可被当作具体个体区分和归并的生命体个体。", "description": "拥有该 GitHub 账号的说话者", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "GitHub账号", "type": "识别联系信息", "type_description": "账号、用户名、编号、邮箱、手机号等与识别、联系或登录相关的信息对象。", "description": "用户拥有的 GitHub 账号", "is_explicit_memory": false},
|
||||
{"entity_idx": 2, "name": "chen4", "type": "识别联系信息", "type_description": "账号、用户名、编号、邮箱、手机号等与识别、联系或登录相关的信息对象。", "description": "该 GitHub 账号的用户名", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**示例 7**
|
||||
Statement: "我想通过雅思。"
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [
|
||||
{"subject_name": "用户", "subject_id": 0, "predicate": "偏好", "predicate_description": "表达主体对对象的稳定偏好、厌恶,或对具体明确目标的指向关系。", "object_name": "通过雅思", "object_id": 1, "valid_at": "NULL", "invalid_at": "NULL"}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "生命体", "type_description": "可稳定指向、可被当作具体个体区分和归并的生命体个体。", "description": "想通过雅思的说话者", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "通过雅思", "type": "具体目标", "type_description": "用户具体、明确、可验证、可长期追踪的目标结果或目标性安排。", "description": "用户想达成的具体目标", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**示例 8**
|
||||
Statement: "用户的小狗叫多多。"
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [
|
||||
{"subject_name": "用户", "subject_id": 0, "predicate": "拥有", "predicate_description": "表达主体拥有、持有、配有某对象、账号、联系方式或标识的关系。", "object_name": "用户的小狗", "object_id": 1, "valid_at": "NULL", "invalid_at": "NULL"},
|
||||
{"subject_name": "多多", "subject_id": 2, "predicate": "别名属于", "predicate_description": "表达实体名称、别名、称呼之间的对应关系。", "object_name": "用户的小狗", "object_id": 1, "valid_at": "NULL", "invalid_at": "NULL"}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "生命体", "type_description": "可稳定指向、可被当作具体个体区分和归并的生命体个体。", "description": "拥有一只叫多多的小狗的说话者", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "用户的小狗", "type": "生命体", "type_description": "可稳定指向、可被当作具体个体区分和归并的生命体个体。", "description": "用户拥有的、名字叫多多的小狗", "is_explicit_memory": false},
|
||||
{"entity_idx": 2, "name": "多多", "type": "称呼别名", "type_description": "用于指代或称呼实体的名字。", "description": "用户的小狗的名字", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**示例 9**
|
||||
Statement: "他2026年3月加入了这家公司。"
|
||||
Input condition: `"has_unsolved_reference": true`
|
||||
|
||||
@@ -649,99 +641,134 @@ Output:
|
||||
"triplets": [],
|
||||
"entities": []
|
||||
}
|
||||
|
||||
**示例 5**
|
||||
Statement: "我的朋友都叫我山哥。"
|
||||
{% else %}
|
||||
**Example 1**
|
||||
Statement: "I live in Paris."
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [
|
||||
{"subject_name": "山哥", "subject_id": 2, "predicate": "别名属于", "predicate_description": "别名指向其对应的规范实体", "object_name": "用户", "object_id": 0, "valid_at": "NULL", "invalid_at": "NULL"},
|
||||
{"subject_name": "我的朋友", "subject_id": 1, "predicate": "使用称呼", "predicate_description": "主体使用某个名字来称呼另一实体", "object_name": "山哥", "object_id": 2, "valid_at": "NULL", "invalid_at": "NULL"}
|
||||
{"subject_name": "用户", "subject_id": 0, "predicate": "位于", "predicate_description": "A stable location relation between an entity and a place, facility, or spatial location.", "object_name": "Paris", "object_id": 1, "valid_at": "NULL", "invalid_at": "NULL"}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "被朋友称作山哥的说话者", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "我的朋友", "type": "群体", "type_description": "边界相对稳定、可被当作整体引用的一组人。", "description": "使用山哥这一称呼的人群", "is_explicit_memory": false},
|
||||
{"entity_idx": 2, "name": "山哥", "type": "称呼别名", "type_description": "用于指代或称呼实体的名字。", "description": "朋友用来称呼用户的昵称", "is_explicit_memory": false}
|
||||
{"entity_idx": 0, "name": "用户", "type": "生命体", "type_description": "A living individual that can be stably referred to, distinguished, and deduplicated as a specific entity.", "description": "The speaker who lives in Paris.", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "Paris", "type": "地点设施", "type_description": "A location or place with geographic meaning or functional spatial meaning.", "description": "The city where the user lives.", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**示例 6**
|
||||
Statement: "我认为努力就会有回报。"
|
||||
**Example 2**
|
||||
Statement: "He works at Tencent."
|
||||
Input condition: supporting context has already made it clear that “he” refers to “Zhang Ming”.
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [
|
||||
{"subject_name": "Zhang Ming", "subject_id": 0, "predicate": "属于类型", "predicate_description": "A relation expressing the type, identity, profession, role, or organizational/group affiliation of a subject.", "object_name": "Tencent", "object_id": 1, "valid_at": "NULL", "invalid_at": "NULL"}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "Zhang Ming", "type": "生命体", "type_description": "A living individual that can be stably referred to, distinguished, and deduplicated as a specific entity.", "description": "A person who works at Tencent.", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "Tencent", "type": "组织", "type_description": "An organizational actor such as a company, institution, school, lab, team, or community.", "description": "The organization Zhang Ming belongs to.", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**Example 3**
|
||||
Statement: "I often go to the library to study calculus."
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [
|
||||
{"subject_name": "用户", "subject_id": 0, "predicate": "前往", "predicate_description": "A relation expressing that a subject goes to or visits a place, facility, organization, or other visit-worthy target.", "object_name": "the library", "object_id": 1, "valid_at": "NULL", "invalid_at": "NULL"},
|
||||
{"subject_name": "用户", "subject_id": 0, "predicate": "了解", "predicate_description": "A relation expressing cognition, learning, or knowledge-oriented interest between a subject and a `KnowledgeOrSkill` object.", "object_name": "calculus", "object_id": 2, "valid_at": "NULL", "invalid_at": "NULL"}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "生命体", "type_description": "A living individual that can be stably referred to, distinguished, and deduplicated as a specific entity.", "description": "The speaker who often goes to the library to study calculus.", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "the library", "type": "地点设施", "type_description": "A location or place with geographic meaning or functional spatial meaning.", "description": "The place the user often goes to for studying.", "is_explicit_memory": false},
|
||||
{"entity_idx": 2, "name": "calculus", "type": "知识能力", "type_description": "A knowledge topic, skill, field, or language that can be learned, mastered, used, or discussed.", "description": "The topic the user often studies.", "is_explicit_memory": true}
|
||||
]
|
||||
}
|
||||
|
||||
**Example 4**
|
||||
Statement: "My friends all call me Shan Ge."
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [
|
||||
{"subject_name": "Shan Ge", "subject_id": 2, "predicate": "别名属于", "predicate_description": "A relation expressing correspondence between names, aliases, and forms of address.", "object_name": "用户", "object_id": 0, "valid_at": "NULL", "invalid_at": "NULL"}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "生命体", "type_description": "A living individual that can be stably referred to, distinguished, and deduplicated as a specific entity.", "description": "The speaker who is called Shan Ge by friends.", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "my friends", "type": "群体", "type_description": "A relatively stable group of people that can be referred to as a whole.", "description": "The group of people who use the name Shan Ge.", "is_explicit_memory": false},
|
||||
{"entity_idx": 2, "name": "Shan Ge", "type": "称呼别名", "type_description": "A name used to refer to or address an entity.", "description": "The nickname used by the friends to address the user.", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**Example 5**
|
||||
Statement: "I think effort brings reward."
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "认为努力就会有回报的说话者", "is_explicit_memory": false}
|
||||
{"entity_idx": 0, "name": "用户", "type": "生命体", "type_description": "A living individual that can be stably referred to, distinguished, and deduplicated as a specific entity.", "description": "The speaker who believes that effort brings reward.", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**示例 7**
|
||||
Statement: "我想要成功。"
|
||||
**Example 6**
|
||||
Statement: "My GitHub username is chen4."
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [
|
||||
{"subject_name": "用户", "subject_id": 0, "predicate": "拥有", "predicate_description": "A relation expressing that a subject owns, holds, carries, or is associated with an identity/contact object.", "object_name": "GitHub account", "object_id": 1, "valid_at": "NULL", "invalid_at": "NULL"},
|
||||
{"subject_name": "GitHub account", "subject_id": 1, "predicate": "拥有", "predicate_description": "A relation expressing that a subject owns, holds, carries, or is associated with an identity/contact object.", "object_name": "chen4", "object_id": 2, "valid_at": "NULL", "invalid_at": "NULL"}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "生命体", "type_description": "A living individual that can be stably referred to, distinguished, and deduplicated as a specific entity.", "description": "The speaker who has this GitHub account.", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "GitHub account", "type": "识别联系信息", "type_description": "An information object related to identification, contact, or login, such as an account, username, ID number, email, or phone number.", "description": "The GitHub account owned by the user.", "is_explicit_memory": false},
|
||||
{"entity_idx": 2, "name": "chen4", "type": "识别联系信息", "type_description": "An information object related to identification, contact, or login, such as an account, username, ID number, email, or phone number.", "description": "The username of the GitHub account.", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**Example 7**
|
||||
Statement: "I want to pass IELTS."
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [
|
||||
{"subject_name": "用户", "subject_id": 0, "predicate": "偏好", "predicate_description": "A relation expressing a stable preference, aversion, or a specific concrete goal of a subject.", "object_name": "pass IELTS", "object_id": 1, "valid_at": "NULL", "invalid_at": "NULL"}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "生命体", "type_description": "A living individual that can be stably referred to, distinguished, and deduplicated as a specific entity.", "description": "The speaker who wants to pass IELTS.", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "pass IELTS", "type": "具体目标", "type_description": "A specific, explicit, verifiable, and trackable goal result or goal-oriented plan of the user.", "description": "A concrete goal the user wants to achieve.", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**Example 8**
|
||||
Statement: "My dog is called Duoduo."
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [
|
||||
{"subject_name": "用户", "subject_id": 0, "predicate": "拥有", "predicate_description": "A relation expressing that a subject owns, holds, carries, or is associated with an object, account, contact method, or identifier.", "object_name": "the user's dog", "object_id": 1, "valid_at": "NULL", "invalid_at": "NULL"},
|
||||
{"subject_name": "Duoduo", "subject_id": 2, "predicate": "别名属于", "predicate_description": "A relation expressing correspondence between names, aliases, and forms of address.", "object_name": "the user's dog", "object_id": 1, "valid_at": "NULL", "invalid_at": "NULL"}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "生命体", "type_description": "A living individual that can be stably referred to, distinguished, and deduplicated as a specific entity.", "description": "The speaker who has a dog called Duoduo.", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "the user's dog", "type": "生命体", "type_description": "A living individual that can be stably referred to, distinguished, and deduplicated as a specific entity.", "description": "The dog owned by the user and named Duoduo.", "is_explicit_memory": false},
|
||||
{"entity_idx": 2, "name": "Duoduo", "type": "称呼别名", "type_description": "A name used to refer to or address an entity.", "description": "The name of the user's dog.", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**Example 9**
|
||||
Statement: "He joined this company in March 2026."
|
||||
Input condition: `"has_unsolved_reference": true`
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "想要成功的说话者", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**示例 8**
|
||||
Statement: "我最近有点紧张,不过这很正常。"
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**示例 9**
|
||||
Statement: "王教授是导师。"
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [
|
||||
{"subject_name": "王教授", "subject_id": 0, "predicate": "担任角色", "predicate_description": "主体承担某个角色", "object_name": "导师", "object_id": 1, "valid_at": "NULL", "invalid_at": "NULL"}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "王教授", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "承担导师角色的具体个人", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "导师", "type": "角色职业", "type_description": "人物承担的社会角色、功能身份或职业身份。", "description": "王教授承担的角色身份", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**示例 10**
|
||||
Statement: "我的GitHub账号用户名是chen4。"
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [
|
||||
{"subject_name": "用户", "subject_id": 0, "predicate": "拥有账号", "predicate_description": "实体具有某账号", "object_name": "GitHub账号", "object_id": 1, "valid_at": "NULL", "invalid_at": "NULL"},
|
||||
{"subject_name": "GitHub账号", "subject_id": 1, "predicate": "标识为", "predicate_description": "实体由某标识符标识", "object_name": "chen4", "object_id": 2, "valid_at": "NULL", "invalid_at": "NULL"}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "拥有该 GitHub 账号的说话者", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "GitHub账号", "type": "识别联系信息", "type_description": "账号、用户名、编号、邮箱、手机号等与识别、联系或登录相关的信息对象。", "description": "用户拥有的 GitHub 账号", "is_explicit_memory": false},
|
||||
{"entity_idx": 2, "name": "chen4", "type": "识别联系信息", "type_description": "账号、用户名、编号、邮箱、手机号等与识别、联系或登录相关的信息对象。", "description": "该 GitHub 账号对应的用户名标识", "is_explicit_memory": false}
|
||||
]
|
||||
}
|
||||
|
||||
**示例 11**
|
||||
Statement: "机器人查票员和我沟通。"
|
||||
|
||||
Output:
|
||||
{
|
||||
"triplets": [
|
||||
{"subject_name": "机器人查票员", "subject_id": 0, "predicate": "沟通于", "predicate_description": "两个实体之间发生沟通或交流", "object_name": "用户", "object_id": 1, "valid_at": "NULL", "invalid_at": "NULL"}
|
||||
],
|
||||
"entities": [
|
||||
{"entity_idx": 0, "name": "机器人查票员", "type": "物品设备", "type_description": "可被持有、使用、携带的具体物体、设备、工具或交通工具。", "description": "与用户发生沟通的机器人主体", "is_explicit_memory": false},
|
||||
{"entity_idx": 1, "name": "用户", "type": "人物", "type_description": "可稳定指向、可被当作具体个体区分和归并的个人实体。", "description": "与机器人查票员沟通的说话者", "is_explicit_memory": false}
|
||||
]
|
||||
"entities": []
|
||||
}
|
||||
{% endif %}
|
||||
===End of Examples===
|
||||
|
||||
===Output Format===
|
||||
@@ -754,7 +781,7 @@ JSON 要求:
|
||||
- 字符串值中不要换行
|
||||
- `name`、`subject_name`、`object_name` 默认保持原文中的表面形式,但用户自指必须规范成 `用户`,可稳定解析的其他代词必须替换为具体指代实体名
|
||||
- `description` 必须使用中文
|
||||
- `type`、`predicate`、`type_description`、`predicate_description` 必须使用上方预定义的中文标签和中文说明
|
||||
- `type`、`predicate` 必须使用上方预定义的中文标签;`type_description`、`predicate_description` 必须使用中文说明
|
||||
- 每个 triplet 都必须包含 `valid_at` 和 `invalid_at`,并与输入中的同名字段完全一致
|
||||
- 如果 `has_unsolved_reference` 是 `true`,输出必须是 `{"entities": [], "triplets": []}`
|
||||
- 如果存在无法稳定解析的代词或指示表达,输出也必须是 `{"entities": [], "triplets": []}`
|
||||
@@ -767,7 +794,7 @@ JSON 要求:
|
||||
- No line breaks inside string values
|
||||
- `name`, `subject_name`, and `object_name` keep their original surface forms by default, but user self-reference must be normalized to `用户` and other stably resolvable references must be replaced by their resolved entity names
|
||||
- `description` must be in English
|
||||
- `type`, `predicate`, `type_description`, and `predicate_description` must use the predefined Chinese labels and Chinese definitions above
|
||||
- `type` and `predicate` must use the predefined Chinese labels above; `type_description` and `predicate_description` must be written in English
|
||||
- Every triplet must include `valid_at` and `invalid_at`, exactly matching the input fields with the same names
|
||||
- If `has_unsolved_reference` is `true`, the output must be `{"entities": [], "triplets": []}`
|
||||
- If unresolved references still remain, the output must also be `{"entities": [], "triplets": []}`
|
||||
@@ -805,4 +832,5 @@ Output JSON structure:
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
```
|
||||
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
# 必须在导入任何使用 DashScope SDK 的模块之前应用补丁
|
||||
import app.plugins.dashscope_patch # noqa: F401
|
||||
|
||||
from app.repositories.neo4j.create_indexes import create_all_indexes
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
|
||||
73
api/app/plugins/dashscope_patch.py
Normal file
73
api/app/plugins/dashscope_patch.py
Normal file
@@ -0,0 +1,73 @@
|
||||
"""
|
||||
DashScope SDK 补丁:修复 __getattr__ 违反 Python 属性访问协议的 bug。
|
||||
|
||||
背景
|
||||
----
|
||||
DashScope SDK 的 DictMixin(所有响应类的基类)的 __getattr__ 实现为:
|
||||
|
||||
def __getattr__(self, attr):
|
||||
return self[attr]
|
||||
|
||||
当属性/键不存在时,它抛出 KeyError。但按照 Python 数据模型规范,
|
||||
__getattr__ 应当抛出 AttributeError,否则 hasattr()/getattr(obj, name, default)
|
||||
等内置函数会失效。
|
||||
|
||||
实际影响
|
||||
--------
|
||||
requests 库在构造 HTTPError 时会调用 hasattr(response, "request")
|
||||
(见 requests/exceptions.py:22),当 DashScope 响应对象参与异常链路时,
|
||||
hasattr 会因 KeyError 直接崩溃,掩盖了真正的 HTTP 错误(如 429 限流、超时)。
|
||||
|
||||
此时抛出的异常表现为 KeyError('request'),极具误导性,并导致项目内已有的
|
||||
429 自动重试逻辑无法捕获真正的限流错误。
|
||||
|
||||
参考
|
||||
----
|
||||
DashScope SDK 官方 Issue #114:
|
||||
https://github.com/dashscope/dashscope-sdk-python/issues/114
|
||||
|
||||
修复
|
||||
----
|
||||
对 DictMixin.__getattr__ 进行 monkey-patch,将 KeyError 转换为 AttributeError,
|
||||
使其符合 Python 语义。补丁应用于基类,因此所有派生响应类型(DashScopeAPIResponse、
|
||||
GenerationResponse、MultiModalConversationResponse 等)都能一次性受益。
|
||||
|
||||
使用方式
|
||||
--------
|
||||
在应用入口(main.py / celery_worker.py)的最顶部导入本模块,
|
||||
在任何 DashScope 调用发生前完成补丁注入:
|
||||
|
||||
import app.plugins.dashscope_patch # noqa: F401
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
from dashscope.api_entities.dashscope_response import DictMixin
|
||||
|
||||
# 防止被重复应用(例如 main 和 celery worker 都导入时)
|
||||
if not getattr(DictMixin, "_redbear_getattr_patched", False):
|
||||
_orig_getattr = DictMixin.__getattr__
|
||||
|
||||
def _safe_getattr(self, attr):
|
||||
"""符合 Python 语义的 __getattr__:键缺失抛 AttributeError 而非 KeyError。"""
|
||||
try:
|
||||
return _orig_getattr(self, attr)
|
||||
except KeyError as e:
|
||||
# 使用 `from None` 抑制 KeyError 链,避免异常信息里出现误导性的
|
||||
# "During handling of the above exception..." 堆栈
|
||||
raise AttributeError(attr) from None
|
||||
|
||||
DictMixin.__getattr__ = _safe_getattr
|
||||
DictMixin._redbear_getattr_patched = True # type: ignore[attr-defined]
|
||||
logger.info(
|
||||
"DashScope SDK 补丁已生效:DictMixin.__getattr__ 在缺失键时抛 AttributeError"
|
||||
)
|
||||
except ImportError:
|
||||
# DashScope SDK 未安装时跳过,不影响其他 provider
|
||||
logger.debug("未安装 dashscope,跳过 DashScope SDK 补丁")
|
||||
except Exception as e:
|
||||
# 补丁失败不应阻止应用启动
|
||||
logger.warning(f"应用 DashScope SDK 补丁失败,将继续启动: {e}")
|
||||
@@ -13,6 +13,7 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
import redis
|
||||
from redis.exceptions import RedisError
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
|
||||
# Import a unified Celery instance
|
||||
from app.celery_app import celery_app
|
||||
@@ -1339,12 +1340,18 @@ def write_message_task(
|
||||
)
|
||||
except Exception as _e:
|
||||
logger.warning(f"[CELERY WRITE] 写入 last_done 时间戳失败(不影响主流程): {_e}")
|
||||
# 将 result 转为 JSON 安全结构,避免 Celery JSON 序列化 pydantic BaseModel / UUID 失败
|
||||
try:
|
||||
safe_result = jsonable_encoder(result)
|
||||
except Exception as _enc_e:
|
||||
logger.warning(f"[CELERY WRITE] jsonable_encoder 失败,回退为字符串: {_enc_e}")
|
||||
safe_result = str(result)
|
||||
return {
|
||||
"status": "SUCCESS",
|
||||
"result": result,
|
||||
"result": safe_result,
|
||||
"start_at": task_start_time,
|
||||
"end_user_id": end_user_id,
|
||||
"config_id": config_id,
|
||||
"config_id": str(config_id) if config_id is not None else None,
|
||||
"elapsed_time": elapsed_time,
|
||||
"task_id": self.request.id
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user