[add] Semantic pruning is unified with the ontology engineering scenario.

This commit is contained in:
lanceyq
2026-03-06 14:12:03 +08:00
parent 61d2a328fe
commit fc240849cf
10 changed files with 147 additions and 23 deletions

View File

@@ -82,7 +82,9 @@ async def get_chunked_dialogs(
pruning_config = PruningConfig(
pruning_switch=memory_config.pruning_enabled,
pruning_scene=memory_config.pruning_scene or "education",
pruning_threshold=memory_config.pruning_threshold
pruning_threshold=memory_config.pruning_threshold,
scene_id=str(memory_config.scene_id) if memory_config.scene_id else None,
ontology_classes=memory_config.ontology_classes,
)
logger.info(f"[剪枝] 加载配置: switch={pruning_config.pruning_switch}, scene={pruning_config.pruning_scene}, threshold={pruning_config.pruning_threshold}")

View File

@@ -10,7 +10,7 @@ Classes:
TemporalSearchParams: Parameters for temporal search queries
"""
from typing import Optional
from typing import Optional, List
from pydantic import BaseModel, Field
@@ -55,17 +55,26 @@ class PruningConfig(BaseModel):
Attributes:
pruning_switch: Enable or disable semantic pruning
pruning_scene: Scene type for pruning ('education', 'online_service', 'outbound')
pruning_scene: Scene name for pruning, either a built-in key
('education', 'online_service', 'outbound') or a custom scene_name
from ontology_scene table
pruning_threshold: Pruning ratio (0-0.9, max 0.9 to avoid complete removal)
scene_id: Optional ontology scene UUID, used to load custom ontology classes
ontology_classes: List of class_name strings from ontology_class table,
injected into the prompt when pruning_scene is not a built-in scene
"""
pruning_switch: bool = Field(False, description="Enable semantic pruning when True.")
pruning_scene: str = Field(
"education",
description="Scene for pruning: one of 'education', 'online_service', 'outbound'.",
description="Scene for pruning: built-in key or custom scene_name from ontology_scene.",
)
pruning_threshold: float = Field(
0.5, ge=0.0, le=0.9,
description="Pruning ratio within 0-0.9 (max 0.9 to avoid termination).")
scene_id: Optional[str] = Field(None, description="Ontology scene UUID (optional).")
ontology_classes: Optional[List[str]] = Field(
None, description="Class names from ontology_class table for custom scenes."
)
class TemporalSearchParams(BaseModel):

View File

@@ -86,19 +86,26 @@ class SemanticPruner:
self._detailed_prune_logging = True # 是否启用详细日志
self._max_debug_msgs_per_dialog = 20 # 每个对话最多记录前N条消息的详细日志
# 加载场景特定配置
# 加载场景特定配置(内置场景走专门规则,自定义场景 fallback 到通用规则)
self.scene_config: ScenePatterns = SceneConfigRegistry.get_config(
self.config.pruning_scene,
fallback_to_generic=True
)
# 检查场景是否有专门支持
is_supported = SceneConfigRegistry.is_scene_supported(self.config.pruning_scene)
if is_supported:
self._log(f"[剪枝-初始化] 场景={self.config.pruning_scene} 使用专门配置")
# 判断是否为内置专门场景
self._is_builtin_scene = SceneConfigRegistry.is_scene_supported(self.config.pruning_scene)
# 自定义场景的本体类型列表(用于注入提示词)
self._ontology_classes = config.ontology_classes or []
if self._is_builtin_scene:
self._log(f"[剪枝-初始化] 场景={self.config.pruning_scene} 使用内置专门配置")
else:
self._log(f"[剪枝-初始化] 场景={self.config.pruning_scene} 未预定义,使用通用配置(保守策略)")
self._log(f"[剪枝-初始化] 支持的场景: {SceneConfigRegistry.get_all_scenes()}")
self._log(f"[剪枝-初始化] 场景={self.config.pruning_scene} 为自定义场景,使用通用规则 + 本体类型提示词注入")
if self._ontology_classes:
self._log(f"[剪枝-初始化] 注入本体类型: {self._ontology_classes}")
else:
self._log(f"[剪枝-初始化] 未找到本体类型,将使用通用提示词")
# Load Jinja2 template
self.template = prompt_env.get_template("extracat_Pruning.jinja2")
@@ -424,12 +431,16 @@ class SemanticPruner:
self._log(f"[剪枝-缓存] LRU缓存已满删除最旧条目")
rendered = self.template.render(
pruning_scene=self.config.pruning_scene,
pruning_scene=self.config.pruning_scene,
is_builtin_scene=self._is_builtin_scene,
ontology_classes=self._ontology_classes,
dialog_text=dialog_text,
language=self.language
)
log_template_rendering("extracat_Pruning.jinja2", {
"pruning_scene": self.config.pruning_scene,
"is_builtin_scene": self._is_builtin_scene,
"ontology_classes_count": len(self._ontology_classes),
"language": self.language
})
log_prompt_rendering("pruning-extract", rendered)

View File

@@ -1,6 +1,6 @@
{#
对话级抽取与相关性判定模板(用于剪枝加速)
输入pruning_scene, dialog_text
输入pruning_scene, is_builtin_scene, ontology_classes, dialog_text, language
输出:严格 JSON不要包含任何多余文本字段
- is_related: bool是否与所选场景相关
- times: [string],从对话中抽取的时间相关文本(日期、时间、时间段、有效期等)
@@ -16,7 +16,8 @@
- 仅输出上述键;避免多余解释或字段。
#}
{% set scene_instructions = {
{# ── 内置场景的固定说明 ── #}
{% set builtin_scene_instructions = {
'education': {
'zh': '教育场景:教学、课程、考试、作业、老师/学生互动、学习资源、学校管理等。',
'en': 'Education Scenario: Teaching, courses, exams, homework, teacher/student interaction, learning resources, school management, etc.'
@@ -31,16 +32,39 @@
}
} %}
{% set scene_key = pruning_scene %}
{% if scene_key not in scene_instructions %}
{% set scene_key = 'education' %}
{# ── 确定最终使用的场景说明 ── #}
{% if is_builtin_scene %}
{# 内置专门场景:使用固定说明 #}
{% set scene_key = pruning_scene %}
{% if scene_key not in builtin_scene_instructions %}{% set scene_key = 'education' %}{% endif %}
{% set instruction = builtin_scene_instructions[scene_key][language] if language in ['zh', 'en'] else builtin_scene_instructions[scene_key]['zh'] %}
{% set custom_types_str = '' %}
{% else %}
{# 自定义场景:使用场景名称 + 本体类型列表构建说明 #}
{% if ontology_classes and ontology_classes | length > 0 %}
{% if language == 'en' %}
{% set instruction = 'Custom scene "' ~ pruning_scene ~ '": The dialogue is related to this scene if it involves any of the following entity types: ' ~ ontology_classes | join(', ') ~ '.' %}
{% else %}
{% set instruction = '自定义场景「' ~ pruning_scene ~ '」:对话涉及以下任意实体类型时视为相关:' ~ ontology_classes | join('、') ~ '。' %}
{% endif %}
{% set custom_types_str = ontology_classes | join('、') %}
{% else %}
{# 无本体类型时退化为通用说明 #}
{% if language == 'en' %}
{% set instruction = 'Custom scene "' ~ pruning_scene ~ '": Determine whether the dialogue content is relevant to this scene based on overall context.' %}
{% else %}
{% set instruction = '自定义场景「' ~ pruning_scene ~ '」:根据对话整体内容判断是否与该场景相关。' %}
{% endif %}
{% set custom_types_str = '' %}
{% endif %}
{% endif %}
{% set instruction = scene_instructions[scene_key][language] if language in ['zh', 'en'] else scene_instructions[scene_key]['zh'] %}
{% if language == "zh" %}
请在下方对话全文基础上,按该场景进行一次性抽取并判定相关性:
场景说明:{{ instruction }}
{% if not is_builtin_scene and custom_types_str %}
重要提示:只要对话中出现与上述实体类型({{ custom_types_str }}相关的内容即判定为相关is_related=true
{% endif %}
对话全文:
"""
@@ -60,6 +84,9 @@
{% else %}
Based on the full dialogue below, perform one-time extraction and relevance determination according to this scenario:
Scenario Description: {{ instruction }}
{% if not is_builtin_scene and custom_types_str %}
Important: If the dialogue contains content related to any of the entity types above ({{ custom_types_str }}), mark it as relevant (is_related=true).
{% endif %}
Full Dialogue:
"""