[add] Semantic pruning is unified with the ontology engineering scenario.
This commit is contained in:
@@ -82,7 +82,9 @@ async def get_chunked_dialogs(
|
||||
pruning_config = PruningConfig(
|
||||
pruning_switch=memory_config.pruning_enabled,
|
||||
pruning_scene=memory_config.pruning_scene or "education",
|
||||
pruning_threshold=memory_config.pruning_threshold
|
||||
pruning_threshold=memory_config.pruning_threshold,
|
||||
scene_id=str(memory_config.scene_id) if memory_config.scene_id else None,
|
||||
ontology_classes=memory_config.ontology_classes,
|
||||
)
|
||||
logger.info(f"[剪枝] 加载配置: switch={pruning_config.pruning_switch}, scene={pruning_config.pruning_scene}, threshold={pruning_config.pruning_threshold}")
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ Classes:
|
||||
TemporalSearchParams: Parameters for temporal search queries
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
from typing import Optional, List
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
@@ -55,17 +55,26 @@ class PruningConfig(BaseModel):
|
||||
|
||||
Attributes:
|
||||
pruning_switch: Enable or disable semantic pruning
|
||||
pruning_scene: Scene type for pruning ('education', 'online_service', 'outbound')
|
||||
pruning_scene: Scene name for pruning, either a built-in key
|
||||
('education', 'online_service', 'outbound') or a custom scene_name
|
||||
from ontology_scene table
|
||||
pruning_threshold: Pruning ratio (0-0.9, max 0.9 to avoid complete removal)
|
||||
scene_id: Optional ontology scene UUID, used to load custom ontology classes
|
||||
ontology_classes: List of class_name strings from ontology_class table,
|
||||
injected into the prompt when pruning_scene is not a built-in scene
|
||||
"""
|
||||
pruning_switch: bool = Field(False, description="Enable semantic pruning when True.")
|
||||
pruning_scene: str = Field(
|
||||
"education",
|
||||
description="Scene for pruning: one of 'education', 'online_service', 'outbound'.",
|
||||
description="Scene for pruning: built-in key or custom scene_name from ontology_scene.",
|
||||
)
|
||||
pruning_threshold: float = Field(
|
||||
0.5, ge=0.0, le=0.9,
|
||||
description="Pruning ratio within 0-0.9 (max 0.9 to avoid termination).")
|
||||
scene_id: Optional[str] = Field(None, description="Ontology scene UUID (optional).")
|
||||
ontology_classes: Optional[List[str]] = Field(
|
||||
None, description="Class names from ontology_class table for custom scenes."
|
||||
)
|
||||
|
||||
|
||||
class TemporalSearchParams(BaseModel):
|
||||
|
||||
@@ -86,19 +86,26 @@ class SemanticPruner:
|
||||
self._detailed_prune_logging = True # 是否启用详细日志
|
||||
self._max_debug_msgs_per_dialog = 20 # 每个对话最多记录前N条消息的详细日志
|
||||
|
||||
# 加载场景特定配置
|
||||
# 加载场景特定配置(内置场景走专门规则,自定义场景 fallback 到通用规则)
|
||||
self.scene_config: ScenePatterns = SceneConfigRegistry.get_config(
|
||||
self.config.pruning_scene,
|
||||
fallback_to_generic=True
|
||||
)
|
||||
|
||||
# 检查场景是否有专门支持
|
||||
is_supported = SceneConfigRegistry.is_scene_supported(self.config.pruning_scene)
|
||||
if is_supported:
|
||||
self._log(f"[剪枝-初始化] 场景={self.config.pruning_scene} 使用专门配置")
|
||||
# 判断是否为内置专门场景
|
||||
self._is_builtin_scene = SceneConfigRegistry.is_scene_supported(self.config.pruning_scene)
|
||||
|
||||
# 自定义场景的本体类型列表(用于注入提示词)
|
||||
self._ontology_classes = config.ontology_classes or []
|
||||
|
||||
if self._is_builtin_scene:
|
||||
self._log(f"[剪枝-初始化] 场景={self.config.pruning_scene} 使用内置专门配置")
|
||||
else:
|
||||
self._log(f"[剪枝-初始化] 场景={self.config.pruning_scene} 未预定义,使用通用配置(保守策略)")
|
||||
self._log(f"[剪枝-初始化] 支持的场景: {SceneConfigRegistry.get_all_scenes()}")
|
||||
self._log(f"[剪枝-初始化] 场景={self.config.pruning_scene} 为自定义场景,使用通用规则 + 本体类型提示词注入")
|
||||
if self._ontology_classes:
|
||||
self._log(f"[剪枝-初始化] 注入本体类型: {self._ontology_classes}")
|
||||
else:
|
||||
self._log(f"[剪枝-初始化] 未找到本体类型,将使用通用提示词")
|
||||
|
||||
# Load Jinja2 template
|
||||
self.template = prompt_env.get_template("extracat_Pruning.jinja2")
|
||||
@@ -424,12 +431,16 @@ class SemanticPruner:
|
||||
self._log(f"[剪枝-缓存] LRU缓存已满,删除最旧条目")
|
||||
|
||||
rendered = self.template.render(
|
||||
pruning_scene=self.config.pruning_scene,
|
||||
pruning_scene=self.config.pruning_scene,
|
||||
is_builtin_scene=self._is_builtin_scene,
|
||||
ontology_classes=self._ontology_classes,
|
||||
dialog_text=dialog_text,
|
||||
language=self.language
|
||||
)
|
||||
log_template_rendering("extracat_Pruning.jinja2", {
|
||||
"pruning_scene": self.config.pruning_scene,
|
||||
"is_builtin_scene": self._is_builtin_scene,
|
||||
"ontology_classes_count": len(self._ontology_classes),
|
||||
"language": self.language
|
||||
})
|
||||
log_prompt_rendering("pruning-extract", rendered)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{#
|
||||
对话级抽取与相关性判定模板(用于剪枝加速)
|
||||
输入:pruning_scene, dialog_text
|
||||
输入:pruning_scene, is_builtin_scene, ontology_classes, dialog_text, language
|
||||
输出:严格 JSON(不要包含任何多余文本),字段:
|
||||
- is_related: bool,是否与所选场景相关
|
||||
- times: [string],从对话中抽取的时间相关文本(日期、时间、时间段、有效期等)
|
||||
@@ -16,7 +16,8 @@
|
||||
- 仅输出上述键;避免多余解释或字段。
|
||||
#}
|
||||
|
||||
{% set scene_instructions = {
|
||||
{# ── 内置场景的固定说明 ── #}
|
||||
{% set builtin_scene_instructions = {
|
||||
'education': {
|
||||
'zh': '教育场景:教学、课程、考试、作业、老师/学生互动、学习资源、学校管理等。',
|
||||
'en': 'Education Scenario: Teaching, courses, exams, homework, teacher/student interaction, learning resources, school management, etc.'
|
||||
@@ -31,16 +32,39 @@
|
||||
}
|
||||
} %}
|
||||
|
||||
{% set scene_key = pruning_scene %}
|
||||
{% if scene_key not in scene_instructions %}
|
||||
{% set scene_key = 'education' %}
|
||||
{# ── 确定最终使用的场景说明 ── #}
|
||||
{% if is_builtin_scene %}
|
||||
{# 内置专门场景:使用固定说明 #}
|
||||
{% set scene_key = pruning_scene %}
|
||||
{% if scene_key not in builtin_scene_instructions %}{% set scene_key = 'education' %}{% endif %}
|
||||
{% set instruction = builtin_scene_instructions[scene_key][language] if language in ['zh', 'en'] else builtin_scene_instructions[scene_key]['zh'] %}
|
||||
{% set custom_types_str = '' %}
|
||||
{% else %}
|
||||
{# 自定义场景:使用场景名称 + 本体类型列表构建说明 #}
|
||||
{% if ontology_classes and ontology_classes | length > 0 %}
|
||||
{% if language == 'en' %}
|
||||
{% set instruction = 'Custom scene "' ~ pruning_scene ~ '": The dialogue is related to this scene if it involves any of the following entity types: ' ~ ontology_classes | join(', ') ~ '.' %}
|
||||
{% else %}
|
||||
{% set instruction = '自定义场景「' ~ pruning_scene ~ '」:对话涉及以下任意实体类型时视为相关:' ~ ontology_classes | join('、') ~ '。' %}
|
||||
{% endif %}
|
||||
{% set custom_types_str = ontology_classes | join('、') %}
|
||||
{% else %}
|
||||
{# 无本体类型时退化为通用说明 #}
|
||||
{% if language == 'en' %}
|
||||
{% set instruction = 'Custom scene "' ~ pruning_scene ~ '": Determine whether the dialogue content is relevant to this scene based on overall context.' %}
|
||||
{% else %}
|
||||
{% set instruction = '自定义场景「' ~ pruning_scene ~ '」:根据对话整体内容判断是否与该场景相关。' %}
|
||||
{% endif %}
|
||||
{% set custom_types_str = '' %}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
{% set instruction = scene_instructions[scene_key][language] if language in ['zh', 'en'] else scene_instructions[scene_key]['zh'] %}
|
||||
|
||||
{% if language == "zh" %}
|
||||
请在下方对话全文基础上,按该场景进行一次性抽取并判定相关性:
|
||||
场景说明:{{ instruction }}
|
||||
{% if not is_builtin_scene and custom_types_str %}
|
||||
重要提示:只要对话中出现与上述实体类型({{ custom_types_str }})相关的内容,即判定为相关(is_related=true)。
|
||||
{% endif %}
|
||||
|
||||
对话全文:
|
||||
"""
|
||||
@@ -60,6 +84,9 @@
|
||||
{% else %}
|
||||
Based on the full dialogue below, perform one-time extraction and relevance determination according to this scenario:
|
||||
Scenario Description: {{ instruction }}
|
||||
{% if not is_builtin_scene and custom_types_str %}
|
||||
Important: If the dialogue contains content related to any of the entity types above ({{ custom_types_str }}), mark it as relevant (is_related=true).
|
||||
{% endif %}
|
||||
|
||||
Full Dialogue:
|
||||
"""
|
||||
|
||||
@@ -233,6 +233,7 @@ class MemoryConfigRepository:
|
||||
config_desc=params.config_desc,
|
||||
workspace_id=params.workspace_id,
|
||||
scene_id=params.scene_id,
|
||||
pruning_scene=params.pruning_scene,
|
||||
llm_id=params.llm_id,
|
||||
embedding_id=params.embedding_id,
|
||||
rerank_id=params.rerank_id,
|
||||
|
||||
@@ -417,6 +417,7 @@ class MemoryConfig:
|
||||
|
||||
# Ontology scene association
|
||||
scene_id: Optional[UUID] = None
|
||||
ontology_classes: Optional[list] = field(default=None)
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validate configuration after initialization."""
|
||||
|
||||
@@ -232,14 +232,15 @@ class ConfigParamsCreate(BaseModel): # 创建配置参数模型(仅 body,
|
||||
# 本体场景关联(可选)
|
||||
scene_id: Optional[uuid.UUID] = Field(None, description="本体场景ID(UUID),关联ontology_scene表")
|
||||
|
||||
# 语义剪枝场景(由 service 层根据 scene_id 自动推导,值为关联场景的 scene_name,前端无需传入)
|
||||
pruning_scene: Optional[str] = Field(None, description="语义剪枝场景,由 scene_id 对应的 scene_name 自动填充")
|
||||
|
||||
# 模型配置字段(可选,用于手动指定或自动填充)
|
||||
llm_id: Optional[str] = Field(None, description="LLM模型配置ID")
|
||||
embedding_id: Optional[str] = Field(None, description="嵌入模型配置ID")
|
||||
rerank_id: Optional[str] = Field(None, description="重排序模型配置ID")
|
||||
reflection_model_id: Optional[str] = Field(None, description="反思模型ID,默认与llm_id一致")
|
||||
emotion_model_id: Optional[str] = Field(None, description="情绪分析模型ID,默认与llm_id一致")
|
||||
|
||||
|
||||
class ConfigParamsDelete(BaseModel): # 删除配置参数模型(请求体)
|
||||
model_config = ConfigDict(populate_by_name=True, extra="forbid")
|
||||
# config_name: str = Field("配置名称", description="配置名称(字符串)")
|
||||
|
||||
@@ -107,6 +107,37 @@ def _validate_config_id(config_id, db: Session = None):
|
||||
)
|
||||
|
||||
|
||||
# 专门场景的内置 key 列表(与 SceneConfigRegistry 保持一致)
|
||||
_BUILTIN_PRUNING_SCENES = {"education", "online_service", "outbound"}
|
||||
|
||||
|
||||
def _load_ontology_classes(db: Session, scene_id, pruning_scene: Optional[str]) -> Optional[list]:
|
||||
"""当 pruning_scene 不是内置场景时,从 ontology_class 表加载类型名称列表。
|
||||
|
||||
Args:
|
||||
db: 数据库会话
|
||||
scene_id: 本体场景 UUID
|
||||
pruning_scene: 语义剪枝场景名称
|
||||
|
||||
Returns:
|
||||
class_name 字符串列表,或 None(内置场景 / 无数据时)
|
||||
"""
|
||||
if not scene_id:
|
||||
return None
|
||||
# 内置场景走 SceneConfigRegistry,不需要注入类型列表
|
||||
if pruning_scene in _BUILTIN_PRUNING_SCENES:
|
||||
return None
|
||||
try:
|
||||
from app.repositories.ontology_class_repository import OntologyClassRepository
|
||||
repo = OntologyClassRepository(db)
|
||||
classes = repo.get_classes_by_scene(scene_id)
|
||||
names = [c.class_name for c in classes if c.class_name]
|
||||
return names if names else None
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load ontology classes for scene_id={scene_id}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
class MemoryConfigService:
|
||||
"""
|
||||
Centralized service for memory configuration loading and validation.
|
||||
@@ -359,6 +390,7 @@ class MemoryConfigService:
|
||||
pruning_threshold=float(memory_config.pruning_threshold) if memory_config.pruning_threshold is not None else 0.5,
|
||||
# Ontology scene association
|
||||
scene_id=memory_config.scene_id,
|
||||
ontology_classes=_load_ontology_classes(self.db, memory_config.scene_id, memory_config.pruning_scene),
|
||||
)
|
||||
|
||||
elapsed_ms = (time.time() - start_time) * 1000
|
||||
|
||||
@@ -146,6 +146,10 @@ class DataConfigService: # 数据配置服务类(PostgreSQL)
|
||||
if not params.emotion_model_id:
|
||||
params.emotion_model_id = params.llm_id
|
||||
|
||||
# 根据关联的本体场景推导 pruning_scene(语义剪枝场景与本体工程场景保持一致)
|
||||
if params.scene_id and not getattr(params, 'pruning_scene', None):
|
||||
params.pruning_scene = self._resolve_pruning_scene_from_scene_id(params.scene_id)
|
||||
|
||||
config = MemoryConfigRepository.create(self.db, params)
|
||||
self.db.commit()
|
||||
return {"affected": 1, "config_id": config.config_id}
|
||||
@@ -161,6 +165,22 @@ class DataConfigService: # 数据配置服务类(PostgreSQL)
|
||||
finally:
|
||||
db_session.close()
|
||||
|
||||
def _resolve_pruning_scene_from_scene_id(self, scene_id) -> Optional[str]:
|
||||
"""根据本体场景ID获取对应的 scene_name,作为语义剪枝场景值
|
||||
|
||||
Args:
|
||||
scene_id: 本体场景UUID
|
||||
|
||||
Returns:
|
||||
scene_name 字符串,查询失败时返回 None
|
||||
"""
|
||||
try:
|
||||
from app.models.ontology_scene import OntologyScene
|
||||
scene = self.db.query(OntologyScene).filter_by(scene_id=scene_id).first()
|
||||
return scene.scene_name if scene else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
# --- Delete ---
|
||||
def delete(self, key: ConfigParamsDelete) -> Dict[str, Any]: # 删除配置参数(按配置ID)
|
||||
success = MemoryConfigRepository.delete(self.db, key.config_id)
|
||||
@@ -196,6 +216,19 @@ class DataConfigService: # 数据配置服务类(PostgreSQL)
|
||||
def get_all(self, workspace_id = None) -> List[Dict[str, Any]]: # 获取所有配置参数
|
||||
results = MemoryConfigRepository.get_all(self.db, workspace_id)
|
||||
|
||||
# 检查并修正 pruning_scene 与 scene_name 不一致的记录
|
||||
needs_commit = False
|
||||
for config, scene_name in results:
|
||||
if scene_name and config.pruning_scene != scene_name:
|
||||
logger.info(
|
||||
f"修正 pruning_scene: config_id={config.config_id} "
|
||||
f"'{config.pruning_scene}' -> '{scene_name}'"
|
||||
)
|
||||
config.pruning_scene = scene_name
|
||||
needs_commit = True
|
||||
if needs_commit:
|
||||
self.db.commit()
|
||||
|
||||
# 将 ORM 对象转换为字典列表
|
||||
data_list = []
|
||||
for config, scene_name in results:
|
||||
|
||||
@@ -152,6 +152,7 @@ def create_workspace(
|
||||
|
||||
# Initialize default ontology scenes for the workspace (先创建本体场景)
|
||||
default_scene_id = None
|
||||
default_scene_name = None
|
||||
try:
|
||||
initializer = DefaultOntologyInitializer(db)
|
||||
success, error_msg = initializer.initialize_default_scenes(
|
||||
@@ -163,7 +164,7 @@ def create_workspace(
|
||||
f"为工作空间 {db_workspace.id} 创建默认本体场景成功 (language={language})"
|
||||
)
|
||||
|
||||
# 获取默认场景ID,优先使用"在线教育"场景,如果不存在则使用"情感陪伴"场景
|
||||
# 获取默认场景ID,优先使用"在线教育"场景,如果不存在则使用"情感陪伴"场景
|
||||
from app.repositories.ontology_scene_repository import OntologySceneRepository
|
||||
from app.config.default_ontology_config import (
|
||||
ONLINE_EDUCATION_SCENE,
|
||||
@@ -179,6 +180,7 @@ def create_workspace(
|
||||
|
||||
if education_scene:
|
||||
default_scene_id = education_scene.scene_id
|
||||
default_scene_name = education_scene.scene_name
|
||||
business_logger.info(
|
||||
f"获取到教育场景ID用于默认记忆配置: {default_scene_id} (scene_name={education_scene_name})"
|
||||
)
|
||||
@@ -189,6 +191,7 @@ def create_workspace(
|
||||
|
||||
if companion_scene:
|
||||
default_scene_id = companion_scene.scene_id
|
||||
default_scene_name = companion_scene.scene_name
|
||||
business_logger.info(
|
||||
f"教育场景不存在,使用情感陪伴场景ID用于默认记忆配置: {default_scene_id} (scene_name={companion_scene_name})"
|
||||
)
|
||||
@@ -219,6 +222,7 @@ def create_workspace(
|
||||
embedding_id=embedding,
|
||||
rerank_id=rerank,
|
||||
scene_id=default_scene_id, # 传入默认场景ID(优先教育场景,其次情感陪伴场景)
|
||||
pruning_scene_name=default_scene_name, # 传入场景名称作为语义剪枝场景值
|
||||
)
|
||||
business_logger.info(
|
||||
f"为工作空间 {db_workspace.id} 创建默认记忆配置成功 (scene_id={default_scene_id})"
|
||||
@@ -1159,6 +1163,7 @@ def _create_default_memory_config(
|
||||
embedding_id: Optional[uuid.UUID] = None,
|
||||
rerank_id: Optional[uuid.UUID] = None,
|
||||
scene_id: Optional[uuid.UUID] = None,
|
||||
pruning_scene_name: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Create a default memory config for a newly created workspace.
|
||||
|
||||
@@ -1170,6 +1175,7 @@ def _create_default_memory_config(
|
||||
embedding_id: Optional embedding model ID
|
||||
rerank_id: Optional rerank model ID
|
||||
scene_id: Optional ontology scene ID (默认关联教育场景)
|
||||
pruning_scene_name: Optional pruning scene name,取自 ontology_scene.scene_name
|
||||
"""
|
||||
from app.models.memory_config_model import MemoryConfig
|
||||
|
||||
@@ -1183,7 +1189,8 @@ def _create_default_memory_config(
|
||||
llm_id=str(llm_id) if llm_id else None,
|
||||
embedding_id=str(embedding_id) if embedding_id else None,
|
||||
rerank_id=str(rerank_id) if rerank_id else None,
|
||||
scene_id=scene_id, # 关联本体场景ID
|
||||
scene_id=scene_id, # 关联本体场景ID(默认为"在线教育"场景)
|
||||
pruning_scene=pruning_scene_name, # 语义剪枝场景直接使用 scene_name
|
||||
state=True, # Active by default
|
||||
is_default=True, # Mark as workspace default
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user