[add] Semantic pruning is unified with the ontology engineering scenario.

2026-03-06 14:12:03 +08:00
parent 61d2a328fe
commit fc240849cf
10 changed files with 147 additions and 23 deletions
--- a/api/app/services/memory_config_service.py
+++ b/api/app/services/memory_config_service.py
@@ -107,6 +107,37 @@ def _validate_config_id(config_id, db: Session = None):
    )


+# 专门场景的内置 key 列表（与 SceneConfigRegistry 保持一致）
+_BUILTIN_PRUNING_SCENES = {"education", "online_service", "outbound"}
+
+
+def _load_ontology_classes(db: Session, scene_id, pruning_scene: Optional[str]) -> Optional[list]:
+    """当 pruning_scene 不是内置场景时，从 ontology_class 表加载类型名称列表。
+
+    Args:
+        db: 数据库会话
+        scene_id: 本体场景 UUID
+        pruning_scene: 语义剪枝场景名称
+
+    Returns:
+        class_name 字符串列表，或 None（内置场景 / 无数据时）
+    """
+    if not scene_id:
+        return None
+    # 内置场景走 SceneConfigRegistry，不需要注入类型列表
+    if pruning_scene in _BUILTIN_PRUNING_SCENES:
+        return None
+    try:
+        from app.repositories.ontology_class_repository import OntologyClassRepository
+        repo = OntologyClassRepository(db)
+        classes = repo.get_classes_by_scene(scene_id)
+        names = [c.class_name for c in classes if c.class_name]
+        return names if names else None
+    except Exception as e:
+        logger.warning(f"Failed to load ontology classes for scene_id={scene_id}: {e}")
+        return None
+
+
 class MemoryConfigService:
    """
    Centralized service for memory configuration loading and validation.
@@ -359,6 +390,7 @@ class MemoryConfigService:
                pruning_threshold=float(memory_config.pruning_threshold) if memory_config.pruning_threshold is not None else 0.5,
                # Ontology scene association
                scene_id=memory_config.scene_id,
+                ontology_classes=_load_ontology_classes(self.db, memory_config.scene_id, memory_config.pruning_scene),
            )

            elapsed_ms = (time.time() - start_time) * 1000
--- a/api/app/services/memory_storage_service.py
+++ b/api/app/services/memory_storage_service.py
@@ -146,6 +146,10 @@ class DataConfigService: # 数据配置服务类（PostgreSQL）
        if not params.emotion_model_id:
            params.emotion_model_id = params.llm_id

+        # 根据关联的本体场景推导 pruning_scene（语义剪枝场景与本体工程场景保持一致）
+        if params.scene_id and not getattr(params, 'pruning_scene', None):
+            params.pruning_scene = self._resolve_pruning_scene_from_scene_id(params.scene_id)
+
        config = MemoryConfigRepository.create(self.db, params)
        self.db.commit()
        return {"affected": 1, "config_id": config.config_id}
@@ -161,6 +165,22 @@ class DataConfigService: # 数据配置服务类（PostgreSQL）
        finally:
            db_session.close()

+    def _resolve_pruning_scene_from_scene_id(self, scene_id) -> Optional[str]:
+        """根据本体场景ID获取对应的 scene_name，作为语义剪枝场景值
+
+        Args:
+            scene_id: 本体场景UUID
+
+        Returns:
+            scene_name 字符串，查询失败时返回 None
+        """
+        try:
+            from app.models.ontology_scene import OntologyScene
+            scene = self.db.query(OntologyScene).filter_by(scene_id=scene_id).first()
+            return scene.scene_name if scene else None
+        except Exception:
+            return None
+
    # --- Delete ---
    def delete(self, key: ConfigParamsDelete) -> Dict[str, Any]: # 删除配置参数（按配置ID）
        success = MemoryConfigRepository.delete(self.db, key.config_id)
@@ -196,6 +216,19 @@ class DataConfigService: # 数据配置服务类（PostgreSQL）
    def get_all(self, workspace_id = None) -> List[Dict[str, Any]]: # 获取所有配置参数
        results = MemoryConfigRepository.get_all(self.db, workspace_id)

+        # 检查并修正 pruning_scene 与 scene_name 不一致的记录
+        needs_commit = False
+        for config, scene_name in results:
+            if scene_name and config.pruning_scene != scene_name:
+                logger.info(
+                    f"修正 pruning_scene: config_id={config.config_id} "
+                    f"'{config.pruning_scene}' -> '{scene_name}'"
+                )
+                config.pruning_scene = scene_name
+                needs_commit = True
+        if needs_commit:
+            self.db.commit()
+
        # 将 ORM 对象转换为字典列表
        data_list = []
        for config, scene_name in results:
--- a/api/app/services/workspace_service.py
+++ b/api/app/services/workspace_service.py
@@ -152,6 +152,7 @@ def create_workspace(

        # Initialize default ontology scenes for the workspace (先创建本体场景)
        default_scene_id = None
+        default_scene_name = None
        try:
            initializer = DefaultOntologyInitializer(db)
            success, error_msg = initializer.initialize_default_scenes(
@@ -163,7 +164,7 @@ def create_workspace(
                    f"为工作空间 {db_workspace.id} 创建默认本体场景成功 (language={language})"
                )
                
-                # 获取默认场景ID，优先使用"在线教育"场景，如果不存在则使用"情感陪伴"场景
+        # 获取默认场景ID，优先使用"在线教育"场景，如果不存在则使用"情感陪伴"场景
                from app.repositories.ontology_scene_repository import OntologySceneRepository
                from app.config.default_ontology_config import (
                    ONLINE_EDUCATION_SCENE, 
@@ -179,6 +180,7 @@ def create_workspace(
                
                if education_scene:
                    default_scene_id = education_scene.scene_id
+                    default_scene_name = education_scene.scene_name
                    business_logger.info(
                        f"获取到教育场景ID用于默认记忆配置: {default_scene_id} (scene_name={education_scene_name})"
                    )
@@ -189,6 +191,7 @@ def create_workspace(
                    
                    if companion_scene:
                        default_scene_id = companion_scene.scene_id
+                        default_scene_name = companion_scene.scene_name
                        business_logger.info(
                            f"教育场景不存在，使用情感陪伴场景ID用于默认记忆配置: {default_scene_id} (scene_name={companion_scene_name})"
                        )
@@ -219,6 +222,7 @@ def create_workspace(
                    embedding_id=embedding,
                    rerank_id=rerank,
                    scene_id=default_scene_id,  # 传入默认场景ID（优先教育场景，其次情感陪伴场景）
+                    pruning_scene_name=default_scene_name,  # 传入场景名称作为语义剪枝场景值
                )
                business_logger.info(
                    f"为工作空间 {db_workspace.id} 创建默认记忆配置成功 (scene_id={default_scene_id})"
@@ -1159,6 +1163,7 @@ def _create_default_memory_config(
    embedding_id: Optional[uuid.UUID] = None,
    rerank_id: Optional[uuid.UUID] = None,
    scene_id: Optional[uuid.UUID] = None,
+    pruning_scene_name: Optional[str] = None,
 ) -> None:
    """Create a default memory config for a newly created workspace.
    
@@ -1170,6 +1175,7 @@ def _create_default_memory_config(
        embedding_id: Optional embedding model ID
        rerank_id: Optional rerank model ID
        scene_id: Optional ontology scene ID (默认关联教育场景)
+        pruning_scene_name: Optional pruning scene name，取自 ontology_scene.scene_name
    """
    from app.models.memory_config_model import MemoryConfig
    
@@ -1183,7 +1189,8 @@ def _create_default_memory_config(
        llm_id=str(llm_id) if llm_id else None,
        embedding_id=str(embedding_id) if embedding_id else None,
        rerank_id=str(rerank_id) if rerank_id else None,
-        scene_id=scene_id,  # 关联本体场景ID
+        scene_id=scene_id,  # 关联本体场景ID（默认为"在线教育"场景）
+        pruning_scene=pruning_scene_name,  # 语义剪枝场景直接使用 scene_name
        state=True,  # Active by default
        is_default=True,  # Mark as workspace default
    )