Merge branch 'develop' into fix/memory-enduser-config

2026-02-06 16:25:57 +08:00
parent 8a0e2da03f 59d8e1bf9f
commit 2db583d62d
69 changed files with 38144 additions and 362 deletions
--- a/api/app/services/memory_config_service.py
+++ b/api/app/services/memory_config_service.py
@@ -303,6 +303,8 @@ class MemoryConfigService:
                pruning_enabled=bool(memory_config.pruning_enabled) if memory_config.pruning_enabled is not None else False,
                pruning_scene=memory_config.pruning_scene or "education",
                pruning_threshold=float(memory_config.pruning_threshold) if memory_config.pruning_threshold is not None else 0.5,
+                # Ontology scene association
+                scene_id=memory_config.scene_id,
            )

            elapsed_ms = (time.time() - start_time) * 1000
@@ -476,6 +478,43 @@ class MemoryConfigService:
            "pruning_threshold": memory_config.pruning_threshold,
        }

+    def get_ontology_types(self, memory_config: MemoryConfig):
+        """Fetch ontology types for the memory configuration's scene.
+        
+        Args:
+            memory_config: MemoryConfig object containing scene_id
+            
+        Returns:
+            OntologyTypeList if scene_id is valid and has types, None otherwise
+        """
+        from app.core.memory.models.ontology_extraction_models import OntologyTypeList
+        from app.repositories.ontology_class_repository import OntologyClassRepository
+        
+        if not memory_config.scene_id:
+            logger.debug("No scene_id configured, skipping ontology type fetch")
+            return None
+        
+        try:
+            ontology_repo = OntologyClassRepository(self.db)
+            ontology_classes = ontology_repo.get_by_scene(memory_config.scene_id)
+            
+            if not ontology_classes:
+                logger.info(f"No ontology classes found for scene_id: {memory_config.scene_id}")
+                return None
+            
+            ontology_types = OntologyTypeList.from_db_models(ontology_classes)
+            logger.info(
+                f"Loaded {len(ontology_types.types)} ontology types for scene_id: {memory_config.scene_id}"
+            )
+            return ontology_types
+            
+        except Exception as e:
+            logger.warning(
+                f"Failed to fetch ontology types for scene_id {memory_config.scene_id}: {e}",
+                exc_info=True
+            )
+            return None
+
    def get_workspace_default_config(
        self,
        workspace_id: UUID
--- a/api/app/services/memory_storage_service.py
+++ b/api/app/services/memory_storage_service.py
@@ -280,12 +280,6 @@ class DataConfigService: # 数据配置服务类（PostgreSQL）
            if not cid:
                raise ValueError("未提供 payload.config_id，禁止启动试运行")

-            # 验证 dialogue_text 必须提供
-            dialogue_text = payload.dialogue_text.strip() if payload.dialogue_text else ""
-            logger.info(f"[PILOT_RUN_STREAM] Received dialogue_text length: {len(dialogue_text)}, preview: {dialogue_text[:100]}")
-            if not dialogue_text:
-                raise ValueError("试运行模式必须提供 dialogue_text 参数")
-
            # Load configuration from database only using centralized manager
            try:
                config_service = MemoryConfigService(self.db)
@@ -297,6 +291,30 @@ class DataConfigService: # 数据配置服务类（PostgreSQL）
            except ConfigurationError as e:
                raise RuntimeError(f"Configuration loading failed: {e}")

+            # 根据是否关联本体场景选择使用的文本
+            # 如果配置关联了本体场景（scene_id 不为空），使用 custom_text（如果提供）
+            # 否则使用 dialogue_text
+            if memory_config.scene_id:
+                # 关联了本体场景，优先使用 custom_text
+                if hasattr(payload, 'custom_text') and payload.custom_text:
+                    dialogue_text = payload.custom_text.strip()
+                    logger.info(f"[PILOT_RUN_STREAM] Using custom_text for scene_id={memory_config.scene_id}, length: {len(dialogue_text)}")
+                else:
+                    # 如果没有提供 custom_text，回退到 dialogue_text
+                    dialogue_text = payload.dialogue_text.strip() if payload.dialogue_text else ""
+                    logger.info(f"[PILOT_RUN_STREAM] No custom_text provided, using dialogue_text for scene_id={memory_config.scene_id}")
+            else:
+                # 没有关联本体场景，使用 dialogue_text
+                dialogue_text = payload.dialogue_text.strip() if payload.dialogue_text else ""
+                logger.info(f"[PILOT_RUN_STREAM] No scene_id, using dialogue_text, length: {len(dialogue_text)}")
+            
+            # 验证最终使用的文本不为空
+            if not dialogue_text:
+                raise ValueError("试运行模式必须提供有效的文本内容（dialogue_text 或 custom_text）")
+            
+            logger.info(f"[PILOT_RUN_STREAM] Final text preview: {dialogue_text[:100]}")
+
+
            # 步骤 2: 创建进度回调函数捕获管线进度
            # 使用队列在回调和生成器之间传递进度事件
            progress_queue: asyncio.Queue = asyncio.Queue()
--- a/api/app/services/ontology_service.py
+++ b/api/app/services/ontology_service.py
@@ -14,7 +14,7 @@ from typing import Any, Dict, List, Optional
 from sqlalchemy.orm import Session

 from app.core.memory.llm_tools.openai_client import OpenAIClient
-from app.core.memory.models.ontology_models import (
+from app.core.memory.models.ontology_scenario_models import (
    OntologyClass,
    OntologyExtractionResponse,
 )
@@ -49,6 +49,10 @@ class OntologyService:
    DEFAULT_LLM_TIMEOUT = 30.0
    DEFAULT_ENABLE_OWL_VALIDATION = True
    
+    # 从环境变量获取默认语言
+    from app.core.config import settings
+    DEFAULT_LANGUAGE = settings.DEFAULT_LANGUAGE
+    
    def __init__(
        self,
        llm_client: OpenAIClient,
--- a/api/app/services/pilot_run_service.py
+++ b/api/app/services/pilot_run_service.py
@@ -142,6 +142,20 @@ async def run_pilot_extraction(
            f"enable_llm_disambiguation={config.deduplication.enable_llm_disambiguation}"
        )

+        # 加载本体类型（如果配置了 scene_id），支持通用类型回退
+        ontology_types = None
+        try:
+            from app.core.memory.ontology_services.ontology_type_loader import load_ontology_types_with_fallback
+            
+            ontology_types = load_ontology_types_with_fallback(
+                scene_id=memory_config.scene_id,
+                workspace_id=memory_config.workspace_id,
+                db=db,
+                enable_general_fallback=True
+            )
+        except Exception as e:
+            logger.warning(f"Failed to load ontology types: {e}", exc_info=True)
+
        orchestrator = ExtractionOrchestrator(
            llm_client=llm_client,
            embedder_client=embedder_client,
@@ -150,6 +164,7 @@ async def run_pilot_extraction(
            progress_callback=progress_callback,
            embedding_id=str(memory_config.embedding_model_id),
            language=language,
+            ontology_types=ontology_types,
        )

        log_time("Orchestrator Initialization", time.time() - step_start, log_file)