Merge branch 'develop' into fix/memory-enduser-config

This commit is contained in:
Ke Sun
2026-02-06 16:25:57 +08:00
69 changed files with 38144 additions and 362 deletions

View File

@@ -303,6 +303,8 @@ class MemoryConfigService:
pruning_enabled=bool(memory_config.pruning_enabled) if memory_config.pruning_enabled is not None else False,
pruning_scene=memory_config.pruning_scene or "education",
pruning_threshold=float(memory_config.pruning_threshold) if memory_config.pruning_threshold is not None else 0.5,
# Ontology scene association
scene_id=memory_config.scene_id,
)
elapsed_ms = (time.time() - start_time) * 1000
@@ -476,6 +478,43 @@ class MemoryConfigService:
"pruning_threshold": memory_config.pruning_threshold,
}
def get_ontology_types(self, memory_config: MemoryConfig):
"""Fetch ontology types for the memory configuration's scene.
Args:
memory_config: MemoryConfig object containing scene_id
Returns:
OntologyTypeList if scene_id is valid and has types, None otherwise
"""
from app.core.memory.models.ontology_extraction_models import OntologyTypeList
from app.repositories.ontology_class_repository import OntologyClassRepository
if not memory_config.scene_id:
logger.debug("No scene_id configured, skipping ontology type fetch")
return None
try:
ontology_repo = OntologyClassRepository(self.db)
ontology_classes = ontology_repo.get_by_scene(memory_config.scene_id)
if not ontology_classes:
logger.info(f"No ontology classes found for scene_id: {memory_config.scene_id}")
return None
ontology_types = OntologyTypeList.from_db_models(ontology_classes)
logger.info(
f"Loaded {len(ontology_types.types)} ontology types for scene_id: {memory_config.scene_id}"
)
return ontology_types
except Exception as e:
logger.warning(
f"Failed to fetch ontology types for scene_id {memory_config.scene_id}: {e}",
exc_info=True
)
return None
def get_workspace_default_config(
self,
workspace_id: UUID

View File

@@ -280,12 +280,6 @@ class DataConfigService: # 数据配置服务类PostgreSQL
if not cid:
raise ValueError("未提供 payload.config_id禁止启动试运行")
# 验证 dialogue_text 必须提供
dialogue_text = payload.dialogue_text.strip() if payload.dialogue_text else ""
logger.info(f"[PILOT_RUN_STREAM] Received dialogue_text length: {len(dialogue_text)}, preview: {dialogue_text[:100]}")
if not dialogue_text:
raise ValueError("试运行模式必须提供 dialogue_text 参数")
# Load configuration from database only using centralized manager
try:
config_service = MemoryConfigService(self.db)
@@ -297,6 +291,30 @@ class DataConfigService: # 数据配置服务类PostgreSQL
except ConfigurationError as e:
raise RuntimeError(f"Configuration loading failed: {e}")
# 根据是否关联本体场景选择使用的文本
# 如果配置关联了本体场景scene_id 不为空),使用 custom_text如果提供
# 否则使用 dialogue_text
if memory_config.scene_id:
# 关联了本体场景,优先使用 custom_text
if hasattr(payload, 'custom_text') and payload.custom_text:
dialogue_text = payload.custom_text.strip()
logger.info(f"[PILOT_RUN_STREAM] Using custom_text for scene_id={memory_config.scene_id}, length: {len(dialogue_text)}")
else:
# 如果没有提供 custom_text回退到 dialogue_text
dialogue_text = payload.dialogue_text.strip() if payload.dialogue_text else ""
logger.info(f"[PILOT_RUN_STREAM] No custom_text provided, using dialogue_text for scene_id={memory_config.scene_id}")
else:
# 没有关联本体场景,使用 dialogue_text
dialogue_text = payload.dialogue_text.strip() if payload.dialogue_text else ""
logger.info(f"[PILOT_RUN_STREAM] No scene_id, using dialogue_text, length: {len(dialogue_text)}")
# 验证最终使用的文本不为空
if not dialogue_text:
raise ValueError("试运行模式必须提供有效的文本内容dialogue_text 或 custom_text")
logger.info(f"[PILOT_RUN_STREAM] Final text preview: {dialogue_text[:100]}")
# 步骤 2: 创建进度回调函数捕获管线进度
# 使用队列在回调和生成器之间传递进度事件
progress_queue: asyncio.Queue = asyncio.Queue()

View File

@@ -14,7 +14,7 @@ from typing import Any, Dict, List, Optional
from sqlalchemy.orm import Session
from app.core.memory.llm_tools.openai_client import OpenAIClient
from app.core.memory.models.ontology_models import (
from app.core.memory.models.ontology_scenario_models import (
OntologyClass,
OntologyExtractionResponse,
)
@@ -49,6 +49,10 @@ class OntologyService:
DEFAULT_LLM_TIMEOUT = 30.0
DEFAULT_ENABLE_OWL_VALIDATION = True
# 从环境变量获取默认语言
from app.core.config import settings
DEFAULT_LANGUAGE = settings.DEFAULT_LANGUAGE
def __init__(
self,
llm_client: OpenAIClient,

View File

@@ -142,6 +142,20 @@ async def run_pilot_extraction(
f"enable_llm_disambiguation={config.deduplication.enable_llm_disambiguation}"
)
# 加载本体类型(如果配置了 scene_id支持通用类型回退
ontology_types = None
try:
from app.core.memory.ontology_services.ontology_type_loader import load_ontology_types_with_fallback
ontology_types = load_ontology_types_with_fallback(
scene_id=memory_config.scene_id,
workspace_id=memory_config.workspace_id,
db=db,
enable_general_fallback=True
)
except Exception as e:
logger.warning(f"Failed to load ontology types: {e}", exc_info=True)
orchestrator = ExtractionOrchestrator(
llm_client=llm_client,
embedder_client=embedder_client,
@@ -150,6 +164,7 @@ async def run_pilot_extraction(
progress_callback=progress_callback,
embedding_id=str(memory_config.embedding_model_id),
language=language,
ontology_types=ontology_types,
)
log_time("Orchestrator Initialization", time.time() - step_start, log_file)