From d4571fb75ba11deb3904bf0c36587800b44cbf06 Mon Sep 17 00:00:00 2001 From: lanceyq <1982376970@qq.com> Date: Mon, 9 Feb 2026 19:35:11 +0800 Subject: [PATCH] [fix]Fix get_classes_by_scen, add ontology_types=ontology_types --- api/app/controllers/ontology_controller.py | 2 +- .../core/memory/agent/utils/write_tools.py | 1 + .../core/memory/ontology_services/__init__.py | 9 ++ .../ontology_services/ontology_type_loader.py | 135 +++++++++++++++++- .../prompt/prompts/extract_triplet.jinja2 | 21 ++- api/app/query_ontology_matched_entities.py | 6 +- .../repositories/ontology_class_repository.py | 4 +- api/app/services/memory_config_service.py | 2 +- api/app/services/ontology_service.py | 2 +- 9 files changed, 162 insertions(+), 20 deletions(-) diff --git a/api/app/controllers/ontology_controller.py b/api/app/controllers/ontology_controller.py index 6827c55c..9d0511ea 100644 --- a/api/app/controllers/ontology_controller.py +++ b/api/app/controllers/ontology_controller.py @@ -1007,7 +1007,7 @@ async def export_owl_by_scene( # 2. 查询场景下的所有本体类型 class_repo = OntologyClassRepository(db) - ontology_classes_db = class_repo.get_by_scene(request.scene_id) + ontology_classes_db = class_repo.get_classes_by_scene(request.scene_id) if not ontology_classes_db: api_logger.warning(f"No classes found in scene: {request.scene_id}") diff --git a/api/app/core/memory/agent/utils/write_tools.py b/api/app/core/memory/agent/utils/write_tools.py index 262f3b61..93c6ef6f 100644 --- a/api/app/core/memory/agent/utils/write_tools.py +++ b/api/app/core/memory/agent/utils/write_tools.py @@ -126,6 +126,7 @@ async def write( config=pipeline_config, embedding_id=embedding_model_id, language=language, + ontology_types=ontology_types, ) # Run the complete extraction pipeline diff --git a/api/app/core/memory/ontology_services/__init__.py b/api/app/core/memory/ontology_services/__init__.py index 9eb2f34f..db778c9c 100644 --- a/api/app/core/memory/ontology_services/__init__.py +++ b/api/app/core/memory/ontology_services/__init__.py @@ -8,6 +8,9 @@ - reload_ontology_registry: 重新加载本体注册表(实验模式) - clear_ontology_cache: 清除本体缓存 - is_general_ontology_enabled: 检查通用本体类型功能是否启用 +- load_ontology_types_for_scene: 从数据库加载场景的本体类型 +- create_empty_ontology_type_list: 创建空的本体类型列表 +- load_ontology_types_with_fallback: 加载本体类型(带通用类型回退) """ from .ontology_type_merger import OntologyTypeMerger, DEFAULT_CORE_GENERAL_TYPES @@ -17,6 +20,9 @@ from .ontology_type_loader import ( reload_ontology_registry, clear_ontology_cache, is_general_ontology_enabled, + load_ontology_types_for_scene, + create_empty_ontology_type_list, + load_ontology_types_with_fallback, ) __all__ = [ @@ -27,4 +33,7 @@ __all__ = [ "reload_ontology_registry", "clear_ontology_cache", "is_general_ontology_enabled", + "load_ontology_types_for_scene", + "create_empty_ontology_type_list", + "load_ontology_types_with_fallback", ] diff --git a/api/app/core/memory/ontology_services/ontology_type_loader.py b/api/app/core/memory/ontology_services/ontology_type_loader.py index 8d7417f7..e313a0b1 100644 --- a/api/app/core/memory/ontology_services/ontology_type_loader.py +++ b/api/app/core/memory/ontology_services/ontology_type_loader.py @@ -5,9 +5,14 @@ Functions: load_ontology_types_for_scene: 从数据库加载场景的本体类型 is_general_ontology_enabled: 检查是否启用通用本体 + get_general_ontology_registry: 获取通用本体类型注册表(单例,懒加载) + get_ontology_type_merger: 获取类型合并服务实例 + reload_ontology_registry: 重新加载本体注册表 + clear_ontology_cache: 清除本体缓存 """ import logging +import os from typing import Optional from uuid import UUID @@ -15,6 +20,10 @@ from sqlalchemy.orm import Session logger = logging.getLogger(__name__) +# 模块级缓存(单例) +_general_registry_cache = None +_ontology_type_merger_cache = None + def load_ontology_types_for_scene( scene_id: Optional[UUID], @@ -52,8 +61,7 @@ def load_ontology_types_for_scene( # 查询场景的本体类型 ontology_repo = OntologyClassRepository(db) ontology_classes = ontology_repo.get_classes_by_scene( - scene_id=scene_id, - workspace_id=workspace_id + scene_id=scene_id ) if not ontology_classes: @@ -96,20 +104,137 @@ def create_empty_ontology_type_list() -> Optional["OntologyTypeList"]: def is_general_ontology_enabled() -> bool: """检查是否启用了通用本体 + 通过配置开关和注册表是否可用来判断。 + Returns: True 如果通用本体已启用,否则 False """ try: - from app.core.memory.ontology_services.ontology_type_merger import OntologyTypeMerger + from app.core.config import settings - merger = OntologyTypeMerger() - return merger.general_registry is not None + if not settings.ENABLE_GENERAL_ONTOLOGY_TYPES: + return False + + registry = get_general_ontology_registry() + return registry is not None and len(registry.types) > 0 except Exception as e: logger.warning(f"Failed to check general ontology status: {e}") return False +def get_general_ontology_registry(): + """获取通用本体类型注册表(单例,懒加载) + + 从配置的本体文件中解析并缓存注册表。 + + Returns: + GeneralOntologyTypeRegistry 实例,如果加载失败则返回 None + """ + global _general_registry_cache + + if _general_registry_cache is not None: + return _general_registry_cache + + try: + from app.core.config import settings + + if not settings.ENABLE_GENERAL_ONTOLOGY_TYPES: + logger.info("通用本体类型功能已禁用") + return None + + # 解析本体文件路径 + file_names = [f.strip() for f in settings.GENERAL_ONTOLOGY_FILES.split(",") if f.strip()] + if not file_names: + logger.warning("未配置通用本体文件") + return None + + # 构建完整路径(相对于项目根目录) + base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) + file_paths = [] + for name in file_names: + full_path = os.path.join(base_dir, name) + if os.path.exists(full_path): + file_paths.append(full_path) + else: + logger.warning(f"本体文件不存在: {full_path}") + + if not file_paths: + logger.warning("没有找到可用的通用本体文件") + return None + + # 解析本体文件 + from app.core.memory.utils.ontology.ontology_parser import MultiOntologyParser + + parser = MultiOntologyParser(file_paths) + _general_registry_cache = parser.parse_all() + logger.info(f"通用本体注册表加载完成: {len(_general_registry_cache.types)} 个类型") + + return _general_registry_cache + + except Exception as e: + logger.error(f"加载通用本体注册表失败: {e}", exc_info=True) + return None + + +def get_ontology_type_merger(): + """获取类型合并服务实例(单例,懒加载) + + Returns: + OntologyTypeMerger 实例,如果通用本体未启用则返回 None + """ + global _ontology_type_merger_cache + + if _ontology_type_merger_cache is not None: + return _ontology_type_merger_cache + + try: + registry = get_general_ontology_registry() + if registry is None: + return None + + from app.core.config import settings + from app.core.memory.ontology_services.ontology_type_merger import OntologyTypeMerger + + # 从配置读取核心类型 + core_types_str = settings.CORE_GENERAL_TYPES + core_types = [t.strip() for t in core_types_str.split(",") if t.strip()] if core_types_str else None + + _ontology_type_merger_cache = OntologyTypeMerger( + general_registry=registry, + max_types_in_prompt=settings.MAX_ONTOLOGY_TYPES_IN_PROMPT, + core_types=core_types, + ) + logger.info("OntologyTypeMerger 实例创建完成") + + return _ontology_type_merger_cache + + except Exception as e: + logger.error(f"创建 OntologyTypeMerger 失败: {e}", exc_info=True) + return None + + +def reload_ontology_registry(): + """重新加载本体注册表(清除缓存后重新加载) + + 用于实验模式下动态更新本体配置。 + """ + clear_ontology_cache() + registry = get_general_ontology_registry() + if registry: + get_ontology_type_merger() + logger.info("本体注册表已重新加载") + return registry + + +def clear_ontology_cache(): + """清除本体缓存""" + global _general_registry_cache, _ontology_type_merger_cache + _general_registry_cache = None + _ontology_type_merger_cache = None + logger.info("本体缓存已清除") + + def load_ontology_types_with_fallback( scene_id: Optional[UUID], workspace_id: UUID, diff --git a/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2 b/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2 index 6c68bcf6..b2f287f4 100644 --- a/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2 +++ b/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2 @@ -18,18 +18,21 @@ Extract entities and knowledge triplets from the given statement. {% if ontology_types %} ===Ontology Type Guidance=== -**CRITICAL: Use predefined ontology types for entity classification with the following priority:** +**CRITICAL RULE: You MUST ONLY use the predefined ontology type names listed below for the entity "type" field. Do NOT use any other type names, even if they seem reasonable.** + +**If no predefined type fits an entity, use the CLOSEST matching predefined type. NEVER invent new type names.** **Type Priority (from highest to lowest):** -1. **[场景类型] Scene Types** - Domain-specific types, use these first if applicable +1. **[场景类型] Scene Types** - Domain-specific types, ALWAYS prefer these first 2. **[通用类型] General Types** - Common types from standard ontologies (DBpedia) 3. **[通用父类] Parent Types** - Provide type hierarchy context **Type Matching Rules:** -- Entity type MUST exactly match one of the predefined type names -- Do NOT modify, translate, or use variations of type names -- Prefer scene types over general types when both could apply -- If uncertain between types, check the type description for guidance +- Entity type MUST exactly match one of the predefined type names below +- Do NOT use types like "Equipment", "Component", "Concept", "Action", "Condition", "Data", "Duration" unless they appear in the predefined list +- Do NOT modify, translate, abbreviate, or create variations of type names +- Prefer scene types (marked [场景类型]) over general types when both could apply +- If uncertain, check the type description to find the best match **Predefined Ontology Types:** {{ ontology_types }} @@ -42,7 +45,7 @@ The following shows type inheritance relationships (Child → Parent → Grandpa {% endfor %} {% endif %} -**Available Type Names (use EXACTLY as shown):** +**ALLOWED Type Names (use EXACTLY one of these, no exceptions):** {{ ontology_type_names | join(', ') }} {% endif %} @@ -207,6 +210,10 @@ Output: {% endif %} ===End of Examples=== +{% if ontology_types %} +**⚠️ REMINDER: The examples above use generic type names for illustration only. You MUST use ONLY the predefined ontology type names from the "ALLOWED Type Names" list above. For example, use "PredictiveMaintenance" instead of "Concept", use "ProductionLine" instead of "Equipment", etc. Map each entity to the closest matching predefined type.** +{% endif %} + ===Output Format=== **JSON Requirements:** diff --git a/api/app/query_ontology_matched_entities.py b/api/app/query_ontology_matched_entities.py index cef8d750..73490134 100644 --- a/api/app/query_ontology_matched_entities.py +++ b/api/app/query_ontology_matched_entities.py @@ -9,7 +9,7 @@ api\scripts\query_ontology_matched_entities.py 用法: python scripts/query_ontology_matched_entities.py [config_id] 示例: python scripts/query_ontology_matched_entities.py 075660cf-08e6-40a6-a76e-308b6f52fbf1 - python scripts/query_ontology_matched_entities.py 075660cf-08e6-40a6-a76e-308b6f52fbf1 fd547bb9-7b9e-47ea-ae53-242d208a31a2 + python scripts/query_ontology_matched_entities.py 075660cf-08e6-40a6-a76e-308b6f52fbf1 fd547bb9-7b9e-47ea-ae53-242d208a31a2 """ import sys @@ -59,7 +59,7 @@ async def get_entities_by_end_user_id(connector: Neo4jConnector, end_user_id: st def get_ontology_types_from_scene(db, scene_id: UUID) -> Set[str]: """获取场景下所有本体类型名称""" class_repo = OntologyClassRepository(db) - ontology_classes = class_repo.get_by_scene(scene_id) + ontology_classes = class_repo.get_classes_by_scene(scene_id) return {oc.class_name for oc in ontology_classes} @@ -80,7 +80,7 @@ def get_all_ontology_types(db) -> Dict[str, Set[str]]: for scene in scenes: class_repo = OntologyClassRepository(db) - ontology_classes = class_repo.get_by_scene(scene.scene_id) + ontology_classes = class_repo.get_classes_by_scene(scene.scene_id) for oc in ontology_classes: if oc.class_name not in all_types: all_types[oc.class_name] = set() diff --git a/api/app/repositories/ontology_class_repository.py b/api/app/repositories/ontology_class_repository.py index 68f261ff..5be81ff7 100644 --- a/api/app/repositories/ontology_class_repository.py +++ b/api/app/repositories/ontology_class_repository.py @@ -202,7 +202,7 @@ class OntologyClassRepository: ) raise - def get_by_scene(self, scene_id: UUID) -> List[OntologyClass]: + def get_classes_by_scene(self, scene_id: UUID) -> List[OntologyClass]: """获取场景下的所有类型 按创建时间倒序排列。 @@ -215,7 +215,7 @@ class OntologyClassRepository: Examples: >>> repo = OntologyClassRepository(db) - >>> classes = repo.get_by_scene(scene_id) + >>> classes = repo.get_classes_by_scene(scene_id) """ try: logger.debug(f"Getting ontology classes by scene: {scene_id}") diff --git a/api/app/services/memory_config_service.py b/api/app/services/memory_config_service.py index c2ddbf2c..ccfd5482 100644 --- a/api/app/services/memory_config_service.py +++ b/api/app/services/memory_config_service.py @@ -550,7 +550,7 @@ class MemoryConfigService: try: ontology_repo = OntologyClassRepository(self.db) - ontology_classes = ontology_repo.get_by_scene(memory_config.scene_id) + ontology_classes = ontology_repo.get_classes_by_scene(memory_config.scene_id) if not ontology_classes: logger.info(f"No ontology classes found for scene_id: {memory_config.scene_id}") diff --git a/api/app/services/ontology_service.py b/api/app/services/ontology_service.py index 31bc5837..9eaab1c8 100644 --- a/api/app/services/ontology_service.py +++ b/api/app/services/ontology_service.py @@ -1155,7 +1155,7 @@ class OntologyService: raise ValueError("无权限访问该场景的类型") # 获取类型列表 - classes = self.class_repo.get_by_scene(scene_id) + classes = self.class_repo.get_classes_by_scene(scene_id) logger.info(f"Found {len(classes)} classes in scene {scene_id}")