From 6c8318b696946bed44f3982530c2a2791189154e Mon Sep 17 00:00:00 2001 From: lanceyq <1982376970@qq.com> Date: Mon, 9 Feb 2026 19:35:11 +0800 Subject: [PATCH 1/6] [fix]Fix get_classes_by_scen, add ontology_types=ontology_types --- api/app/controllers/ontology_controller.py | 2 +- .../core/memory/agent/utils/write_tools.py | 1 + .../core/memory/ontology_services/__init__.py | 9 ++ .../ontology_services/ontology_type_loader.py | 135 +++++++++++++++++- .../prompt/prompts/extract_triplet.jinja2 | 21 ++- api/app/query_ontology_matched_entities.py | 6 +- .../repositories/ontology_class_repository.py | 4 +- api/app/services/memory_config_service.py | 2 +- api/app/services/ontology_service.py | 2 +- 9 files changed, 162 insertions(+), 20 deletions(-) diff --git a/api/app/controllers/ontology_controller.py b/api/app/controllers/ontology_controller.py index 6827c55c..9d0511ea 100644 --- a/api/app/controllers/ontology_controller.py +++ b/api/app/controllers/ontology_controller.py @@ -1007,7 +1007,7 @@ async def export_owl_by_scene( # 2. 查询场景下的所有本体类型 class_repo = OntologyClassRepository(db) - ontology_classes_db = class_repo.get_by_scene(request.scene_id) + ontology_classes_db = class_repo.get_classes_by_scene(request.scene_id) if not ontology_classes_db: api_logger.warning(f"No classes found in scene: {request.scene_id}") diff --git a/api/app/core/memory/agent/utils/write_tools.py b/api/app/core/memory/agent/utils/write_tools.py index 262f3b61..93c6ef6f 100644 --- a/api/app/core/memory/agent/utils/write_tools.py +++ b/api/app/core/memory/agent/utils/write_tools.py @@ -126,6 +126,7 @@ async def write( config=pipeline_config, embedding_id=embedding_model_id, language=language, + ontology_types=ontology_types, ) # Run the complete extraction pipeline diff --git a/api/app/core/memory/ontology_services/__init__.py b/api/app/core/memory/ontology_services/__init__.py index 9eb2f34f..db778c9c 100644 --- a/api/app/core/memory/ontology_services/__init__.py +++ b/api/app/core/memory/ontology_services/__init__.py @@ -8,6 +8,9 @@ - reload_ontology_registry: 重新加载本体注册表(实验模式) - clear_ontology_cache: 清除本体缓存 - is_general_ontology_enabled: 检查通用本体类型功能是否启用 +- load_ontology_types_for_scene: 从数据库加载场景的本体类型 +- create_empty_ontology_type_list: 创建空的本体类型列表 +- load_ontology_types_with_fallback: 加载本体类型(带通用类型回退) """ from .ontology_type_merger import OntologyTypeMerger, DEFAULT_CORE_GENERAL_TYPES @@ -17,6 +20,9 @@ from .ontology_type_loader import ( reload_ontology_registry, clear_ontology_cache, is_general_ontology_enabled, + load_ontology_types_for_scene, + create_empty_ontology_type_list, + load_ontology_types_with_fallback, ) __all__ = [ @@ -27,4 +33,7 @@ __all__ = [ "reload_ontology_registry", "clear_ontology_cache", "is_general_ontology_enabled", + "load_ontology_types_for_scene", + "create_empty_ontology_type_list", + "load_ontology_types_with_fallback", ] diff --git a/api/app/core/memory/ontology_services/ontology_type_loader.py b/api/app/core/memory/ontology_services/ontology_type_loader.py index 8d7417f7..e313a0b1 100644 --- a/api/app/core/memory/ontology_services/ontology_type_loader.py +++ b/api/app/core/memory/ontology_services/ontology_type_loader.py @@ -5,9 +5,14 @@ Functions: load_ontology_types_for_scene: 从数据库加载场景的本体类型 is_general_ontology_enabled: 检查是否启用通用本体 + get_general_ontology_registry: 获取通用本体类型注册表(单例,懒加载) + get_ontology_type_merger: 获取类型合并服务实例 + reload_ontology_registry: 重新加载本体注册表 + clear_ontology_cache: 清除本体缓存 """ import logging +import os from typing import Optional from uuid import UUID @@ -15,6 +20,10 @@ from sqlalchemy.orm import Session logger = logging.getLogger(__name__) +# 模块级缓存(单例) +_general_registry_cache = None +_ontology_type_merger_cache = None + def load_ontology_types_for_scene( scene_id: Optional[UUID], @@ -52,8 +61,7 @@ def load_ontology_types_for_scene( # 查询场景的本体类型 ontology_repo = OntologyClassRepository(db) ontology_classes = ontology_repo.get_classes_by_scene( - scene_id=scene_id, - workspace_id=workspace_id + scene_id=scene_id ) if not ontology_classes: @@ -96,20 +104,137 @@ def create_empty_ontology_type_list() -> Optional["OntologyTypeList"]: def is_general_ontology_enabled() -> bool: """检查是否启用了通用本体 + 通过配置开关和注册表是否可用来判断。 + Returns: True 如果通用本体已启用,否则 False """ try: - from app.core.memory.ontology_services.ontology_type_merger import OntologyTypeMerger + from app.core.config import settings - merger = OntologyTypeMerger() - return merger.general_registry is not None + if not settings.ENABLE_GENERAL_ONTOLOGY_TYPES: + return False + + registry = get_general_ontology_registry() + return registry is not None and len(registry.types) > 0 except Exception as e: logger.warning(f"Failed to check general ontology status: {e}") return False +def get_general_ontology_registry(): + """获取通用本体类型注册表(单例,懒加载) + + 从配置的本体文件中解析并缓存注册表。 + + Returns: + GeneralOntologyTypeRegistry 实例,如果加载失败则返回 None + """ + global _general_registry_cache + + if _general_registry_cache is not None: + return _general_registry_cache + + try: + from app.core.config import settings + + if not settings.ENABLE_GENERAL_ONTOLOGY_TYPES: + logger.info("通用本体类型功能已禁用") + return None + + # 解析本体文件路径 + file_names = [f.strip() for f in settings.GENERAL_ONTOLOGY_FILES.split(",") if f.strip()] + if not file_names: + logger.warning("未配置通用本体文件") + return None + + # 构建完整路径(相对于项目根目录) + base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) + file_paths = [] + for name in file_names: + full_path = os.path.join(base_dir, name) + if os.path.exists(full_path): + file_paths.append(full_path) + else: + logger.warning(f"本体文件不存在: {full_path}") + + if not file_paths: + logger.warning("没有找到可用的通用本体文件") + return None + + # 解析本体文件 + from app.core.memory.utils.ontology.ontology_parser import MultiOntologyParser + + parser = MultiOntologyParser(file_paths) + _general_registry_cache = parser.parse_all() + logger.info(f"通用本体注册表加载完成: {len(_general_registry_cache.types)} 个类型") + + return _general_registry_cache + + except Exception as e: + logger.error(f"加载通用本体注册表失败: {e}", exc_info=True) + return None + + +def get_ontology_type_merger(): + """获取类型合并服务实例(单例,懒加载) + + Returns: + OntologyTypeMerger 实例,如果通用本体未启用则返回 None + """ + global _ontology_type_merger_cache + + if _ontology_type_merger_cache is not None: + return _ontology_type_merger_cache + + try: + registry = get_general_ontology_registry() + if registry is None: + return None + + from app.core.config import settings + from app.core.memory.ontology_services.ontology_type_merger import OntologyTypeMerger + + # 从配置读取核心类型 + core_types_str = settings.CORE_GENERAL_TYPES + core_types = [t.strip() for t in core_types_str.split(",") if t.strip()] if core_types_str else None + + _ontology_type_merger_cache = OntologyTypeMerger( + general_registry=registry, + max_types_in_prompt=settings.MAX_ONTOLOGY_TYPES_IN_PROMPT, + core_types=core_types, + ) + logger.info("OntologyTypeMerger 实例创建完成") + + return _ontology_type_merger_cache + + except Exception as e: + logger.error(f"创建 OntologyTypeMerger 失败: {e}", exc_info=True) + return None + + +def reload_ontology_registry(): + """重新加载本体注册表(清除缓存后重新加载) + + 用于实验模式下动态更新本体配置。 + """ + clear_ontology_cache() + registry = get_general_ontology_registry() + if registry: + get_ontology_type_merger() + logger.info("本体注册表已重新加载") + return registry + + +def clear_ontology_cache(): + """清除本体缓存""" + global _general_registry_cache, _ontology_type_merger_cache + _general_registry_cache = None + _ontology_type_merger_cache = None + logger.info("本体缓存已清除") + + def load_ontology_types_with_fallback( scene_id: Optional[UUID], workspace_id: UUID, diff --git a/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2 b/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2 index 6c68bcf6..b2f287f4 100644 --- a/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2 +++ b/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2 @@ -18,18 +18,21 @@ Extract entities and knowledge triplets from the given statement. {% if ontology_types %} ===Ontology Type Guidance=== -**CRITICAL: Use predefined ontology types for entity classification with the following priority:** +**CRITICAL RULE: You MUST ONLY use the predefined ontology type names listed below for the entity "type" field. Do NOT use any other type names, even if they seem reasonable.** + +**If no predefined type fits an entity, use the CLOSEST matching predefined type. NEVER invent new type names.** **Type Priority (from highest to lowest):** -1. **[场景类型] Scene Types** - Domain-specific types, use these first if applicable +1. **[场景类型] Scene Types** - Domain-specific types, ALWAYS prefer these first 2. **[通用类型] General Types** - Common types from standard ontologies (DBpedia) 3. **[通用父类] Parent Types** - Provide type hierarchy context **Type Matching Rules:** -- Entity type MUST exactly match one of the predefined type names -- Do NOT modify, translate, or use variations of type names -- Prefer scene types over general types when both could apply -- If uncertain between types, check the type description for guidance +- Entity type MUST exactly match one of the predefined type names below +- Do NOT use types like "Equipment", "Component", "Concept", "Action", "Condition", "Data", "Duration" unless they appear in the predefined list +- Do NOT modify, translate, abbreviate, or create variations of type names +- Prefer scene types (marked [场景类型]) over general types when both could apply +- If uncertain, check the type description to find the best match **Predefined Ontology Types:** {{ ontology_types }} @@ -42,7 +45,7 @@ The following shows type inheritance relationships (Child → Parent → Grandpa {% endfor %} {% endif %} -**Available Type Names (use EXACTLY as shown):** +**ALLOWED Type Names (use EXACTLY one of these, no exceptions):** {{ ontology_type_names | join(', ') }} {% endif %} @@ -207,6 +210,10 @@ Output: {% endif %} ===End of Examples=== +{% if ontology_types %} +**⚠️ REMINDER: The examples above use generic type names for illustration only. You MUST use ONLY the predefined ontology type names from the "ALLOWED Type Names" list above. For example, use "PredictiveMaintenance" instead of "Concept", use "ProductionLine" instead of "Equipment", etc. Map each entity to the closest matching predefined type.** +{% endif %} + ===Output Format=== **JSON Requirements:** diff --git a/api/app/query_ontology_matched_entities.py b/api/app/query_ontology_matched_entities.py index cef8d750..73490134 100644 --- a/api/app/query_ontology_matched_entities.py +++ b/api/app/query_ontology_matched_entities.py @@ -9,7 +9,7 @@ api\scripts\query_ontology_matched_entities.py 用法: python scripts/query_ontology_matched_entities.py [config_id] 示例: python scripts/query_ontology_matched_entities.py 075660cf-08e6-40a6-a76e-308b6f52fbf1 - python scripts/query_ontology_matched_entities.py 075660cf-08e6-40a6-a76e-308b6f52fbf1 fd547bb9-7b9e-47ea-ae53-242d208a31a2 + python scripts/query_ontology_matched_entities.py 075660cf-08e6-40a6-a76e-308b6f52fbf1 fd547bb9-7b9e-47ea-ae53-242d208a31a2 """ import sys @@ -59,7 +59,7 @@ async def get_entities_by_end_user_id(connector: Neo4jConnector, end_user_id: st def get_ontology_types_from_scene(db, scene_id: UUID) -> Set[str]: """获取场景下所有本体类型名称""" class_repo = OntologyClassRepository(db) - ontology_classes = class_repo.get_by_scene(scene_id) + ontology_classes = class_repo.get_classes_by_scene(scene_id) return {oc.class_name for oc in ontology_classes} @@ -80,7 +80,7 @@ def get_all_ontology_types(db) -> Dict[str, Set[str]]: for scene in scenes: class_repo = OntologyClassRepository(db) - ontology_classes = class_repo.get_by_scene(scene.scene_id) + ontology_classes = class_repo.get_classes_by_scene(scene.scene_id) for oc in ontology_classes: if oc.class_name not in all_types: all_types[oc.class_name] = set() diff --git a/api/app/repositories/ontology_class_repository.py b/api/app/repositories/ontology_class_repository.py index 68f261ff..5be81ff7 100644 --- a/api/app/repositories/ontology_class_repository.py +++ b/api/app/repositories/ontology_class_repository.py @@ -202,7 +202,7 @@ class OntologyClassRepository: ) raise - def get_by_scene(self, scene_id: UUID) -> List[OntologyClass]: + def get_classes_by_scene(self, scene_id: UUID) -> List[OntologyClass]: """获取场景下的所有类型 按创建时间倒序排列。 @@ -215,7 +215,7 @@ class OntologyClassRepository: Examples: >>> repo = OntologyClassRepository(db) - >>> classes = repo.get_by_scene(scene_id) + >>> classes = repo.get_classes_by_scene(scene_id) """ try: logger.debug(f"Getting ontology classes by scene: {scene_id}") diff --git a/api/app/services/memory_config_service.py b/api/app/services/memory_config_service.py index c2ddbf2c..ccfd5482 100644 --- a/api/app/services/memory_config_service.py +++ b/api/app/services/memory_config_service.py @@ -550,7 +550,7 @@ class MemoryConfigService: try: ontology_repo = OntologyClassRepository(self.db) - ontology_classes = ontology_repo.get_by_scene(memory_config.scene_id) + ontology_classes = ontology_repo.get_classes_by_scene(memory_config.scene_id) if not ontology_classes: logger.info(f"No ontology classes found for scene_id: {memory_config.scene_id}") diff --git a/api/app/services/ontology_service.py b/api/app/services/ontology_service.py index 31bc5837..9eaab1c8 100644 --- a/api/app/services/ontology_service.py +++ b/api/app/services/ontology_service.py @@ -1155,7 +1155,7 @@ class OntologyService: raise ValueError("无权限访问该场景的类型") # 获取类型列表 - classes = self.class_repo.get_by_scene(scene_id) + classes = self.class_repo.get_classes_by_scene(scene_id) logger.info(f"Found {len(classes)} classes in scene {scene_id}") From 9b07775395fa7765c0f4c9a0063bed0b73e6f0a2 Mon Sep 17 00:00:00 2001 From: lanceyq <1982376970@qq.com> Date: Mon, 9 Feb 2026 20:12:24 +0800 Subject: [PATCH 2/6] [fix]Memory extraction output the core engineering effect --- api/app/query_ontology_matched_entities.py | 52 +++++----- api/app/services/memory_storage_service.py | 105 +++++++++++++++++++++ 2 files changed, 128 insertions(+), 29 deletions(-) diff --git a/api/app/query_ontology_matched_entities.py b/api/app/query_ontology_matched_entities.py index 73490134..c878d258 100644 --- a/api/app/query_ontology_matched_entities.py +++ b/api/app/query_ontology_matched_entities.py @@ -169,10 +169,10 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[ print(f" 找到 {len(entities)} 个实体") - # 4. 分类实体(场景类型、通用类型、未匹配) - scene_matched_entities = [] - general_matched_entities = [] - both_matched_entities = [] # 同时匹配场景和通用类型 + # 4. 互斥分类实体:场景类型优先 > 通用类型 > 未匹配 + # 确保: 场景实体数 + 通用实体数 + 未匹配数 = 总实体数 + scene_matched_entities = [] # 匹配场景类型(含同时匹配两者的) + general_matched_entities = [] # 仅匹配通用类型(不含已归入场景的) unmatched_entities = [] scene_type_distribution = defaultdict(list) @@ -183,11 +183,8 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[ in_scene = entity_type in scene_ontology_types in_general = entity_type in general_ontology_types - if in_scene and in_general: - both_matched_entities.append(entity) - scene_type_distribution[entity_type].append(entity) - general_type_distribution[entity_type].append(entity) - elif in_scene: + if in_scene: + # 场景类型优先,同时匹配两者的也归入场景 scene_matched_entities.append(entity) scene_type_distribution[entity_type].append(entity) elif in_general: @@ -197,9 +194,8 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[ unmatched_entities.append(entity) # 5. 输出匹配场景类型的实体 - total_scene_matched = len(scene_matched_entities) + len(both_matched_entities) print(f"\n{'='*70}") - print(f"✅ 匹配场景本体类型的实体 (共 {total_scene_matched} 个)") + print(f"✅ 匹配场景本体类型的实体 (共 {len(scene_matched_entities)} 个)") print(f"{'='*70}") if scene_type_distribution: @@ -219,9 +215,8 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[ print(f"\n (无匹配场景类型的实体)") # 6. 输出匹配通用类型的实体 - total_general_matched = len(general_matched_entities) + len(both_matched_entities) print(f"\n{'='*70}") - print(f"✅ 匹配通用本体类型的实体 (共 {total_general_matched} 个)") + print(f"✅ 匹配通用本体类型的实体 (共 {len(general_matched_entities)} 个)") print(f"{'='*70}") if general_type_distribution: @@ -265,7 +260,6 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[ # 8. 统计摘要 total_entities = len(entities) - any_matched = total_entities - len(unmatched_entities) print(f"\n{'='*70}") print(f"📊 统计摘要") @@ -276,35 +270,35 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[ print(f" 场景本体类型数: {len(scene_ontology_types)}") print(f" 通用本体类型数: {len(general_ontology_types)}") - print(f"\n 匹配率统计:") + print(f"\n 互斥分类统计 (三者之和 = 总实体数):") print(f" {'-'*50}") - scene_rate = total_scene_matched / total_entities * 100 if total_entities > 0 else 0 - general_rate = total_general_matched / total_entities * 100 if total_entities > 0 else 0 - any_rate = any_matched / total_entities * 100 if total_entities > 0 else 0 + scene_rate = len(scene_matched_entities) / total_entities * 100 if total_entities > 0 else 0 + general_rate = len(general_matched_entities) / total_entities * 100 if total_entities > 0 else 0 unmatched_rate = len(unmatched_entities) / total_entities * 100 if total_entities > 0 else 0 - print(f" 匹配场景类型: {total_scene_matched} 个 ({scene_rate:.1f}%)") - print(f" 匹配通用类型: {total_general_matched} 个 ({general_rate:.1f}%)") - print(f" 同时匹配两者: {len(both_matched_entities)} 个 ({len(both_matched_entities)/total_entities*100:.1f}%)") - print(f" 仅匹配场景类型: {len(scene_matched_entities)} 个 ({len(scene_matched_entities)/total_entities*100:.1f}%)") - print(f" 仅匹配通用类型: {len(general_matched_entities)} 个 ({len(general_matched_entities)/total_entities*100:.1f}%)") - print(f" 匹配任一类型: {any_matched} 个 ({any_rate:.1f}%)") + print(f" 匹配场景类型: {len(scene_matched_entities)} 个 ({scene_rate:.1f}%)") + print(f" 匹配通用类型: {len(general_matched_entities)} 个 ({general_rate:.1f}%)") print(f" 未匹配任何类型: {len(unmatched_entities)} 个 ({unmatched_rate:.1f}%)") + print(f" ─────────────────────────────") + print(f" 合计: {len(scene_matched_entities)} + {len(general_matched_entities)} + {len(unmatched_entities)} = {len(scene_matched_entities) + len(general_matched_entities) + len(unmatched_entities)}") - # 9. 类型分布详情 + # 9. 场景类型分布详情(全部) if scene_type_distribution: - print(f"\n 场景类型分布 (Top 10):") + print(f"\n 场景类型分布 (全部 {len(scene_type_distribution)} 种):") print(f" {'-'*50}") sorted_scene_types = sorted(scene_type_distribution.items(), key=lambda x: len(x[1]), reverse=True) - for type_name, entities_list in sorted_scene_types[:10]: + for type_name, entities_list in sorted_scene_types: print(f" - {type_name}: {len(entities_list)} 个") + print(f" 场景类型实体总数: {len(scene_matched_entities)} 个") + # 10. 通用类型分布详情(全部) if general_type_distribution: - print(f"\n 通用类型分布 (Top 10):") + print(f"\n 通用类型分布 (全部 {len(general_type_distribution)} 种):") print(f" {'-'*50}") sorted_general_types = sorted(general_type_distribution.items(), key=lambda x: len(x[1]), reverse=True) - for type_name, entities_list in sorted_general_types[:10]: + for type_name, entities_list in sorted_general_types: print(f" - {type_name}: {len(entities_list)} 个") + print(f" 通用类型实体总数: {len(general_matched_entities)} 个") except Exception as e: print(f"\n❌ 查询出错: {str(e)}") diff --git a/api/app/services/memory_storage_service.py b/api/app/services/memory_storage_service.py index 71a644cf..16dc88c9 100644 --- a/api/app/services/memory_storage_service.py +++ b/api/app/services/memory_storage_service.py @@ -407,6 +407,17 @@ class DataConfigService: # 数据配置服务类(PostgreSQL) } yield format_sse_message("result", result_data) + # 步骤 6.5: 计算本体覆盖率统计并发出 + try: + ontology_coverage = await self._compute_ontology_coverage( + extracted_result=extracted_result, + memory_config=memory_config, + ) + if ontology_coverage: + yield format_sse_message("ontology_coverage", ontology_coverage) + except Exception as cov_err: + logger.warning(f"[PILOT_RUN_STREAM] Ontology coverage computation failed: {cov_err}", exc_info=True) + # 步骤 7: 发出完成事件 yield format_sse_message("done", { "message": "试运行完成", @@ -428,6 +439,100 @@ class DataConfigService: # 数据配置服务类(PostgreSQL) }) + async def _compute_ontology_coverage( + self, + extracted_result: Dict[str, Any], + memory_config, + ) -> Optional[Dict[str, Any]]: + """根据提取结果中的实体类型,与场景/通用本体类型做互斥分类统计。 + + 分类规则(互斥):场景类型优先 > 通用类型 > 未匹配 + 确保: 场景实体数 + 通用实体数 + 未匹配数 = 总实体数 + + Returns: + 包含三部分统计的字典,或 None(无实体数据时) + """ + core_entities = extracted_result.get("core_entities", []) + if not core_entities: + return None + + # 1. 加载场景本体类型集合 + scene_ontology_types: set = set() + try: + from app.repositories.ontology_class_repository import OntologyClassRepository + + if memory_config.scene_id: + class_repo = OntologyClassRepository(self.db) + ontology_classes = class_repo.get_classes_by_scene(memory_config.scene_id) + scene_ontology_types = {oc.class_name for oc in ontology_classes} + except Exception as e: + logger.warning(f"Failed to load scene ontology types: {e}") + + # 2. 加载通用本体类型集合 + general_ontology_types: set = set() + try: + from app.core.memory.ontology_services.ontology_type_loader import ( + get_general_ontology_registry, + is_general_ontology_enabled, + ) + + if is_general_ontology_enabled(): + registry = get_general_ontology_registry() + if registry: + general_ontology_types = set(registry.types.keys()) + except Exception as e: + logger.warning(f"Failed to load general ontology types: {e}") + + # 3. 互斥分类:场景优先 > 通用 > 未匹配 + scene_distribution: list = [] + general_distribution: list = [] + unmatched_distribution: list = [] + scene_total = 0 + general_total = 0 + unmatched_total = 0 + + for item in core_entities: + entity_type = item.get("type", "") + count = item.get("count", 0) + + if entity_type in scene_ontology_types: + scene_distribution.append({"type": entity_type, "count": count}) + scene_total += count + elif entity_type in general_ontology_types: + general_distribution.append({"type": entity_type, "count": count}) + general_total += count + else: + unmatched_distribution.append({"type": entity_type, "count": count}) + unmatched_total += count + + # 按数量降序排列 + scene_distribution.sort(key=lambda x: x["count"], reverse=True) + general_distribution.sort(key=lambda x: x["count"], reverse=True) + unmatched_distribution.sort(key=lambda x: x["count"], reverse=True) + + total_entities = scene_total + general_total + unmatched_total + + return { + "scene_type_distribution": { + "type_count": len(scene_distribution), + "entity_total": scene_total, + "types": scene_distribution, + }, + "general_type_distribution": { + "type_count": len(general_distribution), + "entity_total": general_total, + "types": general_distribution, + }, + "unmatched": { + "type_count": len(unmatched_distribution), + "entity_total": unmatched_total, + "types": unmatched_distribution, + }, + "total_entities": total_entities, + "time": int(time.time() * 1000), + } + + # -------------------- Neo4j Search & Analytics (fused from data_search_service.py) -------------------- # Ensure env for connector (e.g., NEO4J_PASSWORD) load_dotenv() From b1688950c45ff3d3b5eb2caf9baf14b45bab6b77 Mon Sep 17 00:00:00 2001 From: lanceyq <1982376970@qq.com> Date: Mon, 9 Feb 2026 20:49:28 +0800 Subject: [PATCH 3/6] [fix]Added entity type matching and filtered out the 00NA0 status code. --- api/app/core/logging_config.py | 21 ++++++++++++--------- api/app/services/memory_storage_service.py | 9 ++++----- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/api/app/core/logging_config.py b/api/app/core/logging_config.py index 1e6c066e..40259b46 100644 --- a/api/app/core/logging_config.py +++ b/api/app/core/logging_config.py @@ -39,10 +39,13 @@ class SensitiveDataLoggingFilter(logging.Filter): class Neo4jSuccessNotificationFilter(logging.Filter): - """Neo4j 日志过滤器:过滤成功状态的通知,保留真正的警告和错误 + """Neo4j 日志过滤器:过滤成功/信息性状态的通知,保留真正的警告和错误 Neo4j 驱动会以 WARNING 级别记录所有数据库通知,包括成功的操作。 - 这个过滤器会过滤掉状态码为 '00000' (成功) 的通知,只保留真正的警告和错误。 + 这个过滤器会过滤掉以下 GQL 状态码的通知,只保留真正的警告和错误: + - 00000: 成功完成 (successful completion) + - 00N00: 无数据 (no data) + - 00NA0: 无数据,信息性通知 (no data, informational notification) 使用正则表达式进行更严格的匹配,避免误过滤无关的警告。 """ @@ -50,12 +53,12 @@ class Neo4jSuccessNotificationFilter(logging.Filter): import re # 编译正则表达式以提高性能 - # 匹配 gql_status='00000' 或 gql_status="00000",确保是完整的状态码 - GQL_STATUS_PATTERN = re.compile(r"gql_status=['\"]00000['\"]") + # 匹配所有"成功/信息性"的 GQL 状态码: + # 00000 = 成功完成, 00N00 = 无数据, 00NA0 = 无数据信息性通知 + GQL_STATUS_PATTERN = re.compile(r"gql_status=['\"](00000|00N00|00NA0)['\"]") - # 匹配 status_description 中的成功完成消息 - # 使用单词边界确保精确匹配 - SUCCESS_DESC_PATTERN = re.compile(r"status_description=['\"]note:\s*successful\s+completion['\"]", re.IGNORECASE) + # 匹配 status_description 中的成功完成或信息性通知消息 + SUCCESS_DESC_PATTERN = re.compile(r"status_description=['\"]note:\s*(successful\s+completion|no\s+data)['\"]", re.IGNORECASE) def filter(self, record: logging.LogRecord) -> bool: """ @@ -107,8 +110,8 @@ class LoggingConfig: root_logger = logging.getLogger() root_logger.setLevel(getattr(logging, settings.LOG_LEVEL.upper())) - # 为 Neo4j 驱动添加过滤器,过滤成功通知但保留真正的警告 - # Neo4j 驱动会以 WARNING 级别记录所有数据库通知,包括成功的操作(status='00000') + # 为 Neo4j 驱动添加过滤器,过滤成功/信息性通知但保留真正的警告 + # Neo4j 驱动会以 WARNING 级别记录所有数据库通知,包括成功(00000)和信息性(00NA0)通知 # 使用过滤器而不是改变日志级别,这样可以保留真正的警告和错误 neo4j_filter = Neo4jSuccessNotificationFilter() for neo4j_logger_name in ["neo4j", "neo4j.io", "neo4j.pool"]: diff --git a/api/app/services/memory_storage_service.py b/api/app/services/memory_storage_service.py index 16dc88c9..1083f750 100644 --- a/api/app/services/memory_storage_service.py +++ b/api/app/services/memory_storage_service.py @@ -399,25 +399,24 @@ class DataConfigService: # 数据配置服务类(PostgreSQL) with open(result_path, "r", encoding="utf-8") as rf: extracted_result = json.load(rf) - # 步骤 6: 发出结果事件 + # 步骤 6: 计算本体覆盖率并合并到结果中 result_data = { "config_id": cid, "time_log": os.path.join(project_root, "logs", "time.log"), "extracted_result": extracted_result, } - yield format_sse_message("result", result_data) - - # 步骤 6.5: 计算本体覆盖率统计并发出 try: ontology_coverage = await self._compute_ontology_coverage( extracted_result=extracted_result, memory_config=memory_config, ) if ontology_coverage: - yield format_sse_message("ontology_coverage", ontology_coverage) + result_data["ontology_coverage"] = ontology_coverage except Exception as cov_err: logger.warning(f"[PILOT_RUN_STREAM] Ontology coverage computation failed: {cov_err}", exc_info=True) + yield format_sse_message("result", result_data) + # 步骤 7: 发出完成事件 yield format_sse_message("done", { "message": "试运行完成", From d4571fb75ba11deb3904bf0c36587800b44cbf06 Mon Sep 17 00:00:00 2001 From: lanceyq <1982376970@qq.com> Date: Mon, 9 Feb 2026 19:35:11 +0800 Subject: [PATCH 4/6] [fix]Fix get_classes_by_scen, add ontology_types=ontology_types --- api/app/controllers/ontology_controller.py | 2 +- .../core/memory/agent/utils/write_tools.py | 1 + .../core/memory/ontology_services/__init__.py | 9 ++ .../ontology_services/ontology_type_loader.py | 135 +++++++++++++++++- .../prompt/prompts/extract_triplet.jinja2 | 21 ++- api/app/query_ontology_matched_entities.py | 6 +- .../repositories/ontology_class_repository.py | 4 +- api/app/services/memory_config_service.py | 2 +- api/app/services/ontology_service.py | 2 +- 9 files changed, 162 insertions(+), 20 deletions(-) diff --git a/api/app/controllers/ontology_controller.py b/api/app/controllers/ontology_controller.py index 6827c55c..9d0511ea 100644 --- a/api/app/controllers/ontology_controller.py +++ b/api/app/controllers/ontology_controller.py @@ -1007,7 +1007,7 @@ async def export_owl_by_scene( # 2. 查询场景下的所有本体类型 class_repo = OntologyClassRepository(db) - ontology_classes_db = class_repo.get_by_scene(request.scene_id) + ontology_classes_db = class_repo.get_classes_by_scene(request.scene_id) if not ontology_classes_db: api_logger.warning(f"No classes found in scene: {request.scene_id}") diff --git a/api/app/core/memory/agent/utils/write_tools.py b/api/app/core/memory/agent/utils/write_tools.py index 262f3b61..93c6ef6f 100644 --- a/api/app/core/memory/agent/utils/write_tools.py +++ b/api/app/core/memory/agent/utils/write_tools.py @@ -126,6 +126,7 @@ async def write( config=pipeline_config, embedding_id=embedding_model_id, language=language, + ontology_types=ontology_types, ) # Run the complete extraction pipeline diff --git a/api/app/core/memory/ontology_services/__init__.py b/api/app/core/memory/ontology_services/__init__.py index 9eb2f34f..db778c9c 100644 --- a/api/app/core/memory/ontology_services/__init__.py +++ b/api/app/core/memory/ontology_services/__init__.py @@ -8,6 +8,9 @@ - reload_ontology_registry: 重新加载本体注册表(实验模式) - clear_ontology_cache: 清除本体缓存 - is_general_ontology_enabled: 检查通用本体类型功能是否启用 +- load_ontology_types_for_scene: 从数据库加载场景的本体类型 +- create_empty_ontology_type_list: 创建空的本体类型列表 +- load_ontology_types_with_fallback: 加载本体类型(带通用类型回退) """ from .ontology_type_merger import OntologyTypeMerger, DEFAULT_CORE_GENERAL_TYPES @@ -17,6 +20,9 @@ from .ontology_type_loader import ( reload_ontology_registry, clear_ontology_cache, is_general_ontology_enabled, + load_ontology_types_for_scene, + create_empty_ontology_type_list, + load_ontology_types_with_fallback, ) __all__ = [ @@ -27,4 +33,7 @@ __all__ = [ "reload_ontology_registry", "clear_ontology_cache", "is_general_ontology_enabled", + "load_ontology_types_for_scene", + "create_empty_ontology_type_list", + "load_ontology_types_with_fallback", ] diff --git a/api/app/core/memory/ontology_services/ontology_type_loader.py b/api/app/core/memory/ontology_services/ontology_type_loader.py index 8d7417f7..e313a0b1 100644 --- a/api/app/core/memory/ontology_services/ontology_type_loader.py +++ b/api/app/core/memory/ontology_services/ontology_type_loader.py @@ -5,9 +5,14 @@ Functions: load_ontology_types_for_scene: 从数据库加载场景的本体类型 is_general_ontology_enabled: 检查是否启用通用本体 + get_general_ontology_registry: 获取通用本体类型注册表(单例,懒加载) + get_ontology_type_merger: 获取类型合并服务实例 + reload_ontology_registry: 重新加载本体注册表 + clear_ontology_cache: 清除本体缓存 """ import logging +import os from typing import Optional from uuid import UUID @@ -15,6 +20,10 @@ from sqlalchemy.orm import Session logger = logging.getLogger(__name__) +# 模块级缓存(单例) +_general_registry_cache = None +_ontology_type_merger_cache = None + def load_ontology_types_for_scene( scene_id: Optional[UUID], @@ -52,8 +61,7 @@ def load_ontology_types_for_scene( # 查询场景的本体类型 ontology_repo = OntologyClassRepository(db) ontology_classes = ontology_repo.get_classes_by_scene( - scene_id=scene_id, - workspace_id=workspace_id + scene_id=scene_id ) if not ontology_classes: @@ -96,20 +104,137 @@ def create_empty_ontology_type_list() -> Optional["OntologyTypeList"]: def is_general_ontology_enabled() -> bool: """检查是否启用了通用本体 + 通过配置开关和注册表是否可用来判断。 + Returns: True 如果通用本体已启用,否则 False """ try: - from app.core.memory.ontology_services.ontology_type_merger import OntologyTypeMerger + from app.core.config import settings - merger = OntologyTypeMerger() - return merger.general_registry is not None + if not settings.ENABLE_GENERAL_ONTOLOGY_TYPES: + return False + + registry = get_general_ontology_registry() + return registry is not None and len(registry.types) > 0 except Exception as e: logger.warning(f"Failed to check general ontology status: {e}") return False +def get_general_ontology_registry(): + """获取通用本体类型注册表(单例,懒加载) + + 从配置的本体文件中解析并缓存注册表。 + + Returns: + GeneralOntologyTypeRegistry 实例,如果加载失败则返回 None + """ + global _general_registry_cache + + if _general_registry_cache is not None: + return _general_registry_cache + + try: + from app.core.config import settings + + if not settings.ENABLE_GENERAL_ONTOLOGY_TYPES: + logger.info("通用本体类型功能已禁用") + return None + + # 解析本体文件路径 + file_names = [f.strip() for f in settings.GENERAL_ONTOLOGY_FILES.split(",") if f.strip()] + if not file_names: + logger.warning("未配置通用本体文件") + return None + + # 构建完整路径(相对于项目根目录) + base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) + file_paths = [] + for name in file_names: + full_path = os.path.join(base_dir, name) + if os.path.exists(full_path): + file_paths.append(full_path) + else: + logger.warning(f"本体文件不存在: {full_path}") + + if not file_paths: + logger.warning("没有找到可用的通用本体文件") + return None + + # 解析本体文件 + from app.core.memory.utils.ontology.ontology_parser import MultiOntologyParser + + parser = MultiOntologyParser(file_paths) + _general_registry_cache = parser.parse_all() + logger.info(f"通用本体注册表加载完成: {len(_general_registry_cache.types)} 个类型") + + return _general_registry_cache + + except Exception as e: + logger.error(f"加载通用本体注册表失败: {e}", exc_info=True) + return None + + +def get_ontology_type_merger(): + """获取类型合并服务实例(单例,懒加载) + + Returns: + OntologyTypeMerger 实例,如果通用本体未启用则返回 None + """ + global _ontology_type_merger_cache + + if _ontology_type_merger_cache is not None: + return _ontology_type_merger_cache + + try: + registry = get_general_ontology_registry() + if registry is None: + return None + + from app.core.config import settings + from app.core.memory.ontology_services.ontology_type_merger import OntologyTypeMerger + + # 从配置读取核心类型 + core_types_str = settings.CORE_GENERAL_TYPES + core_types = [t.strip() for t in core_types_str.split(",") if t.strip()] if core_types_str else None + + _ontology_type_merger_cache = OntologyTypeMerger( + general_registry=registry, + max_types_in_prompt=settings.MAX_ONTOLOGY_TYPES_IN_PROMPT, + core_types=core_types, + ) + logger.info("OntologyTypeMerger 实例创建完成") + + return _ontology_type_merger_cache + + except Exception as e: + logger.error(f"创建 OntologyTypeMerger 失败: {e}", exc_info=True) + return None + + +def reload_ontology_registry(): + """重新加载本体注册表(清除缓存后重新加载) + + 用于实验模式下动态更新本体配置。 + """ + clear_ontology_cache() + registry = get_general_ontology_registry() + if registry: + get_ontology_type_merger() + logger.info("本体注册表已重新加载") + return registry + + +def clear_ontology_cache(): + """清除本体缓存""" + global _general_registry_cache, _ontology_type_merger_cache + _general_registry_cache = None + _ontology_type_merger_cache = None + logger.info("本体缓存已清除") + + def load_ontology_types_with_fallback( scene_id: Optional[UUID], workspace_id: UUID, diff --git a/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2 b/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2 index 6c68bcf6..b2f287f4 100644 --- a/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2 +++ b/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2 @@ -18,18 +18,21 @@ Extract entities and knowledge triplets from the given statement. {% if ontology_types %} ===Ontology Type Guidance=== -**CRITICAL: Use predefined ontology types for entity classification with the following priority:** +**CRITICAL RULE: You MUST ONLY use the predefined ontology type names listed below for the entity "type" field. Do NOT use any other type names, even if they seem reasonable.** + +**If no predefined type fits an entity, use the CLOSEST matching predefined type. NEVER invent new type names.** **Type Priority (from highest to lowest):** -1. **[场景类型] Scene Types** - Domain-specific types, use these first if applicable +1. **[场景类型] Scene Types** - Domain-specific types, ALWAYS prefer these first 2. **[通用类型] General Types** - Common types from standard ontologies (DBpedia) 3. **[通用父类] Parent Types** - Provide type hierarchy context **Type Matching Rules:** -- Entity type MUST exactly match one of the predefined type names -- Do NOT modify, translate, or use variations of type names -- Prefer scene types over general types when both could apply -- If uncertain between types, check the type description for guidance +- Entity type MUST exactly match one of the predefined type names below +- Do NOT use types like "Equipment", "Component", "Concept", "Action", "Condition", "Data", "Duration" unless they appear in the predefined list +- Do NOT modify, translate, abbreviate, or create variations of type names +- Prefer scene types (marked [场景类型]) over general types when both could apply +- If uncertain, check the type description to find the best match **Predefined Ontology Types:** {{ ontology_types }} @@ -42,7 +45,7 @@ The following shows type inheritance relationships (Child → Parent → Grandpa {% endfor %} {% endif %} -**Available Type Names (use EXACTLY as shown):** +**ALLOWED Type Names (use EXACTLY one of these, no exceptions):** {{ ontology_type_names | join(', ') }} {% endif %} @@ -207,6 +210,10 @@ Output: {% endif %} ===End of Examples=== +{% if ontology_types %} +**⚠️ REMINDER: The examples above use generic type names for illustration only. You MUST use ONLY the predefined ontology type names from the "ALLOWED Type Names" list above. For example, use "PredictiveMaintenance" instead of "Concept", use "ProductionLine" instead of "Equipment", etc. Map each entity to the closest matching predefined type.** +{% endif %} + ===Output Format=== **JSON Requirements:** diff --git a/api/app/query_ontology_matched_entities.py b/api/app/query_ontology_matched_entities.py index cef8d750..73490134 100644 --- a/api/app/query_ontology_matched_entities.py +++ b/api/app/query_ontology_matched_entities.py @@ -9,7 +9,7 @@ api\scripts\query_ontology_matched_entities.py 用法: python scripts/query_ontology_matched_entities.py [config_id] 示例: python scripts/query_ontology_matched_entities.py 075660cf-08e6-40a6-a76e-308b6f52fbf1 - python scripts/query_ontology_matched_entities.py 075660cf-08e6-40a6-a76e-308b6f52fbf1 fd547bb9-7b9e-47ea-ae53-242d208a31a2 + python scripts/query_ontology_matched_entities.py 075660cf-08e6-40a6-a76e-308b6f52fbf1 fd547bb9-7b9e-47ea-ae53-242d208a31a2 """ import sys @@ -59,7 +59,7 @@ async def get_entities_by_end_user_id(connector: Neo4jConnector, end_user_id: st def get_ontology_types_from_scene(db, scene_id: UUID) -> Set[str]: """获取场景下所有本体类型名称""" class_repo = OntologyClassRepository(db) - ontology_classes = class_repo.get_by_scene(scene_id) + ontology_classes = class_repo.get_classes_by_scene(scene_id) return {oc.class_name for oc in ontology_classes} @@ -80,7 +80,7 @@ def get_all_ontology_types(db) -> Dict[str, Set[str]]: for scene in scenes: class_repo = OntologyClassRepository(db) - ontology_classes = class_repo.get_by_scene(scene.scene_id) + ontology_classes = class_repo.get_classes_by_scene(scene.scene_id) for oc in ontology_classes: if oc.class_name not in all_types: all_types[oc.class_name] = set() diff --git a/api/app/repositories/ontology_class_repository.py b/api/app/repositories/ontology_class_repository.py index 68f261ff..5be81ff7 100644 --- a/api/app/repositories/ontology_class_repository.py +++ b/api/app/repositories/ontology_class_repository.py @@ -202,7 +202,7 @@ class OntologyClassRepository: ) raise - def get_by_scene(self, scene_id: UUID) -> List[OntologyClass]: + def get_classes_by_scene(self, scene_id: UUID) -> List[OntologyClass]: """获取场景下的所有类型 按创建时间倒序排列。 @@ -215,7 +215,7 @@ class OntologyClassRepository: Examples: >>> repo = OntologyClassRepository(db) - >>> classes = repo.get_by_scene(scene_id) + >>> classes = repo.get_classes_by_scene(scene_id) """ try: logger.debug(f"Getting ontology classes by scene: {scene_id}") diff --git a/api/app/services/memory_config_service.py b/api/app/services/memory_config_service.py index c2ddbf2c..ccfd5482 100644 --- a/api/app/services/memory_config_service.py +++ b/api/app/services/memory_config_service.py @@ -550,7 +550,7 @@ class MemoryConfigService: try: ontology_repo = OntologyClassRepository(self.db) - ontology_classes = ontology_repo.get_by_scene(memory_config.scene_id) + ontology_classes = ontology_repo.get_classes_by_scene(memory_config.scene_id) if not ontology_classes: logger.info(f"No ontology classes found for scene_id: {memory_config.scene_id}") diff --git a/api/app/services/ontology_service.py b/api/app/services/ontology_service.py index 31bc5837..9eaab1c8 100644 --- a/api/app/services/ontology_service.py +++ b/api/app/services/ontology_service.py @@ -1155,7 +1155,7 @@ class OntologyService: raise ValueError("无权限访问该场景的类型") # 获取类型列表 - classes = self.class_repo.get_by_scene(scene_id) + classes = self.class_repo.get_classes_by_scene(scene_id) logger.info(f"Found {len(classes)} classes in scene {scene_id}") From c86ccf09319fcc19932778d93c8b470d545e6cd8 Mon Sep 17 00:00:00 2001 From: lanceyq <1982376970@qq.com> Date: Mon, 9 Feb 2026 20:12:24 +0800 Subject: [PATCH 5/6] [fix]Memory extraction output the core engineering effect --- api/app/query_ontology_matched_entities.py | 52 +++++----- api/app/services/memory_storage_service.py | 105 +++++++++++++++++++++ 2 files changed, 128 insertions(+), 29 deletions(-) diff --git a/api/app/query_ontology_matched_entities.py b/api/app/query_ontology_matched_entities.py index 73490134..c878d258 100644 --- a/api/app/query_ontology_matched_entities.py +++ b/api/app/query_ontology_matched_entities.py @@ -169,10 +169,10 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[ print(f" 找到 {len(entities)} 个实体") - # 4. 分类实体(场景类型、通用类型、未匹配) - scene_matched_entities = [] - general_matched_entities = [] - both_matched_entities = [] # 同时匹配场景和通用类型 + # 4. 互斥分类实体:场景类型优先 > 通用类型 > 未匹配 + # 确保: 场景实体数 + 通用实体数 + 未匹配数 = 总实体数 + scene_matched_entities = [] # 匹配场景类型(含同时匹配两者的) + general_matched_entities = [] # 仅匹配通用类型(不含已归入场景的) unmatched_entities = [] scene_type_distribution = defaultdict(list) @@ -183,11 +183,8 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[ in_scene = entity_type in scene_ontology_types in_general = entity_type in general_ontology_types - if in_scene and in_general: - both_matched_entities.append(entity) - scene_type_distribution[entity_type].append(entity) - general_type_distribution[entity_type].append(entity) - elif in_scene: + if in_scene: + # 场景类型优先,同时匹配两者的也归入场景 scene_matched_entities.append(entity) scene_type_distribution[entity_type].append(entity) elif in_general: @@ -197,9 +194,8 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[ unmatched_entities.append(entity) # 5. 输出匹配场景类型的实体 - total_scene_matched = len(scene_matched_entities) + len(both_matched_entities) print(f"\n{'='*70}") - print(f"✅ 匹配场景本体类型的实体 (共 {total_scene_matched} 个)") + print(f"✅ 匹配场景本体类型的实体 (共 {len(scene_matched_entities)} 个)") print(f"{'='*70}") if scene_type_distribution: @@ -219,9 +215,8 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[ print(f"\n (无匹配场景类型的实体)") # 6. 输出匹配通用类型的实体 - total_general_matched = len(general_matched_entities) + len(both_matched_entities) print(f"\n{'='*70}") - print(f"✅ 匹配通用本体类型的实体 (共 {total_general_matched} 个)") + print(f"✅ 匹配通用本体类型的实体 (共 {len(general_matched_entities)} 个)") print(f"{'='*70}") if general_type_distribution: @@ -265,7 +260,6 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[ # 8. 统计摘要 total_entities = len(entities) - any_matched = total_entities - len(unmatched_entities) print(f"\n{'='*70}") print(f"📊 统计摘要") @@ -276,35 +270,35 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[ print(f" 场景本体类型数: {len(scene_ontology_types)}") print(f" 通用本体类型数: {len(general_ontology_types)}") - print(f"\n 匹配率统计:") + print(f"\n 互斥分类统计 (三者之和 = 总实体数):") print(f" {'-'*50}") - scene_rate = total_scene_matched / total_entities * 100 if total_entities > 0 else 0 - general_rate = total_general_matched / total_entities * 100 if total_entities > 0 else 0 - any_rate = any_matched / total_entities * 100 if total_entities > 0 else 0 + scene_rate = len(scene_matched_entities) / total_entities * 100 if total_entities > 0 else 0 + general_rate = len(general_matched_entities) / total_entities * 100 if total_entities > 0 else 0 unmatched_rate = len(unmatched_entities) / total_entities * 100 if total_entities > 0 else 0 - print(f" 匹配场景类型: {total_scene_matched} 个 ({scene_rate:.1f}%)") - print(f" 匹配通用类型: {total_general_matched} 个 ({general_rate:.1f}%)") - print(f" 同时匹配两者: {len(both_matched_entities)} 个 ({len(both_matched_entities)/total_entities*100:.1f}%)") - print(f" 仅匹配场景类型: {len(scene_matched_entities)} 个 ({len(scene_matched_entities)/total_entities*100:.1f}%)") - print(f" 仅匹配通用类型: {len(general_matched_entities)} 个 ({len(general_matched_entities)/total_entities*100:.1f}%)") - print(f" 匹配任一类型: {any_matched} 个 ({any_rate:.1f}%)") + print(f" 匹配场景类型: {len(scene_matched_entities)} 个 ({scene_rate:.1f}%)") + print(f" 匹配通用类型: {len(general_matched_entities)} 个 ({general_rate:.1f}%)") print(f" 未匹配任何类型: {len(unmatched_entities)} 个 ({unmatched_rate:.1f}%)") + print(f" ─────────────────────────────") + print(f" 合计: {len(scene_matched_entities)} + {len(general_matched_entities)} + {len(unmatched_entities)} = {len(scene_matched_entities) + len(general_matched_entities) + len(unmatched_entities)}") - # 9. 类型分布详情 + # 9. 场景类型分布详情(全部) if scene_type_distribution: - print(f"\n 场景类型分布 (Top 10):") + print(f"\n 场景类型分布 (全部 {len(scene_type_distribution)} 种):") print(f" {'-'*50}") sorted_scene_types = sorted(scene_type_distribution.items(), key=lambda x: len(x[1]), reverse=True) - for type_name, entities_list in sorted_scene_types[:10]: + for type_name, entities_list in sorted_scene_types: print(f" - {type_name}: {len(entities_list)} 个") + print(f" 场景类型实体总数: {len(scene_matched_entities)} 个") + # 10. 通用类型分布详情(全部) if general_type_distribution: - print(f"\n 通用类型分布 (Top 10):") + print(f"\n 通用类型分布 (全部 {len(general_type_distribution)} 种):") print(f" {'-'*50}") sorted_general_types = sorted(general_type_distribution.items(), key=lambda x: len(x[1]), reverse=True) - for type_name, entities_list in sorted_general_types[:10]: + for type_name, entities_list in sorted_general_types: print(f" - {type_name}: {len(entities_list)} 个") + print(f" 通用类型实体总数: {len(general_matched_entities)} 个") except Exception as e: print(f"\n❌ 查询出错: {str(e)}") diff --git a/api/app/services/memory_storage_service.py b/api/app/services/memory_storage_service.py index 71a644cf..16dc88c9 100644 --- a/api/app/services/memory_storage_service.py +++ b/api/app/services/memory_storage_service.py @@ -407,6 +407,17 @@ class DataConfigService: # 数据配置服务类(PostgreSQL) } yield format_sse_message("result", result_data) + # 步骤 6.5: 计算本体覆盖率统计并发出 + try: + ontology_coverage = await self._compute_ontology_coverage( + extracted_result=extracted_result, + memory_config=memory_config, + ) + if ontology_coverage: + yield format_sse_message("ontology_coverage", ontology_coverage) + except Exception as cov_err: + logger.warning(f"[PILOT_RUN_STREAM] Ontology coverage computation failed: {cov_err}", exc_info=True) + # 步骤 7: 发出完成事件 yield format_sse_message("done", { "message": "试运行完成", @@ -428,6 +439,100 @@ class DataConfigService: # 数据配置服务类(PostgreSQL) }) + async def _compute_ontology_coverage( + self, + extracted_result: Dict[str, Any], + memory_config, + ) -> Optional[Dict[str, Any]]: + """根据提取结果中的实体类型,与场景/通用本体类型做互斥分类统计。 + + 分类规则(互斥):场景类型优先 > 通用类型 > 未匹配 + 确保: 场景实体数 + 通用实体数 + 未匹配数 = 总实体数 + + Returns: + 包含三部分统计的字典,或 None(无实体数据时) + """ + core_entities = extracted_result.get("core_entities", []) + if not core_entities: + return None + + # 1. 加载场景本体类型集合 + scene_ontology_types: set = set() + try: + from app.repositories.ontology_class_repository import OntologyClassRepository + + if memory_config.scene_id: + class_repo = OntologyClassRepository(self.db) + ontology_classes = class_repo.get_classes_by_scene(memory_config.scene_id) + scene_ontology_types = {oc.class_name for oc in ontology_classes} + except Exception as e: + logger.warning(f"Failed to load scene ontology types: {e}") + + # 2. 加载通用本体类型集合 + general_ontology_types: set = set() + try: + from app.core.memory.ontology_services.ontology_type_loader import ( + get_general_ontology_registry, + is_general_ontology_enabled, + ) + + if is_general_ontology_enabled(): + registry = get_general_ontology_registry() + if registry: + general_ontology_types = set(registry.types.keys()) + except Exception as e: + logger.warning(f"Failed to load general ontology types: {e}") + + # 3. 互斥分类:场景优先 > 通用 > 未匹配 + scene_distribution: list = [] + general_distribution: list = [] + unmatched_distribution: list = [] + scene_total = 0 + general_total = 0 + unmatched_total = 0 + + for item in core_entities: + entity_type = item.get("type", "") + count = item.get("count", 0) + + if entity_type in scene_ontology_types: + scene_distribution.append({"type": entity_type, "count": count}) + scene_total += count + elif entity_type in general_ontology_types: + general_distribution.append({"type": entity_type, "count": count}) + general_total += count + else: + unmatched_distribution.append({"type": entity_type, "count": count}) + unmatched_total += count + + # 按数量降序排列 + scene_distribution.sort(key=lambda x: x["count"], reverse=True) + general_distribution.sort(key=lambda x: x["count"], reverse=True) + unmatched_distribution.sort(key=lambda x: x["count"], reverse=True) + + total_entities = scene_total + general_total + unmatched_total + + return { + "scene_type_distribution": { + "type_count": len(scene_distribution), + "entity_total": scene_total, + "types": scene_distribution, + }, + "general_type_distribution": { + "type_count": len(general_distribution), + "entity_total": general_total, + "types": general_distribution, + }, + "unmatched": { + "type_count": len(unmatched_distribution), + "entity_total": unmatched_total, + "types": unmatched_distribution, + }, + "total_entities": total_entities, + "time": int(time.time() * 1000), + } + + # -------------------- Neo4j Search & Analytics (fused from data_search_service.py) -------------------- # Ensure env for connector (e.g., NEO4J_PASSWORD) load_dotenv() From 4219e12cc0e23e0b72f13c43a49aadf19af58763 Mon Sep 17 00:00:00 2001 From: lanceyq <1982376970@qq.com> Date: Mon, 9 Feb 2026 20:49:28 +0800 Subject: [PATCH 6/6] [fix]Added entity type matching and filtered out the 00NA0 status code. --- api/app/core/logging_config.py | 21 ++++++++++++--------- api/app/services/memory_storage_service.py | 9 ++++----- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/api/app/core/logging_config.py b/api/app/core/logging_config.py index 1e6c066e..40259b46 100644 --- a/api/app/core/logging_config.py +++ b/api/app/core/logging_config.py @@ -39,10 +39,13 @@ class SensitiveDataLoggingFilter(logging.Filter): class Neo4jSuccessNotificationFilter(logging.Filter): - """Neo4j 日志过滤器:过滤成功状态的通知,保留真正的警告和错误 + """Neo4j 日志过滤器:过滤成功/信息性状态的通知,保留真正的警告和错误 Neo4j 驱动会以 WARNING 级别记录所有数据库通知,包括成功的操作。 - 这个过滤器会过滤掉状态码为 '00000' (成功) 的通知,只保留真正的警告和错误。 + 这个过滤器会过滤掉以下 GQL 状态码的通知,只保留真正的警告和错误: + - 00000: 成功完成 (successful completion) + - 00N00: 无数据 (no data) + - 00NA0: 无数据,信息性通知 (no data, informational notification) 使用正则表达式进行更严格的匹配,避免误过滤无关的警告。 """ @@ -50,12 +53,12 @@ class Neo4jSuccessNotificationFilter(logging.Filter): import re # 编译正则表达式以提高性能 - # 匹配 gql_status='00000' 或 gql_status="00000",确保是完整的状态码 - GQL_STATUS_PATTERN = re.compile(r"gql_status=['\"]00000['\"]") + # 匹配所有"成功/信息性"的 GQL 状态码: + # 00000 = 成功完成, 00N00 = 无数据, 00NA0 = 无数据信息性通知 + GQL_STATUS_PATTERN = re.compile(r"gql_status=['\"](00000|00N00|00NA0)['\"]") - # 匹配 status_description 中的成功完成消息 - # 使用单词边界确保精确匹配 - SUCCESS_DESC_PATTERN = re.compile(r"status_description=['\"]note:\s*successful\s+completion['\"]", re.IGNORECASE) + # 匹配 status_description 中的成功完成或信息性通知消息 + SUCCESS_DESC_PATTERN = re.compile(r"status_description=['\"]note:\s*(successful\s+completion|no\s+data)['\"]", re.IGNORECASE) def filter(self, record: logging.LogRecord) -> bool: """ @@ -107,8 +110,8 @@ class LoggingConfig: root_logger = logging.getLogger() root_logger.setLevel(getattr(logging, settings.LOG_LEVEL.upper())) - # 为 Neo4j 驱动添加过滤器,过滤成功通知但保留真正的警告 - # Neo4j 驱动会以 WARNING 级别记录所有数据库通知,包括成功的操作(status='00000') + # 为 Neo4j 驱动添加过滤器,过滤成功/信息性通知但保留真正的警告 + # Neo4j 驱动会以 WARNING 级别记录所有数据库通知,包括成功(00000)和信息性(00NA0)通知 # 使用过滤器而不是改变日志级别,这样可以保留真正的警告和错误 neo4j_filter = Neo4jSuccessNotificationFilter() for neo4j_logger_name in ["neo4j", "neo4j.io", "neo4j.pool"]: diff --git a/api/app/services/memory_storage_service.py b/api/app/services/memory_storage_service.py index 16dc88c9..1083f750 100644 --- a/api/app/services/memory_storage_service.py +++ b/api/app/services/memory_storage_service.py @@ -399,25 +399,24 @@ class DataConfigService: # 数据配置服务类(PostgreSQL) with open(result_path, "r", encoding="utf-8") as rf: extracted_result = json.load(rf) - # 步骤 6: 发出结果事件 + # 步骤 6: 计算本体覆盖率并合并到结果中 result_data = { "config_id": cid, "time_log": os.path.join(project_root, "logs", "time.log"), "extracted_result": extracted_result, } - yield format_sse_message("result", result_data) - - # 步骤 6.5: 计算本体覆盖率统计并发出 try: ontology_coverage = await self._compute_ontology_coverage( extracted_result=extracted_result, memory_config=memory_config, ) if ontology_coverage: - yield format_sse_message("ontology_coverage", ontology_coverage) + result_data["ontology_coverage"] = ontology_coverage except Exception as cov_err: logger.warning(f"[PILOT_RUN_STREAM] Ontology coverage computation failed: {cov_err}", exc_info=True) + yield format_sse_message("result", result_data) + # 步骤 7: 发出完成事件 yield format_sse_message("done", { "message": "试运行完成",