From 9b07775395fa7765c0f4c9a0063bed0b73e6f0a2 Mon Sep 17 00:00:00 2001
From: lanceyq <1982376970@qq.com>
Date: Mon, 9 Feb 2026 20:12:24 +0800
Subject: [PATCH] [fix]Memory extraction output the core engineering effect

---
 api/app/query_ontology_matched_entities.py |  52 +++++-----
 api/app/services/memory_storage_service.py | 105 +++++++++++++++++++++
 2 files changed, 128 insertions(+), 29 deletions(-)

diff --git a/api/app/query_ontology_matched_entities.py b/api/app/query_ontology_matched_entities.py
index 73490134..c878d258 100644
--- a/api/app/query_ontology_matched_entities.py
+++ b/api/app/query_ontology_matched_entities.py
@@ -169,10 +169,10 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[
         
         print(f"   找到 {len(entities)} 个实体")
         
-        # 4. 分类实体（场景类型、通用类型、未匹配）
-        scene_matched_entities = []
-        general_matched_entities = []
-        both_matched_entities = []  # 同时匹配场景和通用类型
+        # 4. 互斥分类实体：场景类型优先 > 通用类型 > 未匹配
+        #    确保: 场景实体数 + 通用实体数 + 未匹配数 = 总实体数
+        scene_matched_entities = []   # 匹配场景类型（含同时匹配两者的）
+        general_matched_entities = [] # 仅匹配通用类型（不含已归入场景的）
         unmatched_entities = []
         
         scene_type_distribution = defaultdict(list)
@@ -183,11 +183,8 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[
             in_scene = entity_type in scene_ontology_types
             in_general = entity_type in general_ontology_types
             
-            if in_scene and in_general:
-                both_matched_entities.append(entity)
-                scene_type_distribution[entity_type].append(entity)
-                general_type_distribution[entity_type].append(entity)
-            elif in_scene:
+            if in_scene:
+                # 场景类型优先，同时匹配两者的也归入场景
                 scene_matched_entities.append(entity)
                 scene_type_distribution[entity_type].append(entity)
             elif in_general:
@@ -197,9 +194,8 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[
                 unmatched_entities.append(entity)
         
         # 5. 输出匹配场景类型的实体
-        total_scene_matched = len(scene_matched_entities) + len(both_matched_entities)
         print(f"\n{'='*70}")
-        print(f"✅ 匹配场景本体类型的实体 (共 {total_scene_matched} 个)")
+        print(f"✅ 匹配场景本体类型的实体 (共 {len(scene_matched_entities)} 个)")
         print(f"{'='*70}")
         
         if scene_type_distribution:
@@ -219,9 +215,8 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[
             print(f"\n   (无匹配场景类型的实体)")
         
         # 6. 输出匹配通用类型的实体
-        total_general_matched = len(general_matched_entities) + len(both_matched_entities)
         print(f"\n{'='*70}")
-        print(f"✅ 匹配通用本体类型的实体 (共 {total_general_matched} 个)")
+        print(f"✅ 匹配通用本体类型的实体 (共 {len(general_matched_entities)} 个)")
         print(f"{'='*70}")
         
         if general_type_distribution:
@@ -265,7 +260,6 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[
         
         # 8. 统计摘要
         total_entities = len(entities)
-        any_matched = total_entities - len(unmatched_entities)
         
         print(f"\n{'='*70}")
         print(f"📊 统计摘要")
@@ -276,35 +270,35 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[
         print(f"   场景本体类型数: {len(scene_ontology_types)}")
         print(f"   通用本体类型数: {len(general_ontology_types)}")
         
-        print(f"\n   匹配率统计:")
+        print(f"\n   互斥分类统计 (三者之和 = 总实体数):")
         print(f"   {'-'*50}")
-        scene_rate = total_scene_matched / total_entities * 100 if total_entities > 0 else 0
-        general_rate = total_general_matched / total_entities * 100 if total_entities > 0 else 0
-        any_rate = any_matched / total_entities * 100 if total_entities > 0 else 0
+        scene_rate = len(scene_matched_entities) / total_entities * 100 if total_entities > 0 else 0
+        general_rate = len(general_matched_entities) / total_entities * 100 if total_entities > 0 else 0
         unmatched_rate = len(unmatched_entities) / total_entities * 100 if total_entities > 0 else 0
         
-        print(f"   匹配场景类型: {total_scene_matched} 个 ({scene_rate:.1f}%)")
-        print(f"   匹配通用类型: {total_general_matched} 个 ({general_rate:.1f}%)")
-        print(f"   同时匹配两者: {len(both_matched_entities)} 个 ({len(both_matched_entities)/total_entities*100:.1f}%)")
-        print(f"   仅匹配场景类型: {len(scene_matched_entities)} 个 ({len(scene_matched_entities)/total_entities*100:.1f}%)")
-        print(f"   仅匹配通用类型: {len(general_matched_entities)} 个 ({len(general_matched_entities)/total_entities*100:.1f}%)")
-        print(f"   匹配任一类型: {any_matched} 个 ({any_rate:.1f}%)")
+        print(f"   匹配场景类型: {len(scene_matched_entities)} 个 ({scene_rate:.1f}%)")
+        print(f"   匹配通用类型: {len(general_matched_entities)} 个 ({general_rate:.1f}%)")
         print(f"   未匹配任何类型: {len(unmatched_entities)} 个 ({unmatched_rate:.1f}%)")
+        print(f"   ─────────────────────────────")
+        print(f"   合计: {len(scene_matched_entities)} + {len(general_matched_entities)} + {len(unmatched_entities)} = {len(scene_matched_entities) + len(general_matched_entities) + len(unmatched_entities)}")
         
-        # 9. 类型分布详情
+        # 9. 场景类型分布详情（全部）
         if scene_type_distribution:
-            print(f"\n   场景类型分布 (Top 10):")
+            print(f"\n   场景类型分布 (全部 {len(scene_type_distribution)} 种):")
             print(f"   {'-'*50}")
             sorted_scene_types = sorted(scene_type_distribution.items(), key=lambda x: len(x[1]), reverse=True)
-            for type_name, entities_list in sorted_scene_types[:10]:
+            for type_name, entities_list in sorted_scene_types:
                 print(f"   - {type_name}: {len(entities_list)} 个")
+            print(f"   场景类型实体总数: {len(scene_matched_entities)} 个")
         
+        # 10. 通用类型分布详情（全部）
         if general_type_distribution:
-            print(f"\n   通用类型分布 (Top 10):")
+            print(f"\n   通用类型分布 (全部 {len(general_type_distribution)} 种):")
             print(f"   {'-'*50}")
             sorted_general_types = sorted(general_type_distribution.items(), key=lambda x: len(x[1]), reverse=True)
-            for type_name, entities_list in sorted_general_types[:10]:
+            for type_name, entities_list in sorted_general_types:
                 print(f"   - {type_name}: {len(entities_list)} 个")
+            print(f"   通用类型实体总数: {len(general_matched_entities)} 个")
         
     except Exception as e:
         print(f"\n❌ 查询出错: {str(e)}")
diff --git a/api/app/services/memory_storage_service.py b/api/app/services/memory_storage_service.py
index 71a644cf..16dc88c9 100644
--- a/api/app/services/memory_storage_service.py
+++ b/api/app/services/memory_storage_service.py
@@ -407,6 +407,17 @@ class DataConfigService: # 数据配置服务类（PostgreSQL）
             }
             yield format_sse_message("result", result_data)
             
+            # 步骤 6.5: 计算本体覆盖率统计并发出
+            try:
+                ontology_coverage = await self._compute_ontology_coverage(
+                    extracted_result=extracted_result,
+                    memory_config=memory_config,
+                )
+                if ontology_coverage:
+                    yield format_sse_message("ontology_coverage", ontology_coverage)
+            except Exception as cov_err:
+                logger.warning(f"[PILOT_RUN_STREAM] Ontology coverage computation failed: {cov_err}", exc_info=True)
+            
             # 步骤 7: 发出完成事件
             yield format_sse_message("done", {
                 "message": "试运行完成",
@@ -428,6 +439,100 @@ class DataConfigService: # 数据配置服务类（PostgreSQL）
             })
 
 
+    async def _compute_ontology_coverage(
+        self,
+        extracted_result: Dict[str, Any],
+        memory_config,
+    ) -> Optional[Dict[str, Any]]:
+        """根据提取结果中的实体类型，与场景/通用本体类型做互斥分类统计。
+
+        分类规则（互斥）：场景类型优先 > 通用类型 > 未匹配
+        确保: 场景实体数 + 通用实体数 + 未匹配数 = 总实体数
+
+        Returns:
+            包含三部分统计的字典，或 None（无实体数据时）
+        """
+        core_entities = extracted_result.get("core_entities", [])
+        if not core_entities:
+            return None
+
+        # 1. 加载场景本体类型集合
+        scene_ontology_types: set = set()
+        try:
+            from app.repositories.ontology_class_repository import OntologyClassRepository
+
+            if memory_config.scene_id:
+                class_repo = OntologyClassRepository(self.db)
+                ontology_classes = class_repo.get_classes_by_scene(memory_config.scene_id)
+                scene_ontology_types = {oc.class_name for oc in ontology_classes}
+        except Exception as e:
+            logger.warning(f"Failed to load scene ontology types: {e}")
+
+        # 2. 加载通用本体类型集合
+        general_ontology_types: set = set()
+        try:
+            from app.core.memory.ontology_services.ontology_type_loader import (
+                get_general_ontology_registry,
+                is_general_ontology_enabled,
+            )
+
+            if is_general_ontology_enabled():
+                registry = get_general_ontology_registry()
+                if registry:
+                    general_ontology_types = set(registry.types.keys())
+        except Exception as e:
+            logger.warning(f"Failed to load general ontology types: {e}")
+
+        # 3. 互斥分类：场景优先 > 通用 > 未匹配
+        scene_distribution: list = []
+        general_distribution: list = []
+        unmatched_distribution: list = []
+        scene_total = 0
+        general_total = 0
+        unmatched_total = 0
+
+        for item in core_entities:
+            entity_type = item.get("type", "")
+            count = item.get("count", 0)
+
+            if entity_type in scene_ontology_types:
+                scene_distribution.append({"type": entity_type, "count": count})
+                scene_total += count
+            elif entity_type in general_ontology_types:
+                general_distribution.append({"type": entity_type, "count": count})
+                general_total += count
+            else:
+                unmatched_distribution.append({"type": entity_type, "count": count})
+                unmatched_total += count
+
+        # 按数量降序排列
+        scene_distribution.sort(key=lambda x: x["count"], reverse=True)
+        general_distribution.sort(key=lambda x: x["count"], reverse=True)
+        unmatched_distribution.sort(key=lambda x: x["count"], reverse=True)
+
+        total_entities = scene_total + general_total + unmatched_total
+
+        return {
+            "scene_type_distribution": {
+                "type_count": len(scene_distribution),
+                "entity_total": scene_total,
+                "types": scene_distribution,
+            },
+            "general_type_distribution": {
+                "type_count": len(general_distribution),
+                "entity_total": general_total,
+                "types": general_distribution,
+            },
+            "unmatched": {
+                "type_count": len(unmatched_distribution),
+                "entity_total": unmatched_total,
+                "types": unmatched_distribution,
+            },
+            "total_entities": total_entities,
+            "time": int(time.time() * 1000),
+        }
+
+
 # -------------------- Neo4j Search & Analytics (fused from data_search_service.py) --------------------
 # Ensure env for connector (e.g., NEO4J_PASSWORD)
 load_dotenv()