[fix]Memory extraction output the core engineering effect
This commit is contained in:
@@ -169,10 +169,10 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[
|
|||||||
|
|
||||||
print(f" 找到 {len(entities)} 个实体")
|
print(f" 找到 {len(entities)} 个实体")
|
||||||
|
|
||||||
# 4. 分类实体(场景类型、通用类型、未匹配)
|
# 4. 互斥分类实体:场景类型优先 > 通用类型 > 未匹配
|
||||||
scene_matched_entities = []
|
# 确保: 场景实体数 + 通用实体数 + 未匹配数 = 总实体数
|
||||||
general_matched_entities = []
|
scene_matched_entities = [] # 匹配场景类型(含同时匹配两者的)
|
||||||
both_matched_entities = [] # 同时匹配场景和通用类型
|
general_matched_entities = [] # 仅匹配通用类型(不含已归入场景的)
|
||||||
unmatched_entities = []
|
unmatched_entities = []
|
||||||
|
|
||||||
scene_type_distribution = defaultdict(list)
|
scene_type_distribution = defaultdict(list)
|
||||||
@@ -183,11 +183,8 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[
|
|||||||
in_scene = entity_type in scene_ontology_types
|
in_scene = entity_type in scene_ontology_types
|
||||||
in_general = entity_type in general_ontology_types
|
in_general = entity_type in general_ontology_types
|
||||||
|
|
||||||
if in_scene and in_general:
|
if in_scene:
|
||||||
both_matched_entities.append(entity)
|
# 场景类型优先,同时匹配两者的也归入场景
|
||||||
scene_type_distribution[entity_type].append(entity)
|
|
||||||
general_type_distribution[entity_type].append(entity)
|
|
||||||
elif in_scene:
|
|
||||||
scene_matched_entities.append(entity)
|
scene_matched_entities.append(entity)
|
||||||
scene_type_distribution[entity_type].append(entity)
|
scene_type_distribution[entity_type].append(entity)
|
||||||
elif in_general:
|
elif in_general:
|
||||||
@@ -197,9 +194,8 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[
|
|||||||
unmatched_entities.append(entity)
|
unmatched_entities.append(entity)
|
||||||
|
|
||||||
# 5. 输出匹配场景类型的实体
|
# 5. 输出匹配场景类型的实体
|
||||||
total_scene_matched = len(scene_matched_entities) + len(both_matched_entities)
|
|
||||||
print(f"\n{'='*70}")
|
print(f"\n{'='*70}")
|
||||||
print(f"✅ 匹配场景本体类型的实体 (共 {total_scene_matched} 个)")
|
print(f"✅ 匹配场景本体类型的实体 (共 {len(scene_matched_entities)} 个)")
|
||||||
print(f"{'='*70}")
|
print(f"{'='*70}")
|
||||||
|
|
||||||
if scene_type_distribution:
|
if scene_type_distribution:
|
||||||
@@ -219,9 +215,8 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[
|
|||||||
print(f"\n (无匹配场景类型的实体)")
|
print(f"\n (无匹配场景类型的实体)")
|
||||||
|
|
||||||
# 6. 输出匹配通用类型的实体
|
# 6. 输出匹配通用类型的实体
|
||||||
total_general_matched = len(general_matched_entities) + len(both_matched_entities)
|
|
||||||
print(f"\n{'='*70}")
|
print(f"\n{'='*70}")
|
||||||
print(f"✅ 匹配通用本体类型的实体 (共 {total_general_matched} 个)")
|
print(f"✅ 匹配通用本体类型的实体 (共 {len(general_matched_entities)} 个)")
|
||||||
print(f"{'='*70}")
|
print(f"{'='*70}")
|
||||||
|
|
||||||
if general_type_distribution:
|
if general_type_distribution:
|
||||||
@@ -265,7 +260,6 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[
|
|||||||
|
|
||||||
# 8. 统计摘要
|
# 8. 统计摘要
|
||||||
total_entities = len(entities)
|
total_entities = len(entities)
|
||||||
any_matched = total_entities - len(unmatched_entities)
|
|
||||||
|
|
||||||
print(f"\n{'='*70}")
|
print(f"\n{'='*70}")
|
||||||
print(f"📊 统计摘要")
|
print(f"📊 统计摘要")
|
||||||
@@ -276,35 +270,35 @@ async def query_ontology_matched_entities(end_user_id: str, config_id: Optional[
|
|||||||
print(f" 场景本体类型数: {len(scene_ontology_types)}")
|
print(f" 场景本体类型数: {len(scene_ontology_types)}")
|
||||||
print(f" 通用本体类型数: {len(general_ontology_types)}")
|
print(f" 通用本体类型数: {len(general_ontology_types)}")
|
||||||
|
|
||||||
print(f"\n 匹配率统计:")
|
print(f"\n 互斥分类统计 (三者之和 = 总实体数):")
|
||||||
print(f" {'-'*50}")
|
print(f" {'-'*50}")
|
||||||
scene_rate = total_scene_matched / total_entities * 100 if total_entities > 0 else 0
|
scene_rate = len(scene_matched_entities) / total_entities * 100 if total_entities > 0 else 0
|
||||||
general_rate = total_general_matched / total_entities * 100 if total_entities > 0 else 0
|
general_rate = len(general_matched_entities) / total_entities * 100 if total_entities > 0 else 0
|
||||||
any_rate = any_matched / total_entities * 100 if total_entities > 0 else 0
|
|
||||||
unmatched_rate = len(unmatched_entities) / total_entities * 100 if total_entities > 0 else 0
|
unmatched_rate = len(unmatched_entities) / total_entities * 100 if total_entities > 0 else 0
|
||||||
|
|
||||||
print(f" 匹配场景类型: {total_scene_matched} 个 ({scene_rate:.1f}%)")
|
print(f" 匹配场景类型: {len(scene_matched_entities)} 个 ({scene_rate:.1f}%)")
|
||||||
print(f" 匹配通用类型: {total_general_matched} 个 ({general_rate:.1f}%)")
|
print(f" 匹配通用类型: {len(general_matched_entities)} 个 ({general_rate:.1f}%)")
|
||||||
print(f" 同时匹配两者: {len(both_matched_entities)} 个 ({len(both_matched_entities)/total_entities*100:.1f}%)")
|
|
||||||
print(f" 仅匹配场景类型: {len(scene_matched_entities)} 个 ({len(scene_matched_entities)/total_entities*100:.1f}%)")
|
|
||||||
print(f" 仅匹配通用类型: {len(general_matched_entities)} 个 ({len(general_matched_entities)/total_entities*100:.1f}%)")
|
|
||||||
print(f" 匹配任一类型: {any_matched} 个 ({any_rate:.1f}%)")
|
|
||||||
print(f" 未匹配任何类型: {len(unmatched_entities)} 个 ({unmatched_rate:.1f}%)")
|
print(f" 未匹配任何类型: {len(unmatched_entities)} 个 ({unmatched_rate:.1f}%)")
|
||||||
|
print(f" ─────────────────────────────")
|
||||||
|
print(f" 合计: {len(scene_matched_entities)} + {len(general_matched_entities)} + {len(unmatched_entities)} = {len(scene_matched_entities) + len(general_matched_entities) + len(unmatched_entities)}")
|
||||||
|
|
||||||
# 9. 类型分布详情
|
# 9. 场景类型分布详情(全部)
|
||||||
if scene_type_distribution:
|
if scene_type_distribution:
|
||||||
print(f"\n 场景类型分布 (Top 10):")
|
print(f"\n 场景类型分布 (全部 {len(scene_type_distribution)} 种):")
|
||||||
print(f" {'-'*50}")
|
print(f" {'-'*50}")
|
||||||
sorted_scene_types = sorted(scene_type_distribution.items(), key=lambda x: len(x[1]), reverse=True)
|
sorted_scene_types = sorted(scene_type_distribution.items(), key=lambda x: len(x[1]), reverse=True)
|
||||||
for type_name, entities_list in sorted_scene_types[:10]:
|
for type_name, entities_list in sorted_scene_types:
|
||||||
print(f" - {type_name}: {len(entities_list)} 个")
|
print(f" - {type_name}: {len(entities_list)} 个")
|
||||||
|
print(f" 场景类型实体总数: {len(scene_matched_entities)} 个")
|
||||||
|
|
||||||
|
# 10. 通用类型分布详情(全部)
|
||||||
if general_type_distribution:
|
if general_type_distribution:
|
||||||
print(f"\n 通用类型分布 (Top 10):")
|
print(f"\n 通用类型分布 (全部 {len(general_type_distribution)} 种):")
|
||||||
print(f" {'-'*50}")
|
print(f" {'-'*50}")
|
||||||
sorted_general_types = sorted(general_type_distribution.items(), key=lambda x: len(x[1]), reverse=True)
|
sorted_general_types = sorted(general_type_distribution.items(), key=lambda x: len(x[1]), reverse=True)
|
||||||
for type_name, entities_list in sorted_general_types[:10]:
|
for type_name, entities_list in sorted_general_types:
|
||||||
print(f" - {type_name}: {len(entities_list)} 个")
|
print(f" - {type_name}: {len(entities_list)} 个")
|
||||||
|
print(f" 通用类型实体总数: {len(general_matched_entities)} 个")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"\n❌ 查询出错: {str(e)}")
|
print(f"\n❌ 查询出错: {str(e)}")
|
||||||
|
|||||||
@@ -407,6 +407,17 @@ class DataConfigService: # 数据配置服务类(PostgreSQL)
|
|||||||
}
|
}
|
||||||
yield format_sse_message("result", result_data)
|
yield format_sse_message("result", result_data)
|
||||||
|
|
||||||
|
# 步骤 6.5: 计算本体覆盖率统计并发出
|
||||||
|
try:
|
||||||
|
ontology_coverage = await self._compute_ontology_coverage(
|
||||||
|
extracted_result=extracted_result,
|
||||||
|
memory_config=memory_config,
|
||||||
|
)
|
||||||
|
if ontology_coverage:
|
||||||
|
yield format_sse_message("ontology_coverage", ontology_coverage)
|
||||||
|
except Exception as cov_err:
|
||||||
|
logger.warning(f"[PILOT_RUN_STREAM] Ontology coverage computation failed: {cov_err}", exc_info=True)
|
||||||
|
|
||||||
# 步骤 7: 发出完成事件
|
# 步骤 7: 发出完成事件
|
||||||
yield format_sse_message("done", {
|
yield format_sse_message("done", {
|
||||||
"message": "试运行完成",
|
"message": "试运行完成",
|
||||||
@@ -428,6 +439,100 @@ class DataConfigService: # 数据配置服务类(PostgreSQL)
|
|||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
async def _compute_ontology_coverage(
|
||||||
|
self,
|
||||||
|
extracted_result: Dict[str, Any],
|
||||||
|
memory_config,
|
||||||
|
) -> Optional[Dict[str, Any]]:
|
||||||
|
"""根据提取结果中的实体类型,与场景/通用本体类型做互斥分类统计。
|
||||||
|
|
||||||
|
分类规则(互斥):场景类型优先 > 通用类型 > 未匹配
|
||||||
|
确保: 场景实体数 + 通用实体数 + 未匹配数 = 总实体数
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
包含三部分统计的字典,或 None(无实体数据时)
|
||||||
|
"""
|
||||||
|
core_entities = extracted_result.get("core_entities", [])
|
||||||
|
if not core_entities:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 1. 加载场景本体类型集合
|
||||||
|
scene_ontology_types: set = set()
|
||||||
|
try:
|
||||||
|
from app.repositories.ontology_class_repository import OntologyClassRepository
|
||||||
|
|
||||||
|
if memory_config.scene_id:
|
||||||
|
class_repo = OntologyClassRepository(self.db)
|
||||||
|
ontology_classes = class_repo.get_classes_by_scene(memory_config.scene_id)
|
||||||
|
scene_ontology_types = {oc.class_name for oc in ontology_classes}
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to load scene ontology types: {e}")
|
||||||
|
|
||||||
|
# 2. 加载通用本体类型集合
|
||||||
|
general_ontology_types: set = set()
|
||||||
|
try:
|
||||||
|
from app.core.memory.ontology_services.ontology_type_loader import (
|
||||||
|
get_general_ontology_registry,
|
||||||
|
is_general_ontology_enabled,
|
||||||
|
)
|
||||||
|
|
||||||
|
if is_general_ontology_enabled():
|
||||||
|
registry = get_general_ontology_registry()
|
||||||
|
if registry:
|
||||||
|
general_ontology_types = set(registry.types.keys())
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to load general ontology types: {e}")
|
||||||
|
|
||||||
|
# 3. 互斥分类:场景优先 > 通用 > 未匹配
|
||||||
|
scene_distribution: list = []
|
||||||
|
general_distribution: list = []
|
||||||
|
unmatched_distribution: list = []
|
||||||
|
scene_total = 0
|
||||||
|
general_total = 0
|
||||||
|
unmatched_total = 0
|
||||||
|
|
||||||
|
for item in core_entities:
|
||||||
|
entity_type = item.get("type", "")
|
||||||
|
count = item.get("count", 0)
|
||||||
|
|
||||||
|
if entity_type in scene_ontology_types:
|
||||||
|
scene_distribution.append({"type": entity_type, "count": count})
|
||||||
|
scene_total += count
|
||||||
|
elif entity_type in general_ontology_types:
|
||||||
|
general_distribution.append({"type": entity_type, "count": count})
|
||||||
|
general_total += count
|
||||||
|
else:
|
||||||
|
unmatched_distribution.append({"type": entity_type, "count": count})
|
||||||
|
unmatched_total += count
|
||||||
|
|
||||||
|
# 按数量降序排列
|
||||||
|
scene_distribution.sort(key=lambda x: x["count"], reverse=True)
|
||||||
|
general_distribution.sort(key=lambda x: x["count"], reverse=True)
|
||||||
|
unmatched_distribution.sort(key=lambda x: x["count"], reverse=True)
|
||||||
|
|
||||||
|
total_entities = scene_total + general_total + unmatched_total
|
||||||
|
|
||||||
|
return {
|
||||||
|
"scene_type_distribution": {
|
||||||
|
"type_count": len(scene_distribution),
|
||||||
|
"entity_total": scene_total,
|
||||||
|
"types": scene_distribution,
|
||||||
|
},
|
||||||
|
"general_type_distribution": {
|
||||||
|
"type_count": len(general_distribution),
|
||||||
|
"entity_total": general_total,
|
||||||
|
"types": general_distribution,
|
||||||
|
},
|
||||||
|
"unmatched": {
|
||||||
|
"type_count": len(unmatched_distribution),
|
||||||
|
"entity_total": unmatched_total,
|
||||||
|
"types": unmatched_distribution,
|
||||||
|
},
|
||||||
|
"total_entities": total_entities,
|
||||||
|
"time": int(time.time() * 1000),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# -------------------- Neo4j Search & Analytics (fused from data_search_service.py) --------------------
|
# -------------------- Neo4j Search & Analytics (fused from data_search_service.py) --------------------
|
||||||
# Ensure env for connector (e.g., NEO4J_PASSWORD)
|
# Ensure env for connector (e.g., NEO4J_PASSWORD)
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|||||||
Reference in New Issue
Block a user