[fix]Fix get_classes_by_scen, add ontology_types=ontology_types

This commit is contained in:
lanceyq
2026-02-09 19:35:11 +08:00
parent 076e95d5c2
commit 6c8318b696
9 changed files with 162 additions and 20 deletions

View File

@@ -1007,7 +1007,7 @@ async def export_owl_by_scene(
# 2. 查询场景下的所有本体类型
class_repo = OntologyClassRepository(db)
ontology_classes_db = class_repo.get_by_scene(request.scene_id)
ontology_classes_db = class_repo.get_classes_by_scene(request.scene_id)
if not ontology_classes_db:
api_logger.warning(f"No classes found in scene: {request.scene_id}")

View File

@@ -126,6 +126,7 @@ async def write(
config=pipeline_config,
embedding_id=embedding_model_id,
language=language,
ontology_types=ontology_types,
)
# Run the complete extraction pipeline

View File

@@ -8,6 +8,9 @@
- reload_ontology_registry: 重新加载本体注册表(实验模式)
- clear_ontology_cache: 清除本体缓存
- is_general_ontology_enabled: 检查通用本体类型功能是否启用
- load_ontology_types_for_scene: 从数据库加载场景的本体类型
- create_empty_ontology_type_list: 创建空的本体类型列表
- load_ontology_types_with_fallback: 加载本体类型(带通用类型回退)
"""
from .ontology_type_merger import OntologyTypeMerger, DEFAULT_CORE_GENERAL_TYPES
@@ -17,6 +20,9 @@ from .ontology_type_loader import (
reload_ontology_registry,
clear_ontology_cache,
is_general_ontology_enabled,
load_ontology_types_for_scene,
create_empty_ontology_type_list,
load_ontology_types_with_fallback,
)
__all__ = [
@@ -27,4 +33,7 @@ __all__ = [
"reload_ontology_registry",
"clear_ontology_cache",
"is_general_ontology_enabled",
"load_ontology_types_for_scene",
"create_empty_ontology_type_list",
"load_ontology_types_with_fallback",
]

View File

@@ -5,9 +5,14 @@
Functions:
load_ontology_types_for_scene: 从数据库加载场景的本体类型
is_general_ontology_enabled: 检查是否启用通用本体
get_general_ontology_registry: 获取通用本体类型注册表(单例,懒加载)
get_ontology_type_merger: 获取类型合并服务实例
reload_ontology_registry: 重新加载本体注册表
clear_ontology_cache: 清除本体缓存
"""
import logging
import os
from typing import Optional
from uuid import UUID
@@ -15,6 +20,10 @@ from sqlalchemy.orm import Session
logger = logging.getLogger(__name__)
# 模块级缓存(单例)
_general_registry_cache = None
_ontology_type_merger_cache = None
def load_ontology_types_for_scene(
scene_id: Optional[UUID],
@@ -52,8 +61,7 @@ def load_ontology_types_for_scene(
# 查询场景的本体类型
ontology_repo = OntologyClassRepository(db)
ontology_classes = ontology_repo.get_classes_by_scene(
scene_id=scene_id,
workspace_id=workspace_id
scene_id=scene_id
)
if not ontology_classes:
@@ -96,20 +104,137 @@ def create_empty_ontology_type_list() -> Optional["OntologyTypeList"]:
def is_general_ontology_enabled() -> bool:
"""检查是否启用了通用本体
通过配置开关和注册表是否可用来判断。
Returns:
True 如果通用本体已启用,否则 False
"""
try:
from app.core.memory.ontology_services.ontology_type_merger import OntologyTypeMerger
from app.core.config import settings
merger = OntologyTypeMerger()
return merger.general_registry is not None
if not settings.ENABLE_GENERAL_ONTOLOGY_TYPES:
return False
registry = get_general_ontology_registry()
return registry is not None and len(registry.types) > 0
except Exception as e:
logger.warning(f"Failed to check general ontology status: {e}")
return False
def get_general_ontology_registry():
"""获取通用本体类型注册表(单例,懒加载)
从配置的本体文件中解析并缓存注册表。
Returns:
GeneralOntologyTypeRegistry 实例,如果加载失败则返回 None
"""
global _general_registry_cache
if _general_registry_cache is not None:
return _general_registry_cache
try:
from app.core.config import settings
if not settings.ENABLE_GENERAL_ONTOLOGY_TYPES:
logger.info("通用本体类型功能已禁用")
return None
# 解析本体文件路径
file_names = [f.strip() for f in settings.GENERAL_ONTOLOGY_FILES.split(",") if f.strip()]
if not file_names:
logger.warning("未配置通用本体文件")
return None
# 构建完整路径(相对于项目根目录)
base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
file_paths = []
for name in file_names:
full_path = os.path.join(base_dir, name)
if os.path.exists(full_path):
file_paths.append(full_path)
else:
logger.warning(f"本体文件不存在: {full_path}")
if not file_paths:
logger.warning("没有找到可用的通用本体文件")
return None
# 解析本体文件
from app.core.memory.utils.ontology.ontology_parser import MultiOntologyParser
parser = MultiOntologyParser(file_paths)
_general_registry_cache = parser.parse_all()
logger.info(f"通用本体注册表加载完成: {len(_general_registry_cache.types)} 个类型")
return _general_registry_cache
except Exception as e:
logger.error(f"加载通用本体注册表失败: {e}", exc_info=True)
return None
def get_ontology_type_merger():
"""获取类型合并服务实例(单例,懒加载)
Returns:
OntologyTypeMerger 实例,如果通用本体未启用则返回 None
"""
global _ontology_type_merger_cache
if _ontology_type_merger_cache is not None:
return _ontology_type_merger_cache
try:
registry = get_general_ontology_registry()
if registry is None:
return None
from app.core.config import settings
from app.core.memory.ontology_services.ontology_type_merger import OntologyTypeMerger
# 从配置读取核心类型
core_types_str = settings.CORE_GENERAL_TYPES
core_types = [t.strip() for t in core_types_str.split(",") if t.strip()] if core_types_str else None
_ontology_type_merger_cache = OntologyTypeMerger(
general_registry=registry,
max_types_in_prompt=settings.MAX_ONTOLOGY_TYPES_IN_PROMPT,
core_types=core_types,
)
logger.info("OntologyTypeMerger 实例创建完成")
return _ontology_type_merger_cache
except Exception as e:
logger.error(f"创建 OntologyTypeMerger 失败: {e}", exc_info=True)
return None
def reload_ontology_registry():
"""重新加载本体注册表(清除缓存后重新加载)
用于实验模式下动态更新本体配置。
"""
clear_ontology_cache()
registry = get_general_ontology_registry()
if registry:
get_ontology_type_merger()
logger.info("本体注册表已重新加载")
return registry
def clear_ontology_cache():
"""清除本体缓存"""
global _general_registry_cache, _ontology_type_merger_cache
_general_registry_cache = None
_ontology_type_merger_cache = None
logger.info("本体缓存已清除")
def load_ontology_types_with_fallback(
scene_id: Optional[UUID],
workspace_id: UUID,

View File

@@ -18,18 +18,21 @@ Extract entities and knowledge triplets from the given statement.
{% if ontology_types %}
===Ontology Type Guidance===
**CRITICAL: Use predefined ontology types for entity classification with the following priority:**
**CRITICAL RULE: You MUST ONLY use the predefined ontology type names listed below for the entity "type" field. Do NOT use any other type names, even if they seem reasonable.**
**If no predefined type fits an entity, use the CLOSEST matching predefined type. NEVER invent new type names.**
**Type Priority (from highest to lowest):**
1. **[场景类型] Scene Types** - Domain-specific types, use these first if applicable
1. **[场景类型] Scene Types** - Domain-specific types, ALWAYS prefer these first
2. **[通用类型] General Types** - Common types from standard ontologies (DBpedia)
3. **[通用父类] Parent Types** - Provide type hierarchy context
**Type Matching Rules:**
- Entity type MUST exactly match one of the predefined type names
- Do NOT modify, translate, or use variations of type names
- Prefer scene types over general types when both could apply
- If uncertain between types, check the type description for guidance
- Entity type MUST exactly match one of the predefined type names below
- Do NOT use types like "Equipment", "Component", "Concept", "Action", "Condition", "Data", "Duration" unless they appear in the predefined list
- Do NOT modify, translate, abbreviate, or create variations of type names
- Prefer scene types (marked [场景类型]) over general types when both could apply
- If uncertain, check the type description to find the best match
**Predefined Ontology Types:**
{{ ontology_types }}
@@ -42,7 +45,7 @@ The following shows type inheritance relationships (Child → Parent → Grandpa
{% endfor %}
{% endif %}
**Available Type Names (use EXACTLY as shown):**
**ALLOWED Type Names (use EXACTLY one of these, no exceptions):**
{{ ontology_type_names | join(', ') }}
{% endif %}
@@ -207,6 +210,10 @@ Output:
{% endif %}
===End of Examples===
{% if ontology_types %}
**⚠️ REMINDER: The examples above use generic type names for illustration only. You MUST use ONLY the predefined ontology type names from the "ALLOWED Type Names" list above. For example, use "PredictiveMaintenance" instead of "Concept", use "ProductionLine" instead of "Equipment", etc. Map each entity to the closest matching predefined type.**
{% endif %}
===Output Format===
**JSON Requirements:**

View File

@@ -9,7 +9,7 @@ api\scripts\query_ontology_matched_entities.py
用法: python scripts/query_ontology_matched_entities.py <end_user_id> [config_id]
示例: python scripts/query_ontology_matched_entities.py 075660cf-08e6-40a6-a76e-308b6f52fbf1
python scripts/query_ontology_matched_entities.py 075660cf-08e6-40a6-a76e-308b6f52fbf1 fd547bb9-7b9e-47ea-ae53-242d208a31a2
python scripts/query_ontology_matched_entities.py 075660cf-08e6-40a6-a76e-308b6f52fbf1 fd547bb9-7b9e-47ea-ae53-242d208a31a2
"""
import sys
@@ -59,7 +59,7 @@ async def get_entities_by_end_user_id(connector: Neo4jConnector, end_user_id: st
def get_ontology_types_from_scene(db, scene_id: UUID) -> Set[str]:
"""获取场景下所有本体类型名称"""
class_repo = OntologyClassRepository(db)
ontology_classes = class_repo.get_by_scene(scene_id)
ontology_classes = class_repo.get_classes_by_scene(scene_id)
return {oc.class_name for oc in ontology_classes}
@@ -80,7 +80,7 @@ def get_all_ontology_types(db) -> Dict[str, Set[str]]:
for scene in scenes:
class_repo = OntologyClassRepository(db)
ontology_classes = class_repo.get_by_scene(scene.scene_id)
ontology_classes = class_repo.get_classes_by_scene(scene.scene_id)
for oc in ontology_classes:
if oc.class_name not in all_types:
all_types[oc.class_name] = set()

View File

@@ -202,7 +202,7 @@ class OntologyClassRepository:
)
raise
def get_by_scene(self, scene_id: UUID) -> List[OntologyClass]:
def get_classes_by_scene(self, scene_id: UUID) -> List[OntologyClass]:
"""获取场景下的所有类型
按创建时间倒序排列。
@@ -215,7 +215,7 @@ class OntologyClassRepository:
Examples:
>>> repo = OntologyClassRepository(db)
>>> classes = repo.get_by_scene(scene_id)
>>> classes = repo.get_classes_by_scene(scene_id)
"""
try:
logger.debug(f"Getting ontology classes by scene: {scene_id}")

View File

@@ -550,7 +550,7 @@ class MemoryConfigService:
try:
ontology_repo = OntologyClassRepository(self.db)
ontology_classes = ontology_repo.get_by_scene(memory_config.scene_id)
ontology_classes = ontology_repo.get_classes_by_scene(memory_config.scene_id)
if not ontology_classes:
logger.info(f"No ontology classes found for scene_id: {memory_config.scene_id}")

View File

@@ -1155,7 +1155,7 @@ class OntologyService:
raise ValueError("无权限访问该场景的类型")
# 获取类型列表
classes = self.class_repo.get_by_scene(scene_id)
classes = self.class_repo.get_classes_by_scene(scene_id)
logger.info(f"Found {len(classes)} classes in scene {scene_id}")