Feature/ontology class clean (#249)
* [add] Complete ontology engineering feature implementation * [add] Add ontology feature integration and validation utilities * [add] Add OWL validator and validation utilities * [fix] Add missing render_ontology_extraction_prompt function * [fix]Add dependencies, fix functionality
This commit is contained in:
@@ -45,6 +45,7 @@ from . import (
|
||||
home_page_controller,
|
||||
memory_perceptual_controller,
|
||||
memory_working_controller,
|
||||
ontology_controller,
|
||||
)
|
||||
|
||||
# 创建管理端 API 路由器
|
||||
@@ -90,5 +91,6 @@ manager_router.include_router(implicit_memory_controller.router)
|
||||
manager_router.include_router(memory_perceptual_controller.router)
|
||||
manager_router.include_router(memory_working_controller.router)
|
||||
manager_router.include_router(file_storage_controller.router)
|
||||
manager_router.include_router(ontology_controller.router)
|
||||
|
||||
__all__ = ["manager_router"]
|
||||
|
||||
964
api/app/controllers/ontology_controller.py
Normal file
964
api/app/controllers/ontology_controller.py
Normal file
@@ -0,0 +1,964 @@
|
||||
"""本体提取API控制器
|
||||
|
||||
本模块提供本体提取系统的RESTful API端点。
|
||||
|
||||
Endpoints:
|
||||
POST /api/memory/ontology/extract - 提取本体类
|
||||
POST /api/memory/ontology/export - 导出OWL文件
|
||||
POST /api/memory/ontology/scene - 创建本体场景
|
||||
PUT /api/memory/ontology/scene/{scene_id} - 更新本体场景
|
||||
DELETE /api/memory/ontology/scene/{scene_id} - 删除本体场景
|
||||
GET /api/memory/ontology/scene/{scene_id} - 获取单个场景
|
||||
GET /api/memory/ontology/scenes - 获取场景列表
|
||||
POST /api/memory/ontology/class - 创建本体类型
|
||||
PUT /api/memory/ontology/class/{class_id} - 更新本体类型
|
||||
DELETE /api/memory/ontology/class/{class_id} - 删除本体类型
|
||||
GET /api/memory/ontology/class/{class_id} - 获取单个类型
|
||||
GET /api/memory/ontology/classes - 获取类型列表
|
||||
"""
|
||||
|
||||
import logging
|
||||
import tempfile
|
||||
from typing import Dict, Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Header
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.error_codes import BizCode
|
||||
from app.core.logging_config import get_api_logger
|
||||
from app.core.response_utils import fail, success
|
||||
from app.db import get_db
|
||||
from app.dependencies import get_current_user
|
||||
from app.models.user_model import User
|
||||
from app.services.memory_base_service import Translation_English
|
||||
from app.core.memory.models.ontology_models import OntologyClass
|
||||
from typing import List
|
||||
from app.schemas.ontology_schemas import (
|
||||
ExportRequest,
|
||||
ExportResponse,
|
||||
ExtractionRequest,
|
||||
ExtractionResponse,
|
||||
SceneCreateRequest,
|
||||
SceneUpdateRequest,
|
||||
SceneResponse,
|
||||
SceneListResponse,
|
||||
ClassCreateRequest,
|
||||
ClassUpdateRequest,
|
||||
ClassResponse,
|
||||
ClassListResponse,
|
||||
)
|
||||
from app.schemas.response_schema import ApiResponse
|
||||
from app.services.ontology_service import OntologyService
|
||||
from app.core.memory.llm_tools.openai_client import OpenAIClient
|
||||
from app.core.memory.utils.validation.owl_validator import OWLValidator
|
||||
from app.services.model_service import ModelConfigService
|
||||
|
||||
|
||||
api_logger = get_api_logger()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/memory/ontology",
|
||||
tags=["Ontology"],
|
||||
)
|
||||
|
||||
|
||||
async def translate_ontology_classes(
|
||||
classes: List[OntologyClass],
|
||||
model_id: str
|
||||
) -> List[OntologyClass]:
|
||||
"""翻译本体类列表
|
||||
|
||||
将本体类的中文字段翻译为英文,包括:
|
||||
- name_chinese: 中文名称
|
||||
- description: 描述
|
||||
- examples: 示例列表
|
||||
|
||||
Args:
|
||||
classes: 本体类列表
|
||||
model_id: LLM模型ID,用于翻译
|
||||
|
||||
Returns:
|
||||
List[OntologyClass]: 翻译后的本体类列表
|
||||
"""
|
||||
translated_classes = []
|
||||
|
||||
for ontology_class in classes:
|
||||
# 创建类的副本,避免修改原对象
|
||||
translated_class = ontology_class.model_copy(deep=True)
|
||||
|
||||
# 翻译 name_chinese 字段
|
||||
if translated_class.name_chinese:
|
||||
try:
|
||||
translated_class.name_chinese = await Translation_English(
|
||||
model_id,
|
||||
translated_class.name_chinese
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to translate name_chinese: {e}")
|
||||
# 保留原文
|
||||
|
||||
# 翻译 description 字段
|
||||
if translated_class.description:
|
||||
try:
|
||||
translated_class.description = await Translation_English(
|
||||
model_id,
|
||||
translated_class.description
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to translate description: {e}")
|
||||
# 保留原文
|
||||
|
||||
# 翻译 examples 列表
|
||||
if translated_class.examples:
|
||||
translated_examples = []
|
||||
for example in translated_class.examples:
|
||||
try:
|
||||
translated_example = await Translation_English(
|
||||
model_id,
|
||||
example
|
||||
)
|
||||
translated_examples.append(translated_example)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to translate example: {e}")
|
||||
translated_examples.append(example) # 保留原文
|
||||
translated_class.examples = translated_examples
|
||||
|
||||
translated_classes.append(translated_class)
|
||||
|
||||
return translated_classes
|
||||
|
||||
|
||||
def _get_ontology_service(
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user),
|
||||
llm_id: str = None
|
||||
) -> OntologyService:
|
||||
"""获取OntologyService实例的依赖注入函数
|
||||
|
||||
指定的llm_id获取LLM配置,创建OpenAIClient和OntologyService实例。
|
||||
|
||||
Args:
|
||||
db: 数据库会话
|
||||
current_user: 当前用户
|
||||
llm_id: 可选的LLM模型ID,如果提供则使用指定模型,否则使用工作空间默认模型
|
||||
|
||||
Returns:
|
||||
OntologyService: 本体提取服务实例
|
||||
|
||||
Raises:
|
||||
HTTPException: 如果无法获取LLM配置
|
||||
"""
|
||||
try:
|
||||
import uuid
|
||||
|
||||
# 必须提供llm_id
|
||||
if not llm_id:
|
||||
logger.error(f"llm_id is required but not provided - user: {current_user.id}")
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="必须提供llm_id参数"
|
||||
)
|
||||
|
||||
logger.info(f"Using specified LLM model: {llm_id}")
|
||||
|
||||
# 验证llm_id格式
|
||||
try:
|
||||
model_id = uuid.UUID(llm_id)
|
||||
except ValueError:
|
||||
logger.error(f"Invalid llm_id format: {llm_id}")
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="无效的LLM模型ID格式"
|
||||
)
|
||||
|
||||
# 获取指定的模型配置
|
||||
try:
|
||||
model_config = ModelConfigService.get_model_by_id(db=db, model_id=model_id)
|
||||
except Exception as e:
|
||||
logger.error(f"Model {llm_id} not found: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"找不到指定的LLM模型: {llm_id}"
|
||||
)
|
||||
|
||||
# 检查是否为组合模型
|
||||
if hasattr(model_config, 'is_composite') and model_config.is_composite:
|
||||
logger.error(f"Model {llm_id} is a composite model, which is not supported for ontology extraction")
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="本体提取不支持使用组合模型,请选择单个模型"
|
||||
)
|
||||
|
||||
# 验证模型配置了API密钥
|
||||
if not model_config.api_keys:
|
||||
logger.error(f"Model {llm_id} has no API key configuration")
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="指定的LLM模型没有配置API密钥"
|
||||
)
|
||||
|
||||
api_key_config = model_config.api_keys[0]
|
||||
|
||||
logger.info(
|
||||
f"Using specified model - user: {current_user.id}, "
|
||||
f"model_id: {llm_id}, model_name: {api_key_config.model_name}"
|
||||
)
|
||||
|
||||
# 创建模型配置对象
|
||||
from app.core.models.base import RedBearModelConfig
|
||||
|
||||
llm_model_config = RedBearModelConfig(
|
||||
model_name=api_key_config.model_name,
|
||||
provider=model_config.provider if hasattr(model_config, 'provider') else "openai",
|
||||
api_key=api_key_config.api_key,
|
||||
base_url=api_key_config.api_base,
|
||||
max_retries=3,
|
||||
timeout=60.0
|
||||
)
|
||||
|
||||
# 创建OpenAI客户端
|
||||
llm_client = OpenAIClient(model_config=llm_model_config)
|
||||
|
||||
# 创建OntologyService
|
||||
service = OntologyService(llm_client=llm_client, db=db)
|
||||
|
||||
logger.debug(
|
||||
f"OntologyService created successfully - "
|
||||
f"user: {current_user.id}, model: {api_key_config.model_name}"
|
||||
)
|
||||
|
||||
return service
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create OntologyService: {str(e)}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"创建本体提取服务失败: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.post("/extract", response_model=ApiResponse)
|
||||
async def extract_ontology(
|
||||
request: ExtractionRequest,
|
||||
language_type: str = Header(default="zh", alias="X-Language-Type"),
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""提取本体类
|
||||
|
||||
从场景描述中提取符合OWL规范的本体类。
|
||||
提取结果仅返回给前端,不会自动保存到数据库。
|
||||
前端可以从返回结果中选择需要的类型,然后调用 /class 接口创建类型。
|
||||
支持中英文切换,通过 X-Language-Type Header 指定语言。
|
||||
|
||||
Args:
|
||||
request: 提取请求,包含scenario、domain、llm_id和scene_id
|
||||
language_type: 语言类型,'zh'(中文)或 'en'(英文),默认 'zh'
|
||||
db: 数据库会话
|
||||
current_user: 当前用户
|
||||
|
||||
Returns:
|
||||
ApiResponse: 包含提取结果的响应
|
||||
|
||||
Response format:
|
||||
{
|
||||
"code": 200,
|
||||
"msg": "本体提取成功",
|
||||
"data": {
|
||||
"classes": [
|
||||
{
|
||||
"id": "147d9db50b524a9e909e01a753d3acdd",
|
||||
"name": "Patient",
|
||||
"name_chinese": "患者",
|
||||
"description": "在医疗机构中接受诊疗、护理或健康管理的个体",
|
||||
"examples": ["糖尿病患者", "术后康复患者", "门诊初诊患者"],
|
||||
"parent_class": null,
|
||||
"entity_type": "Person",
|
||||
"domain": "Healthcare"
|
||||
},
|
||||
...
|
||||
],
|
||||
"domain": "Healthcare",
|
||||
"extracted_count": 7
|
||||
}
|
||||
}
|
||||
"""
|
||||
api_logger.info(
|
||||
f"Ontology extraction requested by user {current_user.id}, "
|
||||
f"scenario_length={len(request.scenario)}, "
|
||||
f"domain={request.domain}, "
|
||||
f"llm_id={request.llm_id}, "
|
||||
f"scene_id={request.scene_id}, "
|
||||
f"language_type={language_type}"
|
||||
)
|
||||
|
||||
try:
|
||||
# 获取当前工作空间ID
|
||||
workspace_id = current_user.current_workspace_id
|
||||
if not workspace_id:
|
||||
api_logger.warning(f"User {current_user.id} has no current workspace")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间")
|
||||
|
||||
# 创建OntologyService实例,传入llm_id
|
||||
service = _get_ontology_service(
|
||||
db=db,
|
||||
current_user=current_user,
|
||||
llm_id=request.llm_id
|
||||
)
|
||||
|
||||
# 调用服务层执行提取,传入scene_id和workspace_id
|
||||
result = await service.extract_ontology(
|
||||
scenario=request.scenario,
|
||||
domain=request.domain,
|
||||
scene_id=request.scene_id,
|
||||
workspace_id=workspace_id
|
||||
)
|
||||
|
||||
# ===== 新增:翻译逻辑 =====
|
||||
# 如果需要英文,则翻译数据
|
||||
if language_type != 'zh':
|
||||
api_logger.info(f"Translating extraction result to English")
|
||||
|
||||
# 翻译 classes 列表
|
||||
result.classes = await translate_ontology_classes(
|
||||
result.classes,
|
||||
request.llm_id
|
||||
)
|
||||
|
||||
# 翻译 domain 字段
|
||||
if result.domain:
|
||||
try:
|
||||
result.domain = await Translation_English(
|
||||
request.llm_id,
|
||||
result.domain
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to translate domain: {e}")
|
||||
# 保留原文
|
||||
# ===== 翻译逻辑结束 =====
|
||||
|
||||
# 构建响应
|
||||
response = ExtractionResponse(
|
||||
classes=result.classes,
|
||||
domain=result.domain,
|
||||
extracted_count=len(result.classes)
|
||||
)
|
||||
|
||||
api_logger.info(
|
||||
f"Ontology extraction completed, extracted {len(result.classes)} classes, "
|
||||
f"saved to scene {request.scene_id}, language={language_type}"
|
||||
)
|
||||
|
||||
return success(data=response.model_dump(), msg="本体提取成功")
|
||||
|
||||
except ValueError as e:
|
||||
# 验证错误 (400)
|
||||
api_logger.warning(f"Validation error in extraction: {str(e)}")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e))
|
||||
|
||||
except RuntimeError as e:
|
||||
# 运行时错误 (500)
|
||||
api_logger.error(f"Runtime error in extraction: {str(e)}", exc_info=True)
|
||||
return fail(BizCode.INTERNAL_ERROR, "本体提取失败", str(e))
|
||||
|
||||
except Exception as e:
|
||||
# 未知错误 (500)
|
||||
api_logger.error(f"Unexpected error in extraction: {str(e)}", exc_info=True)
|
||||
return fail(BizCode.INTERNAL_ERROR, "本体提取失败", str(e))
|
||||
|
||||
|
||||
@router.post("/export", response_model=ApiResponse)
|
||||
async def export_owl(
|
||||
request: ExportRequest,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""导出OWL文件
|
||||
|
||||
将提取的本体类导出为OWL文件,支持多种格式。
|
||||
导出操作不需要LLM,只使用OWL验证器和Owlready2库。
|
||||
|
||||
Args:
|
||||
request: 导出请求,包含classes、format和include_metadata
|
||||
db: 数据库会话
|
||||
current_user: 当前用户
|
||||
|
||||
Returns:
|
||||
ApiResponse: 包含OWL文件内容的响应
|
||||
|
||||
Supported formats:
|
||||
- rdfxml: 标准OWL RDF/XML格式(完整)
|
||||
- turtle: Turtle格式(可读性好)
|
||||
- ntriples: N-Triples格式(简单)
|
||||
- json: JSON格式(简化,只包含类信息)
|
||||
|
||||
Response format:
|
||||
{
|
||||
"code": 200,
|
||||
"msg": "OWL文件导出成功",
|
||||
"data": {
|
||||
"owl_content": "...",
|
||||
"format": "rdfxml",
|
||||
"classes_count": 7
|
||||
}
|
||||
}
|
||||
"""
|
||||
api_logger.info(
|
||||
f"OWL export requested by user {current_user.id}, "
|
||||
f"classes_count={len(request.classes)}, "
|
||||
f"format={request.format}, "
|
||||
f"include_metadata={request.include_metadata}"
|
||||
)
|
||||
|
||||
try:
|
||||
# 验证格式
|
||||
valid_formats = ["rdfxml", "turtle", "ntriples", "json"]
|
||||
if request.format not in valid_formats:
|
||||
api_logger.warning(f"Invalid export format: {request.format}")
|
||||
return fail(
|
||||
BizCode.BAD_REQUEST,
|
||||
"不支持的导出格式",
|
||||
f"format必须是以下之一: {', '.join(valid_formats)}"
|
||||
)
|
||||
|
||||
# JSON格式直接导出,不需要OWL验证
|
||||
if request.format == "json":
|
||||
owl_validator = OWLValidator()
|
||||
owl_content = owl_validator.export_to_owl(
|
||||
world=None,
|
||||
format="json",
|
||||
classes=request.classes
|
||||
)
|
||||
|
||||
response = ExportResponse(
|
||||
owl_content=owl_content,
|
||||
format=request.format,
|
||||
classes_count=len(request.classes)
|
||||
)
|
||||
|
||||
api_logger.info(
|
||||
f"JSON export completed, content_length={len(owl_content)}"
|
||||
)
|
||||
|
||||
return success(data=response.model_dump(), msg="OWL文件导出成功")
|
||||
|
||||
# 创建临时文件路径
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode='w',
|
||||
suffix='.owl',
|
||||
delete=False
|
||||
) as tmp_file:
|
||||
output_path = tmp_file.name
|
||||
|
||||
# 导出操作不需要LLM,直接使用OWL验证器
|
||||
owl_validator = OWLValidator()
|
||||
|
||||
# 验证本体类
|
||||
logger.debug("Validating ontology classes")
|
||||
is_valid, errors, world = owl_validator.validate_ontology_classes(
|
||||
classes=request.classes,
|
||||
)
|
||||
|
||||
if not is_valid:
|
||||
logger.warning(
|
||||
f"OWL validation found {len(errors)} issues during export: {errors}"
|
||||
)
|
||||
# 继续导出,但记录警告
|
||||
|
||||
if not world:
|
||||
error_msg = "Failed to create OWL world for export"
|
||||
logger.error(error_msg)
|
||||
return fail(BizCode.INTERNAL_ERROR, "创建OWL世界失败", error_msg)
|
||||
|
||||
# 导出OWL文件
|
||||
logger.info(f"Exporting to {request.format} format")
|
||||
owl_content = owl_validator.export_to_owl(
|
||||
world=world,
|
||||
output_path=output_path,
|
||||
format=request.format,
|
||||
classes=request.classes
|
||||
)
|
||||
|
||||
# 构建响应
|
||||
response = ExportResponse(
|
||||
owl_content=owl_content,
|
||||
format=request.format,
|
||||
classes_count=len(request.classes)
|
||||
)
|
||||
|
||||
api_logger.info(
|
||||
f"OWL export completed, format={request.format}, "
|
||||
f"content_length={len(owl_content)}"
|
||||
)
|
||||
|
||||
return success(data=response.model_dump(), msg="OWL文件导出成功")
|
||||
|
||||
except ValueError as e:
|
||||
# 验证错误 (400)
|
||||
api_logger.warning(f"Validation error in export: {str(e)}")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e))
|
||||
|
||||
except RuntimeError as e:
|
||||
# 运行时错误 (500)
|
||||
api_logger.error(f"Runtime error in export: {str(e)}", exc_info=True)
|
||||
return fail(BizCode.INTERNAL_ERROR, "OWL文件导出失败", str(e))
|
||||
|
||||
except Exception as e:
|
||||
# 未知错误 (500)
|
||||
api_logger.error(f"Unexpected error in export: {str(e)}", exc_info=True)
|
||||
return fail(BizCode.INTERNAL_ERROR, "OWL文件导出失败", str(e))
|
||||
|
||||
|
||||
# ==================== 本体场景管理接口 ====================
|
||||
|
||||
@router.post("/scene", response_model=ApiResponse)
|
||||
async def create_scene(
|
||||
request: SceneCreateRequest,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""创建本体场景
|
||||
|
||||
在当前工作空间下创建新的本体场景。
|
||||
|
||||
Args:
|
||||
request: 场景创建请求
|
||||
db: 数据库会话
|
||||
current_user: 当前用户
|
||||
|
||||
Returns:
|
||||
ApiResponse: 包含创建的场景信息
|
||||
"""
|
||||
api_logger.info(
|
||||
f"Scene creation requested by user {current_user.id}, "
|
||||
f"name={request.scene_name}"
|
||||
)
|
||||
|
||||
try:
|
||||
# 获取当前工作空间ID
|
||||
workspace_id = current_user.current_workspace_id
|
||||
if not workspace_id:
|
||||
api_logger.warning(f"User {current_user.id} has no current workspace")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间")
|
||||
|
||||
# 创建OntologyService实例(不需要LLM)
|
||||
from app.core.memory.llm_tools.openai_client import OpenAIClient
|
||||
from app.core.models.base import RedBearModelConfig
|
||||
|
||||
# 创建一个空的LLM配置(场景管理不需要LLM)
|
||||
dummy_config = RedBearModelConfig(
|
||||
model_name="dummy",
|
||||
provider="openai",
|
||||
api_key="dummy",
|
||||
base_url="https://api.openai.com/v1"
|
||||
)
|
||||
llm_client = OpenAIClient(model_config=dummy_config)
|
||||
service = OntologyService(llm_client=llm_client, db=db)
|
||||
|
||||
# 调用服务层创建场景
|
||||
scene = service.create_scene(
|
||||
scene_name=request.scene_name,
|
||||
scene_description=request.scene_description,
|
||||
workspace_id=workspace_id
|
||||
)
|
||||
|
||||
# 构建响应
|
||||
# 动态计算 type_num
|
||||
type_num = len(scene.classes) if scene.classes else 0
|
||||
|
||||
response = SceneResponse(
|
||||
scene_id=scene.scene_id,
|
||||
scene_name=scene.scene_name,
|
||||
scene_description=scene.scene_description,
|
||||
type_num=type_num,
|
||||
workspace_id=scene.workspace_id,
|
||||
created_at=scene.created_at,
|
||||
updated_at=scene.updated_at,
|
||||
classes_count=type_num
|
||||
)
|
||||
|
||||
api_logger.info(f"Scene created successfully: {scene.scene_id}")
|
||||
|
||||
return success(data=response.model_dump(), msg="场景创建成功")
|
||||
|
||||
except ValueError as e:
|
||||
api_logger.warning(f"Validation error in scene creation: {str(e)}")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e))
|
||||
|
||||
except RuntimeError as e:
|
||||
api_logger.error(f"Runtime error in scene creation: {str(e)}", exc_info=True)
|
||||
return fail(BizCode.INTERNAL_ERROR, "场景创建失败", str(e))
|
||||
|
||||
except Exception as e:
|
||||
api_logger.error(f"Unexpected error in scene creation: {str(e)}", exc_info=True)
|
||||
return fail(BizCode.INTERNAL_ERROR, "场景创建失败", str(e))
|
||||
|
||||
|
||||
@router.put("/scene/{scene_id}", response_model=ApiResponse)
|
||||
async def update_scene(
|
||||
scene_id: str,
|
||||
request: SceneUpdateRequest,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""更新本体场景
|
||||
|
||||
更新指定场景的信息,只能更新当前工作空间下的场景。
|
||||
|
||||
Args:
|
||||
scene_id: 场景ID
|
||||
request: 场景更新请求
|
||||
db: 数据库会话
|
||||
current_user: 当前用户
|
||||
|
||||
Returns:
|
||||
ApiResponse: 包含更新后的场景信息
|
||||
"""
|
||||
api_logger.info(
|
||||
f"Scene update requested by user {current_user.id}, "
|
||||
f"scene_id={scene_id}"
|
||||
)
|
||||
|
||||
try:
|
||||
from uuid import UUID
|
||||
|
||||
# 验证UUID格式
|
||||
try:
|
||||
scene_uuid = UUID(scene_id)
|
||||
except ValueError:
|
||||
api_logger.warning(f"Invalid scene_id format: {scene_id}")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", "无效的场景ID格式")
|
||||
|
||||
# 获取当前工作空间ID
|
||||
workspace_id = current_user.current_workspace_id
|
||||
if not workspace_id:
|
||||
api_logger.warning(f"User {current_user.id} has no current workspace")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间")
|
||||
|
||||
# 创建OntologyService实例
|
||||
from app.core.memory.llm_tools.openai_client import OpenAIClient
|
||||
from app.core.models.base import RedBearModelConfig
|
||||
|
||||
dummy_config = RedBearModelConfig(
|
||||
model_name="dummy",
|
||||
provider="openai",
|
||||
api_key="dummy",
|
||||
base_url="https://api.openai.com/v1"
|
||||
)
|
||||
llm_client = OpenAIClient(model_config=dummy_config)
|
||||
service = OntologyService(llm_client=llm_client, db=db)
|
||||
|
||||
# 调用服务层更新场景
|
||||
scene = service.update_scene(
|
||||
scene_id=scene_uuid,
|
||||
scene_name=request.scene_name,
|
||||
scene_description=request.scene_description,
|
||||
workspace_id=workspace_id
|
||||
)
|
||||
|
||||
# 构建响应
|
||||
# 动态计算 type_num
|
||||
type_num = len(scene.classes) if scene.classes else 0
|
||||
|
||||
response = SceneResponse(
|
||||
scene_id=scene.scene_id,
|
||||
scene_name=scene.scene_name,
|
||||
scene_description=scene.scene_description,
|
||||
type_num=type_num,
|
||||
workspace_id=scene.workspace_id,
|
||||
created_at=scene.created_at,
|
||||
updated_at=scene.updated_at,
|
||||
classes_count=type_num
|
||||
)
|
||||
|
||||
api_logger.info(f"Scene updated successfully: {scene_id}")
|
||||
|
||||
return success(data=response.model_dump(), msg="场景更新成功")
|
||||
|
||||
except ValueError as e:
|
||||
api_logger.warning(f"Validation error in scene update: {str(e)}")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e))
|
||||
|
||||
except RuntimeError as e:
|
||||
api_logger.error(f"Runtime error in scene update: {str(e)}", exc_info=True)
|
||||
return fail(BizCode.INTERNAL_ERROR, "场景更新失败", str(e))
|
||||
|
||||
except Exception as e:
|
||||
api_logger.error(f"Unexpected error in scene update: {str(e)}", exc_info=True)
|
||||
return fail(BizCode.INTERNAL_ERROR, "场景更新失败", str(e))
|
||||
|
||||
|
||||
@router.delete("/scene/{scene_id}", response_model=ApiResponse)
|
||||
async def delete_scene(
|
||||
scene_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""删除本体场景
|
||||
|
||||
删除指定场景及其所有关联类型,只能删除当前工作空间下的场景。
|
||||
|
||||
Args:
|
||||
scene_id: 场景ID
|
||||
db: 数据库会话
|
||||
current_user: 当前用户
|
||||
|
||||
Returns:
|
||||
ApiResponse: 删除结果
|
||||
"""
|
||||
api_logger.info(
|
||||
f"Scene deletion requested by user {current_user.id}, "
|
||||
f"scene_id={scene_id}"
|
||||
)
|
||||
|
||||
try:
|
||||
from uuid import UUID
|
||||
|
||||
# 验证UUID格式
|
||||
try:
|
||||
scene_uuid = UUID(scene_id)
|
||||
except ValueError:
|
||||
api_logger.warning(f"Invalid scene_id format: {scene_id}")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", "无效的场景ID格式")
|
||||
|
||||
# 获取当前工作空间ID
|
||||
workspace_id = current_user.current_workspace_id
|
||||
if not workspace_id:
|
||||
api_logger.warning(f"User {current_user.id} has no current workspace")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间")
|
||||
|
||||
# 创建OntologyService实例
|
||||
from app.core.memory.llm_tools.openai_client import OpenAIClient
|
||||
from app.core.models.base import RedBearModelConfig
|
||||
|
||||
dummy_config = RedBearModelConfig(
|
||||
model_name="dummy",
|
||||
provider="openai",
|
||||
api_key="dummy",
|
||||
base_url="https://api.openai.com/v1"
|
||||
)
|
||||
llm_client = OpenAIClient(model_config=dummy_config)
|
||||
service = OntologyService(llm_client=llm_client, db=db)
|
||||
|
||||
# 调用服务层删除场景
|
||||
success_flag = service.delete_scene(
|
||||
scene_id=scene_uuid,
|
||||
workspace_id=workspace_id
|
||||
)
|
||||
|
||||
api_logger.info(f"Scene deleted successfully: {scene_id}")
|
||||
|
||||
return success(data={"deleted": success_flag}, msg="场景删除成功")
|
||||
|
||||
except ValueError as e:
|
||||
api_logger.warning(f"Validation error in scene deletion: {str(e)}")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e))
|
||||
|
||||
except RuntimeError as e:
|
||||
api_logger.error(f"Runtime error in scene deletion: {str(e)}", exc_info=True)
|
||||
return fail(BizCode.INTERNAL_ERROR, "场景删除失败", str(e))
|
||||
|
||||
except Exception as e:
|
||||
api_logger.error(f"Unexpected error in scene deletion: {str(e)}", exc_info=True)
|
||||
return fail(BizCode.INTERNAL_ERROR, "场景删除失败", str(e))
|
||||
|
||||
|
||||
@router.get("/scenes", response_model=ApiResponse)
|
||||
async def get_scenes(
|
||||
workspace_id: Optional[str] = None,
|
||||
scene_name: Optional[str] = None,
|
||||
page: Optional[int] = None,
|
||||
pagesize: Optional[int] = None,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""获取场景列表(支持模糊搜索和全量查询,全量查询支持分页)
|
||||
|
||||
根据是否提供 scene_name 参数,执行不同的查询:
|
||||
- 提供 scene_name:进行模糊搜索,返回匹配的场景列表(支持分页)
|
||||
- 不提供 scene_name:返回工作空间下的所有场景(支持分页)
|
||||
|
||||
支持中文和英文的模糊匹配,不区分大小写。
|
||||
|
||||
Args:
|
||||
workspace_id: 工作空间ID(可选,默认当前用户工作空间)
|
||||
scene_name: 场景名称关键词(可选,支持模糊匹配)
|
||||
page: 页码(可选,从1开始)
|
||||
pagesize: 每页数量(可选)
|
||||
db: 数据库会话
|
||||
current_user: 当前用户
|
||||
|
||||
Returns:
|
||||
ApiResponse: 包含场景列表和分页信息
|
||||
|
||||
Examples:
|
||||
- 模糊搜索(不分页):GET /scenes?workspace_id=xxx&scene_name=医疗
|
||||
输入 "医疗" 可以匹配到 "医疗场景"、"智慧医疗"、"医疗管理系统" 等
|
||||
- 模糊搜索(分页):GET /scenes?workspace_id=xxx&scene_name=医疗&page=1&pagesize=10
|
||||
返回匹配 "医疗" 的第1页,每页10条数据
|
||||
- 全量查询(不分页):GET /scenes?workspace_id=xxx
|
||||
返回工作空间下的所有场景
|
||||
- 全量查询(分页):GET /scenes?workspace_id=xxx&page=1&pagesize=10
|
||||
返回第1页,每页10条数据
|
||||
|
||||
Notes:
|
||||
- 分页参数 page 和 pagesize 必须同时提供
|
||||
- page 从1开始,pagesize 必须大于0
|
||||
- 返回格式:{"items": [...], "page": {"page": 1, "pagesize": 10, "total": 100, "hasnext": true}}
|
||||
- 不分页时,page 字段为 null
|
||||
"""
|
||||
from app.controllers.ontology_secondary_routes import scenes_handler
|
||||
return await scenes_handler(workspace_id, scene_name, page, pagesize, db, current_user)
|
||||
|
||||
|
||||
# ==================== 本体类型管理接口 ====================
|
||||
|
||||
@router.post("/class", response_model=ApiResponse)
|
||||
async def create_class(
|
||||
request: ClassCreateRequest,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""创建本体类型
|
||||
|
||||
在指定场景下创建新的本体类型。
|
||||
|
||||
Args:
|
||||
request: 类型创建请求
|
||||
db: 数据库会话
|
||||
current_user: 当前用户
|
||||
|
||||
Returns:
|
||||
ApiResponse: 包含创建的类型信息
|
||||
"""
|
||||
from app.controllers.ontology_secondary_routes import create_class_handler
|
||||
return await create_class_handler(request, db, current_user)
|
||||
|
||||
|
||||
@router.put("/class/{class_id}", response_model=ApiResponse)
|
||||
async def update_class(
|
||||
class_id: str,
|
||||
request: ClassUpdateRequest,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""更新本体类型
|
||||
|
||||
更新指定类型的信息,只能更新当前工作空间下场景的类型。
|
||||
|
||||
Args:
|
||||
class_id: 类型ID
|
||||
request: 类型更新请求
|
||||
db: 数据库会话
|
||||
current_user: 当前用户
|
||||
|
||||
Returns:
|
||||
ApiResponse: 包含更新后的类型信息
|
||||
"""
|
||||
from app.controllers.ontology_secondary_routes import update_class_handler
|
||||
return await update_class_handler(class_id, request, db, current_user)
|
||||
|
||||
|
||||
@router.delete("/class/{class_id}", response_model=ApiResponse)
|
||||
async def delete_class(
|
||||
class_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""删除本体类型
|
||||
|
||||
删除指定类型,只能删除当前工作空间下场景的类型。
|
||||
|
||||
Args:
|
||||
class_id: 类型ID
|
||||
db: 数据库会话
|
||||
current_user: 当前用户
|
||||
|
||||
Returns:
|
||||
ApiResponse: 删除结果
|
||||
"""
|
||||
from app.controllers.ontology_secondary_routes import delete_class_handler
|
||||
return await delete_class_handler(class_id, db, current_user)
|
||||
|
||||
|
||||
@router.get("/classes", response_model=ApiResponse)
|
||||
async def get_classes(
|
||||
scene_id: str,
|
||||
class_name: Optional[str] = None,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""获取类型列表(支持模糊搜索和全量查询)
|
||||
|
||||
根据是否提供 class_name 参数,执行不同的查询:
|
||||
- 提供 class_name:进行模糊搜索,返回匹配的类型列表
|
||||
- 不提供 class_name:返回场景下的所有类型
|
||||
|
||||
支持中文和英文的模糊匹配,不区分大小写。
|
||||
返回结果包含场景的基本信息(scene_name 和 scene_description)。
|
||||
|
||||
Args:
|
||||
scene_id: 场景ID(必填)
|
||||
class_name: 类型名称关键词(可选,支持模糊匹配)
|
||||
db: 数据库会话
|
||||
current_user: 当前用户
|
||||
|
||||
Returns:
|
||||
ApiResponse: 包含类型列表和场景信息
|
||||
|
||||
Examples:
|
||||
- 模糊搜索:GET /classes?scene_id=xxx&class_name=患者
|
||||
输入 "患者" 可以匹配到 "患者"、"患者信息"、"门诊患者" 等
|
||||
- 全量查询:GET /classes?scene_id=xxx
|
||||
返回场景下的所有类型
|
||||
|
||||
Response Format:
|
||||
{
|
||||
"total": 3,
|
||||
"scene_id": "xxx",
|
||||
"scene_name": "医疗场景",
|
||||
"scene_description": "用于医疗领域的本体建模",
|
||||
"items": [...]
|
||||
}
|
||||
"""
|
||||
from app.controllers.ontology_secondary_routes import classes_handler
|
||||
return await classes_handler(scene_id, class_name, db, current_user)
|
||||
|
||||
|
||||
@router.get("/class/{class_id}", response_model=ApiResponse)
|
||||
async def get_class(
|
||||
class_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""获取单个本体类型
|
||||
|
||||
根据类型ID获取类型的详细信息,只能查询当前工作空间下场景的类型。
|
||||
|
||||
Args:
|
||||
class_id: 类型ID
|
||||
db: 数据库会话
|
||||
current_user: 当前用户
|
||||
|
||||
Returns:
|
||||
ApiResponse: 包含类型详细信息
|
||||
|
||||
Response Format:
|
||||
{
|
||||
"code": 0,
|
||||
"msg": "查询成功",
|
||||
"data": {
|
||||
"class_id": "xxx",
|
||||
"class_name": "患者",
|
||||
"class_description": "在医疗机构中接受诊疗的个体",
|
||||
"scene_id": "xxx",
|
||||
"created_at": "2026-01-29T10:00:00",
|
||||
"updated_at": "2026-01-29T10:00:00"
|
||||
}
|
||||
}
|
||||
"""
|
||||
from app.controllers.ontology_secondary_routes import get_class_handler
|
||||
return await get_class_handler(class_id, db, current_user)
|
||||
611
api/app/controllers/ontology_secondary_routes.py
Normal file
611
api/app/controllers/ontology_secondary_routes.py
Normal file
@@ -0,0 +1,611 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""本体场景和类型路由(续)
|
||||
|
||||
由于主Controller文件较大,将剩余路由放在此文件中。
|
||||
"""
|
||||
|
||||
from uuid import UUID
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import Depends
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.error_codes import BizCode
|
||||
from app.core.logging_config import get_api_logger
|
||||
from app.core.response_utils import fail, success
|
||||
from app.db import get_db
|
||||
from app.dependencies import get_current_user
|
||||
from app.models.user_model import User
|
||||
from app.schemas.ontology_schemas import (
|
||||
SceneResponse,
|
||||
SceneListResponse,
|
||||
PaginationInfo,
|
||||
ClassCreateRequest,
|
||||
ClassUpdateRequest,
|
||||
ClassResponse,
|
||||
ClassListResponse,
|
||||
ClassBatchCreateResponse,
|
||||
)
|
||||
from app.schemas.response_schema import ApiResponse
|
||||
from app.services.ontology_service import OntologyService
|
||||
from app.core.memory.llm_tools.openai_client import OpenAIClient
|
||||
from app.core.models.base import RedBearModelConfig
|
||||
|
||||
|
||||
api_logger = get_api_logger()
|
||||
|
||||
|
||||
def _get_dummy_ontology_service(db: Session) -> OntologyService:
|
||||
"""获取OntologyService实例(不需要LLM)
|
||||
|
||||
场景和类型管理不需要LLM,创建一个dummy配置。
|
||||
"""
|
||||
dummy_config = RedBearModelConfig(
|
||||
model_name="dummy",
|
||||
provider="openai",
|
||||
api_key="dummy",
|
||||
base_url="https://api.openai.com/v1"
|
||||
)
|
||||
llm_client = OpenAIClient(model_config=dummy_config)
|
||||
return OntologyService(llm_client=llm_client, db=db)
|
||||
|
||||
|
||||
# 这些函数将被导入到主Controller中
|
||||
|
||||
async def scenes_handler(
|
||||
workspace_id: Optional[str] = None,
|
||||
scene_name: Optional[str] = None,
|
||||
page: Optional[int] = None,
|
||||
page_size: Optional[int] = None,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""获取场景列表(支持模糊搜索和全量查询,全量查询支持分页)
|
||||
|
||||
当提供 scene_name 参数时,进行模糊搜索(不分页);
|
||||
当不提供 scene_name 参数时,返回所有场景(支持分页)。
|
||||
|
||||
Args:
|
||||
workspace_id: 工作空间ID(可选,默认当前用户工作空间)
|
||||
scene_name: 场景名称关键词(可选,支持模糊匹配)
|
||||
page: 页码(可选,从1开始,仅在全量查询时有效)
|
||||
page_size: 每页数量(可选,仅在全量查询时有效)
|
||||
db: 数据库会话
|
||||
current_user: 当前用户
|
||||
"""
|
||||
operation = "search" if scene_name else "list"
|
||||
api_logger.info(
|
||||
f"Scene {operation} requested by user {current_user.id}, "
|
||||
f"workspace_id={workspace_id}, keyword={scene_name}, page={page}, page_size={page_size}"
|
||||
)
|
||||
|
||||
try:
|
||||
# 确定工作空间ID
|
||||
if workspace_id:
|
||||
try:
|
||||
ws_uuid = UUID(workspace_id)
|
||||
except ValueError:
|
||||
api_logger.warning(f"Invalid workspace_id format: {workspace_id}")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", "无效的工作空间ID格式")
|
||||
else:
|
||||
ws_uuid = current_user.current_workspace_id
|
||||
if not ws_uuid:
|
||||
api_logger.warning(f"User {current_user.id} has no current workspace")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间")
|
||||
|
||||
# 创建Service
|
||||
service = _get_dummy_ontology_service(db)
|
||||
|
||||
# 根据是否提供 scene_name 决定查询方式
|
||||
if scene_name and scene_name.strip():
|
||||
# 验证分页参数(模糊搜索也支持分页)
|
||||
if page is not None and page < 1:
|
||||
api_logger.warning(f"Invalid page number: {page}")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", "页码必须大于0")
|
||||
|
||||
if page_size is not None and page_size < 1:
|
||||
api_logger.warning(f"Invalid page_size: {page_size}")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", "每页数量必须大于0")
|
||||
|
||||
# 如果只提供了page或page_size中的一个,返回错误
|
||||
if (page is not None and page_size is None) or (page is None and page_size is not None):
|
||||
api_logger.warning(f"Incomplete pagination params: page={page}, page_size={page_size}")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", "分页参数page和pagesize必须同时提供")
|
||||
|
||||
# 模糊搜索场景(支持分页)
|
||||
scenes = service.search_scenes_by_name(scene_name.strip(), ws_uuid)
|
||||
total = len(scenes)
|
||||
|
||||
# 如果提供了分页参数,进行分页处理
|
||||
if page is not None and page_size is not None:
|
||||
start_idx = (page - 1) * page_size
|
||||
end_idx = start_idx + page_size
|
||||
scenes = scenes[start_idx:end_idx]
|
||||
|
||||
# 构建响应
|
||||
items = []
|
||||
for scene in scenes:
|
||||
# 获取前3个class_name作为entity_type
|
||||
entity_type = [cls.class_name for cls in scene.classes[:3]] if scene.classes else None
|
||||
# 动态计算 type_num
|
||||
type_num = len(scene.classes) if scene.classes else 0
|
||||
|
||||
items.append(SceneResponse(
|
||||
scene_id=scene.scene_id,
|
||||
scene_name=scene.scene_name,
|
||||
scene_description=scene.scene_description,
|
||||
type_num=type_num,
|
||||
entity_type=entity_type,
|
||||
workspace_id=scene.workspace_id,
|
||||
created_at=scene.created_at,
|
||||
updated_at=scene.updated_at,
|
||||
classes_count=type_num
|
||||
))
|
||||
|
||||
# 构建响应(包含分页信息)
|
||||
if page is not None and page_size is not None:
|
||||
# 计算是否有下一页
|
||||
hasnext = (page * page_size) < total
|
||||
|
||||
pagination_info = PaginationInfo(
|
||||
page=page,
|
||||
pagesize=page_size,
|
||||
total=total,
|
||||
hasnext=hasnext
|
||||
)
|
||||
response = SceneListResponse(items=items, page=pagination_info)
|
||||
else:
|
||||
response = SceneListResponse(items=items)
|
||||
|
||||
api_logger.info(
|
||||
f"Scene search completed: found {len(items)} scenes matching '{scene_name}' "
|
||||
f"in workspace {ws_uuid}, total={total}"
|
||||
)
|
||||
else:
|
||||
# 获取所有场景(支持分页)
|
||||
# 验证分页参数
|
||||
if page is not None and page < 1:
|
||||
api_logger.warning(f"Invalid page number: {page}")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", "页码必须大于0")
|
||||
|
||||
if page_size is not None and page_size < 1:
|
||||
api_logger.warning(f"Invalid page_size: {page_size}")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", "每页数量必须大于0")
|
||||
|
||||
# 如果只提供了page或page_size中的一个,返回错误
|
||||
if (page is not None and page_size is None) or (page is None and page_size is not None):
|
||||
api_logger.warning(f"Incomplete pagination params: page={page}, page_size={page_size}")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", "分页参数page和pagesize必须同时提供")
|
||||
|
||||
scenes, total = service.list_scenes(ws_uuid, page, page_size)
|
||||
|
||||
# 构建响应
|
||||
items = []
|
||||
for scene in scenes:
|
||||
# 获取前3个class_name作为entity_type
|
||||
entity_type = [cls.class_name for cls in scene.classes[:3]] if scene.classes else None
|
||||
# 动态计算 type_num
|
||||
type_num = len(scene.classes) if scene.classes else 0
|
||||
|
||||
items.append(SceneResponse(
|
||||
scene_id=scene.scene_id,
|
||||
scene_name=scene.scene_name,
|
||||
scene_description=scene.scene_description,
|
||||
type_num=type_num,
|
||||
entity_type=entity_type,
|
||||
workspace_id=scene.workspace_id,
|
||||
created_at=scene.created_at,
|
||||
updated_at=scene.updated_at,
|
||||
classes_count=type_num
|
||||
))
|
||||
|
||||
# 构建响应(包含分页信息)
|
||||
if page is not None and page_size is not None:
|
||||
# 计算是否有下一页
|
||||
hasnext = (page * page_size) < total
|
||||
|
||||
pagination_info = PaginationInfo(
|
||||
page=page,
|
||||
pagesize=page_size,
|
||||
total=total,
|
||||
hasnext=hasnext
|
||||
)
|
||||
response = SceneListResponse(items=items, page=pagination_info)
|
||||
else:
|
||||
response = SceneListResponse(items=items)
|
||||
|
||||
api_logger.info(f"Scene list retrieved successfully, count={len(items)}, total={total}")
|
||||
|
||||
return success(data=response.model_dump(mode='json'), msg="查询成功")
|
||||
|
||||
except ValueError as e:
|
||||
api_logger.warning(f"Validation error in scene {operation}: {str(e)}")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e))
|
||||
|
||||
except RuntimeError as e:
|
||||
api_logger.error(f"Runtime error in scene {operation}: {str(e)}", exc_info=True)
|
||||
return fail(BizCode.INTERNAL_ERROR, "查询失败", str(e))
|
||||
|
||||
except Exception as e:
|
||||
api_logger.error(f"Unexpected error in scene {operation}: {str(e)}", exc_info=True)
|
||||
return fail(BizCode.INTERNAL_ERROR, "查询失败", str(e))
|
||||
|
||||
|
||||
# ==================== 本体类型管理接口 ====================
|
||||
|
||||
async def create_class_handler(
|
||||
request: ClassCreateRequest,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""创建本体类型(统一使用列表形式,支持单个或批量)"""
|
||||
|
||||
# 根据列表长度判断是单个还是批量
|
||||
count = len(request.classes)
|
||||
mode = "single" if count == 1 else "batch"
|
||||
|
||||
api_logger.info(
|
||||
f"Class creation ({mode}) requested by user {current_user.id}, "
|
||||
f"scene_id={request.scene_id}, count={count}"
|
||||
)
|
||||
|
||||
try:
|
||||
# 获取当前工作空间ID
|
||||
workspace_id = current_user.current_workspace_id
|
||||
if not workspace_id:
|
||||
api_logger.warning(f"User {current_user.id} has no current workspace")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间")
|
||||
|
||||
# 创建Service
|
||||
service = _get_dummy_ontology_service(db)
|
||||
|
||||
# 准备类型数据
|
||||
classes_data = [
|
||||
{
|
||||
"class_name": item.class_name,
|
||||
"class_description": item.class_description
|
||||
}
|
||||
for item in request.classes
|
||||
]
|
||||
|
||||
if count == 1:
|
||||
# 单个创建
|
||||
class_data = classes_data[0]
|
||||
ontology_class = service.create_class(
|
||||
scene_id=request.scene_id,
|
||||
class_name=class_data["class_name"],
|
||||
class_description=class_data["class_description"],
|
||||
workspace_id=workspace_id
|
||||
)
|
||||
|
||||
# 构建单个响应
|
||||
response = ClassResponse(
|
||||
class_id=ontology_class.class_id,
|
||||
class_name=ontology_class.class_name,
|
||||
class_description=ontology_class.class_description,
|
||||
scene_id=ontology_class.scene_id,
|
||||
created_at=ontology_class.created_at,
|
||||
updated_at=ontology_class.updated_at
|
||||
)
|
||||
|
||||
api_logger.info(f"Class created successfully: {ontology_class.class_id}")
|
||||
|
||||
return success(data=response.model_dump(mode='json'), msg="类型创建成功")
|
||||
|
||||
else:
|
||||
# 批量创建
|
||||
created_classes, errors = service.create_classes_batch(
|
||||
scene_id=request.scene_id,
|
||||
classes=classes_data,
|
||||
workspace_id=workspace_id
|
||||
)
|
||||
|
||||
# 构建批量响应
|
||||
items = []
|
||||
for ontology_class in created_classes:
|
||||
items.append(ClassResponse(
|
||||
class_id=ontology_class.class_id,
|
||||
class_name=ontology_class.class_name,
|
||||
class_description=ontology_class.class_description,
|
||||
scene_id=ontology_class.scene_id,
|
||||
created_at=ontology_class.created_at,
|
||||
updated_at=ontology_class.updated_at
|
||||
))
|
||||
|
||||
response = ClassBatchCreateResponse(
|
||||
total=len(classes_data),
|
||||
success_count=len(created_classes),
|
||||
failed_count=len(errors),
|
||||
items=items,
|
||||
errors=errors if errors else None
|
||||
)
|
||||
|
||||
api_logger.info(
|
||||
f"Batch class creation completed: "
|
||||
f"success={len(created_classes)}, failed={len(errors)}"
|
||||
)
|
||||
|
||||
return success(data=response.model_dump(mode='json'), msg="批量创建完成")
|
||||
|
||||
except ValueError as e:
|
||||
api_logger.warning(f"Validation error in class creation: {str(e)}")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e))
|
||||
|
||||
except RuntimeError as e:
|
||||
api_logger.error(f"Runtime error in class creation: {str(e)}", exc_info=True)
|
||||
return fail(BizCode.INTERNAL_ERROR, "类型创建失败", str(e))
|
||||
|
||||
except Exception as e:
|
||||
api_logger.error(f"Unexpected error in class creation: {str(e)}", exc_info=True)
|
||||
return fail(BizCode.INTERNAL_ERROR, "类型创建失败", str(e))
|
||||
|
||||
|
||||
async def update_class_handler(
|
||||
class_id: str,
|
||||
request: ClassUpdateRequest,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""更新本体类型"""
|
||||
api_logger.info(
|
||||
f"Class update requested by user {current_user.id}, "
|
||||
f"class_id={class_id}"
|
||||
)
|
||||
|
||||
try:
|
||||
# 验证UUID格式
|
||||
try:
|
||||
class_uuid = UUID(class_id)
|
||||
except ValueError:
|
||||
api_logger.warning(f"Invalid class_id format: {class_id}")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", "无效的类型ID格式")
|
||||
|
||||
# 获取当前工作空间ID
|
||||
workspace_id = current_user.current_workspace_id
|
||||
if not workspace_id:
|
||||
api_logger.warning(f"User {current_user.id} has no current workspace")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间")
|
||||
|
||||
# 创建Service
|
||||
service = _get_dummy_ontology_service(db)
|
||||
|
||||
# 更新类型
|
||||
ontology_class = service.update_class(
|
||||
class_id=class_uuid,
|
||||
class_name=request.class_name,
|
||||
class_description=request.class_description,
|
||||
workspace_id=workspace_id
|
||||
)
|
||||
|
||||
# 构建响应
|
||||
response = ClassResponse(
|
||||
class_id=ontology_class.class_id,
|
||||
class_name=ontology_class.class_name,
|
||||
class_description=ontology_class.class_description,
|
||||
scene_id=ontology_class.scene_id,
|
||||
created_at=ontology_class.created_at,
|
||||
updated_at=ontology_class.updated_at
|
||||
)
|
||||
|
||||
api_logger.info(f"Class updated successfully: {class_id}")
|
||||
|
||||
return success(data=response.model_dump(mode='json'), msg="类型更新成功")
|
||||
|
||||
except ValueError as e:
|
||||
api_logger.warning(f"Validation error in class update: {str(e)}")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e))
|
||||
|
||||
except RuntimeError as e:
|
||||
api_logger.error(f"Runtime error in class update: {str(e)}", exc_info=True)
|
||||
return fail(BizCode.INTERNAL_ERROR, "类型更新失败", str(e))
|
||||
|
||||
except Exception as e:
|
||||
api_logger.error(f"Unexpected error in class update: {str(e)}", exc_info=True)
|
||||
return fail(BizCode.INTERNAL_ERROR, "类型更新失败", str(e))
|
||||
|
||||
|
||||
async def delete_class_handler(
|
||||
class_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""删除本体类型"""
|
||||
api_logger.info(
|
||||
f"Class deletion requested by user {current_user.id}, "
|
||||
f"class_id={class_id}"
|
||||
)
|
||||
|
||||
try:
|
||||
# 验证UUID格式
|
||||
try:
|
||||
class_uuid = UUID(class_id)
|
||||
except ValueError:
|
||||
api_logger.warning(f"Invalid class_id format: {class_id}")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", "无效的类型ID格式")
|
||||
|
||||
# 获取当前工作空间ID
|
||||
workspace_id = current_user.current_workspace_id
|
||||
if not workspace_id:
|
||||
api_logger.warning(f"User {current_user.id} has no current workspace")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间")
|
||||
|
||||
# 创建Service
|
||||
service = _get_dummy_ontology_service(db)
|
||||
|
||||
# 删除类型
|
||||
success_flag = service.delete_class(
|
||||
class_id=class_uuid,
|
||||
workspace_id=workspace_id
|
||||
)
|
||||
|
||||
api_logger.info(f"Class deleted successfully: {class_id}")
|
||||
|
||||
return success(data={"deleted": success_flag}, msg="类型删除成功")
|
||||
|
||||
except ValueError as e:
|
||||
api_logger.warning(f"Validation error in class deletion: {str(e)}")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e))
|
||||
|
||||
except RuntimeError as e:
|
||||
api_logger.error(f"Runtime error in class deletion: {str(e)}", exc_info=True)
|
||||
return fail(BizCode.INTERNAL_ERROR, "类型删除失败", str(e))
|
||||
|
||||
except Exception as e:
|
||||
api_logger.error(f"Unexpected error in class deletion: {str(e)}", exc_info=True)
|
||||
return fail(BizCode.INTERNAL_ERROR, "类型删除失败", str(e))
|
||||
|
||||
|
||||
async def get_class_handler(
|
||||
class_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""获取单个本体类型"""
|
||||
api_logger.info(
|
||||
f"Get class requested by user {current_user.id}, "
|
||||
f"class_id={class_id}"
|
||||
)
|
||||
|
||||
try:
|
||||
# 验证UUID格式
|
||||
try:
|
||||
class_uuid = UUID(class_id)
|
||||
except ValueError:
|
||||
api_logger.warning(f"Invalid class_id format: {class_id}")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", "无效的类型ID格式")
|
||||
|
||||
# 获取当前工作空间ID
|
||||
workspace_id = current_user.current_workspace_id
|
||||
if not workspace_id:
|
||||
api_logger.warning(f"User {current_user.id} has no current workspace")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间")
|
||||
|
||||
# 创建Service
|
||||
service = _get_dummy_ontology_service(db)
|
||||
|
||||
# 获取类型(会抛出ValueError如果不存在)
|
||||
ontology_class = service.get_class_by_id(class_uuid, workspace_id)
|
||||
|
||||
# 构建响应
|
||||
response = ClassResponse(
|
||||
class_id=ontology_class.class_id,
|
||||
class_name=ontology_class.class_name,
|
||||
class_description=ontology_class.class_description,
|
||||
scene_id=ontology_class.scene_id,
|
||||
created_at=ontology_class.created_at,
|
||||
updated_at=ontology_class.updated_at
|
||||
)
|
||||
|
||||
api_logger.info(f"Class retrieved successfully: {class_id}")
|
||||
|
||||
return success(data=response.model_dump(mode='json'), msg="查询成功")
|
||||
|
||||
except ValueError as e:
|
||||
# 类型不存在或无权限访问
|
||||
api_logger.warning(f"Validation error in get class: {str(e)}")
|
||||
return fail(BizCode.NOT_FOUND, "请求参数无效", str(e))
|
||||
|
||||
except RuntimeError as e:
|
||||
api_logger.error(f"Runtime error in get class: {str(e)}", exc_info=True)
|
||||
return fail(BizCode.INTERNAL_ERROR, "查询失败", str(e))
|
||||
|
||||
except Exception as e:
|
||||
api_logger.error(f"Unexpected error in get class: {str(e)}", exc_info=True)
|
||||
return fail(BizCode.INTERNAL_ERROR, "查询失败", str(e))
|
||||
|
||||
|
||||
async def classes_handler(
|
||||
scene_id: str,
|
||||
class_name: Optional[str] = None,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""获取类型列表(支持模糊搜索和全量查询)
|
||||
|
||||
当提供 class_name 参数时,进行模糊搜索;
|
||||
当不提供 class_name 参数时,返回场景下的所有类型。
|
||||
|
||||
Args:
|
||||
scene_id: 场景ID(必填)
|
||||
class_name: 类型名称关键词(可选,支持模糊匹配)
|
||||
db: 数据库会话
|
||||
current_user: 当前用户
|
||||
"""
|
||||
operation = "search" if class_name else "list"
|
||||
api_logger.info(
|
||||
f"Class {operation} requested by user {current_user.id}, "
|
||||
f"keyword={class_name}, scene_id={scene_id}"
|
||||
)
|
||||
|
||||
try:
|
||||
# 验证UUID格式
|
||||
try:
|
||||
scene_uuid = UUID(scene_id)
|
||||
except ValueError:
|
||||
api_logger.warning(f"Invalid scene_id format: {scene_id}")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", "无效的场景ID格式")
|
||||
|
||||
# 获取当前工作空间ID
|
||||
workspace_id = current_user.current_workspace_id
|
||||
if not workspace_id:
|
||||
api_logger.warning(f"User {current_user.id} has no current workspace")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间")
|
||||
|
||||
# 创建Service
|
||||
service = _get_dummy_ontology_service(db)
|
||||
|
||||
# 获取场景信息
|
||||
scene = service.get_scene_by_id(scene_uuid, workspace_id)
|
||||
if not scene:
|
||||
api_logger.warning(f"Scene not found: {scene_id}")
|
||||
return fail(BizCode.NOT_FOUND, "场景不存在", f"未找到ID为 {scene_id} 的场景")
|
||||
|
||||
# 根据是否提供 class_name 决定查询方式
|
||||
if class_name and class_name.strip():
|
||||
# 模糊搜索类型
|
||||
classes = service.search_classes_by_name(class_name.strip(), scene_uuid, workspace_id)
|
||||
else:
|
||||
# 获取所有类型
|
||||
classes = service.list_classes_by_scene(scene_uuid, workspace_id)
|
||||
|
||||
# 构建响应
|
||||
items = []
|
||||
for ontology_class in classes:
|
||||
items.append(ClassResponse(
|
||||
class_id=ontology_class.class_id,
|
||||
class_name=ontology_class.class_name,
|
||||
class_description=ontology_class.class_description,
|
||||
scene_id=ontology_class.scene_id,
|
||||
created_at=ontology_class.created_at,
|
||||
updated_at=ontology_class.updated_at
|
||||
))
|
||||
|
||||
response = ClassListResponse(
|
||||
total=len(items),
|
||||
scene_id=scene_uuid,
|
||||
scene_name=scene.scene_name,
|
||||
scene_description=scene.scene_description,
|
||||
items=items
|
||||
)
|
||||
|
||||
if class_name:
|
||||
api_logger.info(
|
||||
f"Class search completed: found {len(items)} classes matching '{class_name}' "
|
||||
f"in scene {scene_id}"
|
||||
)
|
||||
else:
|
||||
api_logger.info(f"Class list retrieved successfully, count={len(items)}")
|
||||
|
||||
return success(data=response.model_dump(mode='json'), msg="查询成功")
|
||||
|
||||
except ValueError as e:
|
||||
api_logger.warning(f"Validation error in class {operation}: {str(e)}")
|
||||
return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e))
|
||||
|
||||
except RuntimeError as e:
|
||||
api_logger.error(f"Runtime error in class {operation}: {str(e)}", exc_info=True)
|
||||
return fail(BizCode.INTERNAL_ERROR, "查询失败", str(e))
|
||||
|
||||
except Exception as e:
|
||||
api_logger.error(f"Unexpected error in class {operation}: {str(e)}", exc_info=True)
|
||||
return fail(BizCode.INTERNAL_ERROR, "查询失败", str(e))
|
||||
@@ -58,6 +58,12 @@ from app.core.memory.models.triplet_models import (
|
||||
TripletExtractionResponse,
|
||||
)
|
||||
|
||||
# Ontology models
|
||||
from app.core.memory.models.ontology_models import (
|
||||
OntologyClass,
|
||||
OntologyExtractionResponse,
|
||||
)
|
||||
|
||||
# Variable configuration models
|
||||
from app.core.memory.models.variate_config import (
|
||||
StatementExtractionConfig,
|
||||
@@ -105,6 +111,9 @@ __all__ = [
|
||||
"Entity",
|
||||
"Triplet",
|
||||
"TripletExtractionResponse",
|
||||
# Ontology models
|
||||
"OntologyClass",
|
||||
"OntologyExtractionResponse",
|
||||
# Variable configuration
|
||||
"StatementExtractionConfig",
|
||||
"ForgettingEngineConfig",
|
||||
|
||||
135
api/app/core/memory/models/ontology_models.py
Normal file
135
api/app/core/memory/models/ontology_models.py
Normal file
@@ -0,0 +1,135 @@
|
||||
"""Models for ontology classes and extraction responses.
|
||||
|
||||
This module contains Pydantic models for representing extracted ontology classes
|
||||
from scenario descriptions, following OWL ontology engineering standards.
|
||||
|
||||
Classes:
|
||||
OntologyClass: Represents an extracted ontology class
|
||||
OntologyExtractionResponse: Response model containing extracted ontology classes
|
||||
"""
|
||||
|
||||
from typing import List, Optional
|
||||
from uuid import uuid4
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
||||
|
||||
|
||||
class OntologyClass(BaseModel):
|
||||
"""Represents an extracted ontology class from scenario description.
|
||||
|
||||
An ontology class represents an abstract category or concept in a domain,
|
||||
following OWL ontology engineering standards and naming conventions.
|
||||
|
||||
Attributes:
|
||||
id: Unique string identifier for the ontology class
|
||||
name: Name of the class in PascalCase format (e.g., 'MedicalProcedure')
|
||||
name_chinese: Chinese translation of the class name (e.g., '医疗程序')
|
||||
description: Textual description of the class
|
||||
examples: List of concrete instance examples of this class
|
||||
parent_class: Optional name of the parent class in the hierarchy
|
||||
entity_type: Type/category of the entity (e.g., 'Person', 'Organization', 'Concept')
|
||||
domain: Domain this class belongs to (e.g., 'Healthcare', 'Education')
|
||||
|
||||
Config:
|
||||
extra: Ignore extra fields from LLM output
|
||||
"""
|
||||
model_config = ConfigDict(extra='ignore')
|
||||
|
||||
id: str = Field(
|
||||
default_factory=lambda: uuid4().hex,
|
||||
description="Unique identifier for the ontology class"
|
||||
)
|
||||
name: str = Field(
|
||||
...,
|
||||
description="Name of the class in PascalCase format"
|
||||
)
|
||||
name_chinese: Optional[str] = Field(
|
||||
None,
|
||||
description="Chinese translation of the class name"
|
||||
)
|
||||
description: str = Field(
|
||||
...,
|
||||
description="Description of the class"
|
||||
)
|
||||
examples: List[str] = Field(
|
||||
default_factory=list,
|
||||
description="List of concrete instance examples"
|
||||
)
|
||||
parent_class: Optional[str] = Field(
|
||||
None,
|
||||
description="Name of the parent class in the hierarchy"
|
||||
)
|
||||
entity_type: str = Field(
|
||||
...,
|
||||
description="Type/category of the entity"
|
||||
)
|
||||
domain: str = Field(
|
||||
...,
|
||||
description="Domain this class belongs to"
|
||||
)
|
||||
|
||||
@field_validator('name')
|
||||
@classmethod
|
||||
def validate_pascal_case(cls, v: str) -> str:
|
||||
"""Validate that the class name follows PascalCase convention.
|
||||
|
||||
PascalCase rules:
|
||||
- Must start with an uppercase letter
|
||||
- Cannot contain spaces
|
||||
- Should not contain special characters except underscores
|
||||
|
||||
Args:
|
||||
v: The class name to validate
|
||||
|
||||
Returns:
|
||||
The validated class name
|
||||
|
||||
Raises:
|
||||
ValueError: If the name doesn't follow PascalCase convention
|
||||
"""
|
||||
if not v:
|
||||
raise ValueError("Class name cannot be empty")
|
||||
|
||||
if not v[0].isupper():
|
||||
raise ValueError(
|
||||
f"Class name '{v}' must start with an uppercase letter (PascalCase)"
|
||||
)
|
||||
|
||||
if ' ' in v:
|
||||
raise ValueError(
|
||||
f"Class name '{v}' cannot contain spaces (PascalCase)"
|
||||
)
|
||||
|
||||
# Check for invalid characters (allow alphanumeric and underscore only)
|
||||
if not all(c.isalnum() or c == '_' for c in v):
|
||||
raise ValueError(
|
||||
f"Class name '{v}' contains invalid characters. "
|
||||
"Only alphanumeric characters and underscores are allowed"
|
||||
)
|
||||
|
||||
return v
|
||||
|
||||
|
||||
class OntologyExtractionResponse(BaseModel):
|
||||
"""Response model for ontology extraction from LLM.
|
||||
|
||||
This model represents the structured output from the LLM when
|
||||
extracting ontology classes from scenario descriptions.
|
||||
|
||||
Attributes:
|
||||
classes: List of extracted ontology classes
|
||||
domain: Domain/field the scenario belongs to
|
||||
|
||||
Config:
|
||||
extra: Ignore extra fields from LLM output
|
||||
"""
|
||||
model_config = ConfigDict(extra='ignore')
|
||||
|
||||
classes: List[OntologyClass] = Field(
|
||||
default_factory=list,
|
||||
description="List of extracted ontology classes"
|
||||
)
|
||||
domain: str = Field(
|
||||
...,
|
||||
description="Domain/field the scenario belongs to"
|
||||
)
|
||||
@@ -8,4 +8,5 @@
|
||||
- TemporalExtractor: 时间信息提取
|
||||
- EmbeddingGenerator: 嵌入向量生成
|
||||
- MemorySummaryGenerator: 记忆摘要生成
|
||||
- OntologyExtractor: 本体类提取
|
||||
"""
|
||||
|
||||
@@ -0,0 +1,482 @@
|
||||
"""Ontology class extraction from scenario descriptions using LLM.
|
||||
|
||||
This module provides the OntologyExtractor class for extracting ontology classes
|
||||
from natural language scenario descriptions. It uses LLM-driven extraction combined
|
||||
with two-layer validation (string validation + OWL semantic validation).
|
||||
|
||||
Classes:
|
||||
OntologyExtractor: Extracts ontology classes from scenario descriptions
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
from typing import List, Optional
|
||||
|
||||
from app.core.memory.llm_tools.openai_client import OpenAIClient
|
||||
from app.core.memory.models.ontology_models import (
|
||||
OntologyClass,
|
||||
OntologyExtractionResponse,
|
||||
)
|
||||
from app.core.memory.utils.validation.ontology_validator import OntologyValidator
|
||||
from app.core.memory.utils.validation.owl_validator import OWLValidator
|
||||
from app.core.memory.utils.prompt.prompt_utils import render_ontology_extraction_prompt
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class OntologyExtractor:
|
||||
"""Extractor for ontology classes from scenario descriptions.
|
||||
|
||||
This extractor uses LLM to identify abstract classes and concepts from
|
||||
natural language scenario descriptions, following OWL ontology engineering
|
||||
standards. It performs two-layer validation:
|
||||
1. String validation (naming conventions, reserved words, duplicates)
|
||||
2. OWL semantic validation (consistency checking, circular inheritance)
|
||||
|
||||
Attributes:
|
||||
llm_client: OpenAI client for LLM calls
|
||||
validator: String validator for class names and descriptions
|
||||
owl_validator: OWL validator for semantic validation
|
||||
"""
|
||||
|
||||
def __init__(self, llm_client: OpenAIClient):
|
||||
"""Initialize the OntologyExtractor.
|
||||
|
||||
Args:
|
||||
llm_client: OpenAIClient instance for LLM processing
|
||||
"""
|
||||
self.llm_client = llm_client
|
||||
self.validator = OntologyValidator()
|
||||
self.owl_validator = OWLValidator()
|
||||
|
||||
logger.info("OntologyExtractor initialized")
|
||||
|
||||
async def extract_ontology_classes(
|
||||
self,
|
||||
scenario: str,
|
||||
domain: Optional[str] = None,
|
||||
max_classes: int = 15,
|
||||
min_classes: int = 5,
|
||||
enable_owl_validation: bool = True,
|
||||
llm_temperature: float = 0.3,
|
||||
llm_max_tokens: int = 2000,
|
||||
max_description_length: int = 500,
|
||||
timeout: Optional[float] = None,
|
||||
) -> OntologyExtractionResponse:
|
||||
"""Extract ontology classes from a scenario description.
|
||||
|
||||
This is the main extraction method that orchestrates the entire process:
|
||||
1. Call LLM to extract ontology classes
|
||||
2. Perform first-layer validation (string validation and cleaning)
|
||||
3. Perform second-layer validation (OWL semantic validation)
|
||||
4. Filter invalid classes based on validation errors
|
||||
5. Return validated ontology classes
|
||||
|
||||
Args:
|
||||
scenario: Natural language scenario description
|
||||
domain: Optional domain hint (e.g., "Healthcare", "Education")
|
||||
max_classes: Maximum number of classes to extract (default: 15)
|
||||
min_classes: Minimum number of classes to extract (default: 5)
|
||||
enable_owl_validation: Whether to enable OWL validation (default: True)
|
||||
llm_temperature: LLM temperature parameter (default: 0.3)
|
||||
llm_max_tokens: LLM max tokens parameter (default: 2000)
|
||||
max_description_length: Maximum description length (default: 500)
|
||||
timeout: Optional timeout in seconds for LLM call (default: None, no timeout)
|
||||
|
||||
Returns:
|
||||
OntologyExtractionResponse containing validated ontology classes
|
||||
|
||||
Raises:
|
||||
ValueError: If scenario is empty or invalid
|
||||
asyncio.TimeoutError: If extraction times out
|
||||
|
||||
Examples:
|
||||
>>> extractor = OntologyExtractor(llm_client)
|
||||
>>> response = await extractor.extract_ontology_classes(
|
||||
... scenario="A hospital manages patient records...",
|
||||
... domain="Healthcare",
|
||||
... max_classes=10,
|
||||
... timeout=30.0
|
||||
... )
|
||||
>>> len(response.classes)
|
||||
7
|
||||
"""
|
||||
# Start timing
|
||||
start_time = time.time()
|
||||
|
||||
# Validate input
|
||||
if not scenario or not scenario.strip():
|
||||
logger.error("Scenario description is empty")
|
||||
raise ValueError("Scenario description cannot be empty")
|
||||
|
||||
scenario = scenario.strip()
|
||||
|
||||
logger.info(
|
||||
f"Starting ontology extraction - scenario_length={len(scenario)}, "
|
||||
f"domain={domain}, max_classes={max_classes}, min_classes={min_classes}, "
|
||||
f"timeout={timeout}"
|
||||
)
|
||||
|
||||
try:
|
||||
# Step 1: Call LLM for extraction with timeout
|
||||
logger.info("Step 1: Calling LLM for ontology extraction")
|
||||
llm_start_time = time.time()
|
||||
|
||||
if timeout is not None:
|
||||
# Wrap LLM call with timeout
|
||||
try:
|
||||
response = await asyncio.wait_for(
|
||||
self._call_llm_for_extraction(
|
||||
scenario=scenario,
|
||||
domain=domain,
|
||||
max_classes=max_classes,
|
||||
llm_temperature=llm_temperature,
|
||||
llm_max_tokens=llm_max_tokens,
|
||||
),
|
||||
timeout=timeout
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
llm_duration = time.time() - llm_start_time
|
||||
logger.error(
|
||||
f"LLM extraction timed out after {timeout} seconds "
|
||||
f"(actual duration: {llm_duration:.2f}s)"
|
||||
)
|
||||
# Return empty response on timeout
|
||||
return OntologyExtractionResponse(
|
||||
classes=[],
|
||||
domain=domain or "Unknown",
|
||||
)
|
||||
else:
|
||||
# No timeout specified, call directly
|
||||
response = await self._call_llm_for_extraction(
|
||||
scenario=scenario,
|
||||
domain=domain,
|
||||
max_classes=max_classes,
|
||||
llm_temperature=llm_temperature,
|
||||
llm_max_tokens=llm_max_tokens,
|
||||
)
|
||||
|
||||
llm_duration = time.time() - llm_start_time
|
||||
logger.info(
|
||||
f"LLM returned {len(response.classes)} classes in {llm_duration:.2f}s"
|
||||
)
|
||||
|
||||
# Step 2: First-layer validation (string validation and cleaning)
|
||||
logger.info("Step 2: Performing first-layer validation (string validation)")
|
||||
validation_start_time = time.time()
|
||||
|
||||
response = self._validate_and_clean(
|
||||
response=response,
|
||||
max_description_length=max_description_length,
|
||||
)
|
||||
|
||||
validation_duration = time.time() - validation_start_time
|
||||
logger.info(
|
||||
f"After first-layer validation: {len(response.classes)} classes remain "
|
||||
f"(validation took {validation_duration:.2f}s)"
|
||||
)
|
||||
|
||||
# Check if we have enough classes after first-layer validation
|
||||
if len(response.classes) < min_classes:
|
||||
logger.warning(
|
||||
f"Only {len(response.classes)} classes remain after validation, "
|
||||
f"which is below minimum of {min_classes}"
|
||||
)
|
||||
|
||||
# Step 3: Second-layer validation (OWL semantic validation)
|
||||
if enable_owl_validation and response.classes:
|
||||
logger.info("Step 3: Performing second-layer validation (OWL validation)")
|
||||
owl_start_time = time.time()
|
||||
|
||||
is_valid, errors, world = self.owl_validator.validate_ontology_classes(
|
||||
classes=response.classes,
|
||||
)
|
||||
|
||||
owl_duration = time.time() - owl_start_time
|
||||
|
||||
if not is_valid:
|
||||
logger.warning(
|
||||
f"OWL validation found {len(errors)} issues in {owl_duration:.2f}s: {errors}"
|
||||
)
|
||||
|
||||
# Filter invalid classes based on errors
|
||||
response = self._filter_invalid_classes(
|
||||
response=response,
|
||||
errors=errors,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"After second-layer validation: {len(response.classes)} classes remain"
|
||||
)
|
||||
else:
|
||||
logger.info(f"OWL validation passed successfully in {owl_duration:.2f}s")
|
||||
else:
|
||||
if not enable_owl_validation:
|
||||
logger.info("Step 3: OWL validation disabled, skipping")
|
||||
else:
|
||||
logger.info("Step 3: No classes to validate, skipping OWL validation")
|
||||
|
||||
# Calculate total duration
|
||||
total_duration = time.time() - start_time
|
||||
|
||||
# Log extraction statistics
|
||||
logger.info(
|
||||
f"Ontology extraction completed - "
|
||||
f"final_class_count={len(response.classes)}, "
|
||||
f"domain={response.domain}, "
|
||||
f"total_duration={total_duration:.2f}s, "
|
||||
f"llm_duration={llm_duration:.2f}s"
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
# Re-raise timeout errors
|
||||
total_duration = time.time() - start_time
|
||||
logger.error(
|
||||
f"Ontology extraction timed out after {timeout} seconds "
|
||||
f"(total duration: {total_duration:.2f}s)",
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
except Exception as e:
|
||||
total_duration = time.time() - start_time
|
||||
logger.error(
|
||||
f"Ontology extraction failed after {total_duration:.2f}s: {str(e)}",
|
||||
exc_info=True
|
||||
)
|
||||
# Return empty response on failure
|
||||
return OntologyExtractionResponse(
|
||||
classes=[],
|
||||
domain=domain or "Unknown",
|
||||
)
|
||||
|
||||
async def _call_llm_for_extraction(
|
||||
self,
|
||||
scenario: str,
|
||||
domain: Optional[str],
|
||||
max_classes: int,
|
||||
llm_temperature: float,
|
||||
llm_max_tokens: int,
|
||||
) -> OntologyExtractionResponse:
|
||||
"""Call LLM to extract ontology classes from scenario.
|
||||
|
||||
This method renders the extraction prompt using the Jinja2 template
|
||||
and calls the LLM with structured output to get ontology classes.
|
||||
|
||||
Args:
|
||||
scenario: Scenario description text
|
||||
domain: Optional domain hint
|
||||
max_classes: Maximum number of classes to extract
|
||||
llm_temperature: LLM temperature parameter
|
||||
llm_max_tokens: LLM max tokens parameter
|
||||
|
||||
Returns:
|
||||
OntologyExtractionResponse from LLM
|
||||
|
||||
Raises:
|
||||
Exception: If LLM call fails
|
||||
"""
|
||||
try:
|
||||
# Render prompt using template
|
||||
prompt_content = await render_ontology_extraction_prompt(
|
||||
scenario=scenario,
|
||||
domain=domain,
|
||||
max_classes=max_classes,
|
||||
json_schema=OntologyExtractionResponse.model_json_schema(),
|
||||
)
|
||||
|
||||
logger.debug(f"Rendered prompt length: {len(prompt_content)}")
|
||||
|
||||
# Create messages for LLM
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You are an expert ontology engineer specializing in knowledge "
|
||||
"representation and OWL standards. Extract ontology classes from "
|
||||
"scenario descriptions following the provided instructions. "
|
||||
"Return valid JSON conforming to the schema."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": prompt_content,
|
||||
},
|
||||
]
|
||||
|
||||
# Call LLM with structured output
|
||||
logger.debug(
|
||||
f"Calling LLM with temperature={llm_temperature}, "
|
||||
f"max_tokens={llm_max_tokens}"
|
||||
)
|
||||
|
||||
response = await self.llm_client.response_structured(
|
||||
messages=messages,
|
||||
response_model=OntologyExtractionResponse,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"LLM extraction successful - extracted {len(response.classes)} classes"
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"LLM extraction failed: {str(e)}",
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
def _validate_and_clean(
|
||||
self,
|
||||
response: OntologyExtractionResponse,
|
||||
max_description_length: int,
|
||||
) -> OntologyExtractionResponse:
|
||||
"""Perform first-layer validation: string validation and cleaning.
|
||||
|
||||
This method validates and cleans the extracted ontology classes:
|
||||
1. Validate class names (PascalCase, no reserved words)
|
||||
2. Sanitize invalid class names
|
||||
3. Truncate long descriptions
|
||||
4. Remove duplicate classes
|
||||
|
||||
Args:
|
||||
response: OntologyExtractionResponse from LLM
|
||||
max_description_length: Maximum description length
|
||||
|
||||
Returns:
|
||||
Cleaned OntologyExtractionResponse
|
||||
"""
|
||||
if not response.classes:
|
||||
logger.debug("No classes to validate")
|
||||
return response
|
||||
|
||||
logger.debug(f"Validating {len(response.classes)} classes")
|
||||
|
||||
validated_classes = []
|
||||
|
||||
for ontology_class in response.classes:
|
||||
# Validate class name
|
||||
is_valid, error_msg = self.validator.validate_class_name(
|
||||
ontology_class.name
|
||||
)
|
||||
|
||||
if not is_valid:
|
||||
logger.warning(
|
||||
f"Invalid class name '{ontology_class.name}': {error_msg}"
|
||||
)
|
||||
|
||||
# Attempt to sanitize
|
||||
sanitized_name = self.validator.sanitize_class_name(
|
||||
ontology_class.name
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Sanitized class name: '{ontology_class.name}' -> '{sanitized_name}'"
|
||||
)
|
||||
|
||||
# Update class name
|
||||
ontology_class.name = sanitized_name
|
||||
|
||||
# Re-validate sanitized name
|
||||
is_valid, error_msg = self.validator.validate_class_name(
|
||||
sanitized_name
|
||||
)
|
||||
|
||||
if not is_valid:
|
||||
logger.error(
|
||||
f"Failed to sanitize class name '{ontology_class.name}': {error_msg}. "
|
||||
"Skipping this class."
|
||||
)
|
||||
continue
|
||||
|
||||
# Truncate description if too long
|
||||
if ontology_class.description:
|
||||
original_length = len(ontology_class.description)
|
||||
ontology_class.description = self.validator.truncate_description(
|
||||
ontology_class.description,
|
||||
max_length=max_description_length,
|
||||
)
|
||||
|
||||
if len(ontology_class.description) < original_length:
|
||||
logger.debug(
|
||||
f"Truncated description for '{ontology_class.name}': "
|
||||
f"{original_length} -> {len(ontology_class.description)} chars"
|
||||
)
|
||||
|
||||
validated_classes.append(ontology_class)
|
||||
|
||||
# Remove duplicates (case-insensitive)
|
||||
original_count = len(validated_classes)
|
||||
validated_classes = self.validator.remove_duplicates(validated_classes)
|
||||
|
||||
if len(validated_classes) < original_count:
|
||||
logger.info(
|
||||
f"Removed {original_count - len(validated_classes)} duplicate classes"
|
||||
)
|
||||
|
||||
# Return cleaned response
|
||||
return OntologyExtractionResponse(
|
||||
classes=validated_classes,
|
||||
domain=response.domain,
|
||||
)
|
||||
|
||||
def _filter_invalid_classes(
|
||||
self,
|
||||
response: OntologyExtractionResponse,
|
||||
errors: List[str],
|
||||
) -> OntologyExtractionResponse:
|
||||
"""Filter invalid classes based on OWL validation errors.
|
||||
|
||||
This method analyzes OWL validation errors and removes classes
|
||||
that caused validation failures (e.g., circular inheritance,
|
||||
inconsistencies).
|
||||
|
||||
Args:
|
||||
response: OntologyExtractionResponse to filter
|
||||
errors: List of error messages from OWL validation
|
||||
|
||||
Returns:
|
||||
Filtered OntologyExtractionResponse
|
||||
"""
|
||||
if not errors:
|
||||
return response
|
||||
|
||||
logger.debug(f"Filtering classes based on {len(errors)} OWL validation errors")
|
||||
|
||||
# Extract class names mentioned in errors
|
||||
invalid_class_names = set()
|
||||
|
||||
for error in errors:
|
||||
# Look for class names in error messages
|
||||
for ontology_class in response.classes:
|
||||
if ontology_class.name in error:
|
||||
invalid_class_names.add(ontology_class.name)
|
||||
logger.debug(
|
||||
f"Class '{ontology_class.name}' marked as invalid due to error: {error}"
|
||||
)
|
||||
|
||||
# Filter out invalid classes
|
||||
if invalid_class_names:
|
||||
original_count = len(response.classes)
|
||||
|
||||
filtered_classes = [
|
||||
c for c in response.classes
|
||||
if c.name not in invalid_class_names
|
||||
]
|
||||
|
||||
logger.info(
|
||||
f"Filtered out {original_count - len(filtered_classes)} invalid classes: "
|
||||
f"{invalid_class_names}"
|
||||
)
|
||||
|
||||
return OntologyExtractionResponse(
|
||||
classes=filtered_classes,
|
||||
domain=response.domain,
|
||||
)
|
||||
|
||||
return response
|
||||
@@ -409,3 +409,42 @@ async def render_episodic_title_and_type_prompt(content: str) -> str:
|
||||
})
|
||||
|
||||
return rendered_prompt
|
||||
|
||||
|
||||
async def render_ontology_extraction_prompt(
|
||||
scenario: str,
|
||||
domain: str | None = None,
|
||||
max_classes: int = 15,
|
||||
json_schema: dict | None = None
|
||||
) -> str:
|
||||
"""
|
||||
Renders the ontology extraction prompt using the extract_ontology.jinja2 template.
|
||||
|
||||
Args:
|
||||
scenario: The scenario description text to extract ontology classes from
|
||||
domain: Optional domain hint for the scenario (e.g., "Healthcare", "Education")
|
||||
max_classes: Maximum number of classes to extract (default: 15)
|
||||
json_schema: JSON schema for the expected output format
|
||||
|
||||
Returns:
|
||||
Rendered prompt content as string
|
||||
"""
|
||||
template = prompt_env.get_template("extract_ontology.jinja2")
|
||||
rendered_prompt = template.render(
|
||||
scenario=scenario,
|
||||
domain=domain,
|
||||
max_classes=max_classes,
|
||||
json_schema=json_schema
|
||||
)
|
||||
|
||||
# 记录渲染结果到提示日志
|
||||
log_prompt_rendering('ontology extraction', rendered_prompt)
|
||||
# 可选:记录模板渲染信息
|
||||
log_template_rendering('extract_ontology.jinja2', {
|
||||
'scenario_len': len(scenario) if scenario else 0,
|
||||
'domain': domain,
|
||||
'max_classes': max_classes,
|
||||
'json_schema': 'OntologyExtractionResponse.schema'
|
||||
})
|
||||
|
||||
return rendered_prompt
|
||||
|
||||
210
api/app/core/memory/utils/prompt/prompts/extract_ontology.jinja2
Normal file
210
api/app/core/memory/utils/prompt/prompts/extract_ontology.jinja2
Normal file
@@ -0,0 +1,210 @@
|
||||
===Task===
|
||||
Extract ontology classes from the given scenario description following ontology engineering standards.
|
||||
|
||||
===Role===
|
||||
You are a professional ontology engineer with expertise in knowledge representation and OWL (Web Ontology Language) standards. Your task is to identify abstract classes and concepts from scenario descriptions, not concrete instances.
|
||||
|
||||
===Scenario Description===
|
||||
{{ scenario }}
|
||||
|
||||
{% if domain -%}
|
||||
===Domain Hint===
|
||||
This scenario belongs to the **{{ domain }}** domain. Consider domain-specific concepts and terminology when extracting classes.
|
||||
{%- endif %}
|
||||
|
||||
===Extraction Rules===
|
||||
|
||||
**1. Abstract Classes, Not Instances:**
|
||||
- Extract abstract categories and concepts (e.g., "MedicalProcedure", "Patient", "Diagnosis")
|
||||
- Do NOT extract concrete instances (e.g., "John Smith", "Room 301", "2024-01-15")
|
||||
- Think in terms of "types of things" rather than "specific things"
|
||||
|
||||
**2. Naming Convention (PascalCase):**
|
||||
- Use PascalCase format for the "name" field: start with uppercase letter, capitalize each word, no spaces
|
||||
- Examples: "MedicalProcedure", "HealthcareProvider", "DiagnosticTest"
|
||||
- Avoid: "medical procedure", "healthcare_provider", "diagnostic-test"
|
||||
- Use clear, descriptive names in English
|
||||
- Avoid abbreviations unless they are standard in the domain (e.g., "API", "DNA")
|
||||
- Provide Chinese translation in the "name_chinese" field (e.g., "医疗程序", "医疗服务提供者", "诊断测试")
|
||||
|
||||
**3. Domain Relevance:**
|
||||
- Focus on classes that are central to the scenario's domain
|
||||
- Prioritize classes that represent key concepts, entities, or relationships
|
||||
- Avoid overly generic classes (e.g., "Thing", "Object") unless they have specific domain meaning
|
||||
|
||||
**4. Class Quantity:**
|
||||
- Extract between 5 and {{ max_classes }} classes
|
||||
- Aim for a balanced set covering the main concepts in the scenario
|
||||
- Quality over quantity: prefer well-defined classes over exhaustive lists
|
||||
|
||||
**5. Clear Descriptions:**
|
||||
- Provide concise, informative descriptions in Chinese (max 500 characters)
|
||||
- Describe what the class represents, not specific instances
|
||||
- Use clear, natural Chinese language that explains the class's role in the domain
|
||||
|
||||
**6. Concrete Examples:**
|
||||
- Provide 2-5 concrete instance examples in Chinese for each class
|
||||
- Examples should be specific, realistic instances of the class
|
||||
- Examples help clarify the class's scope and meaning
|
||||
- Use natural Chinese language for examples
|
||||
- Example format: ["示例1", "示例2", "示例3"]
|
||||
|
||||
**7. Class Hierarchy:**
|
||||
- Identify parent-child relationships where applicable
|
||||
- Use the parent_class field to specify inheritance
|
||||
- Parent class must be one of the extracted classes or a standard OWL class
|
||||
- Leave parent_class as null for top-level classes
|
||||
|
||||
**8. Entity Types:**
|
||||
- Classify each class with an appropriate entity_type
|
||||
- Common types: "Person", "Organization", "Location", "Event", "Concept", "Process", "Object", "Role"
|
||||
- Choose the most specific type that applies
|
||||
|
||||
**9. OWL Reserved Words:**
|
||||
- Do NOT use OWL reserved words as class names
|
||||
- Reserved words include: "Thing", "Nothing", "Class", "Property", "ObjectProperty", "DatatypeProperty", "AnnotationProperty", "Ontology", "Individual", "Literal"
|
||||
- If a reserved word is needed, add a domain-specific prefix (e.g., "MedicalClass" instead of "Class")
|
||||
|
||||
**10. Language Consistency:**
|
||||
- Extract all class names in English (PascalCase format) for the "name" field
|
||||
- Provide Chinese translation for class names in the "name_chinese" field
|
||||
- Descriptions MUST be in Chinese (中文)
|
||||
- Examples MUST be in Chinese (中文)
|
||||
- Use clear, natural Chinese language for descriptions and examples
|
||||
|
||||
===Examples===
|
||||
|
||||
**Example 1 (Healthcare Domain):**
|
||||
Scenario: "A hospital manages patient records, schedules appointments, and coordinates medical procedures. Doctors diagnose conditions and prescribe treatments."
|
||||
|
||||
Output:
|
||||
{
|
||||
"classes": [
|
||||
{
|
||||
"name": "Patient",
|
||||
"name_chinese": "患者",
|
||||
"description": "在医疗机构接受医疗护理或治疗的人",
|
||||
"examples": ["张三", "李四", "患有糖尿病的老年患者"],
|
||||
"parent_class": null,
|
||||
"entity_type": "Person",
|
||||
"domain": "Healthcare"
|
||||
},
|
||||
{
|
||||
"name": "MedicalProcedure",
|
||||
"name_chinese": "医疗程序",
|
||||
"description": "为医疗诊断或治疗而执行的系统性操作流程",
|
||||
"examples": ["手术", "血液检查", "X光检查", "疫苗接种"],
|
||||
"parent_class": null,
|
||||
"entity_type": "Process",
|
||||
"domain": "Healthcare"
|
||||
},
|
||||
{
|
||||
"name": "Diagnosis",
|
||||
"name_chinese": "诊断",
|
||||
"description": "基于症状和检查结果对疾病或状况的识别",
|
||||
"examples": ["糖尿病诊断", "癌症诊断", "流感诊断"],
|
||||
"parent_class": null,
|
||||
"entity_type": "Concept",
|
||||
"domain": "Healthcare"
|
||||
},
|
||||
{
|
||||
"name": "Doctor",
|
||||
"name_chinese": "医生",
|
||||
"description": "诊断和治疗患者的持证医疗专业人员",
|
||||
"examples": ["全科医生", "外科医生", "心脏病专家"],
|
||||
"parent_class": null,
|
||||
"entity_type": "Role",
|
||||
"domain": "Healthcare"
|
||||
},
|
||||
{
|
||||
"name": "Treatment",
|
||||
"name_chinese": "治疗",
|
||||
"description": "为治愈或管理疾病状况而提供的医疗护理或疗法",
|
||||
"examples": ["药物治疗", "物理治疗", "化疗", "手术治疗"],
|
||||
"parent_class": null,
|
||||
"entity_type": "Process",
|
||||
"domain": "Healthcare"
|
||||
}
|
||||
],
|
||||
"domain": "Healthcare",
|
||||
"namespace": "http://example.org/healthcare#"
|
||||
}
|
||||
|
||||
**Example 2 (Education Domain):**
|
||||
Scenario: "A university offers courses taught by professors. Students enroll in programs, attend lectures, and complete assignments to earn degrees."
|
||||
|
||||
Output:
|
||||
{
|
||||
"classes": [
|
||||
{
|
||||
"name": "Student",
|
||||
"name_chinese": "学生",
|
||||
"description": "在教育机构注册学习的人",
|
||||
"examples": ["本科生", "研究生", "在职学生"],
|
||||
"parent_class": null,
|
||||
"entity_type": "Role",
|
||||
"domain": "Education"
|
||||
},
|
||||
{
|
||||
"name": "Course",
|
||||
"name_chinese": "课程",
|
||||
"description": "涵盖特定学科或主题的结构化教育课程",
|
||||
"examples": ["计算机科学导论", "微积分I", "世界历史"],
|
||||
"parent_class": null,
|
||||
"entity_type": "Concept",
|
||||
"domain": "Education"
|
||||
},
|
||||
{
|
||||
"name": "Professor",
|
||||
"name_chinese": "教授",
|
||||
"description": "教授课程并进行研究的学术教师",
|
||||
"examples": ["助理教授", "副教授", "正教授"],
|
||||
"parent_class": null,
|
||||
"entity_type": "Role",
|
||||
"domain": "Education"
|
||||
},
|
||||
{
|
||||
"name": "AcademicProgram",
|
||||
"name_chinese": "学术项目",
|
||||
"description": "通向学位或证书的结构化课程体系",
|
||||
"examples": ["理学学士", "文学硕士", "博士项目"],
|
||||
"parent_class": null,
|
||||
"entity_type": "Concept",
|
||||
"domain": "Education"
|
||||
},
|
||||
{
|
||||
"name": "Assignment",
|
||||
"name_chinese": "作业",
|
||||
"description": "分配给学生以评估学习成果的任务或项目",
|
||||
"examples": ["论文", "习题集", "研究报告", "实验报告"],
|
||||
"parent_class": null,
|
||||
"entity_type": "Object",
|
||||
"domain": "Education"
|
||||
},
|
||||
{
|
||||
"name": "Lecture",
|
||||
"name_chinese": "讲座",
|
||||
"description": "由教师进行的教育性演讲或讲座",
|
||||
"examples": ["入门讲座", "客座讲座", "在线讲座"],
|
||||
"parent_class": null,
|
||||
"entity_type": "Event",
|
||||
"domain": "Education"
|
||||
}
|
||||
],
|
||||
"domain": "Education",
|
||||
"namespace": "http://example.org/education#"
|
||||
}
|
||||
|
||||
===Output Format===
|
||||
|
||||
**JSON Requirements:**
|
||||
- Use only ASCII double quotes (") for JSON structure
|
||||
- Never use Chinese quotation marks ("") or Unicode quotes
|
||||
- Escape quotation marks in text with backslashes (\")
|
||||
- Ensure proper string closure and comma separation
|
||||
- No line breaks within JSON string values
|
||||
- All class names must be in PascalCase format
|
||||
- All class names must be unique (case-insensitive)
|
||||
- Extract between 5 and {{ max_classes }} classes
|
||||
|
||||
{{ json_schema }}
|
||||
10
api/app/core/memory/utils/validation/__init__.py
Normal file
10
api/app/core/memory/utils/validation/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
"""Validation utilities for ontology extraction.
|
||||
|
||||
This module provides validation classes for ontology class names,
|
||||
descriptions, and OWL compliance checking.
|
||||
"""
|
||||
|
||||
from .ontology_validator import OntologyValidator
|
||||
from .owl_validator import OWLValidator
|
||||
|
||||
__all__ = ['OntologyValidator', 'OWLValidator']
|
||||
268
api/app/core/memory/utils/validation/ontology_validator.py
Normal file
268
api/app/core/memory/utils/validation/ontology_validator.py
Normal file
@@ -0,0 +1,268 @@
|
||||
"""String validation for ontology class names and descriptions.
|
||||
|
||||
This module provides the OntologyValidator class for validating and sanitizing
|
||||
ontology class names according to OWL standards and naming conventions.
|
||||
|
||||
Classes:
|
||||
OntologyValidator: Validates class names, removes duplicates, and truncates descriptions
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import List, Tuple
|
||||
|
||||
from app.core.memory.models.ontology_models import OntologyClass
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class OntologyValidator:
|
||||
"""Validator for ontology class names and descriptions.
|
||||
|
||||
This validator performs string-level validation including:
|
||||
- PascalCase naming convention validation
|
||||
- OWL reserved word checking
|
||||
- Duplicate class name removal
|
||||
- Description length truncation
|
||||
|
||||
Attributes:
|
||||
OWL_RESERVED_WORDS: Set of OWL reserved words that cannot be used as class names
|
||||
"""
|
||||
|
||||
# OWL reserved words that cannot be used as class names
|
||||
OWL_RESERVED_WORDS = {
|
||||
'Thing', 'Nothing', 'Class', 'Property',
|
||||
'ObjectProperty', 'DatatypeProperty', 'FunctionalProperty',
|
||||
'InverseFunctionalProperty', 'TransitiveProperty', 'SymmetricProperty',
|
||||
'AsymmetricProperty', 'ReflexiveProperty', 'IrreflexiveProperty',
|
||||
'Restriction', 'Ontology', 'Individual', 'NamedIndividual',
|
||||
'Annotation', 'AnnotationProperty', 'Axiom',
|
||||
'AllDifferent', 'AllDisjointClasses', 'AllDisjointProperties',
|
||||
'Datatype', 'DataRange', 'Literal',
|
||||
'DeprecatedClass', 'DeprecatedProperty',
|
||||
'Imports', 'IncompatibleWith', 'PriorVersion', 'VersionInfo',
|
||||
'BackwardCompatibleWith', 'OntologyProperty',
|
||||
}
|
||||
|
||||
def validate_class_name(self, name: str) -> Tuple[bool, str]:
|
||||
"""Validate that a class name follows OWL naming conventions.
|
||||
|
||||
Validation rules:
|
||||
1. Must not be empty
|
||||
2. Must start with an uppercase letter (PascalCase)
|
||||
3. Cannot contain spaces
|
||||
4. Can only contain alphanumeric characters and underscores
|
||||
5. Cannot be an OWL reserved word
|
||||
|
||||
Args:
|
||||
name: The class name to validate
|
||||
|
||||
Returns:
|
||||
Tuple of (is_valid, error_message)
|
||||
- is_valid: True if the name is valid, False otherwise
|
||||
- error_message: Empty string if valid, error description if invalid
|
||||
|
||||
Examples:
|
||||
>>> validator = OntologyValidator()
|
||||
>>> validator.validate_class_name("MedicalProcedure")
|
||||
(True, "")
|
||||
>>> validator.validate_class_name("medical procedure")
|
||||
(False, "Class name 'medical procedure' cannot contain spaces")
|
||||
>>> validator.validate_class_name("Thing")
|
||||
(False, "Class name 'Thing' is an OWL reserved word")
|
||||
"""
|
||||
logger.debug(f"Validating class name: '{name}'")
|
||||
|
||||
# Check if empty
|
||||
if not name or not name.strip():
|
||||
error_msg = "Class name cannot be empty"
|
||||
logger.warning(f"Validation failed: {error_msg}")
|
||||
return False, error_msg
|
||||
|
||||
name = name.strip()
|
||||
|
||||
# Check if it's an OWL reserved word
|
||||
if name in self.OWL_RESERVED_WORDS:
|
||||
error_msg = f"Class name '{name}' is an OWL reserved word"
|
||||
logger.warning(f"Validation failed: {error_msg}")
|
||||
return False, error_msg
|
||||
|
||||
# Check if starts with uppercase letter
|
||||
if not name[0].isupper():
|
||||
error_msg = f"Class name '{name}' must start with an uppercase letter (PascalCase)"
|
||||
logger.warning(f"Validation failed: {error_msg}")
|
||||
return False, error_msg
|
||||
|
||||
# Check for spaces
|
||||
if ' ' in name:
|
||||
error_msg = f"Class name '{name}' cannot contain spaces"
|
||||
logger.warning(f"Validation failed: {error_msg}")
|
||||
return False, error_msg
|
||||
|
||||
# Check for invalid characters (only alphanumeric and underscore allowed)
|
||||
if not re.match(r'^[A-Za-z0-9_]+$', name):
|
||||
error_msg = f"Class name '{name}' contains invalid characters. Only alphanumeric characters and underscores are allowed"
|
||||
logger.warning(f"Validation failed: {error_msg}")
|
||||
return False, error_msg
|
||||
|
||||
logger.debug(f"Class name '{name}' is valid")
|
||||
return True, ""
|
||||
|
||||
def sanitize_class_name(self, name: str) -> str:
|
||||
"""Attempt to sanitize an invalid class name into a valid format.
|
||||
|
||||
Sanitization steps:
|
||||
1. Strip whitespace
|
||||
2. Remove invalid characters
|
||||
3. Replace spaces with empty string (PascalCase)
|
||||
4. Capitalize first letter of each word
|
||||
5. If result is empty or starts with number, prefix with 'Class'
|
||||
|
||||
Args:
|
||||
name: The class name to sanitize
|
||||
|
||||
Returns:
|
||||
Sanitized class name that should pass validation
|
||||
|
||||
Examples:
|
||||
>>> validator = OntologyValidator()
|
||||
>>> validator.sanitize_class_name("medical procedure")
|
||||
'MedicalProcedure'
|
||||
>>> validator.sanitize_class_name("patient-record")
|
||||
'PatientRecord'
|
||||
>>> validator.sanitize_class_name("123invalid")
|
||||
'Class123Invalid'
|
||||
"""
|
||||
logger.debug(f"Sanitizing class name: '{name}'")
|
||||
|
||||
if not name or not name.strip():
|
||||
logger.warning("Empty class name provided for sanitization, returning 'UnnamedClass'")
|
||||
return "UnnamedClass"
|
||||
|
||||
# Strip whitespace
|
||||
name = name.strip()
|
||||
original_name = name
|
||||
|
||||
# Split on spaces, hyphens, and underscores, then capitalize each word
|
||||
words = re.split(r'[\s\-_]+', name)
|
||||
|
||||
# Capitalize first letter of each word and keep rest as is
|
||||
sanitized_words = []
|
||||
for word in words:
|
||||
if word:
|
||||
# Remove non-alphanumeric characters except underscore
|
||||
clean_word = re.sub(r'[^A-Za-z0-9_]', '', word)
|
||||
if clean_word:
|
||||
# Capitalize first letter
|
||||
sanitized_words.append(clean_word[0].upper() + clean_word[1:])
|
||||
|
||||
# Join words
|
||||
sanitized = ''.join(sanitized_words)
|
||||
|
||||
# If empty or starts with number, prefix with 'Class'
|
||||
if not sanitized or sanitized[0].isdigit():
|
||||
sanitized = 'Class' + sanitized
|
||||
logger.info(f"Prefixed class name with 'Class': '{original_name}' -> '{sanitized}'")
|
||||
|
||||
# If it's a reserved word, append 'Class' suffix
|
||||
if sanitized in self.OWL_RESERVED_WORDS:
|
||||
sanitized = sanitized + 'Class'
|
||||
logger.info(f"Appended 'Class' suffix to reserved word: '{original_name}' -> '{sanitized}'")
|
||||
|
||||
logger.info(f"Sanitized class name: '{original_name}' -> '{sanitized}'")
|
||||
return sanitized
|
||||
|
||||
def remove_duplicates(self, classes: List[OntologyClass]) -> List[OntologyClass]:
|
||||
"""Remove duplicate ontology classes based on case-insensitive name comparison.
|
||||
|
||||
When duplicates are found, keeps the first occurrence and discards subsequent ones.
|
||||
Comparison is case-insensitive to catch variations like 'Patient' and 'patient'.
|
||||
|
||||
Args:
|
||||
classes: List of OntologyClass objects
|
||||
|
||||
Returns:
|
||||
List of OntologyClass objects with duplicates removed
|
||||
|
||||
Examples:
|
||||
>>> validator = OntologyValidator()
|
||||
>>> classes = [
|
||||
... OntologyClass(name="Patient", description="A patient", entity_type="Person", domain="Healthcare"),
|
||||
... OntologyClass(name="patient", description="Another patient", entity_type="Person", domain="Healthcare"),
|
||||
... OntologyClass(name="Doctor", description="A doctor", entity_type="Person", domain="Healthcare"),
|
||||
... ]
|
||||
>>> unique = validator.remove_duplicates(classes)
|
||||
>>> len(unique)
|
||||
2
|
||||
>>> [c.name for c in unique]
|
||||
['Patient', 'Doctor']
|
||||
"""
|
||||
if not classes:
|
||||
logger.debug("No classes to check for duplicates")
|
||||
return classes
|
||||
|
||||
logger.debug(f"Checking {len(classes)} classes for duplicates")
|
||||
|
||||
seen_names = set()
|
||||
unique_classes = []
|
||||
duplicates_found = []
|
||||
|
||||
for ontology_class in classes:
|
||||
# Use lowercase for comparison
|
||||
name_lower = ontology_class.name.lower()
|
||||
|
||||
if name_lower not in seen_names:
|
||||
seen_names.add(name_lower)
|
||||
unique_classes.append(ontology_class)
|
||||
else:
|
||||
duplicates_found.append(ontology_class.name)
|
||||
logger.debug(f"Duplicate class found and removed: '{ontology_class.name}'")
|
||||
|
||||
if duplicates_found:
|
||||
logger.info(
|
||||
f"Removed {len(duplicates_found)} duplicate classes: {duplicates_found}"
|
||||
)
|
||||
else:
|
||||
logger.debug("No duplicate classes found")
|
||||
|
||||
return unique_classes
|
||||
|
||||
def truncate_description(self, description: str, max_length: int = 500) -> str:
|
||||
"""Truncate a description to a maximum length.
|
||||
|
||||
If the description exceeds max_length, it will be truncated and
|
||||
an ellipsis (...) will be appended to indicate truncation.
|
||||
|
||||
Args:
|
||||
description: The description text to truncate
|
||||
max_length: Maximum allowed length (default: 500)
|
||||
|
||||
Returns:
|
||||
Truncated description string
|
||||
|
||||
Examples:
|
||||
>>> validator = OntologyValidator()
|
||||
>>> long_desc = "A" * 600
|
||||
>>> truncated = validator.truncate_description(long_desc, max_length=500)
|
||||
>>> len(truncated)
|
||||
500
|
||||
>>> truncated.endswith("...")
|
||||
True
|
||||
"""
|
||||
if not description:
|
||||
return ""
|
||||
|
||||
if len(description) <= max_length:
|
||||
return description
|
||||
|
||||
# Truncate and add ellipsis
|
||||
# Reserve 3 characters for "..."
|
||||
truncate_at = max_length - 3
|
||||
truncated = description[:truncate_at] + "..."
|
||||
|
||||
logger.debug(
|
||||
f"Truncated description from {len(description)} to {len(truncated)} characters"
|
||||
)
|
||||
|
||||
return truncated
|
||||
585
api/app/core/memory/utils/validation/owl_validator.py
Normal file
585
api/app/core/memory/utils/validation/owl_validator.py
Normal file
@@ -0,0 +1,585 @@
|
||||
"""OWL semantic validation for ontology classes using Owlready2.
|
||||
|
||||
This module provides the OWLValidator class for validating ontology classes
|
||||
against OWL standards using the Owlready2 library. It performs semantic
|
||||
validation including consistency checking, circular inheritance detection,
|
||||
and OWL file export.
|
||||
|
||||
Classes:
|
||||
OWLValidator: Validates ontology classes using OWL reasoning and exports to OWL formats
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from owlready2 import (
|
||||
World,
|
||||
Thing,
|
||||
get_ontology,
|
||||
sync_reasoner_pellet,
|
||||
OwlReadyInconsistentOntologyError,
|
||||
)
|
||||
|
||||
from app.core.memory.models.ontology_models import OntologyClass
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class OWLValidator:
|
||||
"""Validator for OWL semantic validation of ontology classes.
|
||||
|
||||
This validator performs semantic-level validation using Owlready2 including:
|
||||
- Creating OWL classes from ontology class definitions
|
||||
- Running consistency checking with Pellet reasoner
|
||||
- Detecting circular inheritance
|
||||
- Validating Protégé compatibility
|
||||
- Exporting ontologies to various OWL formats (RDF/XML, Turtle, N-Triples)
|
||||
|
||||
Attributes:
|
||||
base_namespace: Base URI for the ontology namespace
|
||||
"""
|
||||
|
||||
def __init__(self, base_namespace: str = "http://example.org/ontology#"):
|
||||
"""Initialize the OWL validator.
|
||||
|
||||
Args:
|
||||
base_namespace: Base URI for the ontology namespace (default: http://example.org/ontology#)
|
||||
"""
|
||||
self.base_namespace = base_namespace
|
||||
|
||||
def validate_ontology_classes(
|
||||
self,
|
||||
classes: List[OntologyClass],
|
||||
) -> Tuple[bool, List[str], Optional[World]]:
|
||||
"""Validate extracted ontology classes against OWL standards.
|
||||
|
||||
This method creates an OWL ontology from the provided classes using Owlready2,
|
||||
runs consistency checking with the Pellet reasoner, and detects common issues
|
||||
like circular inheritance.
|
||||
|
||||
Args:
|
||||
classes: List of OntologyClass objects to validate
|
||||
|
||||
Returns:
|
||||
Tuple of (is_valid, error_messages, world):
|
||||
- is_valid: True if ontology is valid and consistent, False otherwise
|
||||
- error_messages: List of error/warning messages
|
||||
- world: Owlready2 World object containing the ontology (None if validation failed)
|
||||
|
||||
Examples:
|
||||
>>> validator = OWLValidator()
|
||||
>>> classes = [
|
||||
... OntologyClass(name="Patient", description="A patient", entity_type="Person", domain="Healthcare"),
|
||||
... OntologyClass(name="Doctor", description="A doctor", entity_type="Person", domain="Healthcare"),
|
||||
... ]
|
||||
>>> is_valid, errors, world = validator.validate_ontology_classes(classes)
|
||||
>>> is_valid
|
||||
True
|
||||
>>> len(errors)
|
||||
0
|
||||
"""
|
||||
if not classes:
|
||||
return False, ["No classes provided for validation"], None
|
||||
|
||||
errors = []
|
||||
|
||||
try:
|
||||
# Create a new world (isolated ontology environment)
|
||||
world = World()
|
||||
|
||||
# Use a proper ontology IRI
|
||||
# Owlready2 expects the IRI to end with .owl or similar
|
||||
onto_iri = self.base_namespace.rstrip('#/')
|
||||
if not onto_iri.endswith('.owl'):
|
||||
onto_iri = onto_iri + '.owl'
|
||||
|
||||
# Create ontology
|
||||
onto = world.get_ontology(onto_iri)
|
||||
|
||||
with onto:
|
||||
# Dictionary to store created OWL classes for parent reference
|
||||
owl_classes = {}
|
||||
|
||||
# First pass: Create all classes without parent relationships
|
||||
for ontology_class in classes:
|
||||
try:
|
||||
# Create OWL class dynamically using type() with Thing as base
|
||||
# The key is to NOT set namespace in the dict, let Owlready2 handle it
|
||||
owl_class = type(
|
||||
ontology_class.name, # Class name
|
||||
(Thing,), # Base classes
|
||||
{} # Class dict (empty, let Owlready2 manage)
|
||||
)
|
||||
|
||||
# Add label (rdfs:label) - include both English and Chinese names
|
||||
labels = [ontology_class.name]
|
||||
if ontology_class.name_chinese:
|
||||
labels.append(ontology_class.name_chinese)
|
||||
owl_class.label = labels
|
||||
|
||||
# Add comment (rdfs:comment) with description
|
||||
if ontology_class.description:
|
||||
owl_class.comment = [ontology_class.description]
|
||||
|
||||
# Store for parent relationship setup
|
||||
owl_classes[ontology_class.name] = owl_class
|
||||
|
||||
logger.debug(
|
||||
f"Created OWL class: {ontology_class.name} "
|
||||
f"(Chinese: {ontology_class.name_chinese}) "
|
||||
f"IRI: {owl_class.iri if hasattr(owl_class, 'iri') else 'N/A'}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to create OWL class '{ontology_class.name}': {str(e)}"
|
||||
errors.append(error_msg)
|
||||
logger.error(error_msg, exc_info=True)
|
||||
|
||||
# Second pass: Set up parent relationships
|
||||
for ontology_class in classes:
|
||||
if ontology_class.parent_class and ontology_class.name in owl_classes:
|
||||
parent_name = ontology_class.parent_class
|
||||
|
||||
# Check if parent exists
|
||||
if parent_name in owl_classes:
|
||||
try:
|
||||
child_class = owl_classes[ontology_class.name]
|
||||
parent_class = owl_classes[parent_name]
|
||||
|
||||
# Set parent by modifying is_a
|
||||
child_class.is_a = [parent_class]
|
||||
|
||||
logger.debug(
|
||||
f"Set parent relationship: {ontology_class.name} -> {parent_name}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = (
|
||||
f"Failed to set parent relationship "
|
||||
f"'{ontology_class.name}' -> '{parent_name}': {str(e)}"
|
||||
)
|
||||
errors.append(error_msg)
|
||||
logger.warning(error_msg)
|
||||
else:
|
||||
warning_msg = (
|
||||
f"Parent class '{parent_name}' not found for '{ontology_class.name}'"
|
||||
)
|
||||
errors.append(warning_msg)
|
||||
logger.warning(warning_msg)
|
||||
|
||||
# Check for circular inheritance
|
||||
for class_name, owl_class in owl_classes.items():
|
||||
if self._has_circular_inheritance(owl_class):
|
||||
error_msg = f"Circular inheritance detected for class '{class_name}'"
|
||||
errors.append(error_msg)
|
||||
logger.error(error_msg)
|
||||
|
||||
# Run consistency checking with Pellet reasoner
|
||||
try:
|
||||
logger.info("Running Pellet reasoner for consistency checking...")
|
||||
sync_reasoner_pellet(world, infer_property_values=True, infer_data_property_values=True)
|
||||
logger.info("Consistency check passed")
|
||||
|
||||
except OwlReadyInconsistentOntologyError as e:
|
||||
error_msg = f"Ontology is inconsistent: {str(e)}"
|
||||
errors.append(error_msg)
|
||||
logger.error(error_msg)
|
||||
return False, errors, world
|
||||
|
||||
except Exception as e:
|
||||
# Reasoner errors are often due to Java not being installed or configured
|
||||
# Log as warning but don't fail validation - ontology structure is still valid
|
||||
warning_msg = f"Reasoner check skipped: {str(e)}"
|
||||
if str(e).strip(): # Only log if there's an actual error message
|
||||
logger.warning(warning_msg)
|
||||
else:
|
||||
logger.warning("Reasoner check skipped: Java may not be installed or configured")
|
||||
# Continue - ontology structure is valid even without reasoner check
|
||||
|
||||
# If we have errors (excluding warnings), validation failed
|
||||
is_valid = len(errors) == 0
|
||||
|
||||
return is_valid, errors, world
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"OWL validation failed: {str(e)}"
|
||||
errors.append(error_msg)
|
||||
logger.error(error_msg, exc_info=True)
|
||||
return False, errors, None
|
||||
|
||||
def _has_circular_inheritance(self, owl_class) -> bool:
|
||||
"""Check if an OWL class has circular inheritance.
|
||||
|
||||
Circular inheritance occurs when a class inherits from itself through
|
||||
a chain of parent relationships (e.g., A -> B -> C -> A).
|
||||
|
||||
Args:
|
||||
owl_class: Owlready2 class object to check
|
||||
|
||||
Returns:
|
||||
True if circular inheritance is detected, False otherwise
|
||||
"""
|
||||
visited = set()
|
||||
current = owl_class
|
||||
|
||||
while current:
|
||||
# Get class IRI or name as identifier
|
||||
class_id = str(current.iri) if hasattr(current, 'iri') else str(current)
|
||||
|
||||
if class_id in visited:
|
||||
# Found a cycle
|
||||
return True
|
||||
|
||||
visited.add(class_id)
|
||||
|
||||
# Get parent classes (is_a relationship)
|
||||
parents = getattr(current, 'is_a', [])
|
||||
|
||||
# Filter out Thing and other base classes
|
||||
parent_classes = [p for p in parents if p != Thing and hasattr(p, 'is_a')]
|
||||
|
||||
if not parent_classes:
|
||||
# No more parents, no cycle
|
||||
break
|
||||
|
||||
# Check first parent (in single inheritance)
|
||||
current = parent_classes[0] if parent_classes else None
|
||||
|
||||
return False
|
||||
|
||||
def export_to_owl(
|
||||
self,
|
||||
world: World,
|
||||
output_path: Optional[str] = None,
|
||||
format: str = "rdfxml",
|
||||
classes: Optional[List] = None
|
||||
) -> str:
|
||||
"""Export ontology to OWL file in specified format.
|
||||
|
||||
Supported formats:
|
||||
- rdfxml: RDF/XML format (default, most compatible)
|
||||
- turtle: Turtle format (more readable)
|
||||
- ntriples: N-Triples format (simplest)
|
||||
- json: JSON format (simplified, human-readable)
|
||||
|
||||
Args:
|
||||
world: Owlready2 World object containing the ontology
|
||||
output_path: Optional file path to save the ontology (if None, returns string)
|
||||
format: Export format - "rdfxml", "turtle", "ntriples", or "json" (default: "rdfxml")
|
||||
classes: Optional list of OntologyClass objects (required for json format)
|
||||
|
||||
Returns:
|
||||
String representation of the exported ontology
|
||||
|
||||
Raises:
|
||||
ValueError: If format is not supported
|
||||
RuntimeError: If export fails
|
||||
|
||||
Examples:
|
||||
>>> validator = OWLValidator()
|
||||
>>> is_valid, errors, world = validator.validate_ontology_classes(classes)
|
||||
>>> owl_content = validator.export_to_owl(world, "ontology.owl", format="rdfxml")
|
||||
"""
|
||||
# Validate format
|
||||
valid_formats = ["rdfxml", "turtle", "ntriples", "json"]
|
||||
if format not in valid_formats:
|
||||
raise ValueError(
|
||||
f"Unsupported format '{format}'. Must be one of: {', '.join(valid_formats)}"
|
||||
)
|
||||
|
||||
# JSON format doesn't need OWL processing
|
||||
if format == "json":
|
||||
if not classes:
|
||||
raise ValueError("Classes list is required for JSON format export")
|
||||
return self._export_to_json(classes)
|
||||
|
||||
# For OWL formats, world is required
|
||||
if not world:
|
||||
raise ValueError("World object is None. Cannot export ontology.")
|
||||
|
||||
# Note: Owlready2 has issues with turtle format export
|
||||
# We'll handle it specially by converting from rdfxml
|
||||
use_conversion = (format == "turtle")
|
||||
|
||||
try:
|
||||
# Get all ontologies in the world
|
||||
ontologies = list(world.ontologies.values())
|
||||
|
||||
if not ontologies:
|
||||
raise RuntimeError("No ontologies found in world")
|
||||
|
||||
# Find the ontology with classes (skip anonymous/empty ontologies)
|
||||
onto = None
|
||||
for ont in ontologies:
|
||||
classes_count = len(list(ont.classes()))
|
||||
logger.debug(f"Checking ontology {ont.base_iri}: {classes_count} classes")
|
||||
if classes_count > 0:
|
||||
onto = ont
|
||||
break
|
||||
|
||||
# If no ontology with classes found, use the last non-anonymous one
|
||||
if onto is None:
|
||||
for ont in reversed(ontologies):
|
||||
if ont.base_iri != "http://anonymous/":
|
||||
onto = ont
|
||||
break
|
||||
|
||||
# If still no ontology, use the first one
|
||||
if onto is None:
|
||||
onto = ontologies[0]
|
||||
|
||||
# Log ontology contents for debugging
|
||||
logger.info(f"Ontology IRI: {onto.base_iri}")
|
||||
logger.info(f"Ontology contains {len(list(onto.classes()))} classes")
|
||||
|
||||
# List all classes in the ontology
|
||||
all_classes = list(onto.classes())
|
||||
for cls in all_classes:
|
||||
logger.info(f"Class in ontology: {cls.name} (IRI: {cls.iri})")
|
||||
if hasattr(cls, 'label'):
|
||||
logger.debug(f" Labels: {cls.label}")
|
||||
if hasattr(cls, 'comment'):
|
||||
logger.debug(f" Comments: {cls.comment}")
|
||||
|
||||
if len(all_classes) == 0:
|
||||
logger.warning("No classes found in ontology! This may indicate a problem with class creation.")
|
||||
|
||||
if output_path:
|
||||
# Save to file
|
||||
export_format = "rdfxml" if use_conversion else format
|
||||
logger.info(f"Exporting ontology to {output_path} in {export_format} format")
|
||||
onto.save(file=output_path, format=export_format)
|
||||
|
||||
# Read back the file content to return
|
||||
with open(output_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Convert to turtle if needed
|
||||
if use_conversion:
|
||||
content = self._convert_to_turtle(content)
|
||||
|
||||
logger.info(f"Successfully exported ontology to {output_path}")
|
||||
|
||||
# Format the content for better readability
|
||||
content = self._format_owl_content(content, format)
|
||||
|
||||
return content
|
||||
else:
|
||||
# Export to string (save to temporary location and read)
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
with tempfile.NamedTemporaryFile(mode='w', suffix='.owl', delete=False) as tmp:
|
||||
tmp_path = tmp.name
|
||||
|
||||
try:
|
||||
export_format = "rdfxml" if use_conversion else format
|
||||
onto.save(file=tmp_path, format=export_format)
|
||||
|
||||
with open(tmp_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Convert to turtle if needed
|
||||
if use_conversion:
|
||||
content = self._convert_to_turtle(content)
|
||||
|
||||
# Format the content for better readability
|
||||
content = self._format_owl_content(content, format)
|
||||
|
||||
return content
|
||||
|
||||
finally:
|
||||
# Clean up temporary file
|
||||
if os.path.exists(tmp_path):
|
||||
os.remove(tmp_path)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to export ontology: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
raise RuntimeError(error_msg) from e
|
||||
|
||||
def _export_to_json(self, classes: List) -> str:
|
||||
"""Export ontology classes to simplified JSON format.
|
||||
|
||||
This format is more compact and easier to parse than OWL XML.
|
||||
|
||||
Args:
|
||||
classes: List of OntologyClass objects
|
||||
|
||||
Returns:
|
||||
JSON string representation (compact format)
|
||||
"""
|
||||
import json
|
||||
|
||||
result = {
|
||||
"ontology": {
|
||||
"namespace": self.base_namespace,
|
||||
"classes": []
|
||||
}
|
||||
}
|
||||
|
||||
for cls in classes:
|
||||
class_data = {
|
||||
"name": cls.name,
|
||||
"name_chinese": cls.name_chinese,
|
||||
"description": cls.description,
|
||||
"entity_type": cls.entity_type,
|
||||
"domain": cls.domain,
|
||||
"parent_class": cls.parent_class,
|
||||
"examples": cls.examples if hasattr(cls, 'examples') else []
|
||||
}
|
||||
result["ontology"]["classes"].append(class_data)
|
||||
|
||||
# 使用紧凑格式:无缩进,使用分隔符减少空格
|
||||
return json.dumps(result, ensure_ascii=False, separators=(',', ':'))
|
||||
|
||||
def _convert_to_turtle(self, rdfxml_content: str) -> str:
|
||||
"""Convert RDF/XML content to Turtle format using rdflib.
|
||||
|
||||
Args:
|
||||
rdfxml_content: RDF/XML format content
|
||||
|
||||
Returns:
|
||||
Turtle format content
|
||||
"""
|
||||
try:
|
||||
from rdflib import Graph
|
||||
|
||||
# Parse RDF/XML
|
||||
g = Graph()
|
||||
g.parse(data=rdfxml_content, format="xml")
|
||||
|
||||
# Serialize to Turtle
|
||||
turtle_content = g.serialize(format="turtle")
|
||||
|
||||
# Handle bytes vs string
|
||||
if isinstance(turtle_content, bytes):
|
||||
turtle_content = turtle_content.decode('utf-8')
|
||||
|
||||
return turtle_content
|
||||
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"rdflib is not installed. Cannot convert to Turtle format. "
|
||||
"Install with: pip install rdflib"
|
||||
)
|
||||
return rdfxml_content
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to convert to Turtle format: {e}")
|
||||
return rdfxml_content
|
||||
|
||||
def _format_owl_content(self, content: str, format: str) -> str:
|
||||
"""Format OWL content for better readability.
|
||||
|
||||
Args:
|
||||
content: Raw OWL content string
|
||||
format: Format type (rdfxml, turtle, ntriples)
|
||||
|
||||
Returns:
|
||||
Formatted OWL content string
|
||||
"""
|
||||
if format == "rdfxml":
|
||||
# Format XML with proper indentation
|
||||
try:
|
||||
import xml.dom.minidom as minidom
|
||||
dom = minidom.parseString(content)
|
||||
# Pretty print with 2-space indentation
|
||||
formatted = dom.toprettyxml(indent=" ", encoding="utf-8").decode("utf-8")
|
||||
|
||||
# Remove extra blank lines
|
||||
lines = []
|
||||
prev_blank = False
|
||||
for line in formatted.split('\n'):
|
||||
is_blank = not line.strip()
|
||||
if not (is_blank and prev_blank): # Skip consecutive blank lines
|
||||
lines.append(line)
|
||||
prev_blank = is_blank
|
||||
|
||||
formatted = '\n'.join(lines)
|
||||
|
||||
return formatted
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to format XML content: {e}")
|
||||
return content
|
||||
|
||||
elif format == "turtle":
|
||||
# Turtle format is already relatively readable
|
||||
# Just ensure consistent line endings and not empty
|
||||
if not content or content.strip() == "":
|
||||
logger.warning("Turtle content is empty, this may indicate an export issue")
|
||||
return content.strip() + '\n' if content.strip() else content
|
||||
|
||||
elif format == "ntriples":
|
||||
# N-Triples format is line-based, ensure proper line endings
|
||||
return content.strip() + '\n' if content.strip() else content
|
||||
|
||||
return content
|
||||
|
||||
def validate_with_protege_compatibility(
|
||||
self,
|
||||
classes: List[OntologyClass]
|
||||
) -> Tuple[bool, List[str]]:
|
||||
"""Validate that ontology classes are compatible with Protégé editor.
|
||||
|
||||
Protégé compatibility checks:
|
||||
- Class names are valid OWL identifiers
|
||||
- No special characters that Protégé cannot handle
|
||||
- Namespace is properly formatted
|
||||
- Labels and comments are properly encoded
|
||||
|
||||
Args:
|
||||
classes: List of OntologyClass objects to validate
|
||||
|
||||
Returns:
|
||||
Tuple of (is_compatible, warnings):
|
||||
- is_compatible: True if compatible with Protégé, False otherwise
|
||||
- warnings: List of compatibility warning messages
|
||||
|
||||
Examples:
|
||||
>>> validator = OWLValidator()
|
||||
>>> classes = [OntologyClass(name="Patient", description="A patient", entity_type="Person", domain="Healthcare")]
|
||||
>>> is_compatible, warnings = validator.validate_with_protege_compatibility(classes)
|
||||
>>> is_compatible
|
||||
True
|
||||
"""
|
||||
warnings = []
|
||||
|
||||
# Check namespace format
|
||||
if not self.base_namespace.startswith(('http://', 'https://')):
|
||||
warnings.append(
|
||||
f"Namespace '{self.base_namespace}' should start with http:// or https:// "
|
||||
"for Protégé compatibility"
|
||||
)
|
||||
|
||||
if not self.base_namespace.endswith(('#', '/')):
|
||||
warnings.append(
|
||||
f"Namespace '{self.base_namespace}' should end with # or / "
|
||||
"for Protégé compatibility"
|
||||
)
|
||||
|
||||
# Check each class
|
||||
for ontology_class in classes:
|
||||
# Check for special characters that might cause issues
|
||||
if any(char in ontology_class.name for char in ['<', '>', '"', '{', '}', '|', '^', '`']):
|
||||
warnings.append(
|
||||
f"Class name '{ontology_class.name}' contains special characters "
|
||||
"that may cause issues in Protégé"
|
||||
)
|
||||
|
||||
# Check description length (Protégé can handle long descriptions but may display poorly)
|
||||
if ontology_class.description and len(ontology_class.description) > 1000:
|
||||
warnings.append(
|
||||
f"Class '{ontology_class.name}' has a very long description ({len(ontology_class.description)} chars) "
|
||||
"which may display poorly in Protégé"
|
||||
)
|
||||
|
||||
# Check for non-ASCII characters (Protégé supports them but encoding issues may occur)
|
||||
if not ontology_class.name.isascii():
|
||||
warnings.append(
|
||||
f"Class name '{ontology_class.name}' contains non-ASCII characters "
|
||||
"which may cause encoding issues in some Protégé versions"
|
||||
)
|
||||
|
||||
# If no warnings, it's compatible
|
||||
is_compatible = len(warnings) == 0
|
||||
|
||||
return is_compatible, warnings
|
||||
@@ -28,6 +28,10 @@ from .tool_model import (
|
||||
ToolExecution, ToolType, ToolStatus, AuthType, ExecutionStatus
|
||||
)
|
||||
from .memory_perceptual_model import MemoryPerceptualModel
|
||||
from .ontology_scene import OntologyScene
|
||||
from .ontology_class import OntologyClass
|
||||
from .ontology_scene import OntologyScene
|
||||
from .ontology_class import OntologyClass
|
||||
|
||||
__all__ = [
|
||||
"Tenants",
|
||||
|
||||
40
api/app/models/ontology_class.py
Normal file
40
api/app/models/ontology_class.py
Normal file
@@ -0,0 +1,40 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""本体类型模型
|
||||
|
||||
本模块定义本体类型的数据模型。
|
||||
|
||||
Classes:
|
||||
OntologyClass: 本体类型表模型
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import uuid
|
||||
from sqlalchemy import Column, String, DateTime, Text, ForeignKey
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.orm import relationship
|
||||
from app.db import Base
|
||||
|
||||
|
||||
class OntologyClass(Base):
|
||||
"""本体类型表 - 用于存储某个场景提取出来的本体类型信息"""
|
||||
__tablename__ = "ontology_class"
|
||||
|
||||
# 主键
|
||||
class_id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True, comment="类型ID")
|
||||
|
||||
# 类型信息
|
||||
class_name = Column(String(200), nullable=False, comment="类型名称")
|
||||
class_description = Column(Text, nullable=True, comment="类型描述")
|
||||
|
||||
# 外键:关联到本体场景
|
||||
scene_id = Column(UUID(as_uuid=True), ForeignKey("ontology_scene.scene_id", ondelete="CASCADE"), nullable=False, index=True, comment="所属场景ID")
|
||||
|
||||
# 时间戳
|
||||
created_at = Column(DateTime, default=datetime.datetime.now, nullable=False, comment="创建时间")
|
||||
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now, nullable=False, comment="更新时间")
|
||||
|
||||
# 关系:类型属于某个场景
|
||||
scene = relationship("OntologyScene", back_populates="classes")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<OntologyClass(id={self.class_id}, name={self.class_name}, scene_id={self.scene_id})>"
|
||||
43
api/app/models/ontology_scene.py
Normal file
43
api/app/models/ontology_scene.py
Normal file
@@ -0,0 +1,43 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""本体场景模型
|
||||
|
||||
本模块定义本体场景的数据模型。
|
||||
|
||||
Classes:
|
||||
OntologyScene: 本体场景表模型
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import uuid
|
||||
from sqlalchemy import Column, String, DateTime, Integer, Text, ForeignKey, UniqueConstraint
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.orm import relationship
|
||||
from app.db import Base
|
||||
|
||||
|
||||
class OntologyScene(Base):
|
||||
"""本体场景表 - 用于存储本体场景下不同的类型信息"""
|
||||
__tablename__ = "ontology_scene"
|
||||
__table_args__ = (
|
||||
UniqueConstraint('workspace_id', 'scene_name', name='uq_workspace_scene_name'),
|
||||
)
|
||||
|
||||
# 主键
|
||||
scene_id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True, comment="场景ID")
|
||||
|
||||
# 场景信息
|
||||
scene_name = Column(String(200), nullable=False, comment="场景名称")
|
||||
scene_description = Column(Text, nullable=True, comment="场景描述")
|
||||
|
||||
# 外键:关联到工作空间
|
||||
workspace_id = Column(UUID(as_uuid=True), ForeignKey("workspaces.id", ondelete="CASCADE"), nullable=False, index=True, comment="所属工作空间ID")
|
||||
|
||||
# 时间戳
|
||||
created_at = Column(DateTime, default=datetime.datetime.now, nullable=False, comment="创建时间")
|
||||
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now, nullable=False, comment="更新时间")
|
||||
|
||||
# 关系:一个场景可以有多个类型
|
||||
classes = relationship("OntologyClass", back_populates="scene", cascade="all, delete-orphan")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<OntologyScene(id={self.scene_id}, name={self.scene_name})>"
|
||||
404
api/app/repositories/ontology_class_repository.py
Normal file
404
api/app/repositories/ontology_class_repository.py
Normal file
@@ -0,0 +1,404 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""本体类型Repository层
|
||||
|
||||
本模块提供本体类型的数据访问层实现。
|
||||
|
||||
Classes:
|
||||
OntologyClassRepository: 本体类型数据访问类
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Optional
|
||||
from uuid import UUID
|
||||
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
|
||||
from app.core.logging_config import get_db_logger
|
||||
from app.models.ontology_class import OntologyClass
|
||||
from app.models.ontology_scene import OntologyScene
|
||||
|
||||
|
||||
logger = get_db_logger()
|
||||
|
||||
|
||||
class OntologyClassRepository:
|
||||
"""本体类型Repository
|
||||
|
||||
提供本体类型的CRUD操作和权限检查。
|
||||
|
||||
Attributes:
|
||||
db: SQLAlchemy数据库会话
|
||||
"""
|
||||
|
||||
def __init__(self, db: Session):
|
||||
"""初始化Repository
|
||||
|
||||
Args:
|
||||
db: SQLAlchemy数据库会话
|
||||
"""
|
||||
self.db = db
|
||||
|
||||
def create(self, class_data: dict, scene_id: UUID) -> OntologyClass:
|
||||
"""创建本体类型
|
||||
|
||||
Args:
|
||||
class_data: 类型数据字典,包含class_name和class_description
|
||||
scene_id: 所属场景ID
|
||||
|
||||
Returns:
|
||||
OntologyClass: 创建的类型对象
|
||||
|
||||
Raises:
|
||||
Exception: 数据库操作失败
|
||||
|
||||
Examples:
|
||||
>>> repo = OntologyClassRepository(db)
|
||||
>>> ontology_class = repo.create(
|
||||
... {"class_name": "患者", "class_description": "描述"},
|
||||
... scene_id
|
||||
... )
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
f"Creating ontology class - "
|
||||
f"name={class_data.get('class_name')}, "
|
||||
f"scene_id={scene_id}"
|
||||
)
|
||||
|
||||
ontology_class = OntologyClass(
|
||||
class_name=class_data.get("class_name"),
|
||||
class_description=class_data.get("class_description"),
|
||||
scene_id=scene_id
|
||||
)
|
||||
|
||||
self.db.add(ontology_class)
|
||||
self.db.flush() # 获取ID但不提交
|
||||
|
||||
logger.info(
|
||||
f"Ontology class created successfully - "
|
||||
f"class_id={ontology_class.class_id}"
|
||||
)
|
||||
|
||||
return ontology_class
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to create ontology class: {str(e)}",
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
def get_by_id(self, class_id: UUID) -> Optional[OntologyClass]:
|
||||
"""根据ID获取类型
|
||||
|
||||
Args:
|
||||
class_id: 类型ID
|
||||
|
||||
Returns:
|
||||
Optional[OntologyClass]: 类型对象,不存在则返回None
|
||||
|
||||
Examples:
|
||||
>>> repo = OntologyClassRepository(db)
|
||||
>>> ontology_class = repo.get_by_id(class_id)
|
||||
"""
|
||||
try:
|
||||
logger.debug(f"Getting ontology class by ID: {class_id}")
|
||||
|
||||
ontology_class = self.db.query(OntologyClass).filter(
|
||||
OntologyClass.class_id == class_id
|
||||
).first()
|
||||
|
||||
if ontology_class:
|
||||
logger.debug(f"Ontology class found: {class_id}")
|
||||
else:
|
||||
logger.debug(f"Ontology class not found: {class_id}")
|
||||
|
||||
return ontology_class
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to get ontology class by ID: {str(e)}",
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
def get_by_name(self, class_name: str, scene_id: UUID) -> Optional[OntologyClass]:
|
||||
"""根据类型名称和场景ID获取类型(精确匹配)
|
||||
|
||||
Args:
|
||||
class_name: 类型名称
|
||||
scene_id: 场景ID
|
||||
|
||||
Returns:
|
||||
Optional[OntologyClass]: 类型对象,不存在则返回None
|
||||
|
||||
Examples:
|
||||
>>> repo = OntologyClassRepository(db)
|
||||
>>> ontology_class = repo.get_by_name("患者", scene_id)
|
||||
"""
|
||||
try:
|
||||
logger.debug(f"Getting ontology class by name: {class_name}, scene_id: {scene_id}")
|
||||
|
||||
ontology_class = self.db.query(OntologyClass).filter(
|
||||
OntologyClass.class_name == class_name,
|
||||
OntologyClass.scene_id == scene_id
|
||||
).first()
|
||||
|
||||
if ontology_class:
|
||||
logger.debug(f"Ontology class found: {class_name}")
|
||||
else:
|
||||
logger.debug(f"Ontology class not found: {class_name}")
|
||||
|
||||
return ontology_class
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to get ontology class by name: {str(e)}",
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
def search_by_name(self, keyword: str, scene_id: UUID) -> List[OntologyClass]:
|
||||
"""根据关键词模糊搜索类型
|
||||
|
||||
使用 LIKE 进行模糊匹配,支持中文和英文。
|
||||
|
||||
Args:
|
||||
keyword: 搜索关键词
|
||||
scene_id: 场景ID
|
||||
|
||||
Returns:
|
||||
List[OntologyClass]: 匹配的类型列表
|
||||
|
||||
Examples:
|
||||
>>> repo = OntologyClassRepository(db)
|
||||
>>> classes = repo.search_by_name("患者", scene_id)
|
||||
"""
|
||||
try:
|
||||
logger.debug(
|
||||
f"Searching ontology classes by keyword - "
|
||||
f"keyword={keyword}, scene_id={scene_id}"
|
||||
)
|
||||
|
||||
# 使用 ilike 进行不区分大小写的模糊匹配
|
||||
classes = self.db.query(OntologyClass).filter(
|
||||
OntologyClass.class_name.ilike(f"%{keyword}%"),
|
||||
OntologyClass.scene_id == scene_id
|
||||
).order_by(
|
||||
OntologyClass.created_at.desc()
|
||||
).all()
|
||||
|
||||
logger.info(
|
||||
f"Found {len(classes)} ontology classes matching keyword '{keyword}' "
|
||||
f"in scene {scene_id}"
|
||||
)
|
||||
|
||||
return classes
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to search ontology classes by keyword: {str(e)}",
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
def get_by_scene(self, scene_id: UUID) -> List[OntologyClass]:
|
||||
"""获取场景下的所有类型
|
||||
|
||||
按创建时间倒序排列。
|
||||
|
||||
Args:
|
||||
scene_id: 场景ID
|
||||
|
||||
Returns:
|
||||
List[OntologyClass]: 类型列表
|
||||
|
||||
Examples:
|
||||
>>> repo = OntologyClassRepository(db)
|
||||
>>> classes = repo.get_by_scene(scene_id)
|
||||
"""
|
||||
try:
|
||||
logger.debug(f"Getting ontology classes by scene: {scene_id}")
|
||||
|
||||
classes = self.db.query(OntologyClass).filter(
|
||||
OntologyClass.scene_id == scene_id
|
||||
).order_by(
|
||||
OntologyClass.created_at.desc()
|
||||
).all()
|
||||
|
||||
logger.info(
|
||||
f"Found {len(classes)} ontology classes in scene {scene_id}"
|
||||
)
|
||||
|
||||
return classes
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to get ontology classes by scene: {str(e)}",
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
def update(self, class_id: UUID, update_data: dict) -> Optional[OntologyClass]:
|
||||
"""更新类型信息
|
||||
|
||||
Args:
|
||||
class_id: 类型ID
|
||||
update_data: 更新数据字典
|
||||
|
||||
Returns:
|
||||
Optional[OntologyClass]: 更新后的类型对象,不存在则返回None
|
||||
|
||||
Raises:
|
||||
Exception: 数据库操作失败
|
||||
|
||||
Examples:
|
||||
>>> repo = OntologyClassRepository(db)
|
||||
>>> ontology_class = repo.update(
|
||||
... class_id,
|
||||
... {"class_name": "新名称"}
|
||||
... )
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Updating ontology class: {class_id}")
|
||||
|
||||
ontology_class = self.get_by_id(class_id)
|
||||
if not ontology_class:
|
||||
logger.warning(f"Ontology class not found for update: {class_id}")
|
||||
return None
|
||||
|
||||
# 更新字段
|
||||
if "class_name" in update_data and update_data["class_name"] is not None:
|
||||
ontology_class.class_name = update_data["class_name"]
|
||||
|
||||
if "class_description" in update_data:
|
||||
ontology_class.class_description = update_data["class_description"]
|
||||
|
||||
self.db.flush()
|
||||
|
||||
logger.info(f"Ontology class updated successfully: {class_id}")
|
||||
|
||||
return ontology_class
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to update ontology class: {str(e)}",
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
def delete(self, class_id: UUID) -> bool:
|
||||
"""删除类型
|
||||
|
||||
Args:
|
||||
class_id: 类型ID
|
||||
|
||||
Returns:
|
||||
bool: 删除成功返回True,类型不存在返回False
|
||||
|
||||
Raises:
|
||||
Exception: 数据库操作失败
|
||||
|
||||
Examples:
|
||||
>>> repo = OntologyClassRepository(db)
|
||||
>>> success = repo.delete(class_id)
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Deleting ontology class: {class_id}")
|
||||
|
||||
ontology_class = self.get_by_id(class_id)
|
||||
if not ontology_class:
|
||||
logger.warning(f"Ontology class not found for delete: {class_id}")
|
||||
return False
|
||||
|
||||
self.db.delete(ontology_class)
|
||||
self.db.flush()
|
||||
|
||||
logger.info(f"Ontology class deleted successfully: {class_id}")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to delete ontology class: {str(e)}",
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
def check_ownership(self, class_id: UUID, workspace_id: UUID) -> bool:
|
||||
"""检查类型是否属于指定工作空间(通过场景关联)
|
||||
|
||||
Args:
|
||||
class_id: 类型ID
|
||||
workspace_id: 工作空间ID
|
||||
|
||||
Returns:
|
||||
bool: 属于返回True,否则返回False
|
||||
|
||||
Examples:
|
||||
>>> repo = OntologyClassRepository(db)
|
||||
>>> is_owner = repo.check_ownership(class_id, workspace_id)
|
||||
"""
|
||||
try:
|
||||
logger.debug(
|
||||
f"Checking class ownership - "
|
||||
f"class_id={class_id}, workspace_id={workspace_id}"
|
||||
)
|
||||
|
||||
count = self.db.query(OntologyClass).join(
|
||||
OntologyScene,
|
||||
OntologyClass.scene_id == OntologyScene.scene_id
|
||||
).filter(
|
||||
OntologyClass.class_id == class_id,
|
||||
OntologyScene.workspace_id == workspace_id
|
||||
).count()
|
||||
|
||||
is_owner = count > 0
|
||||
|
||||
logger.debug(
|
||||
f"Class ownership check result: {is_owner} - "
|
||||
f"class_id={class_id}"
|
||||
)
|
||||
|
||||
return is_owner
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to check class ownership: {str(e)}",
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
def get_scene_id_by_class(self, class_id: UUID) -> Optional[UUID]:
|
||||
"""根据类型ID获取所属场景ID
|
||||
|
||||
Args:
|
||||
class_id: 类型ID
|
||||
|
||||
Returns:
|
||||
Optional[UUID]: 场景ID,类型不存在则返回None
|
||||
|
||||
Examples:
|
||||
>>> repo = OntologyClassRepository(db)
|
||||
>>> scene_id = repo.get_scene_id_by_class(class_id)
|
||||
"""
|
||||
try:
|
||||
logger.debug(f"Getting scene ID by class: {class_id}")
|
||||
|
||||
ontology_class = self.get_by_id(class_id)
|
||||
if not ontology_class:
|
||||
logger.debug(f"Class not found: {class_id}")
|
||||
return None
|
||||
|
||||
logger.debug(
|
||||
f"Found scene ID: {ontology_class.scene_id} for class: {class_id}"
|
||||
)
|
||||
|
||||
return ontology_class.scene_id
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to get scene ID by class: {str(e)}",
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
394
api/app/repositories/ontology_scene_repository.py
Normal file
394
api/app/repositories/ontology_scene_repository.py
Normal file
@@ -0,0 +1,394 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""本体场景Repository层
|
||||
|
||||
本模块提供本体场景的数据访问层实现。
|
||||
|
||||
Classes:
|
||||
OntologySceneRepository: 本体场景数据访问类
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Optional
|
||||
from uuid import UUID
|
||||
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
|
||||
from app.core.logging_config import get_db_logger
|
||||
from app.models.ontology_scene import OntologyScene
|
||||
|
||||
|
||||
logger = get_db_logger()
|
||||
|
||||
|
||||
class OntologySceneRepository:
|
||||
"""本体场景Repository
|
||||
|
||||
提供本体场景的CRUD操作和权限检查。
|
||||
|
||||
Attributes:
|
||||
db: SQLAlchemy数据库会话
|
||||
"""
|
||||
|
||||
def __init__(self, db: Session):
|
||||
"""初始化Repository
|
||||
|
||||
Args:
|
||||
db: SQLAlchemy数据库会话
|
||||
"""
|
||||
self.db = db
|
||||
|
||||
def create(self, scene_data: dict, workspace_id: UUID) -> OntologyScene:
|
||||
"""创建本体场景
|
||||
|
||||
Args:
|
||||
scene_data: 场景数据字典,包含scene_name和scene_description
|
||||
workspace_id: 所属工作空间ID
|
||||
|
||||
Returns:
|
||||
OntologyScene: 创建的场景对象
|
||||
|
||||
Raises:
|
||||
Exception: 数据库操作失败
|
||||
|
||||
Examples:
|
||||
>>> repo = OntologySceneRepository(db)
|
||||
>>> scene = repo.create(
|
||||
... {"scene_name": "医疗场景", "scene_description": "描述"},
|
||||
... workspace_id
|
||||
... )
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
f"Creating ontology scene - "
|
||||
f"name={scene_data.get('scene_name')}, "
|
||||
f"workspace_id={workspace_id}"
|
||||
)
|
||||
|
||||
scene = OntologyScene(
|
||||
scene_name=scene_data.get("scene_name"),
|
||||
scene_description=scene_data.get("scene_description"),
|
||||
workspace_id=workspace_id
|
||||
)
|
||||
|
||||
self.db.add(scene)
|
||||
self.db.flush() # 获取ID但不提交
|
||||
|
||||
logger.info(
|
||||
f"Ontology scene created successfully - "
|
||||
f"scene_id={scene.scene_id}"
|
||||
)
|
||||
|
||||
return scene
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to create ontology scene: {str(e)}",
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
def get_by_id(self, scene_id: UUID) -> Optional[OntologyScene]:
|
||||
"""根据ID获取场景
|
||||
|
||||
Args:
|
||||
scene_id: 场景ID
|
||||
|
||||
Returns:
|
||||
Optional[OntologyScene]: 场景对象,不存在则返回None
|
||||
|
||||
Examples:
|
||||
>>> repo = OntologySceneRepository(db)
|
||||
>>> scene = repo.get_by_id(scene_id)
|
||||
"""
|
||||
try:
|
||||
logger.debug(f"Getting ontology scene by ID: {scene_id}")
|
||||
|
||||
scene = self.db.query(OntologyScene).filter(
|
||||
OntologyScene.scene_id == scene_id
|
||||
).first()
|
||||
|
||||
if scene:
|
||||
logger.debug(f"Ontology scene found: {scene_id}")
|
||||
else:
|
||||
logger.debug(f"Ontology scene not found: {scene_id}")
|
||||
|
||||
return scene
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to get ontology scene by ID: {str(e)}",
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
def get_by_name(self, scene_name: str, workspace_id: UUID) -> Optional[OntologyScene]:
|
||||
"""根据场景名称和工作空间ID获取场景(精确匹配)
|
||||
|
||||
Args:
|
||||
scene_name: 场景名称
|
||||
workspace_id: 工作空间ID
|
||||
|
||||
Returns:
|
||||
Optional[OntologyScene]: 场景对象,不存在则返回None
|
||||
|
||||
Examples:
|
||||
>>> repo = OntologySceneRepository(db)
|
||||
>>> scene = repo.get_by_name("医疗场景", workspace_id)
|
||||
"""
|
||||
try:
|
||||
logger.debug(
|
||||
f"Getting ontology scene by name - "
|
||||
f"scene_name={scene_name}, workspace_id={workspace_id}"
|
||||
)
|
||||
|
||||
scene = self.db.query(OntologyScene).options(
|
||||
joinedload(OntologyScene.classes)
|
||||
).filter(
|
||||
OntologyScene.scene_name == scene_name,
|
||||
OntologyScene.workspace_id == workspace_id
|
||||
).first()
|
||||
|
||||
if scene:
|
||||
logger.debug(f"Ontology scene found: {scene_name}")
|
||||
else:
|
||||
logger.debug(f"Ontology scene not found: {scene_name}")
|
||||
|
||||
return scene
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to get ontology scene by name: {str(e)}",
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
def search_by_name(self, keyword: str, workspace_id: UUID) -> List[OntologyScene]:
|
||||
"""根据关键词模糊搜索场景
|
||||
|
||||
使用 LIKE 进行模糊匹配,支持中文和英文。
|
||||
|
||||
Args:
|
||||
keyword: 搜索关键词
|
||||
workspace_id: 工作空间ID
|
||||
|
||||
Returns:
|
||||
List[OntologyScene]: 匹配的场景列表
|
||||
|
||||
Examples:
|
||||
>>> repo = OntologySceneRepository(db)
|
||||
>>> scenes = repo.search_by_name("医疗", workspace_id)
|
||||
"""
|
||||
try:
|
||||
logger.debug(
|
||||
f"Searching ontology scenes by keyword - "
|
||||
f"keyword={keyword}, workspace_id={workspace_id}"
|
||||
)
|
||||
|
||||
# 使用 ilike 进行不区分大小写的模糊匹配
|
||||
scenes = self.db.query(OntologyScene).options(
|
||||
joinedload(OntologyScene.classes)
|
||||
).filter(
|
||||
OntologyScene.scene_name.ilike(f"%{keyword}%"),
|
||||
OntologyScene.workspace_id == workspace_id
|
||||
).order_by(
|
||||
OntologyScene.updated_at.desc()
|
||||
).all()
|
||||
|
||||
logger.info(
|
||||
f"Found {len(scenes)} ontology scenes matching keyword '{keyword}' "
|
||||
f"in workspace {workspace_id}"
|
||||
)
|
||||
|
||||
return scenes
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to search ontology scenes by keyword: {str(e)}",
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
def get_by_workspace(self, workspace_id: UUID, page: Optional[int] = None, page_size: Optional[int] = None) -> tuple:
|
||||
"""获取工作空间下的所有场景(支持分页)
|
||||
|
||||
使用joinedload预加载classes关系以统计数量。
|
||||
|
||||
Args:
|
||||
workspace_id: 工作空间ID
|
||||
page: 页码(可选,从1开始)
|
||||
page_size: 每页数量(可选)
|
||||
|
||||
Returns:
|
||||
tuple: (场景列表, 总数量)
|
||||
|
||||
Examples:
|
||||
>>> repo = OntologySceneRepository(db)
|
||||
>>> scenes, total = repo.get_by_workspace(workspace_id)
|
||||
>>> scenes, total = repo.get_by_workspace(workspace_id, page=1, page_size=10)
|
||||
"""
|
||||
try:
|
||||
logger.debug(f"Getting ontology scenes by workspace: {workspace_id}, page={page}, page_size={page_size}")
|
||||
|
||||
# 构建基础查询
|
||||
query = self.db.query(OntologyScene).options(
|
||||
joinedload(OntologyScene.classes)
|
||||
).filter(
|
||||
OntologyScene.workspace_id == workspace_id
|
||||
).order_by(
|
||||
OntologyScene.updated_at.desc()
|
||||
)
|
||||
|
||||
# 获取总数
|
||||
total = query.count()
|
||||
|
||||
# 如果提供了分页参数,应用分页
|
||||
if page is not None and page_size is not None:
|
||||
offset = (page - 1) * page_size
|
||||
query = query.offset(offset).limit(page_size)
|
||||
logger.debug(f"Applying pagination: offset={offset}, limit={page_size}")
|
||||
|
||||
scenes = query.all()
|
||||
|
||||
logger.info(
|
||||
f"Found {len(scenes)} ontology scenes (total: {total}) in workspace {workspace_id}"
|
||||
)
|
||||
|
||||
return scenes, total
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to get ontology scenes by workspace: {str(e)}",
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
def update(self, scene_id: UUID, update_data: dict) -> Optional[OntologyScene]:
|
||||
"""更新场景信息
|
||||
|
||||
Args:
|
||||
scene_id: 场景ID
|
||||
update_data: 更新数据字典
|
||||
|
||||
Returns:
|
||||
Optional[OntologyScene]: 更新后的场景对象,不存在则返回None
|
||||
|
||||
Raises:
|
||||
Exception: 数据库操作失败
|
||||
|
||||
Examples:
|
||||
>>> repo = OntologySceneRepository(db)
|
||||
>>> scene = repo.update(
|
||||
... scene_id,
|
||||
... {"scene_name": "新名称"}
|
||||
... )
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Updating ontology scene: {scene_id}")
|
||||
|
||||
scene = self.get_by_id(scene_id)
|
||||
if not scene:
|
||||
logger.warning(f"Ontology scene not found for update: {scene_id}")
|
||||
return None
|
||||
|
||||
# 更新字段
|
||||
if "scene_name" in update_data and update_data["scene_name"] is not None:
|
||||
scene.scene_name = update_data["scene_name"]
|
||||
|
||||
if "scene_description" in update_data:
|
||||
scene.scene_description = update_data["scene_description"]
|
||||
|
||||
self.db.flush()
|
||||
|
||||
logger.info(f"Ontology scene updated successfully: {scene_id}")
|
||||
|
||||
return scene
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to update ontology scene: {str(e)}",
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
def delete(self, scene_id: UUID) -> bool:
|
||||
"""删除场景(级联删除类型)
|
||||
|
||||
依赖数据库级联删除配置(ondelete="CASCADE")。
|
||||
|
||||
Args:
|
||||
scene_id: 场景ID
|
||||
|
||||
Returns:
|
||||
bool: 删除成功返回True,场景不存在返回False
|
||||
|
||||
Raises:
|
||||
Exception: 数据库操作失败
|
||||
|
||||
Examples:
|
||||
>>> repo = OntologySceneRepository(db)
|
||||
>>> success = repo.delete(scene_id)
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Deleting ontology scene: {scene_id}")
|
||||
|
||||
scene = self.get_by_id(scene_id)
|
||||
if not scene:
|
||||
logger.warning(f"Ontology scene not found for delete: {scene_id}")
|
||||
return False
|
||||
|
||||
self.db.delete(scene)
|
||||
self.db.flush()
|
||||
|
||||
logger.info(
|
||||
f"Ontology scene deleted successfully (cascade): {scene_id}"
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to delete ontology scene: {str(e)}",
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
def check_ownership(self, scene_id: UUID, workspace_id: UUID) -> bool:
|
||||
"""检查场景是否属于指定工作空间
|
||||
|
||||
Args:
|
||||
scene_id: 场景ID
|
||||
workspace_id: 工作空间ID
|
||||
|
||||
Returns:
|
||||
bool: 属于返回True,否则返回False
|
||||
|
||||
Examples:
|
||||
>>> repo = OntologySceneRepository(db)
|
||||
>>> is_owner = repo.check_ownership(scene_id, workspace_id)
|
||||
"""
|
||||
try:
|
||||
logger.debug(
|
||||
f"Checking scene ownership - "
|
||||
f"scene_id={scene_id}, workspace_id={workspace_id}"
|
||||
)
|
||||
|
||||
count = self.db.query(OntologyScene).filter(
|
||||
OntologyScene.scene_id == scene_id,
|
||||
OntologyScene.workspace_id == workspace_id
|
||||
).count()
|
||||
|
||||
is_owner = count > 0
|
||||
|
||||
logger.debug(
|
||||
f"Scene ownership check result: {is_owner} - "
|
||||
f"scene_id={scene_id}"
|
||||
)
|
||||
|
||||
return is_owner
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to check scene ownership: {str(e)}",
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
461
api/app/schemas/ontology_schemas.py
Normal file
461
api/app/schemas/ontology_schemas.py
Normal file
@@ -0,0 +1,461 @@
|
||||
"""本体提取API的请求和响应模型
|
||||
|
||||
本模块定义了本体提取系统的所有API请求和响应的Pydantic模型。
|
||||
|
||||
Classes:
|
||||
ExtractionRequest: 本体提取请求模型
|
||||
ExtractionResponse: 本体提取响应模型
|
||||
ExportRequest: OWL文件导出请求模型
|
||||
ExportResponse: OWL文件导出响应模型
|
||||
OntologyResultResponse: 本体提取结果响应模型(带毫秒时间戳)
|
||||
SceneCreateRequest: 场景创建请求模型
|
||||
SceneUpdateRequest: 场景更新请求模型
|
||||
SceneResponse: 场景响应模型
|
||||
SceneListResponse: 场景列表响应模型
|
||||
ClassCreateRequest: 类型创建请求模型
|
||||
ClassUpdateRequest: 类型更新请求模型
|
||||
ClassResponse: 类型响应模型
|
||||
ClassListResponse: 类型列表响应模型
|
||||
"""
|
||||
|
||||
from typing import List, Optional
|
||||
import datetime
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel, Field, field_serializer, ConfigDict
|
||||
|
||||
from app.core.memory.models.ontology_models import OntologyClass
|
||||
|
||||
|
||||
class ExtractionRequest(BaseModel):
|
||||
"""本体提取请求模型
|
||||
|
||||
用于POST /api/ontology/extract端点的请求体。
|
||||
|
||||
Attributes:
|
||||
scenario: 场景描述文本,不能为空
|
||||
domain: 可选的领域提示(如Healthcare, Education等)
|
||||
llm_id: LLM模型ID,必须提供
|
||||
scene_id: 场景ID,必须提供,用于将提取的类保存到指定场景
|
||||
|
||||
Examples:
|
||||
>>> request = ExtractionRequest(
|
||||
... scenario="医院管理患者记录...",
|
||||
... domain="Healthcare",
|
||||
... llm_id="550e8400-e29b-41d4-a716-446655440000",
|
||||
... scene_id="660e8400-e29b-41d4-a716-446655440000"
|
||||
... )
|
||||
"""
|
||||
scenario: str = Field(..., description="场景描述文本", min_length=1)
|
||||
domain: Optional[str] = Field(None, description="可选的领域提示")
|
||||
llm_id: str = Field(..., description="LLM模型ID")
|
||||
scene_id: UUID = Field(..., description="场景ID,用于将提取的类保存到指定场景")
|
||||
|
||||
|
||||
class ExtractionResponse(BaseModel):
|
||||
"""本体提取响应模型
|
||||
|
||||
用于POST /api/ontology/extract端点的响应体。
|
||||
|
||||
Attributes:
|
||||
classes: 提取的本体类列表
|
||||
domain: 识别的领域
|
||||
extracted_count: 提取的类数量
|
||||
|
||||
Examples:
|
||||
>>> response = ExtractionResponse(
|
||||
... classes=[...],
|
||||
... domain="Healthcare",
|
||||
... extracted_count=7
|
||||
... )
|
||||
"""
|
||||
classes: List[OntologyClass] = Field(default_factory=list, description="提取的本体类列表")
|
||||
domain: str = Field(..., description="识别的领域")
|
||||
extracted_count: int = Field(..., description="提取的类数量")
|
||||
|
||||
|
||||
class ExportRequest(BaseModel):
|
||||
"""OWL文件导出请求模型
|
||||
|
||||
用于POST /api/ontology/export端点的请求体。
|
||||
|
||||
Attributes:
|
||||
classes: 要导出的本体类列表
|
||||
format: 导出格式,可选值: rdfxml, turtle, ntriples, json
|
||||
include_metadata: 是否包含完整的OWL元数据(命名空间等),默认True
|
||||
|
||||
Examples:
|
||||
>>> request = ExportRequest(
|
||||
... classes=[...],
|
||||
... format="rdfxml",
|
||||
... include_metadata=True
|
||||
... )
|
||||
"""
|
||||
classes: List[OntologyClass] = Field(..., description="要导出的本体类列表", min_length=1)
|
||||
format: str = Field("rdfxml", description="导出格式: rdfxml, turtle, ntriples, json")
|
||||
include_metadata: bool = Field(True, description="是否包含完整的OWL元数据")
|
||||
|
||||
|
||||
class ExportResponse(BaseModel):
|
||||
"""OWL文件导出响应模型
|
||||
|
||||
用于POST /api/ontology/export端点的响应体。
|
||||
|
||||
Attributes:
|
||||
owl_content: OWL文件内容
|
||||
format: 导出格式
|
||||
classes_count: 导出的类数量
|
||||
|
||||
Examples:
|
||||
>>> response = ExportResponse(
|
||||
... owl_content="<?xml version='1.0'?>...",
|
||||
... format="rdfxml",
|
||||
... classes_count=7
|
||||
... )
|
||||
"""
|
||||
owl_content: str = Field(..., description="OWL文件内容")
|
||||
format: str = Field(..., description="导出格式")
|
||||
classes_count: int = Field(..., description="导出的类数量")
|
||||
|
||||
|
||||
class OntologyResultResponse(BaseModel):
|
||||
"""本体提取结果响应模型
|
||||
|
||||
用于返回数据库中存储的提取结果,时间戳为毫秒级。
|
||||
|
||||
Attributes:
|
||||
id: 结果ID (UUID)
|
||||
scenario: 场景描述文本
|
||||
domain: 领域
|
||||
classes_json: 提取的本体类数据(JSON格式)
|
||||
extracted_count: 提取的类数量
|
||||
user_id: 用户ID
|
||||
created_at: 创建时间(毫秒时间戳)
|
||||
|
||||
Examples:
|
||||
>>> response = OntologyResultResponse(
|
||||
... id=uuid.uuid4(),
|
||||
... scenario="医院管理患者记录...",
|
||||
... domain="Healthcare",
|
||||
... classes_json={"classes": [...]},
|
||||
... extracted_count=7,
|
||||
... user_id=123,
|
||||
... created_at=datetime.now()
|
||||
... )
|
||||
"""
|
||||
id: UUID = Field(..., description="结果ID")
|
||||
scenario: str = Field(..., description="场景描述文本")
|
||||
domain: Optional[str] = Field(None, description="领域")
|
||||
classes_json: dict = Field(..., description="提取的本体类数据(JSON格式)")
|
||||
extracted_count: int = Field(..., description="提取的类数量")
|
||||
user_id: Optional[int] = Field(None, description="用户ID")
|
||||
created_at: datetime.datetime = Field(..., description="创建时间")
|
||||
|
||||
@field_serializer("created_at", when_used="json")
|
||||
def _serialize_created_at(self, dt: datetime.datetime):
|
||||
"""将创建时间序列化为毫秒时间戳"""
|
||||
return int(dt.timestamp() * 1000) if dt else None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
|
||||
# ==================== 本体场景相关 Schema ====================
|
||||
|
||||
class SceneCreateRequest(BaseModel):
|
||||
"""场景创建请求模型
|
||||
|
||||
用于创建新的本体场景。
|
||||
|
||||
Attributes:
|
||||
scene_name: 场景名称,必填,1-200字符
|
||||
scene_description: 场景描述,可选
|
||||
|
||||
Examples:
|
||||
>>> request = SceneCreateRequest(
|
||||
... scene_name="医疗场景",
|
||||
... scene_description="用于医疗领域的本体建模"
|
||||
... )
|
||||
"""
|
||||
scene_name: str = Field(..., min_length=1, max_length=200, description="场景名称")
|
||||
scene_description: Optional[str] = Field(None, description="场景描述")
|
||||
|
||||
|
||||
class SceneUpdateRequest(BaseModel):
|
||||
"""场景更新请求模型
|
||||
|
||||
用于更新已有本体场景信息。
|
||||
|
||||
Attributes:
|
||||
scene_name: 场景名称,可选,1-200字符
|
||||
scene_description: 场景描述,可选
|
||||
|
||||
Examples:
|
||||
>>> request = SceneUpdateRequest(
|
||||
... scene_name="更新后的场景名称",
|
||||
... scene_description="更新后的描述"
|
||||
... )
|
||||
"""
|
||||
scene_name: Optional[str] = Field(None, min_length=1, max_length=200, description="场景名称")
|
||||
scene_description: Optional[str] = Field(None, description="场景描述")
|
||||
|
||||
|
||||
class SceneResponse(BaseModel):
|
||||
"""场景响应模型
|
||||
|
||||
用于返回本体场景信息。
|
||||
|
||||
Attributes:
|
||||
scene_id: 场景ID
|
||||
scene_name: 场景名称
|
||||
scene_description: 场景描述
|
||||
type_num: 类型数量
|
||||
workspace_id: 所属工作空间ID
|
||||
created_at: 创建时间(毫秒时间戳)
|
||||
updated_at: 更新时间(毫秒时间戳)
|
||||
classes_count: 类型数量
|
||||
|
||||
Examples:
|
||||
>>> response = SceneResponse(
|
||||
... scene_id=uuid.uuid4(),
|
||||
... scene_name="医疗场景",
|
||||
... scene_description="用于医疗领域的本体建模",
|
||||
... type_num=0,
|
||||
... workspace_id=uuid.uuid4(),
|
||||
... created_at=datetime.now(),
|
||||
... updated_at=datetime.now(),
|
||||
... classes_count=5
|
||||
... )
|
||||
"""
|
||||
scene_id: UUID = Field(..., description="场景ID")
|
||||
scene_name: str = Field(..., description="场景名称")
|
||||
scene_description: Optional[str] = Field(None, description="场景描述")
|
||||
type_num: int = Field(..., description="类型数量")
|
||||
entity_type: Optional[List[str]] = Field(None, description="实体类型列表(最多3个class_name)")
|
||||
workspace_id: UUID = Field(..., description="所属工作空间ID")
|
||||
created_at: datetime.datetime = Field(..., description="创建时间(毫秒时间戳)")
|
||||
updated_at: datetime.datetime = Field(..., description="更新时间(毫秒时间戳)")
|
||||
classes_count: int = Field(0, description="类型数量")
|
||||
|
||||
@field_serializer("created_at", when_used="json")
|
||||
def _serialize_created_at(self, dt: datetime.datetime):
|
||||
"""将创建时间序列化为毫秒时间戳"""
|
||||
return int(dt.timestamp() * 1000) if dt else None
|
||||
|
||||
@field_serializer("updated_at", when_used="json")
|
||||
def _serialize_updated_at(self, dt: datetime.datetime):
|
||||
"""将更新时间序列化为毫秒时间戳"""
|
||||
return int(dt.timestamp() * 1000) if dt else None
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
|
||||
class PaginationInfo(BaseModel):
|
||||
"""分页信息模型
|
||||
|
||||
Attributes:
|
||||
page: 当前页码
|
||||
pagesize: 每页数量
|
||||
total: 总数量
|
||||
hasnext: 是否有下一页
|
||||
"""
|
||||
page: int = Field(..., description="当前页码")
|
||||
pagesize: int = Field(..., description="每页数量")
|
||||
total: int = Field(..., description="总数量")
|
||||
hasnext: bool = Field(..., description="是否有下一页")
|
||||
|
||||
|
||||
class SceneListResponse(BaseModel):
|
||||
"""场景列表响应模型(支持分页)
|
||||
|
||||
用于返回本体场景列表。
|
||||
|
||||
Attributes:
|
||||
items: 场景列表
|
||||
page: 分页信息(可选,分页时返回)
|
||||
|
||||
Examples:
|
||||
>>> # 不分页
|
||||
>>> response = SceneListResponse(
|
||||
... items=[scene1, scene2]
|
||||
... )
|
||||
>>> # 分页
|
||||
>>> response = SceneListResponse(
|
||||
... items=[scene1, scene2, ...],
|
||||
... page=PaginationInfo(page=1, pagesize=100, total=150, hasnext=True)
|
||||
... )
|
||||
"""
|
||||
items: List[SceneResponse] = Field(..., description="场景列表")
|
||||
page: Optional[PaginationInfo] = Field(None, description="分页信息")
|
||||
|
||||
|
||||
# ==================== 本体类型相关 Schema ====================
|
||||
|
||||
class ClassItem(BaseModel):
|
||||
"""单个类型信息模型
|
||||
|
||||
Attributes:
|
||||
class_name: 类型名称,必填,1-200字符
|
||||
class_description: 类型描述,可选
|
||||
|
||||
Examples:
|
||||
>>> item = ClassItem(
|
||||
... class_name="患者",
|
||||
... class_description="医院患者信息"
|
||||
... )
|
||||
"""
|
||||
class_name: str = Field(..., min_length=1, max_length=200, description="类型名称")
|
||||
class_description: Optional[str] = Field(None, description="类型描述")
|
||||
|
||||
|
||||
class ClassCreateRequest(BaseModel):
|
||||
"""类型创建请求模型(统一使用列表形式)
|
||||
|
||||
通过列表中元素数量决定创建模式:
|
||||
- 列表包含 1 个元素:单个创建
|
||||
- 列表包含多个元素:批量创建
|
||||
|
||||
Attributes:
|
||||
scene_id: 所属场景ID,必填
|
||||
classes: 类型列表,必填,至少包含 1 个元素
|
||||
|
||||
Examples:
|
||||
# 单个创建(列表中 1 个元素)
|
||||
>>> request = ClassCreateRequest(
|
||||
... scene_id=uuid.uuid4(),
|
||||
... classes=[
|
||||
... ClassItem(class_name="患者", class_description="医院患者信息")
|
||||
... ]
|
||||
... )
|
||||
|
||||
# 批量创建(列表中多个元素)
|
||||
>>> request = ClassCreateRequest(
|
||||
... scene_id=uuid.uuid4(),
|
||||
... classes=[
|
||||
... ClassItem(class_name="患者", class_description="医院患者信息"),
|
||||
... ClassItem(class_name="医生", class_description="医院医生信息"),
|
||||
... ClassItem(class_name="药品", class_description="医院药品信息")
|
||||
... ]
|
||||
... )
|
||||
"""
|
||||
scene_id: UUID = Field(..., description="所属场景ID")
|
||||
classes: List[ClassItem] = Field(..., min_length=1, description="类型列表,至少包含 1 个元素")
|
||||
|
||||
|
||||
class ClassUpdateRequest(BaseModel):
|
||||
"""类型更新请求模型
|
||||
|
||||
用于更新已有本体类型信息。
|
||||
|
||||
Attributes:
|
||||
class_name: 类型名称,可选,1-200字符
|
||||
class_description: 类型描述,可选
|
||||
|
||||
Examples:
|
||||
>>> request = ClassUpdateRequest(
|
||||
... class_name="更新后的类型名称",
|
||||
... class_description="更新后的描述"
|
||||
... )
|
||||
"""
|
||||
class_name: Optional[str] = Field(None, min_length=1, max_length=200, description="类型名称")
|
||||
class_description: Optional[str] = Field(None, description="类型描述")
|
||||
|
||||
|
||||
class ClassResponse(BaseModel):
|
||||
"""类型响应模型
|
||||
|
||||
用于返回本体类型信息。
|
||||
|
||||
Attributes:
|
||||
class_id: 类型ID
|
||||
class_name: 类型名称
|
||||
class_description: 类型描述
|
||||
scene_id: 所属场景ID
|
||||
created_at: 创建时间(毫秒时间戳)
|
||||
updated_at: 更新时间(毫秒时间戳)
|
||||
|
||||
Examples:
|
||||
>>> response = ClassResponse(
|
||||
... class_id=uuid.uuid4(),
|
||||
... class_name="患者",
|
||||
... class_description="医院患者信息",
|
||||
... scene_id=uuid.uuid4(),
|
||||
... created_at=datetime.now(),
|
||||
... updated_at=datetime.now()
|
||||
... )
|
||||
"""
|
||||
class_id: UUID = Field(..., description="类型ID")
|
||||
class_name: str = Field(..., description="类型名称")
|
||||
class_description: Optional[str] = Field(None, description="类型描述")
|
||||
scene_id: UUID = Field(..., description="所属场景ID")
|
||||
created_at: datetime.datetime = Field(..., description="创建时间(毫秒时间戳)")
|
||||
updated_at: datetime.datetime = Field(..., description="更新时间(毫秒时间戳)")
|
||||
|
||||
@field_serializer("created_at", when_used="json")
|
||||
def _serialize_created_at(self, dt: datetime.datetime):
|
||||
"""将创建时间序列化为毫秒时间戳"""
|
||||
return int(dt.timestamp() * 1000) if dt else None
|
||||
|
||||
@field_serializer("updated_at", when_used="json")
|
||||
def _serialize_updated_at(self, dt: datetime.datetime):
|
||||
"""将更新时间序列化为毫秒时间戳"""
|
||||
return int(dt.timestamp() * 1000) if dt else None
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
|
||||
class ClassBatchCreateResponse(BaseModel):
|
||||
"""批量创建类型响应模型
|
||||
|
||||
用于返回批量创建的结果统计和详情。
|
||||
|
||||
Attributes:
|
||||
total: 总共尝试创建的数量
|
||||
success_count: 成功创建的数量
|
||||
failed_count: 失败的数量
|
||||
items: 成功创建的类型列表
|
||||
errors: 失败的错误信息列表(可选)
|
||||
|
||||
Examples:
|
||||
>>> response = ClassBatchCreateResponse(
|
||||
... total=3,
|
||||
... success_count=2,
|
||||
... failed_count=1,
|
||||
... items=[class1, class2],
|
||||
... errors=["创建类型 '药品' 失败: 类型名称已存在"]
|
||||
... )
|
||||
"""
|
||||
total: int = Field(..., description="总共尝试创建的数量")
|
||||
success_count: int = Field(..., description="成功创建的数量")
|
||||
failed_count: int = Field(0, description="失败的数量")
|
||||
items: List[ClassResponse] = Field(..., description="成功创建的类型列表")
|
||||
errors: Optional[List[str]] = Field(None, description="失败的错误信息列表")
|
||||
|
||||
|
||||
class ClassListResponse(BaseModel):
|
||||
"""类型列表响应模型
|
||||
|
||||
用于返回本体类型列表。
|
||||
|
||||
Attributes:
|
||||
total: 总数量
|
||||
scene_id: 所属场景ID
|
||||
scene_name: 场景名称
|
||||
scene_description: 场景描述
|
||||
items: 类型列表
|
||||
|
||||
Examples:
|
||||
>>> response = ClassListResponse(
|
||||
... total=3,
|
||||
... scene_id=uuid.uuid4(),
|
||||
... scene_name="医疗场景",
|
||||
... scene_description="用于医疗领域的本体建模",
|
||||
... items=[class1, class2, class3]
|
||||
... )
|
||||
"""
|
||||
total: int = Field(..., description="总数量")
|
||||
scene_id: UUID = Field(..., description="所属场景ID")
|
||||
scene_name: str = Field(..., description="场景名称")
|
||||
scene_description: Optional[str] = Field(None, description="场景描述")
|
||||
items: List[ClassResponse] = Field(..., description="类型列表")
|
||||
1162
api/app/services/ontology_service.py
Normal file
1162
api/app/services/ontology_service.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -140,6 +140,7 @@ dependencies = [
|
||||
"oss2>=2.19.1",
|
||||
"flower>=2.0.1",
|
||||
"aiofiles>=23.0.0",
|
||||
"owlready2>=0.46",
|
||||
]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
|
||||
Reference in New Issue
Block a user