Feature/ontology class clean (#249)

* [add] Complete ontology engineering feature implementation

* [add] Add ontology feature integration and validation utilities

* [add] Add OWL validator and validation utilities

* [fix] Add missing render_ontology_extraction_prompt function

* [fix]Add dependencies, fix functionality
This commit is contained in:
乐力齐
2026-01-30 15:16:39 +08:00
committed by GitHub
parent e7370489e8
commit 696b0475a8
20 changed files with 5825 additions and 0 deletions

View File

@@ -45,6 +45,7 @@ from . import (
home_page_controller,
memory_perceptual_controller,
memory_working_controller,
ontology_controller,
)
# 创建管理端 API 路由器
@@ -90,5 +91,6 @@ manager_router.include_router(implicit_memory_controller.router)
manager_router.include_router(memory_perceptual_controller.router)
manager_router.include_router(memory_working_controller.router)
manager_router.include_router(file_storage_controller.router)
manager_router.include_router(ontology_controller.router)
__all__ = ["manager_router"]

View File

@@ -0,0 +1,964 @@
"""本体提取API控制器
本模块提供本体提取系统的RESTful API端点。
Endpoints:
POST /api/memory/ontology/extract - 提取本体类
POST /api/memory/ontology/export - 导出OWL文件
POST /api/memory/ontology/scene - 创建本体场景
PUT /api/memory/ontology/scene/{scene_id} - 更新本体场景
DELETE /api/memory/ontology/scene/{scene_id} - 删除本体场景
GET /api/memory/ontology/scene/{scene_id} - 获取单个场景
GET /api/memory/ontology/scenes - 获取场景列表
POST /api/memory/ontology/class - 创建本体类型
PUT /api/memory/ontology/class/{class_id} - 更新本体类型
DELETE /api/memory/ontology/class/{class_id} - 删除本体类型
GET /api/memory/ontology/class/{class_id} - 获取单个类型
GET /api/memory/ontology/classes - 获取类型列表
"""
import logging
import tempfile
from typing import Dict, Optional
from fastapi import APIRouter, Depends, HTTPException, Header
from sqlalchemy.orm import Session
from app.core.error_codes import BizCode
from app.core.logging_config import get_api_logger
from app.core.response_utils import fail, success
from app.db import get_db
from app.dependencies import get_current_user
from app.models.user_model import User
from app.services.memory_base_service import Translation_English
from app.core.memory.models.ontology_models import OntologyClass
from typing import List
from app.schemas.ontology_schemas import (
ExportRequest,
ExportResponse,
ExtractionRequest,
ExtractionResponse,
SceneCreateRequest,
SceneUpdateRequest,
SceneResponse,
SceneListResponse,
ClassCreateRequest,
ClassUpdateRequest,
ClassResponse,
ClassListResponse,
)
from app.schemas.response_schema import ApiResponse
from app.services.ontology_service import OntologyService
from app.core.memory.llm_tools.openai_client import OpenAIClient
from app.core.memory.utils.validation.owl_validator import OWLValidator
from app.services.model_service import ModelConfigService
api_logger = get_api_logger()
logger = logging.getLogger(__name__)
router = APIRouter(
prefix="/memory/ontology",
tags=["Ontology"],
)
async def translate_ontology_classes(
classes: List[OntologyClass],
model_id: str
) -> List[OntologyClass]:
"""翻译本体类列表
将本体类的中文字段翻译为英文,包括:
- name_chinese: 中文名称
- description: 描述
- examples: 示例列表
Args:
classes: 本体类列表
model_id: LLM模型ID用于翻译
Returns:
List[OntologyClass]: 翻译后的本体类列表
"""
translated_classes = []
for ontology_class in classes:
# 创建类的副本,避免修改原对象
translated_class = ontology_class.model_copy(deep=True)
# 翻译 name_chinese 字段
if translated_class.name_chinese:
try:
translated_class.name_chinese = await Translation_English(
model_id,
translated_class.name_chinese
)
except Exception as e:
logger.warning(f"Failed to translate name_chinese: {e}")
# 保留原文
# 翻译 description 字段
if translated_class.description:
try:
translated_class.description = await Translation_English(
model_id,
translated_class.description
)
except Exception as e:
logger.warning(f"Failed to translate description: {e}")
# 保留原文
# 翻译 examples 列表
if translated_class.examples:
translated_examples = []
for example in translated_class.examples:
try:
translated_example = await Translation_English(
model_id,
example
)
translated_examples.append(translated_example)
except Exception as e:
logger.warning(f"Failed to translate example: {e}")
translated_examples.append(example) # 保留原文
translated_class.examples = translated_examples
translated_classes.append(translated_class)
return translated_classes
def _get_ontology_service(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
llm_id: str = None
) -> OntologyService:
"""获取OntologyService实例的依赖注入函数
指定的llm_id获取LLM配置,创建OpenAIClient和OntologyService实例。
Args:
db: 数据库会话
current_user: 当前用户
llm_id: 可选的LLM模型ID,如果提供则使用指定模型,否则使用工作空间默认模型
Returns:
OntologyService: 本体提取服务实例
Raises:
HTTPException: 如果无法获取LLM配置
"""
try:
import uuid
# 必须提供llm_id
if not llm_id:
logger.error(f"llm_id is required but not provided - user: {current_user.id}")
raise HTTPException(
status_code=400,
detail="必须提供llm_id参数"
)
logger.info(f"Using specified LLM model: {llm_id}")
# 验证llm_id格式
try:
model_id = uuid.UUID(llm_id)
except ValueError:
logger.error(f"Invalid llm_id format: {llm_id}")
raise HTTPException(
status_code=400,
detail="无效的LLM模型ID格式"
)
# 获取指定的模型配置
try:
model_config = ModelConfigService.get_model_by_id(db=db, model_id=model_id)
except Exception as e:
logger.error(f"Model {llm_id} not found: {str(e)}")
raise HTTPException(
status_code=400,
detail=f"找不到指定的LLM模型: {llm_id}"
)
# 检查是否为组合模型
if hasattr(model_config, 'is_composite') and model_config.is_composite:
logger.error(f"Model {llm_id} is a composite model, which is not supported for ontology extraction")
raise HTTPException(
status_code=400,
detail="本体提取不支持使用组合模型,请选择单个模型"
)
# 验证模型配置了API密钥
if not model_config.api_keys:
logger.error(f"Model {llm_id} has no API key configuration")
raise HTTPException(
status_code=400,
detail="指定的LLM模型没有配置API密钥"
)
api_key_config = model_config.api_keys[0]
logger.info(
f"Using specified model - user: {current_user.id}, "
f"model_id: {llm_id}, model_name: {api_key_config.model_name}"
)
# 创建模型配置对象
from app.core.models.base import RedBearModelConfig
llm_model_config = RedBearModelConfig(
model_name=api_key_config.model_name,
provider=model_config.provider if hasattr(model_config, 'provider') else "openai",
api_key=api_key_config.api_key,
base_url=api_key_config.api_base,
max_retries=3,
timeout=60.0
)
# 创建OpenAI客户端
llm_client = OpenAIClient(model_config=llm_model_config)
# 创建OntologyService
service = OntologyService(llm_client=llm_client, db=db)
logger.debug(
f"OntologyService created successfully - "
f"user: {current_user.id}, model: {api_key_config.model_name}"
)
return service
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to create OntologyService: {str(e)}", exc_info=True)
raise HTTPException(
status_code=500,
detail=f"创建本体提取服务失败: {str(e)}"
)
@router.post("/extract", response_model=ApiResponse)
async def extract_ontology(
request: ExtractionRequest,
language_type: str = Header(default="zh", alias="X-Language-Type"),
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""提取本体类
从场景描述中提取符合OWL规范的本体类。
提取结果仅返回给前端,不会自动保存到数据库。
前端可以从返回结果中选择需要的类型,然后调用 /class 接口创建类型。
支持中英文切换,通过 X-Language-Type Header 指定语言。
Args:
request: 提取请求,包含scenario、domain、llm_id和scene_id
language_type: 语言类型,'zh'(中文)或 'en'(英文),默认 'zh'
db: 数据库会话
current_user: 当前用户
Returns:
ApiResponse: 包含提取结果的响应
Response format:
{
"code": 200,
"msg": "本体提取成功",
"data": {
"classes": [
{
"id": "147d9db50b524a9e909e01a753d3acdd",
"name": "Patient",
"name_chinese": "患者",
"description": "在医疗机构中接受诊疗、护理或健康管理的个体",
"examples": ["糖尿病患者", "术后康复患者", "门诊初诊患者"],
"parent_class": null,
"entity_type": "Person",
"domain": "Healthcare"
},
...
],
"domain": "Healthcare",
"extracted_count": 7
}
}
"""
api_logger.info(
f"Ontology extraction requested by user {current_user.id}, "
f"scenario_length={len(request.scenario)}, "
f"domain={request.domain}, "
f"llm_id={request.llm_id}, "
f"scene_id={request.scene_id}, "
f"language_type={language_type}"
)
try:
# 获取当前工作空间ID
workspace_id = current_user.current_workspace_id
if not workspace_id:
api_logger.warning(f"User {current_user.id} has no current workspace")
return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间")
# 创建OntologyService实例,传入llm_id
service = _get_ontology_service(
db=db,
current_user=current_user,
llm_id=request.llm_id
)
# 调用服务层执行提取传入scene_id和workspace_id
result = await service.extract_ontology(
scenario=request.scenario,
domain=request.domain,
scene_id=request.scene_id,
workspace_id=workspace_id
)
# ===== 新增:翻译逻辑 =====
# 如果需要英文,则翻译数据
if language_type != 'zh':
api_logger.info(f"Translating extraction result to English")
# 翻译 classes 列表
result.classes = await translate_ontology_classes(
result.classes,
request.llm_id
)
# 翻译 domain 字段
if result.domain:
try:
result.domain = await Translation_English(
request.llm_id,
result.domain
)
except Exception as e:
logger.warning(f"Failed to translate domain: {e}")
# 保留原文
# ===== 翻译逻辑结束 =====
# 构建响应
response = ExtractionResponse(
classes=result.classes,
domain=result.domain,
extracted_count=len(result.classes)
)
api_logger.info(
f"Ontology extraction completed, extracted {len(result.classes)} classes, "
f"saved to scene {request.scene_id}, language={language_type}"
)
return success(data=response.model_dump(), msg="本体提取成功")
except ValueError as e:
# 验证错误 (400)
api_logger.warning(f"Validation error in extraction: {str(e)}")
return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e))
except RuntimeError as e:
# 运行时错误 (500)
api_logger.error(f"Runtime error in extraction: {str(e)}", exc_info=True)
return fail(BizCode.INTERNAL_ERROR, "本体提取失败", str(e))
except Exception as e:
# 未知错误 (500)
api_logger.error(f"Unexpected error in extraction: {str(e)}", exc_info=True)
return fail(BizCode.INTERNAL_ERROR, "本体提取失败", str(e))
@router.post("/export", response_model=ApiResponse)
async def export_owl(
request: ExportRequest,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""导出OWL文件
将提取的本体类导出为OWL文件,支持多种格式。
导出操作不需要LLM,只使用OWL验证器和Owlready2库。
Args:
request: 导出请求,包含classes、format和include_metadata
db: 数据库会话
current_user: 当前用户
Returns:
ApiResponse: 包含OWL文件内容的响应
Supported formats:
- rdfxml: 标准OWL RDF/XML格式(完整)
- turtle: Turtle格式(可读性好)
- ntriples: N-Triples格式(简单)
- json: JSON格式(简化,只包含类信息)
Response format:
{
"code": 200,
"msg": "OWL文件导出成功",
"data": {
"owl_content": "...",
"format": "rdfxml",
"classes_count": 7
}
}
"""
api_logger.info(
f"OWL export requested by user {current_user.id}, "
f"classes_count={len(request.classes)}, "
f"format={request.format}, "
f"include_metadata={request.include_metadata}"
)
try:
# 验证格式
valid_formats = ["rdfxml", "turtle", "ntriples", "json"]
if request.format not in valid_formats:
api_logger.warning(f"Invalid export format: {request.format}")
return fail(
BizCode.BAD_REQUEST,
"不支持的导出格式",
f"format必须是以下之一: {', '.join(valid_formats)}"
)
# JSON格式直接导出,不需要OWL验证
if request.format == "json":
owl_validator = OWLValidator()
owl_content = owl_validator.export_to_owl(
world=None,
format="json",
classes=request.classes
)
response = ExportResponse(
owl_content=owl_content,
format=request.format,
classes_count=len(request.classes)
)
api_logger.info(
f"JSON export completed, content_length={len(owl_content)}"
)
return success(data=response.model_dump(), msg="OWL文件导出成功")
# 创建临时文件路径
with tempfile.NamedTemporaryFile(
mode='w',
suffix='.owl',
delete=False
) as tmp_file:
output_path = tmp_file.name
# 导出操作不需要LLM,直接使用OWL验证器
owl_validator = OWLValidator()
# 验证本体类
logger.debug("Validating ontology classes")
is_valid, errors, world = owl_validator.validate_ontology_classes(
classes=request.classes,
)
if not is_valid:
logger.warning(
f"OWL validation found {len(errors)} issues during export: {errors}"
)
# 继续导出,但记录警告
if not world:
error_msg = "Failed to create OWL world for export"
logger.error(error_msg)
return fail(BizCode.INTERNAL_ERROR, "创建OWL世界失败", error_msg)
# 导出OWL文件
logger.info(f"Exporting to {request.format} format")
owl_content = owl_validator.export_to_owl(
world=world,
output_path=output_path,
format=request.format,
classes=request.classes
)
# 构建响应
response = ExportResponse(
owl_content=owl_content,
format=request.format,
classes_count=len(request.classes)
)
api_logger.info(
f"OWL export completed, format={request.format}, "
f"content_length={len(owl_content)}"
)
return success(data=response.model_dump(), msg="OWL文件导出成功")
except ValueError as e:
# 验证错误 (400)
api_logger.warning(f"Validation error in export: {str(e)}")
return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e))
except RuntimeError as e:
# 运行时错误 (500)
api_logger.error(f"Runtime error in export: {str(e)}", exc_info=True)
return fail(BizCode.INTERNAL_ERROR, "OWL文件导出失败", str(e))
except Exception as e:
# 未知错误 (500)
api_logger.error(f"Unexpected error in export: {str(e)}", exc_info=True)
return fail(BizCode.INTERNAL_ERROR, "OWL文件导出失败", str(e))
# ==================== 本体场景管理接口 ====================
@router.post("/scene", response_model=ApiResponse)
async def create_scene(
request: SceneCreateRequest,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""创建本体场景
在当前工作空间下创建新的本体场景。
Args:
request: 场景创建请求
db: 数据库会话
current_user: 当前用户
Returns:
ApiResponse: 包含创建的场景信息
"""
api_logger.info(
f"Scene creation requested by user {current_user.id}, "
f"name={request.scene_name}"
)
try:
# 获取当前工作空间ID
workspace_id = current_user.current_workspace_id
if not workspace_id:
api_logger.warning(f"User {current_user.id} has no current workspace")
return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间")
# 创建OntologyService实例不需要LLM
from app.core.memory.llm_tools.openai_client import OpenAIClient
from app.core.models.base import RedBearModelConfig
# 创建一个空的LLM配置场景管理不需要LLM
dummy_config = RedBearModelConfig(
model_name="dummy",
provider="openai",
api_key="dummy",
base_url="https://api.openai.com/v1"
)
llm_client = OpenAIClient(model_config=dummy_config)
service = OntologyService(llm_client=llm_client, db=db)
# 调用服务层创建场景
scene = service.create_scene(
scene_name=request.scene_name,
scene_description=request.scene_description,
workspace_id=workspace_id
)
# 构建响应
# 动态计算 type_num
type_num = len(scene.classes) if scene.classes else 0
response = SceneResponse(
scene_id=scene.scene_id,
scene_name=scene.scene_name,
scene_description=scene.scene_description,
type_num=type_num,
workspace_id=scene.workspace_id,
created_at=scene.created_at,
updated_at=scene.updated_at,
classes_count=type_num
)
api_logger.info(f"Scene created successfully: {scene.scene_id}")
return success(data=response.model_dump(), msg="场景创建成功")
except ValueError as e:
api_logger.warning(f"Validation error in scene creation: {str(e)}")
return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e))
except RuntimeError as e:
api_logger.error(f"Runtime error in scene creation: {str(e)}", exc_info=True)
return fail(BizCode.INTERNAL_ERROR, "场景创建失败", str(e))
except Exception as e:
api_logger.error(f"Unexpected error in scene creation: {str(e)}", exc_info=True)
return fail(BizCode.INTERNAL_ERROR, "场景创建失败", str(e))
@router.put("/scene/{scene_id}", response_model=ApiResponse)
async def update_scene(
scene_id: str,
request: SceneUpdateRequest,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""更新本体场景
更新指定场景的信息,只能更新当前工作空间下的场景。
Args:
scene_id: 场景ID
request: 场景更新请求
db: 数据库会话
current_user: 当前用户
Returns:
ApiResponse: 包含更新后的场景信息
"""
api_logger.info(
f"Scene update requested by user {current_user.id}, "
f"scene_id={scene_id}"
)
try:
from uuid import UUID
# 验证UUID格式
try:
scene_uuid = UUID(scene_id)
except ValueError:
api_logger.warning(f"Invalid scene_id format: {scene_id}")
return fail(BizCode.BAD_REQUEST, "请求参数无效", "无效的场景ID格式")
# 获取当前工作空间ID
workspace_id = current_user.current_workspace_id
if not workspace_id:
api_logger.warning(f"User {current_user.id} has no current workspace")
return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间")
# 创建OntologyService实例
from app.core.memory.llm_tools.openai_client import OpenAIClient
from app.core.models.base import RedBearModelConfig
dummy_config = RedBearModelConfig(
model_name="dummy",
provider="openai",
api_key="dummy",
base_url="https://api.openai.com/v1"
)
llm_client = OpenAIClient(model_config=dummy_config)
service = OntologyService(llm_client=llm_client, db=db)
# 调用服务层更新场景
scene = service.update_scene(
scene_id=scene_uuid,
scene_name=request.scene_name,
scene_description=request.scene_description,
workspace_id=workspace_id
)
# 构建响应
# 动态计算 type_num
type_num = len(scene.classes) if scene.classes else 0
response = SceneResponse(
scene_id=scene.scene_id,
scene_name=scene.scene_name,
scene_description=scene.scene_description,
type_num=type_num,
workspace_id=scene.workspace_id,
created_at=scene.created_at,
updated_at=scene.updated_at,
classes_count=type_num
)
api_logger.info(f"Scene updated successfully: {scene_id}")
return success(data=response.model_dump(), msg="场景更新成功")
except ValueError as e:
api_logger.warning(f"Validation error in scene update: {str(e)}")
return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e))
except RuntimeError as e:
api_logger.error(f"Runtime error in scene update: {str(e)}", exc_info=True)
return fail(BizCode.INTERNAL_ERROR, "场景更新失败", str(e))
except Exception as e:
api_logger.error(f"Unexpected error in scene update: {str(e)}", exc_info=True)
return fail(BizCode.INTERNAL_ERROR, "场景更新失败", str(e))
@router.delete("/scene/{scene_id}", response_model=ApiResponse)
async def delete_scene(
scene_id: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""删除本体场景
删除指定场景及其所有关联类型,只能删除当前工作空间下的场景。
Args:
scene_id: 场景ID
db: 数据库会话
current_user: 当前用户
Returns:
ApiResponse: 删除结果
"""
api_logger.info(
f"Scene deletion requested by user {current_user.id}, "
f"scene_id={scene_id}"
)
try:
from uuid import UUID
# 验证UUID格式
try:
scene_uuid = UUID(scene_id)
except ValueError:
api_logger.warning(f"Invalid scene_id format: {scene_id}")
return fail(BizCode.BAD_REQUEST, "请求参数无效", "无效的场景ID格式")
# 获取当前工作空间ID
workspace_id = current_user.current_workspace_id
if not workspace_id:
api_logger.warning(f"User {current_user.id} has no current workspace")
return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间")
# 创建OntologyService实例
from app.core.memory.llm_tools.openai_client import OpenAIClient
from app.core.models.base import RedBearModelConfig
dummy_config = RedBearModelConfig(
model_name="dummy",
provider="openai",
api_key="dummy",
base_url="https://api.openai.com/v1"
)
llm_client = OpenAIClient(model_config=dummy_config)
service = OntologyService(llm_client=llm_client, db=db)
# 调用服务层删除场景
success_flag = service.delete_scene(
scene_id=scene_uuid,
workspace_id=workspace_id
)
api_logger.info(f"Scene deleted successfully: {scene_id}")
return success(data={"deleted": success_flag}, msg="场景删除成功")
except ValueError as e:
api_logger.warning(f"Validation error in scene deletion: {str(e)}")
return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e))
except RuntimeError as e:
api_logger.error(f"Runtime error in scene deletion: {str(e)}", exc_info=True)
return fail(BizCode.INTERNAL_ERROR, "场景删除失败", str(e))
except Exception as e:
api_logger.error(f"Unexpected error in scene deletion: {str(e)}", exc_info=True)
return fail(BizCode.INTERNAL_ERROR, "场景删除失败", str(e))
@router.get("/scenes", response_model=ApiResponse)
async def get_scenes(
workspace_id: Optional[str] = None,
scene_name: Optional[str] = None,
page: Optional[int] = None,
pagesize: Optional[int] = None,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""获取场景列表(支持模糊搜索和全量查询,全量查询支持分页)
根据是否提供 scene_name 参数,执行不同的查询:
- 提供 scene_name进行模糊搜索返回匹配的场景列表支持分页
- 不提供 scene_name返回工作空间下的所有场景支持分页
支持中文和英文的模糊匹配,不区分大小写。
Args:
workspace_id: 工作空间ID可选默认当前用户工作空间
scene_name: 场景名称关键词(可选,支持模糊匹配)
page: 页码可选从1开始
pagesize: 每页数量(可选)
db: 数据库会话
current_user: 当前用户
Returns:
ApiResponse: 包含场景列表和分页信息
Examples:
- 模糊搜索不分页GET /scenes?workspace_id=xxx&scene_name=医疗
输入 "医疗" 可以匹配到 "医疗场景""智慧医疗""医疗管理系统"
- 模糊搜索分页GET /scenes?workspace_id=xxx&scene_name=医疗&page=1&pagesize=10
返回匹配 "医疗" 的第1页每页10条数据
- 全量查询不分页GET /scenes?workspace_id=xxx
返回工作空间下的所有场景
- 全量查询分页GET /scenes?workspace_id=xxx&page=1&pagesize=10
返回第1页每页10条数据
Notes:
- 分页参数 page 和 pagesize 必须同时提供
- page 从1开始pagesize 必须大于0
- 返回格式:{"items": [...], "page": {"page": 1, "pagesize": 10, "total": 100, "hasnext": true}}
- 不分页时page 字段为 null
"""
from app.controllers.ontology_secondary_routes import scenes_handler
return await scenes_handler(workspace_id, scene_name, page, pagesize, db, current_user)
# ==================== 本体类型管理接口 ====================
@router.post("/class", response_model=ApiResponse)
async def create_class(
request: ClassCreateRequest,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""创建本体类型
在指定场景下创建新的本体类型。
Args:
request: 类型创建请求
db: 数据库会话
current_user: 当前用户
Returns:
ApiResponse: 包含创建的类型信息
"""
from app.controllers.ontology_secondary_routes import create_class_handler
return await create_class_handler(request, db, current_user)
@router.put("/class/{class_id}", response_model=ApiResponse)
async def update_class(
class_id: str,
request: ClassUpdateRequest,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""更新本体类型
更新指定类型的信息,只能更新当前工作空间下场景的类型。
Args:
class_id: 类型ID
request: 类型更新请求
db: 数据库会话
current_user: 当前用户
Returns:
ApiResponse: 包含更新后的类型信息
"""
from app.controllers.ontology_secondary_routes import update_class_handler
return await update_class_handler(class_id, request, db, current_user)
@router.delete("/class/{class_id}", response_model=ApiResponse)
async def delete_class(
class_id: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""删除本体类型
删除指定类型,只能删除当前工作空间下场景的类型。
Args:
class_id: 类型ID
db: 数据库会话
current_user: 当前用户
Returns:
ApiResponse: 删除结果
"""
from app.controllers.ontology_secondary_routes import delete_class_handler
return await delete_class_handler(class_id, db, current_user)
@router.get("/classes", response_model=ApiResponse)
async def get_classes(
scene_id: str,
class_name: Optional[str] = None,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""获取类型列表(支持模糊搜索和全量查询)
根据是否提供 class_name 参数,执行不同的查询:
- 提供 class_name进行模糊搜索返回匹配的类型列表
- 不提供 class_name返回场景下的所有类型
支持中文和英文的模糊匹配,不区分大小写。
返回结果包含场景的基本信息scene_name 和 scene_description
Args:
scene_id: 场景ID必填
class_name: 类型名称关键词(可选,支持模糊匹配)
db: 数据库会话
current_user: 当前用户
Returns:
ApiResponse: 包含类型列表和场景信息
Examples:
- 模糊搜索GET /classes?scene_id=xxx&class_name=患者
输入 "患者" 可以匹配到 "患者""患者信息""门诊患者"
- 全量查询GET /classes?scene_id=xxx
返回场景下的所有类型
Response Format:
{
"total": 3,
"scene_id": "xxx",
"scene_name": "医疗场景",
"scene_description": "用于医疗领域的本体建模",
"items": [...]
}
"""
from app.controllers.ontology_secondary_routes import classes_handler
return await classes_handler(scene_id, class_name, db, current_user)
@router.get("/class/{class_id}", response_model=ApiResponse)
async def get_class(
class_id: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""获取单个本体类型
根据类型ID获取类型的详细信息只能查询当前工作空间下场景的类型。
Args:
class_id: 类型ID
db: 数据库会话
current_user: 当前用户
Returns:
ApiResponse: 包含类型详细信息
Response Format:
{
"code": 0,
"msg": "查询成功",
"data": {
"class_id": "xxx",
"class_name": "患者",
"class_description": "在医疗机构中接受诊疗的个体",
"scene_id": "xxx",
"created_at": "2026-01-29T10:00:00",
"updated_at": "2026-01-29T10:00:00"
}
}
"""
from app.controllers.ontology_secondary_routes import get_class_handler
return await get_class_handler(class_id, db, current_user)

View File

@@ -0,0 +1,611 @@
# -*- coding: utf-8 -*-
"""本体场景和类型路由(续)
由于主Controller文件较大将剩余路由放在此文件中。
"""
from uuid import UUID
from typing import Optional
from fastapi import Depends
from sqlalchemy.orm import Session
from app.core.error_codes import BizCode
from app.core.logging_config import get_api_logger
from app.core.response_utils import fail, success
from app.db import get_db
from app.dependencies import get_current_user
from app.models.user_model import User
from app.schemas.ontology_schemas import (
SceneResponse,
SceneListResponse,
PaginationInfo,
ClassCreateRequest,
ClassUpdateRequest,
ClassResponse,
ClassListResponse,
ClassBatchCreateResponse,
)
from app.schemas.response_schema import ApiResponse
from app.services.ontology_service import OntologyService
from app.core.memory.llm_tools.openai_client import OpenAIClient
from app.core.models.base import RedBearModelConfig
api_logger = get_api_logger()
def _get_dummy_ontology_service(db: Session) -> OntologyService:
"""获取OntologyService实例不需要LLM
场景和类型管理不需要LLM创建一个dummy配置。
"""
dummy_config = RedBearModelConfig(
model_name="dummy",
provider="openai",
api_key="dummy",
base_url="https://api.openai.com/v1"
)
llm_client = OpenAIClient(model_config=dummy_config)
return OntologyService(llm_client=llm_client, db=db)
# 这些函数将被导入到主Controller中
async def scenes_handler(
workspace_id: Optional[str] = None,
scene_name: Optional[str] = None,
page: Optional[int] = None,
page_size: Optional[int] = None,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""获取场景列表(支持模糊搜索和全量查询,全量查询支持分页)
当提供 scene_name 参数时,进行模糊搜索(不分页);
当不提供 scene_name 参数时,返回所有场景(支持分页)。
Args:
workspace_id: 工作空间ID可选默认当前用户工作空间
scene_name: 场景名称关键词(可选,支持模糊匹配)
page: 页码可选从1开始仅在全量查询时有效
page_size: 每页数量(可选,仅在全量查询时有效)
db: 数据库会话
current_user: 当前用户
"""
operation = "search" if scene_name else "list"
api_logger.info(
f"Scene {operation} requested by user {current_user.id}, "
f"workspace_id={workspace_id}, keyword={scene_name}, page={page}, page_size={page_size}"
)
try:
# 确定工作空间ID
if workspace_id:
try:
ws_uuid = UUID(workspace_id)
except ValueError:
api_logger.warning(f"Invalid workspace_id format: {workspace_id}")
return fail(BizCode.BAD_REQUEST, "请求参数无效", "无效的工作空间ID格式")
else:
ws_uuid = current_user.current_workspace_id
if not ws_uuid:
api_logger.warning(f"User {current_user.id} has no current workspace")
return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间")
# 创建Service
service = _get_dummy_ontology_service(db)
# 根据是否提供 scene_name 决定查询方式
if scene_name and scene_name.strip():
# 验证分页参数(模糊搜索也支持分页)
if page is not None and page < 1:
api_logger.warning(f"Invalid page number: {page}")
return fail(BizCode.BAD_REQUEST, "请求参数无效", "页码必须大于0")
if page_size is not None and page_size < 1:
api_logger.warning(f"Invalid page_size: {page_size}")
return fail(BizCode.BAD_REQUEST, "请求参数无效", "每页数量必须大于0")
# 如果只提供了page或page_size中的一个返回错误
if (page is not None and page_size is None) or (page is None and page_size is not None):
api_logger.warning(f"Incomplete pagination params: page={page}, page_size={page_size}")
return fail(BizCode.BAD_REQUEST, "请求参数无效", "分页参数page和pagesize必须同时提供")
# 模糊搜索场景(支持分页)
scenes = service.search_scenes_by_name(scene_name.strip(), ws_uuid)
total = len(scenes)
# 如果提供了分页参数,进行分页处理
if page is not None and page_size is not None:
start_idx = (page - 1) * page_size
end_idx = start_idx + page_size
scenes = scenes[start_idx:end_idx]
# 构建响应
items = []
for scene in scenes:
# 获取前3个class_name作为entity_type
entity_type = [cls.class_name for cls in scene.classes[:3]] if scene.classes else None
# 动态计算 type_num
type_num = len(scene.classes) if scene.classes else 0
items.append(SceneResponse(
scene_id=scene.scene_id,
scene_name=scene.scene_name,
scene_description=scene.scene_description,
type_num=type_num,
entity_type=entity_type,
workspace_id=scene.workspace_id,
created_at=scene.created_at,
updated_at=scene.updated_at,
classes_count=type_num
))
# 构建响应(包含分页信息)
if page is not None and page_size is not None:
# 计算是否有下一页
hasnext = (page * page_size) < total
pagination_info = PaginationInfo(
page=page,
pagesize=page_size,
total=total,
hasnext=hasnext
)
response = SceneListResponse(items=items, page=pagination_info)
else:
response = SceneListResponse(items=items)
api_logger.info(
f"Scene search completed: found {len(items)} scenes matching '{scene_name}' "
f"in workspace {ws_uuid}, total={total}"
)
else:
# 获取所有场景(支持分页)
# 验证分页参数
if page is not None and page < 1:
api_logger.warning(f"Invalid page number: {page}")
return fail(BizCode.BAD_REQUEST, "请求参数无效", "页码必须大于0")
if page_size is not None and page_size < 1:
api_logger.warning(f"Invalid page_size: {page_size}")
return fail(BizCode.BAD_REQUEST, "请求参数无效", "每页数量必须大于0")
# 如果只提供了page或page_size中的一个返回错误
if (page is not None and page_size is None) or (page is None and page_size is not None):
api_logger.warning(f"Incomplete pagination params: page={page}, page_size={page_size}")
return fail(BizCode.BAD_REQUEST, "请求参数无效", "分页参数page和pagesize必须同时提供")
scenes, total = service.list_scenes(ws_uuid, page, page_size)
# 构建响应
items = []
for scene in scenes:
# 获取前3个class_name作为entity_type
entity_type = [cls.class_name for cls in scene.classes[:3]] if scene.classes else None
# 动态计算 type_num
type_num = len(scene.classes) if scene.classes else 0
items.append(SceneResponse(
scene_id=scene.scene_id,
scene_name=scene.scene_name,
scene_description=scene.scene_description,
type_num=type_num,
entity_type=entity_type,
workspace_id=scene.workspace_id,
created_at=scene.created_at,
updated_at=scene.updated_at,
classes_count=type_num
))
# 构建响应(包含分页信息)
if page is not None and page_size is not None:
# 计算是否有下一页
hasnext = (page * page_size) < total
pagination_info = PaginationInfo(
page=page,
pagesize=page_size,
total=total,
hasnext=hasnext
)
response = SceneListResponse(items=items, page=pagination_info)
else:
response = SceneListResponse(items=items)
api_logger.info(f"Scene list retrieved successfully, count={len(items)}, total={total}")
return success(data=response.model_dump(mode='json'), msg="查询成功")
except ValueError as e:
api_logger.warning(f"Validation error in scene {operation}: {str(e)}")
return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e))
except RuntimeError as e:
api_logger.error(f"Runtime error in scene {operation}: {str(e)}", exc_info=True)
return fail(BizCode.INTERNAL_ERROR, "查询失败", str(e))
except Exception as e:
api_logger.error(f"Unexpected error in scene {operation}: {str(e)}", exc_info=True)
return fail(BizCode.INTERNAL_ERROR, "查询失败", str(e))
# ==================== 本体类型管理接口 ====================
async def create_class_handler(
request: ClassCreateRequest,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""创建本体类型(统一使用列表形式,支持单个或批量)"""
# 根据列表长度判断是单个还是批量
count = len(request.classes)
mode = "single" if count == 1 else "batch"
api_logger.info(
f"Class creation ({mode}) requested by user {current_user.id}, "
f"scene_id={request.scene_id}, count={count}"
)
try:
# 获取当前工作空间ID
workspace_id = current_user.current_workspace_id
if not workspace_id:
api_logger.warning(f"User {current_user.id} has no current workspace")
return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间")
# 创建Service
service = _get_dummy_ontology_service(db)
# 准备类型数据
classes_data = [
{
"class_name": item.class_name,
"class_description": item.class_description
}
for item in request.classes
]
if count == 1:
# 单个创建
class_data = classes_data[0]
ontology_class = service.create_class(
scene_id=request.scene_id,
class_name=class_data["class_name"],
class_description=class_data["class_description"],
workspace_id=workspace_id
)
# 构建单个响应
response = ClassResponse(
class_id=ontology_class.class_id,
class_name=ontology_class.class_name,
class_description=ontology_class.class_description,
scene_id=ontology_class.scene_id,
created_at=ontology_class.created_at,
updated_at=ontology_class.updated_at
)
api_logger.info(f"Class created successfully: {ontology_class.class_id}")
return success(data=response.model_dump(mode='json'), msg="类型创建成功")
else:
# 批量创建
created_classes, errors = service.create_classes_batch(
scene_id=request.scene_id,
classes=classes_data,
workspace_id=workspace_id
)
# 构建批量响应
items = []
for ontology_class in created_classes:
items.append(ClassResponse(
class_id=ontology_class.class_id,
class_name=ontology_class.class_name,
class_description=ontology_class.class_description,
scene_id=ontology_class.scene_id,
created_at=ontology_class.created_at,
updated_at=ontology_class.updated_at
))
response = ClassBatchCreateResponse(
total=len(classes_data),
success_count=len(created_classes),
failed_count=len(errors),
items=items,
errors=errors if errors else None
)
api_logger.info(
f"Batch class creation completed: "
f"success={len(created_classes)}, failed={len(errors)}"
)
return success(data=response.model_dump(mode='json'), msg="批量创建完成")
except ValueError as e:
api_logger.warning(f"Validation error in class creation: {str(e)}")
return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e))
except RuntimeError as e:
api_logger.error(f"Runtime error in class creation: {str(e)}", exc_info=True)
return fail(BizCode.INTERNAL_ERROR, "类型创建失败", str(e))
except Exception as e:
api_logger.error(f"Unexpected error in class creation: {str(e)}", exc_info=True)
return fail(BizCode.INTERNAL_ERROR, "类型创建失败", str(e))
async def update_class_handler(
class_id: str,
request: ClassUpdateRequest,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""更新本体类型"""
api_logger.info(
f"Class update requested by user {current_user.id}, "
f"class_id={class_id}"
)
try:
# 验证UUID格式
try:
class_uuid = UUID(class_id)
except ValueError:
api_logger.warning(f"Invalid class_id format: {class_id}")
return fail(BizCode.BAD_REQUEST, "请求参数无效", "无效的类型ID格式")
# 获取当前工作空间ID
workspace_id = current_user.current_workspace_id
if not workspace_id:
api_logger.warning(f"User {current_user.id} has no current workspace")
return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间")
# 创建Service
service = _get_dummy_ontology_service(db)
# 更新类型
ontology_class = service.update_class(
class_id=class_uuid,
class_name=request.class_name,
class_description=request.class_description,
workspace_id=workspace_id
)
# 构建响应
response = ClassResponse(
class_id=ontology_class.class_id,
class_name=ontology_class.class_name,
class_description=ontology_class.class_description,
scene_id=ontology_class.scene_id,
created_at=ontology_class.created_at,
updated_at=ontology_class.updated_at
)
api_logger.info(f"Class updated successfully: {class_id}")
return success(data=response.model_dump(mode='json'), msg="类型更新成功")
except ValueError as e:
api_logger.warning(f"Validation error in class update: {str(e)}")
return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e))
except RuntimeError as e:
api_logger.error(f"Runtime error in class update: {str(e)}", exc_info=True)
return fail(BizCode.INTERNAL_ERROR, "类型更新失败", str(e))
except Exception as e:
api_logger.error(f"Unexpected error in class update: {str(e)}", exc_info=True)
return fail(BizCode.INTERNAL_ERROR, "类型更新失败", str(e))
async def delete_class_handler(
class_id: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""删除本体类型"""
api_logger.info(
f"Class deletion requested by user {current_user.id}, "
f"class_id={class_id}"
)
try:
# 验证UUID格式
try:
class_uuid = UUID(class_id)
except ValueError:
api_logger.warning(f"Invalid class_id format: {class_id}")
return fail(BizCode.BAD_REQUEST, "请求参数无效", "无效的类型ID格式")
# 获取当前工作空间ID
workspace_id = current_user.current_workspace_id
if not workspace_id:
api_logger.warning(f"User {current_user.id} has no current workspace")
return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间")
# 创建Service
service = _get_dummy_ontology_service(db)
# 删除类型
success_flag = service.delete_class(
class_id=class_uuid,
workspace_id=workspace_id
)
api_logger.info(f"Class deleted successfully: {class_id}")
return success(data={"deleted": success_flag}, msg="类型删除成功")
except ValueError as e:
api_logger.warning(f"Validation error in class deletion: {str(e)}")
return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e))
except RuntimeError as e:
api_logger.error(f"Runtime error in class deletion: {str(e)}", exc_info=True)
return fail(BizCode.INTERNAL_ERROR, "类型删除失败", str(e))
except Exception as e:
api_logger.error(f"Unexpected error in class deletion: {str(e)}", exc_info=True)
return fail(BizCode.INTERNAL_ERROR, "类型删除失败", str(e))
async def get_class_handler(
class_id: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""获取单个本体类型"""
api_logger.info(
f"Get class requested by user {current_user.id}, "
f"class_id={class_id}"
)
try:
# 验证UUID格式
try:
class_uuid = UUID(class_id)
except ValueError:
api_logger.warning(f"Invalid class_id format: {class_id}")
return fail(BizCode.BAD_REQUEST, "请求参数无效", "无效的类型ID格式")
# 获取当前工作空间ID
workspace_id = current_user.current_workspace_id
if not workspace_id:
api_logger.warning(f"User {current_user.id} has no current workspace")
return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间")
# 创建Service
service = _get_dummy_ontology_service(db)
# 获取类型会抛出ValueError如果不存在
ontology_class = service.get_class_by_id(class_uuid, workspace_id)
# 构建响应
response = ClassResponse(
class_id=ontology_class.class_id,
class_name=ontology_class.class_name,
class_description=ontology_class.class_description,
scene_id=ontology_class.scene_id,
created_at=ontology_class.created_at,
updated_at=ontology_class.updated_at
)
api_logger.info(f"Class retrieved successfully: {class_id}")
return success(data=response.model_dump(mode='json'), msg="查询成功")
except ValueError as e:
# 类型不存在或无权限访问
api_logger.warning(f"Validation error in get class: {str(e)}")
return fail(BizCode.NOT_FOUND, "请求参数无效", str(e))
except RuntimeError as e:
api_logger.error(f"Runtime error in get class: {str(e)}", exc_info=True)
return fail(BizCode.INTERNAL_ERROR, "查询失败", str(e))
except Exception as e:
api_logger.error(f"Unexpected error in get class: {str(e)}", exc_info=True)
return fail(BizCode.INTERNAL_ERROR, "查询失败", str(e))
async def classes_handler(
scene_id: str,
class_name: Optional[str] = None,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""获取类型列表(支持模糊搜索和全量查询)
当提供 class_name 参数时,进行模糊搜索;
当不提供 class_name 参数时,返回场景下的所有类型。
Args:
scene_id: 场景ID必填
class_name: 类型名称关键词(可选,支持模糊匹配)
db: 数据库会话
current_user: 当前用户
"""
operation = "search" if class_name else "list"
api_logger.info(
f"Class {operation} requested by user {current_user.id}, "
f"keyword={class_name}, scene_id={scene_id}"
)
try:
# 验证UUID格式
try:
scene_uuid = UUID(scene_id)
except ValueError:
api_logger.warning(f"Invalid scene_id format: {scene_id}")
return fail(BizCode.BAD_REQUEST, "请求参数无效", "无效的场景ID格式")
# 获取当前工作空间ID
workspace_id = current_user.current_workspace_id
if not workspace_id:
api_logger.warning(f"User {current_user.id} has no current workspace")
return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间")
# 创建Service
service = _get_dummy_ontology_service(db)
# 获取场景信息
scene = service.get_scene_by_id(scene_uuid, workspace_id)
if not scene:
api_logger.warning(f"Scene not found: {scene_id}")
return fail(BizCode.NOT_FOUND, "场景不存在", f"未找到ID为 {scene_id} 的场景")
# 根据是否提供 class_name 决定查询方式
if class_name and class_name.strip():
# 模糊搜索类型
classes = service.search_classes_by_name(class_name.strip(), scene_uuid, workspace_id)
else:
# 获取所有类型
classes = service.list_classes_by_scene(scene_uuid, workspace_id)
# 构建响应
items = []
for ontology_class in classes:
items.append(ClassResponse(
class_id=ontology_class.class_id,
class_name=ontology_class.class_name,
class_description=ontology_class.class_description,
scene_id=ontology_class.scene_id,
created_at=ontology_class.created_at,
updated_at=ontology_class.updated_at
))
response = ClassListResponse(
total=len(items),
scene_id=scene_uuid,
scene_name=scene.scene_name,
scene_description=scene.scene_description,
items=items
)
if class_name:
api_logger.info(
f"Class search completed: found {len(items)} classes matching '{class_name}' "
f"in scene {scene_id}"
)
else:
api_logger.info(f"Class list retrieved successfully, count={len(items)}")
return success(data=response.model_dump(mode='json'), msg="查询成功")
except ValueError as e:
api_logger.warning(f"Validation error in class {operation}: {str(e)}")
return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e))
except RuntimeError as e:
api_logger.error(f"Runtime error in class {operation}: {str(e)}", exc_info=True)
return fail(BizCode.INTERNAL_ERROR, "查询失败", str(e))
except Exception as e:
api_logger.error(f"Unexpected error in class {operation}: {str(e)}", exc_info=True)
return fail(BizCode.INTERNAL_ERROR, "查询失败", str(e))

View File

@@ -58,6 +58,12 @@ from app.core.memory.models.triplet_models import (
TripletExtractionResponse,
)
# Ontology models
from app.core.memory.models.ontology_models import (
OntologyClass,
OntologyExtractionResponse,
)
# Variable configuration models
from app.core.memory.models.variate_config import (
StatementExtractionConfig,
@@ -105,6 +111,9 @@ __all__ = [
"Entity",
"Triplet",
"TripletExtractionResponse",
# Ontology models
"OntologyClass",
"OntologyExtractionResponse",
# Variable configuration
"StatementExtractionConfig",
"ForgettingEngineConfig",

View File

@@ -0,0 +1,135 @@
"""Models for ontology classes and extraction responses.
This module contains Pydantic models for representing extracted ontology classes
from scenario descriptions, following OWL ontology engineering standards.
Classes:
OntologyClass: Represents an extracted ontology class
OntologyExtractionResponse: Response model containing extracted ontology classes
"""
from typing import List, Optional
from uuid import uuid4
from pydantic import BaseModel, ConfigDict, Field, field_validator
class OntologyClass(BaseModel):
"""Represents an extracted ontology class from scenario description.
An ontology class represents an abstract category or concept in a domain,
following OWL ontology engineering standards and naming conventions.
Attributes:
id: Unique string identifier for the ontology class
name: Name of the class in PascalCase format (e.g., 'MedicalProcedure')
name_chinese: Chinese translation of the class name (e.g., '医疗程序')
description: Textual description of the class
examples: List of concrete instance examples of this class
parent_class: Optional name of the parent class in the hierarchy
entity_type: Type/category of the entity (e.g., 'Person', 'Organization', 'Concept')
domain: Domain this class belongs to (e.g., 'Healthcare', 'Education')
Config:
extra: Ignore extra fields from LLM output
"""
model_config = ConfigDict(extra='ignore')
id: str = Field(
default_factory=lambda: uuid4().hex,
description="Unique identifier for the ontology class"
)
name: str = Field(
...,
description="Name of the class in PascalCase format"
)
name_chinese: Optional[str] = Field(
None,
description="Chinese translation of the class name"
)
description: str = Field(
...,
description="Description of the class"
)
examples: List[str] = Field(
default_factory=list,
description="List of concrete instance examples"
)
parent_class: Optional[str] = Field(
None,
description="Name of the parent class in the hierarchy"
)
entity_type: str = Field(
...,
description="Type/category of the entity"
)
domain: str = Field(
...,
description="Domain this class belongs to"
)
@field_validator('name')
@classmethod
def validate_pascal_case(cls, v: str) -> str:
"""Validate that the class name follows PascalCase convention.
PascalCase rules:
- Must start with an uppercase letter
- Cannot contain spaces
- Should not contain special characters except underscores
Args:
v: The class name to validate
Returns:
The validated class name
Raises:
ValueError: If the name doesn't follow PascalCase convention
"""
if not v:
raise ValueError("Class name cannot be empty")
if not v[0].isupper():
raise ValueError(
f"Class name '{v}' must start with an uppercase letter (PascalCase)"
)
if ' ' in v:
raise ValueError(
f"Class name '{v}' cannot contain spaces (PascalCase)"
)
# Check for invalid characters (allow alphanumeric and underscore only)
if not all(c.isalnum() or c == '_' for c in v):
raise ValueError(
f"Class name '{v}' contains invalid characters. "
"Only alphanumeric characters and underscores are allowed"
)
return v
class OntologyExtractionResponse(BaseModel):
"""Response model for ontology extraction from LLM.
This model represents the structured output from the LLM when
extracting ontology classes from scenario descriptions.
Attributes:
classes: List of extracted ontology classes
domain: Domain/field the scenario belongs to
Config:
extra: Ignore extra fields from LLM output
"""
model_config = ConfigDict(extra='ignore')
classes: List[OntologyClass] = Field(
default_factory=list,
description="List of extracted ontology classes"
)
domain: str = Field(
...,
description="Domain/field the scenario belongs to"
)

View File

@@ -8,4 +8,5 @@
- TemporalExtractor: 时间信息提取
- EmbeddingGenerator: 嵌入向量生成
- MemorySummaryGenerator: 记忆摘要生成
- OntologyExtractor: 本体类提取
"""

View File

@@ -0,0 +1,482 @@
"""Ontology class extraction from scenario descriptions using LLM.
This module provides the OntologyExtractor class for extracting ontology classes
from natural language scenario descriptions. It uses LLM-driven extraction combined
with two-layer validation (string validation + OWL semantic validation).
Classes:
OntologyExtractor: Extracts ontology classes from scenario descriptions
"""
import asyncio
import logging
import time
from typing import List, Optional
from app.core.memory.llm_tools.openai_client import OpenAIClient
from app.core.memory.models.ontology_models import (
OntologyClass,
OntologyExtractionResponse,
)
from app.core.memory.utils.validation.ontology_validator import OntologyValidator
from app.core.memory.utils.validation.owl_validator import OWLValidator
from app.core.memory.utils.prompt.prompt_utils import render_ontology_extraction_prompt
logger = logging.getLogger(__name__)
class OntologyExtractor:
"""Extractor for ontology classes from scenario descriptions.
This extractor uses LLM to identify abstract classes and concepts from
natural language scenario descriptions, following OWL ontology engineering
standards. It performs two-layer validation:
1. String validation (naming conventions, reserved words, duplicates)
2. OWL semantic validation (consistency checking, circular inheritance)
Attributes:
llm_client: OpenAI client for LLM calls
validator: String validator for class names and descriptions
owl_validator: OWL validator for semantic validation
"""
def __init__(self, llm_client: OpenAIClient):
"""Initialize the OntologyExtractor.
Args:
llm_client: OpenAIClient instance for LLM processing
"""
self.llm_client = llm_client
self.validator = OntologyValidator()
self.owl_validator = OWLValidator()
logger.info("OntologyExtractor initialized")
async def extract_ontology_classes(
self,
scenario: str,
domain: Optional[str] = None,
max_classes: int = 15,
min_classes: int = 5,
enable_owl_validation: bool = True,
llm_temperature: float = 0.3,
llm_max_tokens: int = 2000,
max_description_length: int = 500,
timeout: Optional[float] = None,
) -> OntologyExtractionResponse:
"""Extract ontology classes from a scenario description.
This is the main extraction method that orchestrates the entire process:
1. Call LLM to extract ontology classes
2. Perform first-layer validation (string validation and cleaning)
3. Perform second-layer validation (OWL semantic validation)
4. Filter invalid classes based on validation errors
5. Return validated ontology classes
Args:
scenario: Natural language scenario description
domain: Optional domain hint (e.g., "Healthcare", "Education")
max_classes: Maximum number of classes to extract (default: 15)
min_classes: Minimum number of classes to extract (default: 5)
enable_owl_validation: Whether to enable OWL validation (default: True)
llm_temperature: LLM temperature parameter (default: 0.3)
llm_max_tokens: LLM max tokens parameter (default: 2000)
max_description_length: Maximum description length (default: 500)
timeout: Optional timeout in seconds for LLM call (default: None, no timeout)
Returns:
OntologyExtractionResponse containing validated ontology classes
Raises:
ValueError: If scenario is empty or invalid
asyncio.TimeoutError: If extraction times out
Examples:
>>> extractor = OntologyExtractor(llm_client)
>>> response = await extractor.extract_ontology_classes(
... scenario="A hospital manages patient records...",
... domain="Healthcare",
... max_classes=10,
... timeout=30.0
... )
>>> len(response.classes)
7
"""
# Start timing
start_time = time.time()
# Validate input
if not scenario or not scenario.strip():
logger.error("Scenario description is empty")
raise ValueError("Scenario description cannot be empty")
scenario = scenario.strip()
logger.info(
f"Starting ontology extraction - scenario_length={len(scenario)}, "
f"domain={domain}, max_classes={max_classes}, min_classes={min_classes}, "
f"timeout={timeout}"
)
try:
# Step 1: Call LLM for extraction with timeout
logger.info("Step 1: Calling LLM for ontology extraction")
llm_start_time = time.time()
if timeout is not None:
# Wrap LLM call with timeout
try:
response = await asyncio.wait_for(
self._call_llm_for_extraction(
scenario=scenario,
domain=domain,
max_classes=max_classes,
llm_temperature=llm_temperature,
llm_max_tokens=llm_max_tokens,
),
timeout=timeout
)
except asyncio.TimeoutError:
llm_duration = time.time() - llm_start_time
logger.error(
f"LLM extraction timed out after {timeout} seconds "
f"(actual duration: {llm_duration:.2f}s)"
)
# Return empty response on timeout
return OntologyExtractionResponse(
classes=[],
domain=domain or "Unknown",
)
else:
# No timeout specified, call directly
response = await self._call_llm_for_extraction(
scenario=scenario,
domain=domain,
max_classes=max_classes,
llm_temperature=llm_temperature,
llm_max_tokens=llm_max_tokens,
)
llm_duration = time.time() - llm_start_time
logger.info(
f"LLM returned {len(response.classes)} classes in {llm_duration:.2f}s"
)
# Step 2: First-layer validation (string validation and cleaning)
logger.info("Step 2: Performing first-layer validation (string validation)")
validation_start_time = time.time()
response = self._validate_and_clean(
response=response,
max_description_length=max_description_length,
)
validation_duration = time.time() - validation_start_time
logger.info(
f"After first-layer validation: {len(response.classes)} classes remain "
f"(validation took {validation_duration:.2f}s)"
)
# Check if we have enough classes after first-layer validation
if len(response.classes) < min_classes:
logger.warning(
f"Only {len(response.classes)} classes remain after validation, "
f"which is below minimum of {min_classes}"
)
# Step 3: Second-layer validation (OWL semantic validation)
if enable_owl_validation and response.classes:
logger.info("Step 3: Performing second-layer validation (OWL validation)")
owl_start_time = time.time()
is_valid, errors, world = self.owl_validator.validate_ontology_classes(
classes=response.classes,
)
owl_duration = time.time() - owl_start_time
if not is_valid:
logger.warning(
f"OWL validation found {len(errors)} issues in {owl_duration:.2f}s: {errors}"
)
# Filter invalid classes based on errors
response = self._filter_invalid_classes(
response=response,
errors=errors,
)
logger.info(
f"After second-layer validation: {len(response.classes)} classes remain"
)
else:
logger.info(f"OWL validation passed successfully in {owl_duration:.2f}s")
else:
if not enable_owl_validation:
logger.info("Step 3: OWL validation disabled, skipping")
else:
logger.info("Step 3: No classes to validate, skipping OWL validation")
# Calculate total duration
total_duration = time.time() - start_time
# Log extraction statistics
logger.info(
f"Ontology extraction completed - "
f"final_class_count={len(response.classes)}, "
f"domain={response.domain}, "
f"total_duration={total_duration:.2f}s, "
f"llm_duration={llm_duration:.2f}s"
)
return response
except asyncio.TimeoutError:
# Re-raise timeout errors
total_duration = time.time() - start_time
logger.error(
f"Ontology extraction timed out after {timeout} seconds "
f"(total duration: {total_duration:.2f}s)",
exc_info=True
)
raise
except Exception as e:
total_duration = time.time() - start_time
logger.error(
f"Ontology extraction failed after {total_duration:.2f}s: {str(e)}",
exc_info=True
)
# Return empty response on failure
return OntologyExtractionResponse(
classes=[],
domain=domain or "Unknown",
)
async def _call_llm_for_extraction(
self,
scenario: str,
domain: Optional[str],
max_classes: int,
llm_temperature: float,
llm_max_tokens: int,
) -> OntologyExtractionResponse:
"""Call LLM to extract ontology classes from scenario.
This method renders the extraction prompt using the Jinja2 template
and calls the LLM with structured output to get ontology classes.
Args:
scenario: Scenario description text
domain: Optional domain hint
max_classes: Maximum number of classes to extract
llm_temperature: LLM temperature parameter
llm_max_tokens: LLM max tokens parameter
Returns:
OntologyExtractionResponse from LLM
Raises:
Exception: If LLM call fails
"""
try:
# Render prompt using template
prompt_content = await render_ontology_extraction_prompt(
scenario=scenario,
domain=domain,
max_classes=max_classes,
json_schema=OntologyExtractionResponse.model_json_schema(),
)
logger.debug(f"Rendered prompt length: {len(prompt_content)}")
# Create messages for LLM
messages = [
{
"role": "system",
"content": (
"You are an expert ontology engineer specializing in knowledge "
"representation and OWL standards. Extract ontology classes from "
"scenario descriptions following the provided instructions. "
"Return valid JSON conforming to the schema."
),
},
{
"role": "user",
"content": prompt_content,
},
]
# Call LLM with structured output
logger.debug(
f"Calling LLM with temperature={llm_temperature}, "
f"max_tokens={llm_max_tokens}"
)
response = await self.llm_client.response_structured(
messages=messages,
response_model=OntologyExtractionResponse,
)
logger.info(
f"LLM extraction successful - extracted {len(response.classes)} classes"
)
return response
except Exception as e:
logger.error(
f"LLM extraction failed: {str(e)}",
exc_info=True
)
raise
def _validate_and_clean(
self,
response: OntologyExtractionResponse,
max_description_length: int,
) -> OntologyExtractionResponse:
"""Perform first-layer validation: string validation and cleaning.
This method validates and cleans the extracted ontology classes:
1. Validate class names (PascalCase, no reserved words)
2. Sanitize invalid class names
3. Truncate long descriptions
4. Remove duplicate classes
Args:
response: OntologyExtractionResponse from LLM
max_description_length: Maximum description length
Returns:
Cleaned OntologyExtractionResponse
"""
if not response.classes:
logger.debug("No classes to validate")
return response
logger.debug(f"Validating {len(response.classes)} classes")
validated_classes = []
for ontology_class in response.classes:
# Validate class name
is_valid, error_msg = self.validator.validate_class_name(
ontology_class.name
)
if not is_valid:
logger.warning(
f"Invalid class name '{ontology_class.name}': {error_msg}"
)
# Attempt to sanitize
sanitized_name = self.validator.sanitize_class_name(
ontology_class.name
)
logger.info(
f"Sanitized class name: '{ontology_class.name}' -> '{sanitized_name}'"
)
# Update class name
ontology_class.name = sanitized_name
# Re-validate sanitized name
is_valid, error_msg = self.validator.validate_class_name(
sanitized_name
)
if not is_valid:
logger.error(
f"Failed to sanitize class name '{ontology_class.name}': {error_msg}. "
"Skipping this class."
)
continue
# Truncate description if too long
if ontology_class.description:
original_length = len(ontology_class.description)
ontology_class.description = self.validator.truncate_description(
ontology_class.description,
max_length=max_description_length,
)
if len(ontology_class.description) < original_length:
logger.debug(
f"Truncated description for '{ontology_class.name}': "
f"{original_length} -> {len(ontology_class.description)} chars"
)
validated_classes.append(ontology_class)
# Remove duplicates (case-insensitive)
original_count = len(validated_classes)
validated_classes = self.validator.remove_duplicates(validated_classes)
if len(validated_classes) < original_count:
logger.info(
f"Removed {original_count - len(validated_classes)} duplicate classes"
)
# Return cleaned response
return OntologyExtractionResponse(
classes=validated_classes,
domain=response.domain,
)
def _filter_invalid_classes(
self,
response: OntologyExtractionResponse,
errors: List[str],
) -> OntologyExtractionResponse:
"""Filter invalid classes based on OWL validation errors.
This method analyzes OWL validation errors and removes classes
that caused validation failures (e.g., circular inheritance,
inconsistencies).
Args:
response: OntologyExtractionResponse to filter
errors: List of error messages from OWL validation
Returns:
Filtered OntologyExtractionResponse
"""
if not errors:
return response
logger.debug(f"Filtering classes based on {len(errors)} OWL validation errors")
# Extract class names mentioned in errors
invalid_class_names = set()
for error in errors:
# Look for class names in error messages
for ontology_class in response.classes:
if ontology_class.name in error:
invalid_class_names.add(ontology_class.name)
logger.debug(
f"Class '{ontology_class.name}' marked as invalid due to error: {error}"
)
# Filter out invalid classes
if invalid_class_names:
original_count = len(response.classes)
filtered_classes = [
c for c in response.classes
if c.name not in invalid_class_names
]
logger.info(
f"Filtered out {original_count - len(filtered_classes)} invalid classes: "
f"{invalid_class_names}"
)
return OntologyExtractionResponse(
classes=filtered_classes,
domain=response.domain,
)
return response

View File

@@ -409,3 +409,42 @@ async def render_episodic_title_and_type_prompt(content: str) -> str:
})
return rendered_prompt
async def render_ontology_extraction_prompt(
scenario: str,
domain: str | None = None,
max_classes: int = 15,
json_schema: dict | None = None
) -> str:
"""
Renders the ontology extraction prompt using the extract_ontology.jinja2 template.
Args:
scenario: The scenario description text to extract ontology classes from
domain: Optional domain hint for the scenario (e.g., "Healthcare", "Education")
max_classes: Maximum number of classes to extract (default: 15)
json_schema: JSON schema for the expected output format
Returns:
Rendered prompt content as string
"""
template = prompt_env.get_template("extract_ontology.jinja2")
rendered_prompt = template.render(
scenario=scenario,
domain=domain,
max_classes=max_classes,
json_schema=json_schema
)
# 记录渲染结果到提示日志
log_prompt_rendering('ontology extraction', rendered_prompt)
# 可选:记录模板渲染信息
log_template_rendering('extract_ontology.jinja2', {
'scenario_len': len(scenario) if scenario else 0,
'domain': domain,
'max_classes': max_classes,
'json_schema': 'OntologyExtractionResponse.schema'
})
return rendered_prompt

View File

@@ -0,0 +1,210 @@
===Task===
Extract ontology classes from the given scenario description following ontology engineering standards.
===Role===
You are a professional ontology engineer with expertise in knowledge representation and OWL (Web Ontology Language) standards. Your task is to identify abstract classes and concepts from scenario descriptions, not concrete instances.
===Scenario Description===
{{ scenario }}
{% if domain -%}
===Domain Hint===
This scenario belongs to the **{{ domain }}** domain. Consider domain-specific concepts and terminology when extracting classes.
{%- endif %}
===Extraction Rules===
**1. Abstract Classes, Not Instances:**
- Extract abstract categories and concepts (e.g., "MedicalProcedure", "Patient", "Diagnosis")
- Do NOT extract concrete instances (e.g., "John Smith", "Room 301", "2024-01-15")
- Think in terms of "types of things" rather than "specific things"
**2. Naming Convention (PascalCase):**
- Use PascalCase format for the "name" field: start with uppercase letter, capitalize each word, no spaces
- Examples: "MedicalProcedure", "HealthcareProvider", "DiagnosticTest"
- Avoid: "medical procedure", "healthcare_provider", "diagnostic-test"
- Use clear, descriptive names in English
- Avoid abbreviations unless they are standard in the domain (e.g., "API", "DNA")
- Provide Chinese translation in the "name_chinese" field (e.g., "医疗程序", "医疗服务提供者", "诊断测试")
**3. Domain Relevance:**
- Focus on classes that are central to the scenario's domain
- Prioritize classes that represent key concepts, entities, or relationships
- Avoid overly generic classes (e.g., "Thing", "Object") unless they have specific domain meaning
**4. Class Quantity:**
- Extract between 5 and {{ max_classes }} classes
- Aim for a balanced set covering the main concepts in the scenario
- Quality over quantity: prefer well-defined classes over exhaustive lists
**5. Clear Descriptions:**
- Provide concise, informative descriptions in Chinese (max 500 characters)
- Describe what the class represents, not specific instances
- Use clear, natural Chinese language that explains the class's role in the domain
**6. Concrete Examples:**
- Provide 2-5 concrete instance examples in Chinese for each class
- Examples should be specific, realistic instances of the class
- Examples help clarify the class's scope and meaning
- Use natural Chinese language for examples
- Example format: ["示例1", "示例2", "示例3"]
**7. Class Hierarchy:**
- Identify parent-child relationships where applicable
- Use the parent_class field to specify inheritance
- Parent class must be one of the extracted classes or a standard OWL class
- Leave parent_class as null for top-level classes
**8. Entity Types:**
- Classify each class with an appropriate entity_type
- Common types: "Person", "Organization", "Location", "Event", "Concept", "Process", "Object", "Role"
- Choose the most specific type that applies
**9. OWL Reserved Words:**
- Do NOT use OWL reserved words as class names
- Reserved words include: "Thing", "Nothing", "Class", "Property", "ObjectProperty", "DatatypeProperty", "AnnotationProperty", "Ontology", "Individual", "Literal"
- If a reserved word is needed, add a domain-specific prefix (e.g., "MedicalClass" instead of "Class")
**10. Language Consistency:**
- Extract all class names in English (PascalCase format) for the "name" field
- Provide Chinese translation for class names in the "name_chinese" field
- Descriptions MUST be in Chinese (中文)
- Examples MUST be in Chinese (中文)
- Use clear, natural Chinese language for descriptions and examples
===Examples===
**Example 1 (Healthcare Domain):**
Scenario: "A hospital manages patient records, schedules appointments, and coordinates medical procedures. Doctors diagnose conditions and prescribe treatments."
Output:
{
"classes": [
{
"name": "Patient",
"name_chinese": "患者",
"description": "在医疗机构接受医疗护理或治疗的人",
"examples": ["张三", "李四", "患有糖尿病的老年患者"],
"parent_class": null,
"entity_type": "Person",
"domain": "Healthcare"
},
{
"name": "MedicalProcedure",
"name_chinese": "医疗程序",
"description": "为医疗诊断或治疗而执行的系统性操作流程",
"examples": ["手术", "血液检查", "X光检查", "疫苗接种"],
"parent_class": null,
"entity_type": "Process",
"domain": "Healthcare"
},
{
"name": "Diagnosis",
"name_chinese": "诊断",
"description": "基于症状和检查结果对疾病或状况的识别",
"examples": ["糖尿病诊断", "癌症诊断", "流感诊断"],
"parent_class": null,
"entity_type": "Concept",
"domain": "Healthcare"
},
{
"name": "Doctor",
"name_chinese": "医生",
"description": "诊断和治疗患者的持证医疗专业人员",
"examples": ["全科医生", "外科医生", "心脏病专家"],
"parent_class": null,
"entity_type": "Role",
"domain": "Healthcare"
},
{
"name": "Treatment",
"name_chinese": "治疗",
"description": "为治愈或管理疾病状况而提供的医疗护理或疗法",
"examples": ["药物治疗", "物理治疗", "化疗", "手术治疗"],
"parent_class": null,
"entity_type": "Process",
"domain": "Healthcare"
}
],
"domain": "Healthcare",
"namespace": "http://example.org/healthcare#"
}
**Example 2 (Education Domain):**
Scenario: "A university offers courses taught by professors. Students enroll in programs, attend lectures, and complete assignments to earn degrees."
Output:
{
"classes": [
{
"name": "Student",
"name_chinese": "学生",
"description": "在教育机构注册学习的人",
"examples": ["本科生", "研究生", "在职学生"],
"parent_class": null,
"entity_type": "Role",
"domain": "Education"
},
{
"name": "Course",
"name_chinese": "课程",
"description": "涵盖特定学科或主题的结构化教育课程",
"examples": ["计算机科学导论", "微积分I", "世界历史"],
"parent_class": null,
"entity_type": "Concept",
"domain": "Education"
},
{
"name": "Professor",
"name_chinese": "教授",
"description": "教授课程并进行研究的学术教师",
"examples": ["助理教授", "副教授", "正教授"],
"parent_class": null,
"entity_type": "Role",
"domain": "Education"
},
{
"name": "AcademicProgram",
"name_chinese": "学术项目",
"description": "通向学位或证书的结构化课程体系",
"examples": ["理学学士", "文学硕士", "博士项目"],
"parent_class": null,
"entity_type": "Concept",
"domain": "Education"
},
{
"name": "Assignment",
"name_chinese": "作业",
"description": "分配给学生以评估学习成果的任务或项目",
"examples": ["论文", "习题集", "研究报告", "实验报告"],
"parent_class": null,
"entity_type": "Object",
"domain": "Education"
},
{
"name": "Lecture",
"name_chinese": "讲座",
"description": "由教师进行的教育性演讲或讲座",
"examples": ["入门讲座", "客座讲座", "在线讲座"],
"parent_class": null,
"entity_type": "Event",
"domain": "Education"
}
],
"domain": "Education",
"namespace": "http://example.org/education#"
}
===Output Format===
**JSON Requirements:**
- Use only ASCII double quotes (") for JSON structure
- Never use Chinese quotation marks ("") or Unicode quotes
- Escape quotation marks in text with backslashes (\")
- Ensure proper string closure and comma separation
- No line breaks within JSON string values
- All class names must be in PascalCase format
- All class names must be unique (case-insensitive)
- Extract between 5 and {{ max_classes }} classes
{{ json_schema }}

View File

@@ -0,0 +1,10 @@
"""Validation utilities for ontology extraction.
This module provides validation classes for ontology class names,
descriptions, and OWL compliance checking.
"""
from .ontology_validator import OntologyValidator
from .owl_validator import OWLValidator
__all__ = ['OntologyValidator', 'OWLValidator']

View File

@@ -0,0 +1,268 @@
"""String validation for ontology class names and descriptions.
This module provides the OntologyValidator class for validating and sanitizing
ontology class names according to OWL standards and naming conventions.
Classes:
OntologyValidator: Validates class names, removes duplicates, and truncates descriptions
"""
import logging
import re
from typing import List, Tuple
from app.core.memory.models.ontology_models import OntologyClass
logger = logging.getLogger(__name__)
class OntologyValidator:
"""Validator for ontology class names and descriptions.
This validator performs string-level validation including:
- PascalCase naming convention validation
- OWL reserved word checking
- Duplicate class name removal
- Description length truncation
Attributes:
OWL_RESERVED_WORDS: Set of OWL reserved words that cannot be used as class names
"""
# OWL reserved words that cannot be used as class names
OWL_RESERVED_WORDS = {
'Thing', 'Nothing', 'Class', 'Property',
'ObjectProperty', 'DatatypeProperty', 'FunctionalProperty',
'InverseFunctionalProperty', 'TransitiveProperty', 'SymmetricProperty',
'AsymmetricProperty', 'ReflexiveProperty', 'IrreflexiveProperty',
'Restriction', 'Ontology', 'Individual', 'NamedIndividual',
'Annotation', 'AnnotationProperty', 'Axiom',
'AllDifferent', 'AllDisjointClasses', 'AllDisjointProperties',
'Datatype', 'DataRange', 'Literal',
'DeprecatedClass', 'DeprecatedProperty',
'Imports', 'IncompatibleWith', 'PriorVersion', 'VersionInfo',
'BackwardCompatibleWith', 'OntologyProperty',
}
def validate_class_name(self, name: str) -> Tuple[bool, str]:
"""Validate that a class name follows OWL naming conventions.
Validation rules:
1. Must not be empty
2. Must start with an uppercase letter (PascalCase)
3. Cannot contain spaces
4. Can only contain alphanumeric characters and underscores
5. Cannot be an OWL reserved word
Args:
name: The class name to validate
Returns:
Tuple of (is_valid, error_message)
- is_valid: True if the name is valid, False otherwise
- error_message: Empty string if valid, error description if invalid
Examples:
>>> validator = OntologyValidator()
>>> validator.validate_class_name("MedicalProcedure")
(True, "")
>>> validator.validate_class_name("medical procedure")
(False, "Class name 'medical procedure' cannot contain spaces")
>>> validator.validate_class_name("Thing")
(False, "Class name 'Thing' is an OWL reserved word")
"""
logger.debug(f"Validating class name: '{name}'")
# Check if empty
if not name or not name.strip():
error_msg = "Class name cannot be empty"
logger.warning(f"Validation failed: {error_msg}")
return False, error_msg
name = name.strip()
# Check if it's an OWL reserved word
if name in self.OWL_RESERVED_WORDS:
error_msg = f"Class name '{name}' is an OWL reserved word"
logger.warning(f"Validation failed: {error_msg}")
return False, error_msg
# Check if starts with uppercase letter
if not name[0].isupper():
error_msg = f"Class name '{name}' must start with an uppercase letter (PascalCase)"
logger.warning(f"Validation failed: {error_msg}")
return False, error_msg
# Check for spaces
if ' ' in name:
error_msg = f"Class name '{name}' cannot contain spaces"
logger.warning(f"Validation failed: {error_msg}")
return False, error_msg
# Check for invalid characters (only alphanumeric and underscore allowed)
if not re.match(r'^[A-Za-z0-9_]+$', name):
error_msg = f"Class name '{name}' contains invalid characters. Only alphanumeric characters and underscores are allowed"
logger.warning(f"Validation failed: {error_msg}")
return False, error_msg
logger.debug(f"Class name '{name}' is valid")
return True, ""
def sanitize_class_name(self, name: str) -> str:
"""Attempt to sanitize an invalid class name into a valid format.
Sanitization steps:
1. Strip whitespace
2. Remove invalid characters
3. Replace spaces with empty string (PascalCase)
4. Capitalize first letter of each word
5. If result is empty or starts with number, prefix with 'Class'
Args:
name: The class name to sanitize
Returns:
Sanitized class name that should pass validation
Examples:
>>> validator = OntologyValidator()
>>> validator.sanitize_class_name("medical procedure")
'MedicalProcedure'
>>> validator.sanitize_class_name("patient-record")
'PatientRecord'
>>> validator.sanitize_class_name("123invalid")
'Class123Invalid'
"""
logger.debug(f"Sanitizing class name: '{name}'")
if not name or not name.strip():
logger.warning("Empty class name provided for sanitization, returning 'UnnamedClass'")
return "UnnamedClass"
# Strip whitespace
name = name.strip()
original_name = name
# Split on spaces, hyphens, and underscores, then capitalize each word
words = re.split(r'[\s\-_]+', name)
# Capitalize first letter of each word and keep rest as is
sanitized_words = []
for word in words:
if word:
# Remove non-alphanumeric characters except underscore
clean_word = re.sub(r'[^A-Za-z0-9_]', '', word)
if clean_word:
# Capitalize first letter
sanitized_words.append(clean_word[0].upper() + clean_word[1:])
# Join words
sanitized = ''.join(sanitized_words)
# If empty or starts with number, prefix with 'Class'
if not sanitized or sanitized[0].isdigit():
sanitized = 'Class' + sanitized
logger.info(f"Prefixed class name with 'Class': '{original_name}' -> '{sanitized}'")
# If it's a reserved word, append 'Class' suffix
if sanitized in self.OWL_RESERVED_WORDS:
sanitized = sanitized + 'Class'
logger.info(f"Appended 'Class' suffix to reserved word: '{original_name}' -> '{sanitized}'")
logger.info(f"Sanitized class name: '{original_name}' -> '{sanitized}'")
return sanitized
def remove_duplicates(self, classes: List[OntologyClass]) -> List[OntologyClass]:
"""Remove duplicate ontology classes based on case-insensitive name comparison.
When duplicates are found, keeps the first occurrence and discards subsequent ones.
Comparison is case-insensitive to catch variations like 'Patient' and 'patient'.
Args:
classes: List of OntologyClass objects
Returns:
List of OntologyClass objects with duplicates removed
Examples:
>>> validator = OntologyValidator()
>>> classes = [
... OntologyClass(name="Patient", description="A patient", entity_type="Person", domain="Healthcare"),
... OntologyClass(name="patient", description="Another patient", entity_type="Person", domain="Healthcare"),
... OntologyClass(name="Doctor", description="A doctor", entity_type="Person", domain="Healthcare"),
... ]
>>> unique = validator.remove_duplicates(classes)
>>> len(unique)
2
>>> [c.name for c in unique]
['Patient', 'Doctor']
"""
if not classes:
logger.debug("No classes to check for duplicates")
return classes
logger.debug(f"Checking {len(classes)} classes for duplicates")
seen_names = set()
unique_classes = []
duplicates_found = []
for ontology_class in classes:
# Use lowercase for comparison
name_lower = ontology_class.name.lower()
if name_lower not in seen_names:
seen_names.add(name_lower)
unique_classes.append(ontology_class)
else:
duplicates_found.append(ontology_class.name)
logger.debug(f"Duplicate class found and removed: '{ontology_class.name}'")
if duplicates_found:
logger.info(
f"Removed {len(duplicates_found)} duplicate classes: {duplicates_found}"
)
else:
logger.debug("No duplicate classes found")
return unique_classes
def truncate_description(self, description: str, max_length: int = 500) -> str:
"""Truncate a description to a maximum length.
If the description exceeds max_length, it will be truncated and
an ellipsis (...) will be appended to indicate truncation.
Args:
description: The description text to truncate
max_length: Maximum allowed length (default: 500)
Returns:
Truncated description string
Examples:
>>> validator = OntologyValidator()
>>> long_desc = "A" * 600
>>> truncated = validator.truncate_description(long_desc, max_length=500)
>>> len(truncated)
500
>>> truncated.endswith("...")
True
"""
if not description:
return ""
if len(description) <= max_length:
return description
# Truncate and add ellipsis
# Reserve 3 characters for "..."
truncate_at = max_length - 3
truncated = description[:truncate_at] + "..."
logger.debug(
f"Truncated description from {len(description)} to {len(truncated)} characters"
)
return truncated

View File

@@ -0,0 +1,585 @@
"""OWL semantic validation for ontology classes using Owlready2.
This module provides the OWLValidator class for validating ontology classes
against OWL standards using the Owlready2 library. It performs semantic
validation including consistency checking, circular inheritance detection,
and OWL file export.
Classes:
OWLValidator: Validates ontology classes using OWL reasoning and exports to OWL formats
"""
import logging
from typing import List, Optional, Tuple
from owlready2 import (
World,
Thing,
get_ontology,
sync_reasoner_pellet,
OwlReadyInconsistentOntologyError,
)
from app.core.memory.models.ontology_models import OntologyClass
logger = logging.getLogger(__name__)
class OWLValidator:
"""Validator for OWL semantic validation of ontology classes.
This validator performs semantic-level validation using Owlready2 including:
- Creating OWL classes from ontology class definitions
- Running consistency checking with Pellet reasoner
- Detecting circular inheritance
- Validating Protégé compatibility
- Exporting ontologies to various OWL formats (RDF/XML, Turtle, N-Triples)
Attributes:
base_namespace: Base URI for the ontology namespace
"""
def __init__(self, base_namespace: str = "http://example.org/ontology#"):
"""Initialize the OWL validator.
Args:
base_namespace: Base URI for the ontology namespace (default: http://example.org/ontology#)
"""
self.base_namespace = base_namespace
def validate_ontology_classes(
self,
classes: List[OntologyClass],
) -> Tuple[bool, List[str], Optional[World]]:
"""Validate extracted ontology classes against OWL standards.
This method creates an OWL ontology from the provided classes using Owlready2,
runs consistency checking with the Pellet reasoner, and detects common issues
like circular inheritance.
Args:
classes: List of OntologyClass objects to validate
Returns:
Tuple of (is_valid, error_messages, world):
- is_valid: True if ontology is valid and consistent, False otherwise
- error_messages: List of error/warning messages
- world: Owlready2 World object containing the ontology (None if validation failed)
Examples:
>>> validator = OWLValidator()
>>> classes = [
... OntologyClass(name="Patient", description="A patient", entity_type="Person", domain="Healthcare"),
... OntologyClass(name="Doctor", description="A doctor", entity_type="Person", domain="Healthcare"),
... ]
>>> is_valid, errors, world = validator.validate_ontology_classes(classes)
>>> is_valid
True
>>> len(errors)
0
"""
if not classes:
return False, ["No classes provided for validation"], None
errors = []
try:
# Create a new world (isolated ontology environment)
world = World()
# Use a proper ontology IRI
# Owlready2 expects the IRI to end with .owl or similar
onto_iri = self.base_namespace.rstrip('#/')
if not onto_iri.endswith('.owl'):
onto_iri = onto_iri + '.owl'
# Create ontology
onto = world.get_ontology(onto_iri)
with onto:
# Dictionary to store created OWL classes for parent reference
owl_classes = {}
# First pass: Create all classes without parent relationships
for ontology_class in classes:
try:
# Create OWL class dynamically using type() with Thing as base
# The key is to NOT set namespace in the dict, let Owlready2 handle it
owl_class = type(
ontology_class.name, # Class name
(Thing,), # Base classes
{} # Class dict (empty, let Owlready2 manage)
)
# Add label (rdfs:label) - include both English and Chinese names
labels = [ontology_class.name]
if ontology_class.name_chinese:
labels.append(ontology_class.name_chinese)
owl_class.label = labels
# Add comment (rdfs:comment) with description
if ontology_class.description:
owl_class.comment = [ontology_class.description]
# Store for parent relationship setup
owl_classes[ontology_class.name] = owl_class
logger.debug(
f"Created OWL class: {ontology_class.name} "
f"(Chinese: {ontology_class.name_chinese}) "
f"IRI: {owl_class.iri if hasattr(owl_class, 'iri') else 'N/A'}"
)
except Exception as e:
error_msg = f"Failed to create OWL class '{ontology_class.name}': {str(e)}"
errors.append(error_msg)
logger.error(error_msg, exc_info=True)
# Second pass: Set up parent relationships
for ontology_class in classes:
if ontology_class.parent_class and ontology_class.name in owl_classes:
parent_name = ontology_class.parent_class
# Check if parent exists
if parent_name in owl_classes:
try:
child_class = owl_classes[ontology_class.name]
parent_class = owl_classes[parent_name]
# Set parent by modifying is_a
child_class.is_a = [parent_class]
logger.debug(
f"Set parent relationship: {ontology_class.name} -> {parent_name}"
)
except Exception as e:
error_msg = (
f"Failed to set parent relationship "
f"'{ontology_class.name}' -> '{parent_name}': {str(e)}"
)
errors.append(error_msg)
logger.warning(error_msg)
else:
warning_msg = (
f"Parent class '{parent_name}' not found for '{ontology_class.name}'"
)
errors.append(warning_msg)
logger.warning(warning_msg)
# Check for circular inheritance
for class_name, owl_class in owl_classes.items():
if self._has_circular_inheritance(owl_class):
error_msg = f"Circular inheritance detected for class '{class_name}'"
errors.append(error_msg)
logger.error(error_msg)
# Run consistency checking with Pellet reasoner
try:
logger.info("Running Pellet reasoner for consistency checking...")
sync_reasoner_pellet(world, infer_property_values=True, infer_data_property_values=True)
logger.info("Consistency check passed")
except OwlReadyInconsistentOntologyError as e:
error_msg = f"Ontology is inconsistent: {str(e)}"
errors.append(error_msg)
logger.error(error_msg)
return False, errors, world
except Exception as e:
# Reasoner errors are often due to Java not being installed or configured
# Log as warning but don't fail validation - ontology structure is still valid
warning_msg = f"Reasoner check skipped: {str(e)}"
if str(e).strip(): # Only log if there's an actual error message
logger.warning(warning_msg)
else:
logger.warning("Reasoner check skipped: Java may not be installed or configured")
# Continue - ontology structure is valid even without reasoner check
# If we have errors (excluding warnings), validation failed
is_valid = len(errors) == 0
return is_valid, errors, world
except Exception as e:
error_msg = f"OWL validation failed: {str(e)}"
errors.append(error_msg)
logger.error(error_msg, exc_info=True)
return False, errors, None
def _has_circular_inheritance(self, owl_class) -> bool:
"""Check if an OWL class has circular inheritance.
Circular inheritance occurs when a class inherits from itself through
a chain of parent relationships (e.g., A -> B -> C -> A).
Args:
owl_class: Owlready2 class object to check
Returns:
True if circular inheritance is detected, False otherwise
"""
visited = set()
current = owl_class
while current:
# Get class IRI or name as identifier
class_id = str(current.iri) if hasattr(current, 'iri') else str(current)
if class_id in visited:
# Found a cycle
return True
visited.add(class_id)
# Get parent classes (is_a relationship)
parents = getattr(current, 'is_a', [])
# Filter out Thing and other base classes
parent_classes = [p for p in parents if p != Thing and hasattr(p, 'is_a')]
if not parent_classes:
# No more parents, no cycle
break
# Check first parent (in single inheritance)
current = parent_classes[0] if parent_classes else None
return False
def export_to_owl(
self,
world: World,
output_path: Optional[str] = None,
format: str = "rdfxml",
classes: Optional[List] = None
) -> str:
"""Export ontology to OWL file in specified format.
Supported formats:
- rdfxml: RDF/XML format (default, most compatible)
- turtle: Turtle format (more readable)
- ntriples: N-Triples format (simplest)
- json: JSON format (simplified, human-readable)
Args:
world: Owlready2 World object containing the ontology
output_path: Optional file path to save the ontology (if None, returns string)
format: Export format - "rdfxml", "turtle", "ntriples", or "json" (default: "rdfxml")
classes: Optional list of OntologyClass objects (required for json format)
Returns:
String representation of the exported ontology
Raises:
ValueError: If format is not supported
RuntimeError: If export fails
Examples:
>>> validator = OWLValidator()
>>> is_valid, errors, world = validator.validate_ontology_classes(classes)
>>> owl_content = validator.export_to_owl(world, "ontology.owl", format="rdfxml")
"""
# Validate format
valid_formats = ["rdfxml", "turtle", "ntriples", "json"]
if format not in valid_formats:
raise ValueError(
f"Unsupported format '{format}'. Must be one of: {', '.join(valid_formats)}"
)
# JSON format doesn't need OWL processing
if format == "json":
if not classes:
raise ValueError("Classes list is required for JSON format export")
return self._export_to_json(classes)
# For OWL formats, world is required
if not world:
raise ValueError("World object is None. Cannot export ontology.")
# Note: Owlready2 has issues with turtle format export
# We'll handle it specially by converting from rdfxml
use_conversion = (format == "turtle")
try:
# Get all ontologies in the world
ontologies = list(world.ontologies.values())
if not ontologies:
raise RuntimeError("No ontologies found in world")
# Find the ontology with classes (skip anonymous/empty ontologies)
onto = None
for ont in ontologies:
classes_count = len(list(ont.classes()))
logger.debug(f"Checking ontology {ont.base_iri}: {classes_count} classes")
if classes_count > 0:
onto = ont
break
# If no ontology with classes found, use the last non-anonymous one
if onto is None:
for ont in reversed(ontologies):
if ont.base_iri != "http://anonymous/":
onto = ont
break
# If still no ontology, use the first one
if onto is None:
onto = ontologies[0]
# Log ontology contents for debugging
logger.info(f"Ontology IRI: {onto.base_iri}")
logger.info(f"Ontology contains {len(list(onto.classes()))} classes")
# List all classes in the ontology
all_classes = list(onto.classes())
for cls in all_classes:
logger.info(f"Class in ontology: {cls.name} (IRI: {cls.iri})")
if hasattr(cls, 'label'):
logger.debug(f" Labels: {cls.label}")
if hasattr(cls, 'comment'):
logger.debug(f" Comments: {cls.comment}")
if len(all_classes) == 0:
logger.warning("No classes found in ontology! This may indicate a problem with class creation.")
if output_path:
# Save to file
export_format = "rdfxml" if use_conversion else format
logger.info(f"Exporting ontology to {output_path} in {export_format} format")
onto.save(file=output_path, format=export_format)
# Read back the file content to return
with open(output_path, 'r', encoding='utf-8') as f:
content = f.read()
# Convert to turtle if needed
if use_conversion:
content = self._convert_to_turtle(content)
logger.info(f"Successfully exported ontology to {output_path}")
# Format the content for better readability
content = self._format_owl_content(content, format)
return content
else:
# Export to string (save to temporary location and read)
import tempfile
import os
with tempfile.NamedTemporaryFile(mode='w', suffix='.owl', delete=False) as tmp:
tmp_path = tmp.name
try:
export_format = "rdfxml" if use_conversion else format
onto.save(file=tmp_path, format=export_format)
with open(tmp_path, 'r', encoding='utf-8') as f:
content = f.read()
# Convert to turtle if needed
if use_conversion:
content = self._convert_to_turtle(content)
# Format the content for better readability
content = self._format_owl_content(content, format)
return content
finally:
# Clean up temporary file
if os.path.exists(tmp_path):
os.remove(tmp_path)
except Exception as e:
error_msg = f"Failed to export ontology: {str(e)}"
logger.error(error_msg, exc_info=True)
raise RuntimeError(error_msg) from e
def _export_to_json(self, classes: List) -> str:
"""Export ontology classes to simplified JSON format.
This format is more compact and easier to parse than OWL XML.
Args:
classes: List of OntologyClass objects
Returns:
JSON string representation (compact format)
"""
import json
result = {
"ontology": {
"namespace": self.base_namespace,
"classes": []
}
}
for cls in classes:
class_data = {
"name": cls.name,
"name_chinese": cls.name_chinese,
"description": cls.description,
"entity_type": cls.entity_type,
"domain": cls.domain,
"parent_class": cls.parent_class,
"examples": cls.examples if hasattr(cls, 'examples') else []
}
result["ontology"]["classes"].append(class_data)
# 使用紧凑格式:无缩进,使用分隔符减少空格
return json.dumps(result, ensure_ascii=False, separators=(',', ':'))
def _convert_to_turtle(self, rdfxml_content: str) -> str:
"""Convert RDF/XML content to Turtle format using rdflib.
Args:
rdfxml_content: RDF/XML format content
Returns:
Turtle format content
"""
try:
from rdflib import Graph
# Parse RDF/XML
g = Graph()
g.parse(data=rdfxml_content, format="xml")
# Serialize to Turtle
turtle_content = g.serialize(format="turtle")
# Handle bytes vs string
if isinstance(turtle_content, bytes):
turtle_content = turtle_content.decode('utf-8')
return turtle_content
except ImportError:
logger.warning(
"rdflib is not installed. Cannot convert to Turtle format. "
"Install with: pip install rdflib"
)
return rdfxml_content
except Exception as e:
logger.error(f"Failed to convert to Turtle format: {e}")
return rdfxml_content
def _format_owl_content(self, content: str, format: str) -> str:
"""Format OWL content for better readability.
Args:
content: Raw OWL content string
format: Format type (rdfxml, turtle, ntriples)
Returns:
Formatted OWL content string
"""
if format == "rdfxml":
# Format XML with proper indentation
try:
import xml.dom.minidom as minidom
dom = minidom.parseString(content)
# Pretty print with 2-space indentation
formatted = dom.toprettyxml(indent=" ", encoding="utf-8").decode("utf-8")
# Remove extra blank lines
lines = []
prev_blank = False
for line in formatted.split('\n'):
is_blank = not line.strip()
if not (is_blank and prev_blank): # Skip consecutive blank lines
lines.append(line)
prev_blank = is_blank
formatted = '\n'.join(lines)
return formatted
except Exception as e:
logger.warning(f"Failed to format XML content: {e}")
return content
elif format == "turtle":
# Turtle format is already relatively readable
# Just ensure consistent line endings and not empty
if not content or content.strip() == "":
logger.warning("Turtle content is empty, this may indicate an export issue")
return content.strip() + '\n' if content.strip() else content
elif format == "ntriples":
# N-Triples format is line-based, ensure proper line endings
return content.strip() + '\n' if content.strip() else content
return content
def validate_with_protege_compatibility(
self,
classes: List[OntologyClass]
) -> Tuple[bool, List[str]]:
"""Validate that ontology classes are compatible with Protégé editor.
Protégé compatibility checks:
- Class names are valid OWL identifiers
- No special characters that Protégé cannot handle
- Namespace is properly formatted
- Labels and comments are properly encoded
Args:
classes: List of OntologyClass objects to validate
Returns:
Tuple of (is_compatible, warnings):
- is_compatible: True if compatible with Protégé, False otherwise
- warnings: List of compatibility warning messages
Examples:
>>> validator = OWLValidator()
>>> classes = [OntologyClass(name="Patient", description="A patient", entity_type="Person", domain="Healthcare")]
>>> is_compatible, warnings = validator.validate_with_protege_compatibility(classes)
>>> is_compatible
True
"""
warnings = []
# Check namespace format
if not self.base_namespace.startswith(('http://', 'https://')):
warnings.append(
f"Namespace '{self.base_namespace}' should start with http:// or https:// "
"for Protégé compatibility"
)
if not self.base_namespace.endswith(('#', '/')):
warnings.append(
f"Namespace '{self.base_namespace}' should end with # or / "
"for Protégé compatibility"
)
# Check each class
for ontology_class in classes:
# Check for special characters that might cause issues
if any(char in ontology_class.name for char in ['<', '>', '"', '{', '}', '|', '^', '`']):
warnings.append(
f"Class name '{ontology_class.name}' contains special characters "
"that may cause issues in Protégé"
)
# Check description length (Protégé can handle long descriptions but may display poorly)
if ontology_class.description and len(ontology_class.description) > 1000:
warnings.append(
f"Class '{ontology_class.name}' has a very long description ({len(ontology_class.description)} chars) "
"which may display poorly in Protégé"
)
# Check for non-ASCII characters (Protégé supports them but encoding issues may occur)
if not ontology_class.name.isascii():
warnings.append(
f"Class name '{ontology_class.name}' contains non-ASCII characters "
"which may cause encoding issues in some Protégé versions"
)
# If no warnings, it's compatible
is_compatible = len(warnings) == 0
return is_compatible, warnings

View File

@@ -28,6 +28,10 @@ from .tool_model import (
ToolExecution, ToolType, ToolStatus, AuthType, ExecutionStatus
)
from .memory_perceptual_model import MemoryPerceptualModel
from .ontology_scene import OntologyScene
from .ontology_class import OntologyClass
from .ontology_scene import OntologyScene
from .ontology_class import OntologyClass
__all__ = [
"Tenants",

View File

@@ -0,0 +1,40 @@
# -*- coding: utf-8 -*-
"""本体类型模型
本模块定义本体类型的数据模型。
Classes:
OntologyClass: 本体类型表模型
"""
import datetime
import uuid
from sqlalchemy import Column, String, DateTime, Text, ForeignKey
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import relationship
from app.db import Base
class OntologyClass(Base):
"""本体类型表 - 用于存储某个场景提取出来的本体类型信息"""
__tablename__ = "ontology_class"
# 主键
class_id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True, comment="类型ID")
# 类型信息
class_name = Column(String(200), nullable=False, comment="类型名称")
class_description = Column(Text, nullable=True, comment="类型描述")
# 外键:关联到本体场景
scene_id = Column(UUID(as_uuid=True), ForeignKey("ontology_scene.scene_id", ondelete="CASCADE"), nullable=False, index=True, comment="所属场景ID")
# 时间戳
created_at = Column(DateTime, default=datetime.datetime.now, nullable=False, comment="创建时间")
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now, nullable=False, comment="更新时间")
# 关系:类型属于某个场景
scene = relationship("OntologyScene", back_populates="classes")
def __repr__(self):
return f"<OntologyClass(id={self.class_id}, name={self.class_name}, scene_id={self.scene_id})>"

View File

@@ -0,0 +1,43 @@
# -*- coding: utf-8 -*-
"""本体场景模型
本模块定义本体场景的数据模型。
Classes:
OntologyScene: 本体场景表模型
"""
import datetime
import uuid
from sqlalchemy import Column, String, DateTime, Integer, Text, ForeignKey, UniqueConstraint
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import relationship
from app.db import Base
class OntologyScene(Base):
"""本体场景表 - 用于存储本体场景下不同的类型信息"""
__tablename__ = "ontology_scene"
__table_args__ = (
UniqueConstraint('workspace_id', 'scene_name', name='uq_workspace_scene_name'),
)
# 主键
scene_id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True, comment="场景ID")
# 场景信息
scene_name = Column(String(200), nullable=False, comment="场景名称")
scene_description = Column(Text, nullable=True, comment="场景描述")
# 外键:关联到工作空间
workspace_id = Column(UUID(as_uuid=True), ForeignKey("workspaces.id", ondelete="CASCADE"), nullable=False, index=True, comment="所属工作空间ID")
# 时间戳
created_at = Column(DateTime, default=datetime.datetime.now, nullable=False, comment="创建时间")
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now, nullable=False, comment="更新时间")
# 关系:一个场景可以有多个类型
classes = relationship("OntologyClass", back_populates="scene", cascade="all, delete-orphan")
def __repr__(self):
return f"<OntologyScene(id={self.scene_id}, name={self.scene_name})>"

View File

@@ -0,0 +1,404 @@
# -*- coding: utf-8 -*-
"""本体类型Repository层
本模块提供本体类型的数据访问层实现。
Classes:
OntologyClassRepository: 本体类型数据访问类
"""
import logging
from typing import List, Optional
from uuid import UUID
from sqlalchemy.orm import Session, joinedload
from app.core.logging_config import get_db_logger
from app.models.ontology_class import OntologyClass
from app.models.ontology_scene import OntologyScene
logger = get_db_logger()
class OntologyClassRepository:
"""本体类型Repository
提供本体类型的CRUD操作和权限检查。
Attributes:
db: SQLAlchemy数据库会话
"""
def __init__(self, db: Session):
"""初始化Repository
Args:
db: SQLAlchemy数据库会话
"""
self.db = db
def create(self, class_data: dict, scene_id: UUID) -> OntologyClass:
"""创建本体类型
Args:
class_data: 类型数据字典包含class_name和class_description
scene_id: 所属场景ID
Returns:
OntologyClass: 创建的类型对象
Raises:
Exception: 数据库操作失败
Examples:
>>> repo = OntologyClassRepository(db)
>>> ontology_class = repo.create(
... {"class_name": "患者", "class_description": "描述"},
... scene_id
... )
"""
try:
logger.info(
f"Creating ontology class - "
f"name={class_data.get('class_name')}, "
f"scene_id={scene_id}"
)
ontology_class = OntologyClass(
class_name=class_data.get("class_name"),
class_description=class_data.get("class_description"),
scene_id=scene_id
)
self.db.add(ontology_class)
self.db.flush() # 获取ID但不提交
logger.info(
f"Ontology class created successfully - "
f"class_id={ontology_class.class_id}"
)
return ontology_class
except Exception as e:
logger.error(
f"Failed to create ontology class: {str(e)}",
exc_info=True
)
raise
def get_by_id(self, class_id: UUID) -> Optional[OntologyClass]:
"""根据ID获取类型
Args:
class_id: 类型ID
Returns:
Optional[OntologyClass]: 类型对象不存在则返回None
Examples:
>>> repo = OntologyClassRepository(db)
>>> ontology_class = repo.get_by_id(class_id)
"""
try:
logger.debug(f"Getting ontology class by ID: {class_id}")
ontology_class = self.db.query(OntologyClass).filter(
OntologyClass.class_id == class_id
).first()
if ontology_class:
logger.debug(f"Ontology class found: {class_id}")
else:
logger.debug(f"Ontology class not found: {class_id}")
return ontology_class
except Exception as e:
logger.error(
f"Failed to get ontology class by ID: {str(e)}",
exc_info=True
)
raise
def get_by_name(self, class_name: str, scene_id: UUID) -> Optional[OntologyClass]:
"""根据类型名称和场景ID获取类型精确匹配
Args:
class_name: 类型名称
scene_id: 场景ID
Returns:
Optional[OntologyClass]: 类型对象不存在则返回None
Examples:
>>> repo = OntologyClassRepository(db)
>>> ontology_class = repo.get_by_name("患者", scene_id)
"""
try:
logger.debug(f"Getting ontology class by name: {class_name}, scene_id: {scene_id}")
ontology_class = self.db.query(OntologyClass).filter(
OntologyClass.class_name == class_name,
OntologyClass.scene_id == scene_id
).first()
if ontology_class:
logger.debug(f"Ontology class found: {class_name}")
else:
logger.debug(f"Ontology class not found: {class_name}")
return ontology_class
except Exception as e:
logger.error(
f"Failed to get ontology class by name: {str(e)}",
exc_info=True
)
raise
def search_by_name(self, keyword: str, scene_id: UUID) -> List[OntologyClass]:
"""根据关键词模糊搜索类型
使用 LIKE 进行模糊匹配,支持中文和英文。
Args:
keyword: 搜索关键词
scene_id: 场景ID
Returns:
List[OntologyClass]: 匹配的类型列表
Examples:
>>> repo = OntologyClassRepository(db)
>>> classes = repo.search_by_name("患者", scene_id)
"""
try:
logger.debug(
f"Searching ontology classes by keyword - "
f"keyword={keyword}, scene_id={scene_id}"
)
# 使用 ilike 进行不区分大小写的模糊匹配
classes = self.db.query(OntologyClass).filter(
OntologyClass.class_name.ilike(f"%{keyword}%"),
OntologyClass.scene_id == scene_id
).order_by(
OntologyClass.created_at.desc()
).all()
logger.info(
f"Found {len(classes)} ontology classes matching keyword '{keyword}' "
f"in scene {scene_id}"
)
return classes
except Exception as e:
logger.error(
f"Failed to search ontology classes by keyword: {str(e)}",
exc_info=True
)
raise
def get_by_scene(self, scene_id: UUID) -> List[OntologyClass]:
"""获取场景下的所有类型
按创建时间倒序排列。
Args:
scene_id: 场景ID
Returns:
List[OntologyClass]: 类型列表
Examples:
>>> repo = OntologyClassRepository(db)
>>> classes = repo.get_by_scene(scene_id)
"""
try:
logger.debug(f"Getting ontology classes by scene: {scene_id}")
classes = self.db.query(OntologyClass).filter(
OntologyClass.scene_id == scene_id
).order_by(
OntologyClass.created_at.desc()
).all()
logger.info(
f"Found {len(classes)} ontology classes in scene {scene_id}"
)
return classes
except Exception as e:
logger.error(
f"Failed to get ontology classes by scene: {str(e)}",
exc_info=True
)
raise
def update(self, class_id: UUID, update_data: dict) -> Optional[OntologyClass]:
"""更新类型信息
Args:
class_id: 类型ID
update_data: 更新数据字典
Returns:
Optional[OntologyClass]: 更新后的类型对象不存在则返回None
Raises:
Exception: 数据库操作失败
Examples:
>>> repo = OntologyClassRepository(db)
>>> ontology_class = repo.update(
... class_id,
... {"class_name": "新名称"}
... )
"""
try:
logger.info(f"Updating ontology class: {class_id}")
ontology_class = self.get_by_id(class_id)
if not ontology_class:
logger.warning(f"Ontology class not found for update: {class_id}")
return None
# 更新字段
if "class_name" in update_data and update_data["class_name"] is not None:
ontology_class.class_name = update_data["class_name"]
if "class_description" in update_data:
ontology_class.class_description = update_data["class_description"]
self.db.flush()
logger.info(f"Ontology class updated successfully: {class_id}")
return ontology_class
except Exception as e:
logger.error(
f"Failed to update ontology class: {str(e)}",
exc_info=True
)
raise
def delete(self, class_id: UUID) -> bool:
"""删除类型
Args:
class_id: 类型ID
Returns:
bool: 删除成功返回True类型不存在返回False
Raises:
Exception: 数据库操作失败
Examples:
>>> repo = OntologyClassRepository(db)
>>> success = repo.delete(class_id)
"""
try:
logger.info(f"Deleting ontology class: {class_id}")
ontology_class = self.get_by_id(class_id)
if not ontology_class:
logger.warning(f"Ontology class not found for delete: {class_id}")
return False
self.db.delete(ontology_class)
self.db.flush()
logger.info(f"Ontology class deleted successfully: {class_id}")
return True
except Exception as e:
logger.error(
f"Failed to delete ontology class: {str(e)}",
exc_info=True
)
raise
def check_ownership(self, class_id: UUID, workspace_id: UUID) -> bool:
"""检查类型是否属于指定工作空间(通过场景关联)
Args:
class_id: 类型ID
workspace_id: 工作空间ID
Returns:
bool: 属于返回True否则返回False
Examples:
>>> repo = OntologyClassRepository(db)
>>> is_owner = repo.check_ownership(class_id, workspace_id)
"""
try:
logger.debug(
f"Checking class ownership - "
f"class_id={class_id}, workspace_id={workspace_id}"
)
count = self.db.query(OntologyClass).join(
OntologyScene,
OntologyClass.scene_id == OntologyScene.scene_id
).filter(
OntologyClass.class_id == class_id,
OntologyScene.workspace_id == workspace_id
).count()
is_owner = count > 0
logger.debug(
f"Class ownership check result: {is_owner} - "
f"class_id={class_id}"
)
return is_owner
except Exception as e:
logger.error(
f"Failed to check class ownership: {str(e)}",
exc_info=True
)
raise
def get_scene_id_by_class(self, class_id: UUID) -> Optional[UUID]:
"""根据类型ID获取所属场景ID
Args:
class_id: 类型ID
Returns:
Optional[UUID]: 场景ID类型不存在则返回None
Examples:
>>> repo = OntologyClassRepository(db)
>>> scene_id = repo.get_scene_id_by_class(class_id)
"""
try:
logger.debug(f"Getting scene ID by class: {class_id}")
ontology_class = self.get_by_id(class_id)
if not ontology_class:
logger.debug(f"Class not found: {class_id}")
return None
logger.debug(
f"Found scene ID: {ontology_class.scene_id} for class: {class_id}"
)
return ontology_class.scene_id
except Exception as e:
logger.error(
f"Failed to get scene ID by class: {str(e)}",
exc_info=True
)
raise

View File

@@ -0,0 +1,394 @@
# -*- coding: utf-8 -*-
"""本体场景Repository层
本模块提供本体场景的数据访问层实现。
Classes:
OntologySceneRepository: 本体场景数据访问类
"""
import logging
from typing import List, Optional
from uuid import UUID
from sqlalchemy.orm import Session, joinedload
from app.core.logging_config import get_db_logger
from app.models.ontology_scene import OntologyScene
logger = get_db_logger()
class OntologySceneRepository:
"""本体场景Repository
提供本体场景的CRUD操作和权限检查。
Attributes:
db: SQLAlchemy数据库会话
"""
def __init__(self, db: Session):
"""初始化Repository
Args:
db: SQLAlchemy数据库会话
"""
self.db = db
def create(self, scene_data: dict, workspace_id: UUID) -> OntologyScene:
"""创建本体场景
Args:
scene_data: 场景数据字典包含scene_name和scene_description
workspace_id: 所属工作空间ID
Returns:
OntologyScene: 创建的场景对象
Raises:
Exception: 数据库操作失败
Examples:
>>> repo = OntologySceneRepository(db)
>>> scene = repo.create(
... {"scene_name": "医疗场景", "scene_description": "描述"},
... workspace_id
... )
"""
try:
logger.info(
f"Creating ontology scene - "
f"name={scene_data.get('scene_name')}, "
f"workspace_id={workspace_id}"
)
scene = OntologyScene(
scene_name=scene_data.get("scene_name"),
scene_description=scene_data.get("scene_description"),
workspace_id=workspace_id
)
self.db.add(scene)
self.db.flush() # 获取ID但不提交
logger.info(
f"Ontology scene created successfully - "
f"scene_id={scene.scene_id}"
)
return scene
except Exception as e:
logger.error(
f"Failed to create ontology scene: {str(e)}",
exc_info=True
)
raise
def get_by_id(self, scene_id: UUID) -> Optional[OntologyScene]:
"""根据ID获取场景
Args:
scene_id: 场景ID
Returns:
Optional[OntologyScene]: 场景对象不存在则返回None
Examples:
>>> repo = OntologySceneRepository(db)
>>> scene = repo.get_by_id(scene_id)
"""
try:
logger.debug(f"Getting ontology scene by ID: {scene_id}")
scene = self.db.query(OntologyScene).filter(
OntologyScene.scene_id == scene_id
).first()
if scene:
logger.debug(f"Ontology scene found: {scene_id}")
else:
logger.debug(f"Ontology scene not found: {scene_id}")
return scene
except Exception as e:
logger.error(
f"Failed to get ontology scene by ID: {str(e)}",
exc_info=True
)
raise
def get_by_name(self, scene_name: str, workspace_id: UUID) -> Optional[OntologyScene]:
"""根据场景名称和工作空间ID获取场景精确匹配
Args:
scene_name: 场景名称
workspace_id: 工作空间ID
Returns:
Optional[OntologyScene]: 场景对象不存在则返回None
Examples:
>>> repo = OntologySceneRepository(db)
>>> scene = repo.get_by_name("医疗场景", workspace_id)
"""
try:
logger.debug(
f"Getting ontology scene by name - "
f"scene_name={scene_name}, workspace_id={workspace_id}"
)
scene = self.db.query(OntologyScene).options(
joinedload(OntologyScene.classes)
).filter(
OntologyScene.scene_name == scene_name,
OntologyScene.workspace_id == workspace_id
).first()
if scene:
logger.debug(f"Ontology scene found: {scene_name}")
else:
logger.debug(f"Ontology scene not found: {scene_name}")
return scene
except Exception as e:
logger.error(
f"Failed to get ontology scene by name: {str(e)}",
exc_info=True
)
raise
def search_by_name(self, keyword: str, workspace_id: UUID) -> List[OntologyScene]:
"""根据关键词模糊搜索场景
使用 LIKE 进行模糊匹配,支持中文和英文。
Args:
keyword: 搜索关键词
workspace_id: 工作空间ID
Returns:
List[OntologyScene]: 匹配的场景列表
Examples:
>>> repo = OntologySceneRepository(db)
>>> scenes = repo.search_by_name("医疗", workspace_id)
"""
try:
logger.debug(
f"Searching ontology scenes by keyword - "
f"keyword={keyword}, workspace_id={workspace_id}"
)
# 使用 ilike 进行不区分大小写的模糊匹配
scenes = self.db.query(OntologyScene).options(
joinedload(OntologyScene.classes)
).filter(
OntologyScene.scene_name.ilike(f"%{keyword}%"),
OntologyScene.workspace_id == workspace_id
).order_by(
OntologyScene.updated_at.desc()
).all()
logger.info(
f"Found {len(scenes)} ontology scenes matching keyword '{keyword}' "
f"in workspace {workspace_id}"
)
return scenes
except Exception as e:
logger.error(
f"Failed to search ontology scenes by keyword: {str(e)}",
exc_info=True
)
raise
def get_by_workspace(self, workspace_id: UUID, page: Optional[int] = None, page_size: Optional[int] = None) -> tuple:
"""获取工作空间下的所有场景(支持分页)
使用joinedload预加载classes关系以统计数量。
Args:
workspace_id: 工作空间ID
page: 页码可选从1开始
page_size: 每页数量(可选)
Returns:
tuple: (场景列表, 总数量)
Examples:
>>> repo = OntologySceneRepository(db)
>>> scenes, total = repo.get_by_workspace(workspace_id)
>>> scenes, total = repo.get_by_workspace(workspace_id, page=1, page_size=10)
"""
try:
logger.debug(f"Getting ontology scenes by workspace: {workspace_id}, page={page}, page_size={page_size}")
# 构建基础查询
query = self.db.query(OntologyScene).options(
joinedload(OntologyScene.classes)
).filter(
OntologyScene.workspace_id == workspace_id
).order_by(
OntologyScene.updated_at.desc()
)
# 获取总数
total = query.count()
# 如果提供了分页参数,应用分页
if page is not None and page_size is not None:
offset = (page - 1) * page_size
query = query.offset(offset).limit(page_size)
logger.debug(f"Applying pagination: offset={offset}, limit={page_size}")
scenes = query.all()
logger.info(
f"Found {len(scenes)} ontology scenes (total: {total}) in workspace {workspace_id}"
)
return scenes, total
except Exception as e:
logger.error(
f"Failed to get ontology scenes by workspace: {str(e)}",
exc_info=True
)
raise
def update(self, scene_id: UUID, update_data: dict) -> Optional[OntologyScene]:
"""更新场景信息
Args:
scene_id: 场景ID
update_data: 更新数据字典
Returns:
Optional[OntologyScene]: 更新后的场景对象不存在则返回None
Raises:
Exception: 数据库操作失败
Examples:
>>> repo = OntologySceneRepository(db)
>>> scene = repo.update(
... scene_id,
... {"scene_name": "新名称"}
... )
"""
try:
logger.info(f"Updating ontology scene: {scene_id}")
scene = self.get_by_id(scene_id)
if not scene:
logger.warning(f"Ontology scene not found for update: {scene_id}")
return None
# 更新字段
if "scene_name" in update_data and update_data["scene_name"] is not None:
scene.scene_name = update_data["scene_name"]
if "scene_description" in update_data:
scene.scene_description = update_data["scene_description"]
self.db.flush()
logger.info(f"Ontology scene updated successfully: {scene_id}")
return scene
except Exception as e:
logger.error(
f"Failed to update ontology scene: {str(e)}",
exc_info=True
)
raise
def delete(self, scene_id: UUID) -> bool:
"""删除场景(级联删除类型)
依赖数据库级联删除配置ondelete="CASCADE")。
Args:
scene_id: 场景ID
Returns:
bool: 删除成功返回True场景不存在返回False
Raises:
Exception: 数据库操作失败
Examples:
>>> repo = OntologySceneRepository(db)
>>> success = repo.delete(scene_id)
"""
try:
logger.info(f"Deleting ontology scene: {scene_id}")
scene = self.get_by_id(scene_id)
if not scene:
logger.warning(f"Ontology scene not found for delete: {scene_id}")
return False
self.db.delete(scene)
self.db.flush()
logger.info(
f"Ontology scene deleted successfully (cascade): {scene_id}"
)
return True
except Exception as e:
logger.error(
f"Failed to delete ontology scene: {str(e)}",
exc_info=True
)
raise
def check_ownership(self, scene_id: UUID, workspace_id: UUID) -> bool:
"""检查场景是否属于指定工作空间
Args:
scene_id: 场景ID
workspace_id: 工作空间ID
Returns:
bool: 属于返回True否则返回False
Examples:
>>> repo = OntologySceneRepository(db)
>>> is_owner = repo.check_ownership(scene_id, workspace_id)
"""
try:
logger.debug(
f"Checking scene ownership - "
f"scene_id={scene_id}, workspace_id={workspace_id}"
)
count = self.db.query(OntologyScene).filter(
OntologyScene.scene_id == scene_id,
OntologyScene.workspace_id == workspace_id
).count()
is_owner = count > 0
logger.debug(
f"Scene ownership check result: {is_owner} - "
f"scene_id={scene_id}"
)
return is_owner
except Exception as e:
logger.error(
f"Failed to check scene ownership: {str(e)}",
exc_info=True
)
raise

View File

@@ -0,0 +1,461 @@
"""本体提取API的请求和响应模型
本模块定义了本体提取系统的所有API请求和响应的Pydantic模型。
Classes:
ExtractionRequest: 本体提取请求模型
ExtractionResponse: 本体提取响应模型
ExportRequest: OWL文件导出请求模型
ExportResponse: OWL文件导出响应模型
OntologyResultResponse: 本体提取结果响应模型(带毫秒时间戳)
SceneCreateRequest: 场景创建请求模型
SceneUpdateRequest: 场景更新请求模型
SceneResponse: 场景响应模型
SceneListResponse: 场景列表响应模型
ClassCreateRequest: 类型创建请求模型
ClassUpdateRequest: 类型更新请求模型
ClassResponse: 类型响应模型
ClassListResponse: 类型列表响应模型
"""
from typing import List, Optional
import datetime
from uuid import UUID
from pydantic import BaseModel, Field, field_serializer, ConfigDict
from app.core.memory.models.ontology_models import OntologyClass
class ExtractionRequest(BaseModel):
"""本体提取请求模型
用于POST /api/ontology/extract端点的请求体。
Attributes:
scenario: 场景描述文本,不能为空
domain: 可选的领域提示(如Healthcare, Education等)
llm_id: LLM模型ID,必须提供
scene_id: 场景ID,必须提供,用于将提取的类保存到指定场景
Examples:
>>> request = ExtractionRequest(
... scenario="医院管理患者记录...",
... domain="Healthcare",
... llm_id="550e8400-e29b-41d4-a716-446655440000",
... scene_id="660e8400-e29b-41d4-a716-446655440000"
... )
"""
scenario: str = Field(..., description="场景描述文本", min_length=1)
domain: Optional[str] = Field(None, description="可选的领域提示")
llm_id: str = Field(..., description="LLM模型ID")
scene_id: UUID = Field(..., description="场景ID,用于将提取的类保存到指定场景")
class ExtractionResponse(BaseModel):
"""本体提取响应模型
用于POST /api/ontology/extract端点的响应体。
Attributes:
classes: 提取的本体类列表
domain: 识别的领域
extracted_count: 提取的类数量
Examples:
>>> response = ExtractionResponse(
... classes=[...],
... domain="Healthcare",
... extracted_count=7
... )
"""
classes: List[OntologyClass] = Field(default_factory=list, description="提取的本体类列表")
domain: str = Field(..., description="识别的领域")
extracted_count: int = Field(..., description="提取的类数量")
class ExportRequest(BaseModel):
"""OWL文件导出请求模型
用于POST /api/ontology/export端点的请求体。
Attributes:
classes: 要导出的本体类列表
format: 导出格式,可选值: rdfxml, turtle, ntriples, json
include_metadata: 是否包含完整的OWL元数据(命名空间等),默认True
Examples:
>>> request = ExportRequest(
... classes=[...],
... format="rdfxml",
... include_metadata=True
... )
"""
classes: List[OntologyClass] = Field(..., description="要导出的本体类列表", min_length=1)
format: str = Field("rdfxml", description="导出格式: rdfxml, turtle, ntriples, json")
include_metadata: bool = Field(True, description="是否包含完整的OWL元数据")
class ExportResponse(BaseModel):
"""OWL文件导出响应模型
用于POST /api/ontology/export端点的响应体。
Attributes:
owl_content: OWL文件内容
format: 导出格式
classes_count: 导出的类数量
Examples:
>>> response = ExportResponse(
... owl_content="<?xml version='1.0'?>...",
... format="rdfxml",
... classes_count=7
... )
"""
owl_content: str = Field(..., description="OWL文件内容")
format: str = Field(..., description="导出格式")
classes_count: int = Field(..., description="导出的类数量")
class OntologyResultResponse(BaseModel):
"""本体提取结果响应模型
用于返回数据库中存储的提取结果,时间戳为毫秒级。
Attributes:
id: 结果ID (UUID)
scenario: 场景描述文本
domain: 领域
classes_json: 提取的本体类数据(JSON格式)
extracted_count: 提取的类数量
user_id: 用户ID
created_at: 创建时间(毫秒时间戳)
Examples:
>>> response = OntologyResultResponse(
... id=uuid.uuid4(),
... scenario="医院管理患者记录...",
... domain="Healthcare",
... classes_json={"classes": [...]},
... extracted_count=7,
... user_id=123,
... created_at=datetime.now()
... )
"""
id: UUID = Field(..., description="结果ID")
scenario: str = Field(..., description="场景描述文本")
domain: Optional[str] = Field(None, description="领域")
classes_json: dict = Field(..., description="提取的本体类数据(JSON格式)")
extracted_count: int = Field(..., description="提取的类数量")
user_id: Optional[int] = Field(None, description="用户ID")
created_at: datetime.datetime = Field(..., description="创建时间")
@field_serializer("created_at", when_used="json")
def _serialize_created_at(self, dt: datetime.datetime):
"""将创建时间序列化为毫秒时间戳"""
return int(dt.timestamp() * 1000) if dt else None
class Config:
from_attributes = True
# ==================== 本体场景相关 Schema ====================
class SceneCreateRequest(BaseModel):
"""场景创建请求模型
用于创建新的本体场景。
Attributes:
scene_name: 场景名称必填1-200字符
scene_description: 场景描述,可选
Examples:
>>> request = SceneCreateRequest(
... scene_name="医疗场景",
... scene_description="用于医疗领域的本体建模"
... )
"""
scene_name: str = Field(..., min_length=1, max_length=200, description="场景名称")
scene_description: Optional[str] = Field(None, description="场景描述")
class SceneUpdateRequest(BaseModel):
"""场景更新请求模型
用于更新已有本体场景信息。
Attributes:
scene_name: 场景名称可选1-200字符
scene_description: 场景描述,可选
Examples:
>>> request = SceneUpdateRequest(
... scene_name="更新后的场景名称",
... scene_description="更新后的描述"
... )
"""
scene_name: Optional[str] = Field(None, min_length=1, max_length=200, description="场景名称")
scene_description: Optional[str] = Field(None, description="场景描述")
class SceneResponse(BaseModel):
"""场景响应模型
用于返回本体场景信息。
Attributes:
scene_id: 场景ID
scene_name: 场景名称
scene_description: 场景描述
type_num: 类型数量
workspace_id: 所属工作空间ID
created_at: 创建时间(毫秒时间戳)
updated_at: 更新时间(毫秒时间戳)
classes_count: 类型数量
Examples:
>>> response = SceneResponse(
... scene_id=uuid.uuid4(),
... scene_name="医疗场景",
... scene_description="用于医疗领域的本体建模",
... type_num=0,
... workspace_id=uuid.uuid4(),
... created_at=datetime.now(),
... updated_at=datetime.now(),
... classes_count=5
... )
"""
scene_id: UUID = Field(..., description="场景ID")
scene_name: str = Field(..., description="场景名称")
scene_description: Optional[str] = Field(None, description="场景描述")
type_num: int = Field(..., description="类型数量")
entity_type: Optional[List[str]] = Field(None, description="实体类型列表最多3个class_name")
workspace_id: UUID = Field(..., description="所属工作空间ID")
created_at: datetime.datetime = Field(..., description="创建时间(毫秒时间戳)")
updated_at: datetime.datetime = Field(..., description="更新时间(毫秒时间戳)")
classes_count: int = Field(0, description="类型数量")
@field_serializer("created_at", when_used="json")
def _serialize_created_at(self, dt: datetime.datetime):
"""将创建时间序列化为毫秒时间戳"""
return int(dt.timestamp() * 1000) if dt else None
@field_serializer("updated_at", when_used="json")
def _serialize_updated_at(self, dt: datetime.datetime):
"""将更新时间序列化为毫秒时间戳"""
return int(dt.timestamp() * 1000) if dt else None
model_config = ConfigDict(from_attributes=True)
class PaginationInfo(BaseModel):
"""分页信息模型
Attributes:
page: 当前页码
pagesize: 每页数量
total: 总数量
hasnext: 是否有下一页
"""
page: int = Field(..., description="当前页码")
pagesize: int = Field(..., description="每页数量")
total: int = Field(..., description="总数量")
hasnext: bool = Field(..., description="是否有下一页")
class SceneListResponse(BaseModel):
"""场景列表响应模型(支持分页)
用于返回本体场景列表。
Attributes:
items: 场景列表
page: 分页信息(可选,分页时返回)
Examples:
>>> # 不分页
>>> response = SceneListResponse(
... items=[scene1, scene2]
... )
>>> # 分页
>>> response = SceneListResponse(
... items=[scene1, scene2, ...],
... page=PaginationInfo(page=1, pagesize=100, total=150, hasnext=True)
... )
"""
items: List[SceneResponse] = Field(..., description="场景列表")
page: Optional[PaginationInfo] = Field(None, description="分页信息")
# ==================== 本体类型相关 Schema ====================
class ClassItem(BaseModel):
"""单个类型信息模型
Attributes:
class_name: 类型名称必填1-200字符
class_description: 类型描述,可选
Examples:
>>> item = ClassItem(
... class_name="患者",
... class_description="医院患者信息"
... )
"""
class_name: str = Field(..., min_length=1, max_length=200, description="类型名称")
class_description: Optional[str] = Field(None, description="类型描述")
class ClassCreateRequest(BaseModel):
"""类型创建请求模型(统一使用列表形式)
通过列表中元素数量决定创建模式:
- 列表包含 1 个元素:单个创建
- 列表包含多个元素:批量创建
Attributes:
scene_id: 所属场景ID必填
classes: 类型列表,必填,至少包含 1 个元素
Examples:
# 单个创建(列表中 1 个元素)
>>> request = ClassCreateRequest(
... scene_id=uuid.uuid4(),
... classes=[
... ClassItem(class_name="患者", class_description="医院患者信息")
... ]
... )
# 批量创建(列表中多个元素)
>>> request = ClassCreateRequest(
... scene_id=uuid.uuid4(),
... classes=[
... ClassItem(class_name="患者", class_description="医院患者信息"),
... ClassItem(class_name="医生", class_description="医院医生信息"),
... ClassItem(class_name="药品", class_description="医院药品信息")
... ]
... )
"""
scene_id: UUID = Field(..., description="所属场景ID")
classes: List[ClassItem] = Field(..., min_length=1, description="类型列表,至少包含 1 个元素")
class ClassUpdateRequest(BaseModel):
"""类型更新请求模型
用于更新已有本体类型信息。
Attributes:
class_name: 类型名称可选1-200字符
class_description: 类型描述,可选
Examples:
>>> request = ClassUpdateRequest(
... class_name="更新后的类型名称",
... class_description="更新后的描述"
... )
"""
class_name: Optional[str] = Field(None, min_length=1, max_length=200, description="类型名称")
class_description: Optional[str] = Field(None, description="类型描述")
class ClassResponse(BaseModel):
"""类型响应模型
用于返回本体类型信息。
Attributes:
class_id: 类型ID
class_name: 类型名称
class_description: 类型描述
scene_id: 所属场景ID
created_at: 创建时间(毫秒时间戳)
updated_at: 更新时间(毫秒时间戳)
Examples:
>>> response = ClassResponse(
... class_id=uuid.uuid4(),
... class_name="患者",
... class_description="医院患者信息",
... scene_id=uuid.uuid4(),
... created_at=datetime.now(),
... updated_at=datetime.now()
... )
"""
class_id: UUID = Field(..., description="类型ID")
class_name: str = Field(..., description="类型名称")
class_description: Optional[str] = Field(None, description="类型描述")
scene_id: UUID = Field(..., description="所属场景ID")
created_at: datetime.datetime = Field(..., description="创建时间(毫秒时间戳)")
updated_at: datetime.datetime = Field(..., description="更新时间(毫秒时间戳)")
@field_serializer("created_at", when_used="json")
def _serialize_created_at(self, dt: datetime.datetime):
"""将创建时间序列化为毫秒时间戳"""
return int(dt.timestamp() * 1000) if dt else None
@field_serializer("updated_at", when_used="json")
def _serialize_updated_at(self, dt: datetime.datetime):
"""将更新时间序列化为毫秒时间戳"""
return int(dt.timestamp() * 1000) if dt else None
model_config = ConfigDict(from_attributes=True)
class ClassBatchCreateResponse(BaseModel):
"""批量创建类型响应模型
用于返回批量创建的结果统计和详情。
Attributes:
total: 总共尝试创建的数量
success_count: 成功创建的数量
failed_count: 失败的数量
items: 成功创建的类型列表
errors: 失败的错误信息列表(可选)
Examples:
>>> response = ClassBatchCreateResponse(
... total=3,
... success_count=2,
... failed_count=1,
... items=[class1, class2],
... errors=["创建类型 '药品' 失败: 类型名称已存在"]
... )
"""
total: int = Field(..., description="总共尝试创建的数量")
success_count: int = Field(..., description="成功创建的数量")
failed_count: int = Field(0, description="失败的数量")
items: List[ClassResponse] = Field(..., description="成功创建的类型列表")
errors: Optional[List[str]] = Field(None, description="失败的错误信息列表")
class ClassListResponse(BaseModel):
"""类型列表响应模型
用于返回本体类型列表。
Attributes:
total: 总数量
scene_id: 所属场景ID
scene_name: 场景名称
scene_description: 场景描述
items: 类型列表
Examples:
>>> response = ClassListResponse(
... total=3,
... scene_id=uuid.uuid4(),
... scene_name="医疗场景",
... scene_description="用于医疗领域的本体建模",
... items=[class1, class2, class3]
... )
"""
total: int = Field(..., description="总数量")
scene_id: UUID = Field(..., description="所属场景ID")
scene_name: str = Field(..., description="场景名称")
scene_description: Optional[str] = Field(None, description="场景描述")
items: List[ClassResponse] = Field(..., description="类型列表")

File diff suppressed because it is too large Load Diff

View File

@@ -140,6 +140,7 @@ dependencies = [
"oss2>=2.19.1",
"flower>=2.0.1",
"aiofiles>=23.0.0",
"owlready2>=0.46",
]
[tool.pytest.ini_options]