From 696b0475a836e31e931e668686ee93bfde5e8156 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B9=90=E5=8A=9B=E9=BD=90?= <162269739+lanceyq@users.noreply.github.com> Date: Fri, 30 Jan 2026 15:16:39 +0800 Subject: [PATCH] Feature/ontology class clean (#249) * [add] Complete ontology engineering feature implementation * [add] Add ontology feature integration and validation utilities * [add] Add OWL validator and validation utilities * [fix] Add missing render_ontology_extraction_prompt function * [fix]Add dependencies, fix functionality --- api/app/controllers/__init__.py | 2 + api/app/controllers/ontology_controller.py | 964 ++++++++++++++ .../controllers/ontology_secondary_routes.py | 611 +++++++++ api/app/core/memory/models/__init__.py | 9 + api/app/core/memory/models/ontology_models.py | 135 ++ .../knowledge_extraction/__init__.py | 1 + .../ontology_extraction.py | 482 +++++++ .../core/memory/utils/prompt/prompt_utils.py | 39 + .../prompt/prompts/extract_ontology.jinja2 | 210 +++ .../core/memory/utils/validation/__init__.py | 10 + .../utils/validation/ontology_validator.py | 268 ++++ .../memory/utils/validation/owl_validator.py | 585 +++++++++ api/app/models/__init__.py | 4 + api/app/models/ontology_class.py | 40 + api/app/models/ontology_scene.py | 43 + .../repositories/ontology_class_repository.py | 404 ++++++ .../repositories/ontology_scene_repository.py | 394 ++++++ api/app/schemas/ontology_schemas.py | 461 +++++++ api/app/services/ontology_service.py | 1162 +++++++++++++++++ api/pyproject.toml | 1 + 20 files changed, 5825 insertions(+) create mode 100644 api/app/controllers/ontology_controller.py create mode 100644 api/app/controllers/ontology_secondary_routes.py create mode 100644 api/app/core/memory/models/ontology_models.py create mode 100644 api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/ontology_extraction.py create mode 100644 api/app/core/memory/utils/prompt/prompts/extract_ontology.jinja2 create mode 100644 api/app/core/memory/utils/validation/__init__.py create mode 100644 api/app/core/memory/utils/validation/ontology_validator.py create mode 100644 api/app/core/memory/utils/validation/owl_validator.py create mode 100644 api/app/models/ontology_class.py create mode 100644 api/app/models/ontology_scene.py create mode 100644 api/app/repositories/ontology_class_repository.py create mode 100644 api/app/repositories/ontology_scene_repository.py create mode 100644 api/app/schemas/ontology_schemas.py create mode 100644 api/app/services/ontology_service.py diff --git a/api/app/controllers/__init__.py b/api/app/controllers/__init__.py index 3701f14d..765ef967 100644 --- a/api/app/controllers/__init__.py +++ b/api/app/controllers/__init__.py @@ -45,6 +45,7 @@ from . import ( home_page_controller, memory_perceptual_controller, memory_working_controller, + ontology_controller, ) # 创建管理端 API 路由器 @@ -90,5 +91,6 @@ manager_router.include_router(implicit_memory_controller.router) manager_router.include_router(memory_perceptual_controller.router) manager_router.include_router(memory_working_controller.router) manager_router.include_router(file_storage_controller.router) +manager_router.include_router(ontology_controller.router) __all__ = ["manager_router"] diff --git a/api/app/controllers/ontology_controller.py b/api/app/controllers/ontology_controller.py new file mode 100644 index 00000000..1cf8e64e --- /dev/null +++ b/api/app/controllers/ontology_controller.py @@ -0,0 +1,964 @@ +"""本体提取API控制器 + +本模块提供本体提取系统的RESTful API端点。 + +Endpoints: + POST /api/memory/ontology/extract - 提取本体类 + POST /api/memory/ontology/export - 导出OWL文件 + POST /api/memory/ontology/scene - 创建本体场景 + PUT /api/memory/ontology/scene/{scene_id} - 更新本体场景 + DELETE /api/memory/ontology/scene/{scene_id} - 删除本体场景 + GET /api/memory/ontology/scene/{scene_id} - 获取单个场景 + GET /api/memory/ontology/scenes - 获取场景列表 + POST /api/memory/ontology/class - 创建本体类型 + PUT /api/memory/ontology/class/{class_id} - 更新本体类型 + DELETE /api/memory/ontology/class/{class_id} - 删除本体类型 + GET /api/memory/ontology/class/{class_id} - 获取单个类型 + GET /api/memory/ontology/classes - 获取类型列表 +""" + +import logging +import tempfile +from typing import Dict, Optional + +from fastapi import APIRouter, Depends, HTTPException, Header +from sqlalchemy.orm import Session + +from app.core.error_codes import BizCode +from app.core.logging_config import get_api_logger +from app.core.response_utils import fail, success +from app.db import get_db +from app.dependencies import get_current_user +from app.models.user_model import User +from app.services.memory_base_service import Translation_English +from app.core.memory.models.ontology_models import OntologyClass +from typing import List +from app.schemas.ontology_schemas import ( + ExportRequest, + ExportResponse, + ExtractionRequest, + ExtractionResponse, + SceneCreateRequest, + SceneUpdateRequest, + SceneResponse, + SceneListResponse, + ClassCreateRequest, + ClassUpdateRequest, + ClassResponse, + ClassListResponse, +) +from app.schemas.response_schema import ApiResponse +from app.services.ontology_service import OntologyService +from app.core.memory.llm_tools.openai_client import OpenAIClient +from app.core.memory.utils.validation.owl_validator import OWLValidator +from app.services.model_service import ModelConfigService + + +api_logger = get_api_logger() +logger = logging.getLogger(__name__) + +router = APIRouter( + prefix="/memory/ontology", + tags=["Ontology"], +) + + +async def translate_ontology_classes( + classes: List[OntologyClass], + model_id: str +) -> List[OntologyClass]: + """翻译本体类列表 + + 将本体类的中文字段翻译为英文,包括: + - name_chinese: 中文名称 + - description: 描述 + - examples: 示例列表 + + Args: + classes: 本体类列表 + model_id: LLM模型ID,用于翻译 + + Returns: + List[OntologyClass]: 翻译后的本体类列表 + """ + translated_classes = [] + + for ontology_class in classes: + # 创建类的副本,避免修改原对象 + translated_class = ontology_class.model_copy(deep=True) + + # 翻译 name_chinese 字段 + if translated_class.name_chinese: + try: + translated_class.name_chinese = await Translation_English( + model_id, + translated_class.name_chinese + ) + except Exception as e: + logger.warning(f"Failed to translate name_chinese: {e}") + # 保留原文 + + # 翻译 description 字段 + if translated_class.description: + try: + translated_class.description = await Translation_English( + model_id, + translated_class.description + ) + except Exception as e: + logger.warning(f"Failed to translate description: {e}") + # 保留原文 + + # 翻译 examples 列表 + if translated_class.examples: + translated_examples = [] + for example in translated_class.examples: + try: + translated_example = await Translation_English( + model_id, + example + ) + translated_examples.append(translated_example) + except Exception as e: + logger.warning(f"Failed to translate example: {e}") + translated_examples.append(example) # 保留原文 + translated_class.examples = translated_examples + + translated_classes.append(translated_class) + + return translated_classes + + +def _get_ontology_service( + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user), + llm_id: str = None +) -> OntologyService: + """获取OntologyService实例的依赖注入函数 + + 指定的llm_id获取LLM配置,创建OpenAIClient和OntologyService实例。 + + Args: + db: 数据库会话 + current_user: 当前用户 + llm_id: 可选的LLM模型ID,如果提供则使用指定模型,否则使用工作空间默认模型 + + Returns: + OntologyService: 本体提取服务实例 + + Raises: + HTTPException: 如果无法获取LLM配置 + """ + try: + import uuid + + # 必须提供llm_id + if not llm_id: + logger.error(f"llm_id is required but not provided - user: {current_user.id}") + raise HTTPException( + status_code=400, + detail="必须提供llm_id参数" + ) + + logger.info(f"Using specified LLM model: {llm_id}") + + # 验证llm_id格式 + try: + model_id = uuid.UUID(llm_id) + except ValueError: + logger.error(f"Invalid llm_id format: {llm_id}") + raise HTTPException( + status_code=400, + detail="无效的LLM模型ID格式" + ) + + # 获取指定的模型配置 + try: + model_config = ModelConfigService.get_model_by_id(db=db, model_id=model_id) + except Exception as e: + logger.error(f"Model {llm_id} not found: {str(e)}") + raise HTTPException( + status_code=400, + detail=f"找不到指定的LLM模型: {llm_id}" + ) + + # 检查是否为组合模型 + if hasattr(model_config, 'is_composite') and model_config.is_composite: + logger.error(f"Model {llm_id} is a composite model, which is not supported for ontology extraction") + raise HTTPException( + status_code=400, + detail="本体提取不支持使用组合模型,请选择单个模型" + ) + + # 验证模型配置了API密钥 + if not model_config.api_keys: + logger.error(f"Model {llm_id} has no API key configuration") + raise HTTPException( + status_code=400, + detail="指定的LLM模型没有配置API密钥" + ) + + api_key_config = model_config.api_keys[0] + + logger.info( + f"Using specified model - user: {current_user.id}, " + f"model_id: {llm_id}, model_name: {api_key_config.model_name}" + ) + + # 创建模型配置对象 + from app.core.models.base import RedBearModelConfig + + llm_model_config = RedBearModelConfig( + model_name=api_key_config.model_name, + provider=model_config.provider if hasattr(model_config, 'provider') else "openai", + api_key=api_key_config.api_key, + base_url=api_key_config.api_base, + max_retries=3, + timeout=60.0 + ) + + # 创建OpenAI客户端 + llm_client = OpenAIClient(model_config=llm_model_config) + + # 创建OntologyService + service = OntologyService(llm_client=llm_client, db=db) + + logger.debug( + f"OntologyService created successfully - " + f"user: {current_user.id}, model: {api_key_config.model_name}" + ) + + return service + + except HTTPException: + raise + except Exception as e: + logger.error(f"Failed to create OntologyService: {str(e)}", exc_info=True) + raise HTTPException( + status_code=500, + detail=f"创建本体提取服务失败: {str(e)}" + ) + + +@router.post("/extract", response_model=ApiResponse) +async def extract_ontology( + request: ExtractionRequest, + language_type: str = Header(default="zh", alias="X-Language-Type"), + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user) +): + """提取本体类 + + 从场景描述中提取符合OWL规范的本体类。 + 提取结果仅返回给前端,不会自动保存到数据库。 + 前端可以从返回结果中选择需要的类型,然后调用 /class 接口创建类型。 + 支持中英文切换,通过 X-Language-Type Header 指定语言。 + + Args: + request: 提取请求,包含scenario、domain、llm_id和scene_id + language_type: 语言类型,'zh'(中文)或 'en'(英文),默认 'zh' + db: 数据库会话 + current_user: 当前用户 + + Returns: + ApiResponse: 包含提取结果的响应 + + Response format: + { + "code": 200, + "msg": "本体提取成功", + "data": { + "classes": [ + { + "id": "147d9db50b524a9e909e01a753d3acdd", + "name": "Patient", + "name_chinese": "患者", + "description": "在医疗机构中接受诊疗、护理或健康管理的个体", + "examples": ["糖尿病患者", "术后康复患者", "门诊初诊患者"], + "parent_class": null, + "entity_type": "Person", + "domain": "Healthcare" + }, + ... + ], + "domain": "Healthcare", + "extracted_count": 7 + } + } + """ + api_logger.info( + f"Ontology extraction requested by user {current_user.id}, " + f"scenario_length={len(request.scenario)}, " + f"domain={request.domain}, " + f"llm_id={request.llm_id}, " + f"scene_id={request.scene_id}, " + f"language_type={language_type}" + ) + + try: + # 获取当前工作空间ID + workspace_id = current_user.current_workspace_id + if not workspace_id: + api_logger.warning(f"User {current_user.id} has no current workspace") + return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间") + + # 创建OntologyService实例,传入llm_id + service = _get_ontology_service( + db=db, + current_user=current_user, + llm_id=request.llm_id + ) + + # 调用服务层执行提取,传入scene_id和workspace_id + result = await service.extract_ontology( + scenario=request.scenario, + domain=request.domain, + scene_id=request.scene_id, + workspace_id=workspace_id + ) + + # ===== 新增:翻译逻辑 ===== + # 如果需要英文,则翻译数据 + if language_type != 'zh': + api_logger.info(f"Translating extraction result to English") + + # 翻译 classes 列表 + result.classes = await translate_ontology_classes( + result.classes, + request.llm_id + ) + + # 翻译 domain 字段 + if result.domain: + try: + result.domain = await Translation_English( + request.llm_id, + result.domain + ) + except Exception as e: + logger.warning(f"Failed to translate domain: {e}") + # 保留原文 + # ===== 翻译逻辑结束 ===== + + # 构建响应 + response = ExtractionResponse( + classes=result.classes, + domain=result.domain, + extracted_count=len(result.classes) + ) + + api_logger.info( + f"Ontology extraction completed, extracted {len(result.classes)} classes, " + f"saved to scene {request.scene_id}, language={language_type}" + ) + + return success(data=response.model_dump(), msg="本体提取成功") + + except ValueError as e: + # 验证错误 (400) + api_logger.warning(f"Validation error in extraction: {str(e)}") + return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e)) + + except RuntimeError as e: + # 运行时错误 (500) + api_logger.error(f"Runtime error in extraction: {str(e)}", exc_info=True) + return fail(BizCode.INTERNAL_ERROR, "本体提取失败", str(e)) + + except Exception as e: + # 未知错误 (500) + api_logger.error(f"Unexpected error in extraction: {str(e)}", exc_info=True) + return fail(BizCode.INTERNAL_ERROR, "本体提取失败", str(e)) + + +@router.post("/export", response_model=ApiResponse) +async def export_owl( + request: ExportRequest, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user) +): + """导出OWL文件 + + 将提取的本体类导出为OWL文件,支持多种格式。 + 导出操作不需要LLM,只使用OWL验证器和Owlready2库。 + + Args: + request: 导出请求,包含classes、format和include_metadata + db: 数据库会话 + current_user: 当前用户 + + Returns: + ApiResponse: 包含OWL文件内容的响应 + + Supported formats: + - rdfxml: 标准OWL RDF/XML格式(完整) + - turtle: Turtle格式(可读性好) + - ntriples: N-Triples格式(简单) + - json: JSON格式(简化,只包含类信息) + + Response format: + { + "code": 200, + "msg": "OWL文件导出成功", + "data": { + "owl_content": "...", + "format": "rdfxml", + "classes_count": 7 + } + } + """ + api_logger.info( + f"OWL export requested by user {current_user.id}, " + f"classes_count={len(request.classes)}, " + f"format={request.format}, " + f"include_metadata={request.include_metadata}" + ) + + try: + # 验证格式 + valid_formats = ["rdfxml", "turtle", "ntriples", "json"] + if request.format not in valid_formats: + api_logger.warning(f"Invalid export format: {request.format}") + return fail( + BizCode.BAD_REQUEST, + "不支持的导出格式", + f"format必须是以下之一: {', '.join(valid_formats)}" + ) + + # JSON格式直接导出,不需要OWL验证 + if request.format == "json": + owl_validator = OWLValidator() + owl_content = owl_validator.export_to_owl( + world=None, + format="json", + classes=request.classes + ) + + response = ExportResponse( + owl_content=owl_content, + format=request.format, + classes_count=len(request.classes) + ) + + api_logger.info( + f"JSON export completed, content_length={len(owl_content)}" + ) + + return success(data=response.model_dump(), msg="OWL文件导出成功") + + # 创建临时文件路径 + with tempfile.NamedTemporaryFile( + mode='w', + suffix='.owl', + delete=False + ) as tmp_file: + output_path = tmp_file.name + + # 导出操作不需要LLM,直接使用OWL验证器 + owl_validator = OWLValidator() + + # 验证本体类 + logger.debug("Validating ontology classes") + is_valid, errors, world = owl_validator.validate_ontology_classes( + classes=request.classes, + ) + + if not is_valid: + logger.warning( + f"OWL validation found {len(errors)} issues during export: {errors}" + ) + # 继续导出,但记录警告 + + if not world: + error_msg = "Failed to create OWL world for export" + logger.error(error_msg) + return fail(BizCode.INTERNAL_ERROR, "创建OWL世界失败", error_msg) + + # 导出OWL文件 + logger.info(f"Exporting to {request.format} format") + owl_content = owl_validator.export_to_owl( + world=world, + output_path=output_path, + format=request.format, + classes=request.classes + ) + + # 构建响应 + response = ExportResponse( + owl_content=owl_content, + format=request.format, + classes_count=len(request.classes) + ) + + api_logger.info( + f"OWL export completed, format={request.format}, " + f"content_length={len(owl_content)}" + ) + + return success(data=response.model_dump(), msg="OWL文件导出成功") + + except ValueError as e: + # 验证错误 (400) + api_logger.warning(f"Validation error in export: {str(e)}") + return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e)) + + except RuntimeError as e: + # 运行时错误 (500) + api_logger.error(f"Runtime error in export: {str(e)}", exc_info=True) + return fail(BizCode.INTERNAL_ERROR, "OWL文件导出失败", str(e)) + + except Exception as e: + # 未知错误 (500) + api_logger.error(f"Unexpected error in export: {str(e)}", exc_info=True) + return fail(BizCode.INTERNAL_ERROR, "OWL文件导出失败", str(e)) + + +# ==================== 本体场景管理接口 ==================== + +@router.post("/scene", response_model=ApiResponse) +async def create_scene( + request: SceneCreateRequest, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user) +): + """创建本体场景 + + 在当前工作空间下创建新的本体场景。 + + Args: + request: 场景创建请求 + db: 数据库会话 + current_user: 当前用户 + + Returns: + ApiResponse: 包含创建的场景信息 + """ + api_logger.info( + f"Scene creation requested by user {current_user.id}, " + f"name={request.scene_name}" + ) + + try: + # 获取当前工作空间ID + workspace_id = current_user.current_workspace_id + if not workspace_id: + api_logger.warning(f"User {current_user.id} has no current workspace") + return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间") + + # 创建OntologyService实例(不需要LLM) + from app.core.memory.llm_tools.openai_client import OpenAIClient + from app.core.models.base import RedBearModelConfig + + # 创建一个空的LLM配置(场景管理不需要LLM) + dummy_config = RedBearModelConfig( + model_name="dummy", + provider="openai", + api_key="dummy", + base_url="https://api.openai.com/v1" + ) + llm_client = OpenAIClient(model_config=dummy_config) + service = OntologyService(llm_client=llm_client, db=db) + + # 调用服务层创建场景 + scene = service.create_scene( + scene_name=request.scene_name, + scene_description=request.scene_description, + workspace_id=workspace_id + ) + + # 构建响应 + # 动态计算 type_num + type_num = len(scene.classes) if scene.classes else 0 + + response = SceneResponse( + scene_id=scene.scene_id, + scene_name=scene.scene_name, + scene_description=scene.scene_description, + type_num=type_num, + workspace_id=scene.workspace_id, + created_at=scene.created_at, + updated_at=scene.updated_at, + classes_count=type_num + ) + + api_logger.info(f"Scene created successfully: {scene.scene_id}") + + return success(data=response.model_dump(), msg="场景创建成功") + + except ValueError as e: + api_logger.warning(f"Validation error in scene creation: {str(e)}") + return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e)) + + except RuntimeError as e: + api_logger.error(f"Runtime error in scene creation: {str(e)}", exc_info=True) + return fail(BizCode.INTERNAL_ERROR, "场景创建失败", str(e)) + + except Exception as e: + api_logger.error(f"Unexpected error in scene creation: {str(e)}", exc_info=True) + return fail(BizCode.INTERNAL_ERROR, "场景创建失败", str(e)) + + +@router.put("/scene/{scene_id}", response_model=ApiResponse) +async def update_scene( + scene_id: str, + request: SceneUpdateRequest, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user) +): + """更新本体场景 + + 更新指定场景的信息,只能更新当前工作空间下的场景。 + + Args: + scene_id: 场景ID + request: 场景更新请求 + db: 数据库会话 + current_user: 当前用户 + + Returns: + ApiResponse: 包含更新后的场景信息 + """ + api_logger.info( + f"Scene update requested by user {current_user.id}, " + f"scene_id={scene_id}" + ) + + try: + from uuid import UUID + + # 验证UUID格式 + try: + scene_uuid = UUID(scene_id) + except ValueError: + api_logger.warning(f"Invalid scene_id format: {scene_id}") + return fail(BizCode.BAD_REQUEST, "请求参数无效", "无效的场景ID格式") + + # 获取当前工作空间ID + workspace_id = current_user.current_workspace_id + if not workspace_id: + api_logger.warning(f"User {current_user.id} has no current workspace") + return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间") + + # 创建OntologyService实例 + from app.core.memory.llm_tools.openai_client import OpenAIClient + from app.core.models.base import RedBearModelConfig + + dummy_config = RedBearModelConfig( + model_name="dummy", + provider="openai", + api_key="dummy", + base_url="https://api.openai.com/v1" + ) + llm_client = OpenAIClient(model_config=dummy_config) + service = OntologyService(llm_client=llm_client, db=db) + + # 调用服务层更新场景 + scene = service.update_scene( + scene_id=scene_uuid, + scene_name=request.scene_name, + scene_description=request.scene_description, + workspace_id=workspace_id + ) + + # 构建响应 + # 动态计算 type_num + type_num = len(scene.classes) if scene.classes else 0 + + response = SceneResponse( + scene_id=scene.scene_id, + scene_name=scene.scene_name, + scene_description=scene.scene_description, + type_num=type_num, + workspace_id=scene.workspace_id, + created_at=scene.created_at, + updated_at=scene.updated_at, + classes_count=type_num + ) + + api_logger.info(f"Scene updated successfully: {scene_id}") + + return success(data=response.model_dump(), msg="场景更新成功") + + except ValueError as e: + api_logger.warning(f"Validation error in scene update: {str(e)}") + return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e)) + + except RuntimeError as e: + api_logger.error(f"Runtime error in scene update: {str(e)}", exc_info=True) + return fail(BizCode.INTERNAL_ERROR, "场景更新失败", str(e)) + + except Exception as e: + api_logger.error(f"Unexpected error in scene update: {str(e)}", exc_info=True) + return fail(BizCode.INTERNAL_ERROR, "场景更新失败", str(e)) + + +@router.delete("/scene/{scene_id}", response_model=ApiResponse) +async def delete_scene( + scene_id: str, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user) +): + """删除本体场景 + + 删除指定场景及其所有关联类型,只能删除当前工作空间下的场景。 + + Args: + scene_id: 场景ID + db: 数据库会话 + current_user: 当前用户 + + Returns: + ApiResponse: 删除结果 + """ + api_logger.info( + f"Scene deletion requested by user {current_user.id}, " + f"scene_id={scene_id}" + ) + + try: + from uuid import UUID + + # 验证UUID格式 + try: + scene_uuid = UUID(scene_id) + except ValueError: + api_logger.warning(f"Invalid scene_id format: {scene_id}") + return fail(BizCode.BAD_REQUEST, "请求参数无效", "无效的场景ID格式") + + # 获取当前工作空间ID + workspace_id = current_user.current_workspace_id + if not workspace_id: + api_logger.warning(f"User {current_user.id} has no current workspace") + return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间") + + # 创建OntologyService实例 + from app.core.memory.llm_tools.openai_client import OpenAIClient + from app.core.models.base import RedBearModelConfig + + dummy_config = RedBearModelConfig( + model_name="dummy", + provider="openai", + api_key="dummy", + base_url="https://api.openai.com/v1" + ) + llm_client = OpenAIClient(model_config=dummy_config) + service = OntologyService(llm_client=llm_client, db=db) + + # 调用服务层删除场景 + success_flag = service.delete_scene( + scene_id=scene_uuid, + workspace_id=workspace_id + ) + + api_logger.info(f"Scene deleted successfully: {scene_id}") + + return success(data={"deleted": success_flag}, msg="场景删除成功") + + except ValueError as e: + api_logger.warning(f"Validation error in scene deletion: {str(e)}") + return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e)) + + except RuntimeError as e: + api_logger.error(f"Runtime error in scene deletion: {str(e)}", exc_info=True) + return fail(BizCode.INTERNAL_ERROR, "场景删除失败", str(e)) + + except Exception as e: + api_logger.error(f"Unexpected error in scene deletion: {str(e)}", exc_info=True) + return fail(BizCode.INTERNAL_ERROR, "场景删除失败", str(e)) + + +@router.get("/scenes", response_model=ApiResponse) +async def get_scenes( + workspace_id: Optional[str] = None, + scene_name: Optional[str] = None, + page: Optional[int] = None, + pagesize: Optional[int] = None, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user) +): + """获取场景列表(支持模糊搜索和全量查询,全量查询支持分页) + + 根据是否提供 scene_name 参数,执行不同的查询: + - 提供 scene_name:进行模糊搜索,返回匹配的场景列表(支持分页) + - 不提供 scene_name:返回工作空间下的所有场景(支持分页) + + 支持中文和英文的模糊匹配,不区分大小写。 + + Args: + workspace_id: 工作空间ID(可选,默认当前用户工作空间) + scene_name: 场景名称关键词(可选,支持模糊匹配) + page: 页码(可选,从1开始) + pagesize: 每页数量(可选) + db: 数据库会话 + current_user: 当前用户 + + Returns: + ApiResponse: 包含场景列表和分页信息 + + Examples: + - 模糊搜索(不分页):GET /scenes?workspace_id=xxx&scene_name=医疗 + 输入 "医疗" 可以匹配到 "医疗场景"、"智慧医疗"、"医疗管理系统" 等 + - 模糊搜索(分页):GET /scenes?workspace_id=xxx&scene_name=医疗&page=1&pagesize=10 + 返回匹配 "医疗" 的第1页,每页10条数据 + - 全量查询(不分页):GET /scenes?workspace_id=xxx + 返回工作空间下的所有场景 + - 全量查询(分页):GET /scenes?workspace_id=xxx&page=1&pagesize=10 + 返回第1页,每页10条数据 + + Notes: + - 分页参数 page 和 pagesize 必须同时提供 + - page 从1开始,pagesize 必须大于0 + - 返回格式:{"items": [...], "page": {"page": 1, "pagesize": 10, "total": 100, "hasnext": true}} + - 不分页时,page 字段为 null + """ + from app.controllers.ontology_secondary_routes import scenes_handler + return await scenes_handler(workspace_id, scene_name, page, pagesize, db, current_user) + + +# ==================== 本体类型管理接口 ==================== + +@router.post("/class", response_model=ApiResponse) +async def create_class( + request: ClassCreateRequest, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user) +): + """创建本体类型 + + 在指定场景下创建新的本体类型。 + + Args: + request: 类型创建请求 + db: 数据库会话 + current_user: 当前用户 + + Returns: + ApiResponse: 包含创建的类型信息 + """ + from app.controllers.ontology_secondary_routes import create_class_handler + return await create_class_handler(request, db, current_user) + + +@router.put("/class/{class_id}", response_model=ApiResponse) +async def update_class( + class_id: str, + request: ClassUpdateRequest, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user) +): + """更新本体类型 + + 更新指定类型的信息,只能更新当前工作空间下场景的类型。 + + Args: + class_id: 类型ID + request: 类型更新请求 + db: 数据库会话 + current_user: 当前用户 + + Returns: + ApiResponse: 包含更新后的类型信息 + """ + from app.controllers.ontology_secondary_routes import update_class_handler + return await update_class_handler(class_id, request, db, current_user) + + +@router.delete("/class/{class_id}", response_model=ApiResponse) +async def delete_class( + class_id: str, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user) +): + """删除本体类型 + + 删除指定类型,只能删除当前工作空间下场景的类型。 + + Args: + class_id: 类型ID + db: 数据库会话 + current_user: 当前用户 + + Returns: + ApiResponse: 删除结果 + """ + from app.controllers.ontology_secondary_routes import delete_class_handler + return await delete_class_handler(class_id, db, current_user) + + +@router.get("/classes", response_model=ApiResponse) +async def get_classes( + scene_id: str, + class_name: Optional[str] = None, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user) +): + """获取类型列表(支持模糊搜索和全量查询) + + 根据是否提供 class_name 参数,执行不同的查询: + - 提供 class_name:进行模糊搜索,返回匹配的类型列表 + - 不提供 class_name:返回场景下的所有类型 + + 支持中文和英文的模糊匹配,不区分大小写。 + 返回结果包含场景的基本信息(scene_name 和 scene_description)。 + + Args: + scene_id: 场景ID(必填) + class_name: 类型名称关键词(可选,支持模糊匹配) + db: 数据库会话 + current_user: 当前用户 + + Returns: + ApiResponse: 包含类型列表和场景信息 + + Examples: + - 模糊搜索:GET /classes?scene_id=xxx&class_name=患者 + 输入 "患者" 可以匹配到 "患者"、"患者信息"、"门诊患者" 等 + - 全量查询:GET /classes?scene_id=xxx + 返回场景下的所有类型 + + Response Format: + { + "total": 3, + "scene_id": "xxx", + "scene_name": "医疗场景", + "scene_description": "用于医疗领域的本体建模", + "items": [...] + } + """ + from app.controllers.ontology_secondary_routes import classes_handler + return await classes_handler(scene_id, class_name, db, current_user) + + +@router.get("/class/{class_id}", response_model=ApiResponse) +async def get_class( + class_id: str, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user) +): + """获取单个本体类型 + + 根据类型ID获取类型的详细信息,只能查询当前工作空间下场景的类型。 + + Args: + class_id: 类型ID + db: 数据库会话 + current_user: 当前用户 + + Returns: + ApiResponse: 包含类型详细信息 + + Response Format: + { + "code": 0, + "msg": "查询成功", + "data": { + "class_id": "xxx", + "class_name": "患者", + "class_description": "在医疗机构中接受诊疗的个体", + "scene_id": "xxx", + "created_at": "2026-01-29T10:00:00", + "updated_at": "2026-01-29T10:00:00" + } + } + """ + from app.controllers.ontology_secondary_routes import get_class_handler + return await get_class_handler(class_id, db, current_user) diff --git a/api/app/controllers/ontology_secondary_routes.py b/api/app/controllers/ontology_secondary_routes.py new file mode 100644 index 00000000..99017eea --- /dev/null +++ b/api/app/controllers/ontology_secondary_routes.py @@ -0,0 +1,611 @@ +# -*- coding: utf-8 -*- +"""本体场景和类型路由(续) + +由于主Controller文件较大,将剩余路由放在此文件中。 +""" + +from uuid import UUID +from typing import Optional + +from fastapi import Depends +from sqlalchemy.orm import Session + +from app.core.error_codes import BizCode +from app.core.logging_config import get_api_logger +from app.core.response_utils import fail, success +from app.db import get_db +from app.dependencies import get_current_user +from app.models.user_model import User +from app.schemas.ontology_schemas import ( + SceneResponse, + SceneListResponse, + PaginationInfo, + ClassCreateRequest, + ClassUpdateRequest, + ClassResponse, + ClassListResponse, + ClassBatchCreateResponse, +) +from app.schemas.response_schema import ApiResponse +from app.services.ontology_service import OntologyService +from app.core.memory.llm_tools.openai_client import OpenAIClient +from app.core.models.base import RedBearModelConfig + + +api_logger = get_api_logger() + + +def _get_dummy_ontology_service(db: Session) -> OntologyService: + """获取OntologyService实例(不需要LLM) + + 场景和类型管理不需要LLM,创建一个dummy配置。 + """ + dummy_config = RedBearModelConfig( + model_name="dummy", + provider="openai", + api_key="dummy", + base_url="https://api.openai.com/v1" + ) + llm_client = OpenAIClient(model_config=dummy_config) + return OntologyService(llm_client=llm_client, db=db) + + +# 这些函数将被导入到主Controller中 + +async def scenes_handler( + workspace_id: Optional[str] = None, + scene_name: Optional[str] = None, + page: Optional[int] = None, + page_size: Optional[int] = None, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user) +): + """获取场景列表(支持模糊搜索和全量查询,全量查询支持分页) + + 当提供 scene_name 参数时,进行模糊搜索(不分页); + 当不提供 scene_name 参数时,返回所有场景(支持分页)。 + + Args: + workspace_id: 工作空间ID(可选,默认当前用户工作空间) + scene_name: 场景名称关键词(可选,支持模糊匹配) + page: 页码(可选,从1开始,仅在全量查询时有效) + page_size: 每页数量(可选,仅在全量查询时有效) + db: 数据库会话 + current_user: 当前用户 + """ + operation = "search" if scene_name else "list" + api_logger.info( + f"Scene {operation} requested by user {current_user.id}, " + f"workspace_id={workspace_id}, keyword={scene_name}, page={page}, page_size={page_size}" + ) + + try: + # 确定工作空间ID + if workspace_id: + try: + ws_uuid = UUID(workspace_id) + except ValueError: + api_logger.warning(f"Invalid workspace_id format: {workspace_id}") + return fail(BizCode.BAD_REQUEST, "请求参数无效", "无效的工作空间ID格式") + else: + ws_uuid = current_user.current_workspace_id + if not ws_uuid: + api_logger.warning(f"User {current_user.id} has no current workspace") + return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间") + + # 创建Service + service = _get_dummy_ontology_service(db) + + # 根据是否提供 scene_name 决定查询方式 + if scene_name and scene_name.strip(): + # 验证分页参数(模糊搜索也支持分页) + if page is not None and page < 1: + api_logger.warning(f"Invalid page number: {page}") + return fail(BizCode.BAD_REQUEST, "请求参数无效", "页码必须大于0") + + if page_size is not None and page_size < 1: + api_logger.warning(f"Invalid page_size: {page_size}") + return fail(BizCode.BAD_REQUEST, "请求参数无效", "每页数量必须大于0") + + # 如果只提供了page或page_size中的一个,返回错误 + if (page is not None and page_size is None) or (page is None and page_size is not None): + api_logger.warning(f"Incomplete pagination params: page={page}, page_size={page_size}") + return fail(BizCode.BAD_REQUEST, "请求参数无效", "分页参数page和pagesize必须同时提供") + + # 模糊搜索场景(支持分页) + scenes = service.search_scenes_by_name(scene_name.strip(), ws_uuid) + total = len(scenes) + + # 如果提供了分页参数,进行分页处理 + if page is not None and page_size is not None: + start_idx = (page - 1) * page_size + end_idx = start_idx + page_size + scenes = scenes[start_idx:end_idx] + + # 构建响应 + items = [] + for scene in scenes: + # 获取前3个class_name作为entity_type + entity_type = [cls.class_name for cls in scene.classes[:3]] if scene.classes else None + # 动态计算 type_num + type_num = len(scene.classes) if scene.classes else 0 + + items.append(SceneResponse( + scene_id=scene.scene_id, + scene_name=scene.scene_name, + scene_description=scene.scene_description, + type_num=type_num, + entity_type=entity_type, + workspace_id=scene.workspace_id, + created_at=scene.created_at, + updated_at=scene.updated_at, + classes_count=type_num + )) + + # 构建响应(包含分页信息) + if page is not None and page_size is not None: + # 计算是否有下一页 + hasnext = (page * page_size) < total + + pagination_info = PaginationInfo( + page=page, + pagesize=page_size, + total=total, + hasnext=hasnext + ) + response = SceneListResponse(items=items, page=pagination_info) + else: + response = SceneListResponse(items=items) + + api_logger.info( + f"Scene search completed: found {len(items)} scenes matching '{scene_name}' " + f"in workspace {ws_uuid}, total={total}" + ) + else: + # 获取所有场景(支持分页) + # 验证分页参数 + if page is not None and page < 1: + api_logger.warning(f"Invalid page number: {page}") + return fail(BizCode.BAD_REQUEST, "请求参数无效", "页码必须大于0") + + if page_size is not None and page_size < 1: + api_logger.warning(f"Invalid page_size: {page_size}") + return fail(BizCode.BAD_REQUEST, "请求参数无效", "每页数量必须大于0") + + # 如果只提供了page或page_size中的一个,返回错误 + if (page is not None and page_size is None) or (page is None and page_size is not None): + api_logger.warning(f"Incomplete pagination params: page={page}, page_size={page_size}") + return fail(BizCode.BAD_REQUEST, "请求参数无效", "分页参数page和pagesize必须同时提供") + + scenes, total = service.list_scenes(ws_uuid, page, page_size) + + # 构建响应 + items = [] + for scene in scenes: + # 获取前3个class_name作为entity_type + entity_type = [cls.class_name for cls in scene.classes[:3]] if scene.classes else None + # 动态计算 type_num + type_num = len(scene.classes) if scene.classes else 0 + + items.append(SceneResponse( + scene_id=scene.scene_id, + scene_name=scene.scene_name, + scene_description=scene.scene_description, + type_num=type_num, + entity_type=entity_type, + workspace_id=scene.workspace_id, + created_at=scene.created_at, + updated_at=scene.updated_at, + classes_count=type_num + )) + + # 构建响应(包含分页信息) + if page is not None and page_size is not None: + # 计算是否有下一页 + hasnext = (page * page_size) < total + + pagination_info = PaginationInfo( + page=page, + pagesize=page_size, + total=total, + hasnext=hasnext + ) + response = SceneListResponse(items=items, page=pagination_info) + else: + response = SceneListResponse(items=items) + + api_logger.info(f"Scene list retrieved successfully, count={len(items)}, total={total}") + + return success(data=response.model_dump(mode='json'), msg="查询成功") + + except ValueError as e: + api_logger.warning(f"Validation error in scene {operation}: {str(e)}") + return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e)) + + except RuntimeError as e: + api_logger.error(f"Runtime error in scene {operation}: {str(e)}", exc_info=True) + return fail(BizCode.INTERNAL_ERROR, "查询失败", str(e)) + + except Exception as e: + api_logger.error(f"Unexpected error in scene {operation}: {str(e)}", exc_info=True) + return fail(BizCode.INTERNAL_ERROR, "查询失败", str(e)) + + +# ==================== 本体类型管理接口 ==================== + +async def create_class_handler( + request: ClassCreateRequest, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user) +): + """创建本体类型(统一使用列表形式,支持单个或批量)""" + + # 根据列表长度判断是单个还是批量 + count = len(request.classes) + mode = "single" if count == 1 else "batch" + + api_logger.info( + f"Class creation ({mode}) requested by user {current_user.id}, " + f"scene_id={request.scene_id}, count={count}" + ) + + try: + # 获取当前工作空间ID + workspace_id = current_user.current_workspace_id + if not workspace_id: + api_logger.warning(f"User {current_user.id} has no current workspace") + return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间") + + # 创建Service + service = _get_dummy_ontology_service(db) + + # 准备类型数据 + classes_data = [ + { + "class_name": item.class_name, + "class_description": item.class_description + } + for item in request.classes + ] + + if count == 1: + # 单个创建 + class_data = classes_data[0] + ontology_class = service.create_class( + scene_id=request.scene_id, + class_name=class_data["class_name"], + class_description=class_data["class_description"], + workspace_id=workspace_id + ) + + # 构建单个响应 + response = ClassResponse( + class_id=ontology_class.class_id, + class_name=ontology_class.class_name, + class_description=ontology_class.class_description, + scene_id=ontology_class.scene_id, + created_at=ontology_class.created_at, + updated_at=ontology_class.updated_at + ) + + api_logger.info(f"Class created successfully: {ontology_class.class_id}") + + return success(data=response.model_dump(mode='json'), msg="类型创建成功") + + else: + # 批量创建 + created_classes, errors = service.create_classes_batch( + scene_id=request.scene_id, + classes=classes_data, + workspace_id=workspace_id + ) + + # 构建批量响应 + items = [] + for ontology_class in created_classes: + items.append(ClassResponse( + class_id=ontology_class.class_id, + class_name=ontology_class.class_name, + class_description=ontology_class.class_description, + scene_id=ontology_class.scene_id, + created_at=ontology_class.created_at, + updated_at=ontology_class.updated_at + )) + + response = ClassBatchCreateResponse( + total=len(classes_data), + success_count=len(created_classes), + failed_count=len(errors), + items=items, + errors=errors if errors else None + ) + + api_logger.info( + f"Batch class creation completed: " + f"success={len(created_classes)}, failed={len(errors)}" + ) + + return success(data=response.model_dump(mode='json'), msg="批量创建完成") + + except ValueError as e: + api_logger.warning(f"Validation error in class creation: {str(e)}") + return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e)) + + except RuntimeError as e: + api_logger.error(f"Runtime error in class creation: {str(e)}", exc_info=True) + return fail(BizCode.INTERNAL_ERROR, "类型创建失败", str(e)) + + except Exception as e: + api_logger.error(f"Unexpected error in class creation: {str(e)}", exc_info=True) + return fail(BizCode.INTERNAL_ERROR, "类型创建失败", str(e)) + + +async def update_class_handler( + class_id: str, + request: ClassUpdateRequest, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user) +): + """更新本体类型""" + api_logger.info( + f"Class update requested by user {current_user.id}, " + f"class_id={class_id}" + ) + + try: + # 验证UUID格式 + try: + class_uuid = UUID(class_id) + except ValueError: + api_logger.warning(f"Invalid class_id format: {class_id}") + return fail(BizCode.BAD_REQUEST, "请求参数无效", "无效的类型ID格式") + + # 获取当前工作空间ID + workspace_id = current_user.current_workspace_id + if not workspace_id: + api_logger.warning(f"User {current_user.id} has no current workspace") + return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间") + + # 创建Service + service = _get_dummy_ontology_service(db) + + # 更新类型 + ontology_class = service.update_class( + class_id=class_uuid, + class_name=request.class_name, + class_description=request.class_description, + workspace_id=workspace_id + ) + + # 构建响应 + response = ClassResponse( + class_id=ontology_class.class_id, + class_name=ontology_class.class_name, + class_description=ontology_class.class_description, + scene_id=ontology_class.scene_id, + created_at=ontology_class.created_at, + updated_at=ontology_class.updated_at + ) + + api_logger.info(f"Class updated successfully: {class_id}") + + return success(data=response.model_dump(mode='json'), msg="类型更新成功") + + except ValueError as e: + api_logger.warning(f"Validation error in class update: {str(e)}") + return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e)) + + except RuntimeError as e: + api_logger.error(f"Runtime error in class update: {str(e)}", exc_info=True) + return fail(BizCode.INTERNAL_ERROR, "类型更新失败", str(e)) + + except Exception as e: + api_logger.error(f"Unexpected error in class update: {str(e)}", exc_info=True) + return fail(BizCode.INTERNAL_ERROR, "类型更新失败", str(e)) + + +async def delete_class_handler( + class_id: str, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user) +): + """删除本体类型""" + api_logger.info( + f"Class deletion requested by user {current_user.id}, " + f"class_id={class_id}" + ) + + try: + # 验证UUID格式 + try: + class_uuid = UUID(class_id) + except ValueError: + api_logger.warning(f"Invalid class_id format: {class_id}") + return fail(BizCode.BAD_REQUEST, "请求参数无效", "无效的类型ID格式") + + # 获取当前工作空间ID + workspace_id = current_user.current_workspace_id + if not workspace_id: + api_logger.warning(f"User {current_user.id} has no current workspace") + return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间") + + # 创建Service + service = _get_dummy_ontology_service(db) + + # 删除类型 + success_flag = service.delete_class( + class_id=class_uuid, + workspace_id=workspace_id + ) + + api_logger.info(f"Class deleted successfully: {class_id}") + + return success(data={"deleted": success_flag}, msg="类型删除成功") + + except ValueError as e: + api_logger.warning(f"Validation error in class deletion: {str(e)}") + return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e)) + + except RuntimeError as e: + api_logger.error(f"Runtime error in class deletion: {str(e)}", exc_info=True) + return fail(BizCode.INTERNAL_ERROR, "类型删除失败", str(e)) + + except Exception as e: + api_logger.error(f"Unexpected error in class deletion: {str(e)}", exc_info=True) + return fail(BizCode.INTERNAL_ERROR, "类型删除失败", str(e)) + + +async def get_class_handler( + class_id: str, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user) +): + """获取单个本体类型""" + api_logger.info( + f"Get class requested by user {current_user.id}, " + f"class_id={class_id}" + ) + + try: + # 验证UUID格式 + try: + class_uuid = UUID(class_id) + except ValueError: + api_logger.warning(f"Invalid class_id format: {class_id}") + return fail(BizCode.BAD_REQUEST, "请求参数无效", "无效的类型ID格式") + + # 获取当前工作空间ID + workspace_id = current_user.current_workspace_id + if not workspace_id: + api_logger.warning(f"User {current_user.id} has no current workspace") + return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间") + + # 创建Service + service = _get_dummy_ontology_service(db) + + # 获取类型(会抛出ValueError如果不存在) + ontology_class = service.get_class_by_id(class_uuid, workspace_id) + + # 构建响应 + response = ClassResponse( + class_id=ontology_class.class_id, + class_name=ontology_class.class_name, + class_description=ontology_class.class_description, + scene_id=ontology_class.scene_id, + created_at=ontology_class.created_at, + updated_at=ontology_class.updated_at + ) + + api_logger.info(f"Class retrieved successfully: {class_id}") + + return success(data=response.model_dump(mode='json'), msg="查询成功") + + except ValueError as e: + # 类型不存在或无权限访问 + api_logger.warning(f"Validation error in get class: {str(e)}") + return fail(BizCode.NOT_FOUND, "请求参数无效", str(e)) + + except RuntimeError as e: + api_logger.error(f"Runtime error in get class: {str(e)}", exc_info=True) + return fail(BizCode.INTERNAL_ERROR, "查询失败", str(e)) + + except Exception as e: + api_logger.error(f"Unexpected error in get class: {str(e)}", exc_info=True) + return fail(BizCode.INTERNAL_ERROR, "查询失败", str(e)) + + +async def classes_handler( + scene_id: str, + class_name: Optional[str] = None, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user) +): + """获取类型列表(支持模糊搜索和全量查询) + + 当提供 class_name 参数时,进行模糊搜索; + 当不提供 class_name 参数时,返回场景下的所有类型。 + + Args: + scene_id: 场景ID(必填) + class_name: 类型名称关键词(可选,支持模糊匹配) + db: 数据库会话 + current_user: 当前用户 + """ + operation = "search" if class_name else "list" + api_logger.info( + f"Class {operation} requested by user {current_user.id}, " + f"keyword={class_name}, scene_id={scene_id}" + ) + + try: + # 验证UUID格式 + try: + scene_uuid = UUID(scene_id) + except ValueError: + api_logger.warning(f"Invalid scene_id format: {scene_id}") + return fail(BizCode.BAD_REQUEST, "请求参数无效", "无效的场景ID格式") + + # 获取当前工作空间ID + workspace_id = current_user.current_workspace_id + if not workspace_id: + api_logger.warning(f"User {current_user.id} has no current workspace") + return fail(BizCode.BAD_REQUEST, "请求参数无效", "当前用户没有工作空间") + + # 创建Service + service = _get_dummy_ontology_service(db) + + # 获取场景信息 + scene = service.get_scene_by_id(scene_uuid, workspace_id) + if not scene: + api_logger.warning(f"Scene not found: {scene_id}") + return fail(BizCode.NOT_FOUND, "场景不存在", f"未找到ID为 {scene_id} 的场景") + + # 根据是否提供 class_name 决定查询方式 + if class_name and class_name.strip(): + # 模糊搜索类型 + classes = service.search_classes_by_name(class_name.strip(), scene_uuid, workspace_id) + else: + # 获取所有类型 + classes = service.list_classes_by_scene(scene_uuid, workspace_id) + + # 构建响应 + items = [] + for ontology_class in classes: + items.append(ClassResponse( + class_id=ontology_class.class_id, + class_name=ontology_class.class_name, + class_description=ontology_class.class_description, + scene_id=ontology_class.scene_id, + created_at=ontology_class.created_at, + updated_at=ontology_class.updated_at + )) + + response = ClassListResponse( + total=len(items), + scene_id=scene_uuid, + scene_name=scene.scene_name, + scene_description=scene.scene_description, + items=items + ) + + if class_name: + api_logger.info( + f"Class search completed: found {len(items)} classes matching '{class_name}' " + f"in scene {scene_id}" + ) + else: + api_logger.info(f"Class list retrieved successfully, count={len(items)}") + + return success(data=response.model_dump(mode='json'), msg="查询成功") + + except ValueError as e: + api_logger.warning(f"Validation error in class {operation}: {str(e)}") + return fail(BizCode.BAD_REQUEST, "请求参数无效", str(e)) + + except RuntimeError as e: + api_logger.error(f"Runtime error in class {operation}: {str(e)}", exc_info=True) + return fail(BizCode.INTERNAL_ERROR, "查询失败", str(e)) + + except Exception as e: + api_logger.error(f"Unexpected error in class {operation}: {str(e)}", exc_info=True) + return fail(BizCode.INTERNAL_ERROR, "查询失败", str(e)) diff --git a/api/app/core/memory/models/__init__.py b/api/app/core/memory/models/__init__.py index 1de3424a..8c573b7a 100644 --- a/api/app/core/memory/models/__init__.py +++ b/api/app/core/memory/models/__init__.py @@ -58,6 +58,12 @@ from app.core.memory.models.triplet_models import ( TripletExtractionResponse, ) +# Ontology models +from app.core.memory.models.ontology_models import ( + OntologyClass, + OntologyExtractionResponse, +) + # Variable configuration models from app.core.memory.models.variate_config import ( StatementExtractionConfig, @@ -105,6 +111,9 @@ __all__ = [ "Entity", "Triplet", "TripletExtractionResponse", + # Ontology models + "OntologyClass", + "OntologyExtractionResponse", # Variable configuration "StatementExtractionConfig", "ForgettingEngineConfig", diff --git a/api/app/core/memory/models/ontology_models.py b/api/app/core/memory/models/ontology_models.py new file mode 100644 index 00000000..24a61f5f --- /dev/null +++ b/api/app/core/memory/models/ontology_models.py @@ -0,0 +1,135 @@ +"""Models for ontology classes and extraction responses. + +This module contains Pydantic models for representing extracted ontology classes +from scenario descriptions, following OWL ontology engineering standards. + +Classes: + OntologyClass: Represents an extracted ontology class + OntologyExtractionResponse: Response model containing extracted ontology classes +""" + +from typing import List, Optional +from uuid import uuid4 + +from pydantic import BaseModel, ConfigDict, Field, field_validator + + +class OntologyClass(BaseModel): + """Represents an extracted ontology class from scenario description. + + An ontology class represents an abstract category or concept in a domain, + following OWL ontology engineering standards and naming conventions. + + Attributes: + id: Unique string identifier for the ontology class + name: Name of the class in PascalCase format (e.g., 'MedicalProcedure') + name_chinese: Chinese translation of the class name (e.g., '医疗程序') + description: Textual description of the class + examples: List of concrete instance examples of this class + parent_class: Optional name of the parent class in the hierarchy + entity_type: Type/category of the entity (e.g., 'Person', 'Organization', 'Concept') + domain: Domain this class belongs to (e.g., 'Healthcare', 'Education') + + Config: + extra: Ignore extra fields from LLM output + """ + model_config = ConfigDict(extra='ignore') + + id: str = Field( + default_factory=lambda: uuid4().hex, + description="Unique identifier for the ontology class" + ) + name: str = Field( + ..., + description="Name of the class in PascalCase format" + ) + name_chinese: Optional[str] = Field( + None, + description="Chinese translation of the class name" + ) + description: str = Field( + ..., + description="Description of the class" + ) + examples: List[str] = Field( + default_factory=list, + description="List of concrete instance examples" + ) + parent_class: Optional[str] = Field( + None, + description="Name of the parent class in the hierarchy" + ) + entity_type: str = Field( + ..., + description="Type/category of the entity" + ) + domain: str = Field( + ..., + description="Domain this class belongs to" + ) + + @field_validator('name') + @classmethod + def validate_pascal_case(cls, v: str) -> str: + """Validate that the class name follows PascalCase convention. + + PascalCase rules: + - Must start with an uppercase letter + - Cannot contain spaces + - Should not contain special characters except underscores + + Args: + v: The class name to validate + + Returns: + The validated class name + + Raises: + ValueError: If the name doesn't follow PascalCase convention + """ + if not v: + raise ValueError("Class name cannot be empty") + + if not v[0].isupper(): + raise ValueError( + f"Class name '{v}' must start with an uppercase letter (PascalCase)" + ) + + if ' ' in v: + raise ValueError( + f"Class name '{v}' cannot contain spaces (PascalCase)" + ) + + # Check for invalid characters (allow alphanumeric and underscore only) + if not all(c.isalnum() or c == '_' for c in v): + raise ValueError( + f"Class name '{v}' contains invalid characters. " + "Only alphanumeric characters and underscores are allowed" + ) + + return v + + +class OntologyExtractionResponse(BaseModel): + """Response model for ontology extraction from LLM. + + This model represents the structured output from the LLM when + extracting ontology classes from scenario descriptions. + + Attributes: + classes: List of extracted ontology classes + domain: Domain/field the scenario belongs to + + Config: + extra: Ignore extra fields from LLM output + """ + model_config = ConfigDict(extra='ignore') + + classes: List[OntologyClass] = Field( + default_factory=list, + description="List of extracted ontology classes" + ) + domain: str = Field( + ..., + description="Domain/field the scenario belongs to" + ) diff --git a/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/__init__.py b/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/__init__.py index 53815124..0bc09622 100644 --- a/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/__init__.py +++ b/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/__init__.py @@ -8,4 +8,5 @@ - TemporalExtractor: 时间信息提取 - EmbeddingGenerator: 嵌入向量生成 - MemorySummaryGenerator: 记忆摘要生成 +- OntologyExtractor: 本体类提取 """ diff --git a/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/ontology_extraction.py b/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/ontology_extraction.py new file mode 100644 index 00000000..d1b79bd1 --- /dev/null +++ b/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/ontology_extraction.py @@ -0,0 +1,482 @@ +"""Ontology class extraction from scenario descriptions using LLM. + +This module provides the OntologyExtractor class for extracting ontology classes +from natural language scenario descriptions. It uses LLM-driven extraction combined +with two-layer validation (string validation + OWL semantic validation). + +Classes: + OntologyExtractor: Extracts ontology classes from scenario descriptions +""" + +import asyncio +import logging +import time +from typing import List, Optional + +from app.core.memory.llm_tools.openai_client import OpenAIClient +from app.core.memory.models.ontology_models import ( + OntologyClass, + OntologyExtractionResponse, +) +from app.core.memory.utils.validation.ontology_validator import OntologyValidator +from app.core.memory.utils.validation.owl_validator import OWLValidator +from app.core.memory.utils.prompt.prompt_utils import render_ontology_extraction_prompt + + +logger = logging.getLogger(__name__) + + +class OntologyExtractor: + """Extractor for ontology classes from scenario descriptions. + + This extractor uses LLM to identify abstract classes and concepts from + natural language scenario descriptions, following OWL ontology engineering + standards. It performs two-layer validation: + 1. String validation (naming conventions, reserved words, duplicates) + 2. OWL semantic validation (consistency checking, circular inheritance) + + Attributes: + llm_client: OpenAI client for LLM calls + validator: String validator for class names and descriptions + owl_validator: OWL validator for semantic validation + """ + + def __init__(self, llm_client: OpenAIClient): + """Initialize the OntologyExtractor. + + Args: + llm_client: OpenAIClient instance for LLM processing + """ + self.llm_client = llm_client + self.validator = OntologyValidator() + self.owl_validator = OWLValidator() + + logger.info("OntologyExtractor initialized") + + async def extract_ontology_classes( + self, + scenario: str, + domain: Optional[str] = None, + max_classes: int = 15, + min_classes: int = 5, + enable_owl_validation: bool = True, + llm_temperature: float = 0.3, + llm_max_tokens: int = 2000, + max_description_length: int = 500, + timeout: Optional[float] = None, + ) -> OntologyExtractionResponse: + """Extract ontology classes from a scenario description. + + This is the main extraction method that orchestrates the entire process: + 1. Call LLM to extract ontology classes + 2. Perform first-layer validation (string validation and cleaning) + 3. Perform second-layer validation (OWL semantic validation) + 4. Filter invalid classes based on validation errors + 5. Return validated ontology classes + + Args: + scenario: Natural language scenario description + domain: Optional domain hint (e.g., "Healthcare", "Education") + max_classes: Maximum number of classes to extract (default: 15) + min_classes: Minimum number of classes to extract (default: 5) + enable_owl_validation: Whether to enable OWL validation (default: True) + llm_temperature: LLM temperature parameter (default: 0.3) + llm_max_tokens: LLM max tokens parameter (default: 2000) + max_description_length: Maximum description length (default: 500) + timeout: Optional timeout in seconds for LLM call (default: None, no timeout) + + Returns: + OntologyExtractionResponse containing validated ontology classes + + Raises: + ValueError: If scenario is empty or invalid + asyncio.TimeoutError: If extraction times out + + Examples: + >>> extractor = OntologyExtractor(llm_client) + >>> response = await extractor.extract_ontology_classes( + ... scenario="A hospital manages patient records...", + ... domain="Healthcare", + ... max_classes=10, + ... timeout=30.0 + ... ) + >>> len(response.classes) + 7 + """ + # Start timing + start_time = time.time() + + # Validate input + if not scenario or not scenario.strip(): + logger.error("Scenario description is empty") + raise ValueError("Scenario description cannot be empty") + + scenario = scenario.strip() + + logger.info( + f"Starting ontology extraction - scenario_length={len(scenario)}, " + f"domain={domain}, max_classes={max_classes}, min_classes={min_classes}, " + f"timeout={timeout}" + ) + + try: + # Step 1: Call LLM for extraction with timeout + logger.info("Step 1: Calling LLM for ontology extraction") + llm_start_time = time.time() + + if timeout is not None: + # Wrap LLM call with timeout + try: + response = await asyncio.wait_for( + self._call_llm_for_extraction( + scenario=scenario, + domain=domain, + max_classes=max_classes, + llm_temperature=llm_temperature, + llm_max_tokens=llm_max_tokens, + ), + timeout=timeout + ) + except asyncio.TimeoutError: + llm_duration = time.time() - llm_start_time + logger.error( + f"LLM extraction timed out after {timeout} seconds " + f"(actual duration: {llm_duration:.2f}s)" + ) + # Return empty response on timeout + return OntologyExtractionResponse( + classes=[], + domain=domain or "Unknown", + ) + else: + # No timeout specified, call directly + response = await self._call_llm_for_extraction( + scenario=scenario, + domain=domain, + max_classes=max_classes, + llm_temperature=llm_temperature, + llm_max_tokens=llm_max_tokens, + ) + + llm_duration = time.time() - llm_start_time + logger.info( + f"LLM returned {len(response.classes)} classes in {llm_duration:.2f}s" + ) + + # Step 2: First-layer validation (string validation and cleaning) + logger.info("Step 2: Performing first-layer validation (string validation)") + validation_start_time = time.time() + + response = self._validate_and_clean( + response=response, + max_description_length=max_description_length, + ) + + validation_duration = time.time() - validation_start_time + logger.info( + f"After first-layer validation: {len(response.classes)} classes remain " + f"(validation took {validation_duration:.2f}s)" + ) + + # Check if we have enough classes after first-layer validation + if len(response.classes) < min_classes: + logger.warning( + f"Only {len(response.classes)} classes remain after validation, " + f"which is below minimum of {min_classes}" + ) + + # Step 3: Second-layer validation (OWL semantic validation) + if enable_owl_validation and response.classes: + logger.info("Step 3: Performing second-layer validation (OWL validation)") + owl_start_time = time.time() + + is_valid, errors, world = self.owl_validator.validate_ontology_classes( + classes=response.classes, + ) + + owl_duration = time.time() - owl_start_time + + if not is_valid: + logger.warning( + f"OWL validation found {len(errors)} issues in {owl_duration:.2f}s: {errors}" + ) + + # Filter invalid classes based on errors + response = self._filter_invalid_classes( + response=response, + errors=errors, + ) + + logger.info( + f"After second-layer validation: {len(response.classes)} classes remain" + ) + else: + logger.info(f"OWL validation passed successfully in {owl_duration:.2f}s") + else: + if not enable_owl_validation: + logger.info("Step 3: OWL validation disabled, skipping") + else: + logger.info("Step 3: No classes to validate, skipping OWL validation") + + # Calculate total duration + total_duration = time.time() - start_time + + # Log extraction statistics + logger.info( + f"Ontology extraction completed - " + f"final_class_count={len(response.classes)}, " + f"domain={response.domain}, " + f"total_duration={total_duration:.2f}s, " + f"llm_duration={llm_duration:.2f}s" + ) + + return response + + except asyncio.TimeoutError: + # Re-raise timeout errors + total_duration = time.time() - start_time + logger.error( + f"Ontology extraction timed out after {timeout} seconds " + f"(total duration: {total_duration:.2f}s)", + exc_info=True + ) + raise + except Exception as e: + total_duration = time.time() - start_time + logger.error( + f"Ontology extraction failed after {total_duration:.2f}s: {str(e)}", + exc_info=True + ) + # Return empty response on failure + return OntologyExtractionResponse( + classes=[], + domain=domain or "Unknown", + ) + + async def _call_llm_for_extraction( + self, + scenario: str, + domain: Optional[str], + max_classes: int, + llm_temperature: float, + llm_max_tokens: int, + ) -> OntologyExtractionResponse: + """Call LLM to extract ontology classes from scenario. + + This method renders the extraction prompt using the Jinja2 template + and calls the LLM with structured output to get ontology classes. + + Args: + scenario: Scenario description text + domain: Optional domain hint + max_classes: Maximum number of classes to extract + llm_temperature: LLM temperature parameter + llm_max_tokens: LLM max tokens parameter + + Returns: + OntologyExtractionResponse from LLM + + Raises: + Exception: If LLM call fails + """ + try: + # Render prompt using template + prompt_content = await render_ontology_extraction_prompt( + scenario=scenario, + domain=domain, + max_classes=max_classes, + json_schema=OntologyExtractionResponse.model_json_schema(), + ) + + logger.debug(f"Rendered prompt length: {len(prompt_content)}") + + # Create messages for LLM + messages = [ + { + "role": "system", + "content": ( + "You are an expert ontology engineer specializing in knowledge " + "representation and OWL standards. Extract ontology classes from " + "scenario descriptions following the provided instructions. " + "Return valid JSON conforming to the schema." + ), + }, + { + "role": "user", + "content": prompt_content, + }, + ] + + # Call LLM with structured output + logger.debug( + f"Calling LLM with temperature={llm_temperature}, " + f"max_tokens={llm_max_tokens}" + ) + + response = await self.llm_client.response_structured( + messages=messages, + response_model=OntologyExtractionResponse, + ) + + logger.info( + f"LLM extraction successful - extracted {len(response.classes)} classes" + ) + + return response + + except Exception as e: + logger.error( + f"LLM extraction failed: {str(e)}", + exc_info=True + ) + raise + + def _validate_and_clean( + self, + response: OntologyExtractionResponse, + max_description_length: int, + ) -> OntologyExtractionResponse: + """Perform first-layer validation: string validation and cleaning. + + This method validates and cleans the extracted ontology classes: + 1. Validate class names (PascalCase, no reserved words) + 2. Sanitize invalid class names + 3. Truncate long descriptions + 4. Remove duplicate classes + + Args: + response: OntologyExtractionResponse from LLM + max_description_length: Maximum description length + + Returns: + Cleaned OntologyExtractionResponse + """ + if not response.classes: + logger.debug("No classes to validate") + return response + + logger.debug(f"Validating {len(response.classes)} classes") + + validated_classes = [] + + for ontology_class in response.classes: + # Validate class name + is_valid, error_msg = self.validator.validate_class_name( + ontology_class.name + ) + + if not is_valid: + logger.warning( + f"Invalid class name '{ontology_class.name}': {error_msg}" + ) + + # Attempt to sanitize + sanitized_name = self.validator.sanitize_class_name( + ontology_class.name + ) + + logger.info( + f"Sanitized class name: '{ontology_class.name}' -> '{sanitized_name}'" + ) + + # Update class name + ontology_class.name = sanitized_name + + # Re-validate sanitized name + is_valid, error_msg = self.validator.validate_class_name( + sanitized_name + ) + + if not is_valid: + logger.error( + f"Failed to sanitize class name '{ontology_class.name}': {error_msg}. " + "Skipping this class." + ) + continue + + # Truncate description if too long + if ontology_class.description: + original_length = len(ontology_class.description) + ontology_class.description = self.validator.truncate_description( + ontology_class.description, + max_length=max_description_length, + ) + + if len(ontology_class.description) < original_length: + logger.debug( + f"Truncated description for '{ontology_class.name}': " + f"{original_length} -> {len(ontology_class.description)} chars" + ) + + validated_classes.append(ontology_class) + + # Remove duplicates (case-insensitive) + original_count = len(validated_classes) + validated_classes = self.validator.remove_duplicates(validated_classes) + + if len(validated_classes) < original_count: + logger.info( + f"Removed {original_count - len(validated_classes)} duplicate classes" + ) + + # Return cleaned response + return OntologyExtractionResponse( + classes=validated_classes, + domain=response.domain, + ) + + def _filter_invalid_classes( + self, + response: OntologyExtractionResponse, + errors: List[str], + ) -> OntologyExtractionResponse: + """Filter invalid classes based on OWL validation errors. + + This method analyzes OWL validation errors and removes classes + that caused validation failures (e.g., circular inheritance, + inconsistencies). + + Args: + response: OntologyExtractionResponse to filter + errors: List of error messages from OWL validation + + Returns: + Filtered OntologyExtractionResponse + """ + if not errors: + return response + + logger.debug(f"Filtering classes based on {len(errors)} OWL validation errors") + + # Extract class names mentioned in errors + invalid_class_names = set() + + for error in errors: + # Look for class names in error messages + for ontology_class in response.classes: + if ontology_class.name in error: + invalid_class_names.add(ontology_class.name) + logger.debug( + f"Class '{ontology_class.name}' marked as invalid due to error: {error}" + ) + + # Filter out invalid classes + if invalid_class_names: + original_count = len(response.classes) + + filtered_classes = [ + c for c in response.classes + if c.name not in invalid_class_names + ] + + logger.info( + f"Filtered out {original_count - len(filtered_classes)} invalid classes: " + f"{invalid_class_names}" + ) + + return OntologyExtractionResponse( + classes=filtered_classes, + domain=response.domain, + ) + + return response diff --git a/api/app/core/memory/utils/prompt/prompt_utils.py b/api/app/core/memory/utils/prompt/prompt_utils.py index 50593e49..d8bf02c7 100644 --- a/api/app/core/memory/utils/prompt/prompt_utils.py +++ b/api/app/core/memory/utils/prompt/prompt_utils.py @@ -409,3 +409,42 @@ async def render_episodic_title_and_type_prompt(content: str) -> str: }) return rendered_prompt + + +async def render_ontology_extraction_prompt( + scenario: str, + domain: str | None = None, + max_classes: int = 15, + json_schema: dict | None = None +) -> str: + """ + Renders the ontology extraction prompt using the extract_ontology.jinja2 template. + + Args: + scenario: The scenario description text to extract ontology classes from + domain: Optional domain hint for the scenario (e.g., "Healthcare", "Education") + max_classes: Maximum number of classes to extract (default: 15) + json_schema: JSON schema for the expected output format + + Returns: + Rendered prompt content as string + """ + template = prompt_env.get_template("extract_ontology.jinja2") + rendered_prompt = template.render( + scenario=scenario, + domain=domain, + max_classes=max_classes, + json_schema=json_schema + ) + + # 记录渲染结果到提示日志 + log_prompt_rendering('ontology extraction', rendered_prompt) + # 可选:记录模板渲染信息 + log_template_rendering('extract_ontology.jinja2', { + 'scenario_len': len(scenario) if scenario else 0, + 'domain': domain, + 'max_classes': max_classes, + 'json_schema': 'OntologyExtractionResponse.schema' + }) + + return rendered_prompt diff --git a/api/app/core/memory/utils/prompt/prompts/extract_ontology.jinja2 b/api/app/core/memory/utils/prompt/prompts/extract_ontology.jinja2 new file mode 100644 index 00000000..80594ad9 --- /dev/null +++ b/api/app/core/memory/utils/prompt/prompts/extract_ontology.jinja2 @@ -0,0 +1,210 @@ +===Task=== +Extract ontology classes from the given scenario description following ontology engineering standards. + +===Role=== +You are a professional ontology engineer with expertise in knowledge representation and OWL (Web Ontology Language) standards. Your task is to identify abstract classes and concepts from scenario descriptions, not concrete instances. + +===Scenario Description=== +{{ scenario }} + +{% if domain -%} +===Domain Hint=== +This scenario belongs to the **{{ domain }}** domain. Consider domain-specific concepts and terminology when extracting classes. +{%- endif %} + +===Extraction Rules=== + +**1. Abstract Classes, Not Instances:** +- Extract abstract categories and concepts (e.g., "MedicalProcedure", "Patient", "Diagnosis") +- Do NOT extract concrete instances (e.g., "John Smith", "Room 301", "2024-01-15") +- Think in terms of "types of things" rather than "specific things" + +**2. Naming Convention (PascalCase):** +- Use PascalCase format for the "name" field: start with uppercase letter, capitalize each word, no spaces +- Examples: "MedicalProcedure", "HealthcareProvider", "DiagnosticTest" +- Avoid: "medical procedure", "healthcare_provider", "diagnostic-test" +- Use clear, descriptive names in English +- Avoid abbreviations unless they are standard in the domain (e.g., "API", "DNA") +- Provide Chinese translation in the "name_chinese" field (e.g., "医疗程序", "医疗服务提供者", "诊断测试") + +**3. Domain Relevance:** +- Focus on classes that are central to the scenario's domain +- Prioritize classes that represent key concepts, entities, or relationships +- Avoid overly generic classes (e.g., "Thing", "Object") unless they have specific domain meaning + +**4. Class Quantity:** +- Extract between 5 and {{ max_classes }} classes +- Aim for a balanced set covering the main concepts in the scenario +- Quality over quantity: prefer well-defined classes over exhaustive lists + +**5. Clear Descriptions:** +- Provide concise, informative descriptions in Chinese (max 500 characters) +- Describe what the class represents, not specific instances +- Use clear, natural Chinese language that explains the class's role in the domain + +**6. Concrete Examples:** +- Provide 2-5 concrete instance examples in Chinese for each class +- Examples should be specific, realistic instances of the class +- Examples help clarify the class's scope and meaning +- Use natural Chinese language for examples +- Example format: ["示例1", "示例2", "示例3"] + +**7. Class Hierarchy:** +- Identify parent-child relationships where applicable +- Use the parent_class field to specify inheritance +- Parent class must be one of the extracted classes or a standard OWL class +- Leave parent_class as null for top-level classes + +**8. Entity Types:** +- Classify each class with an appropriate entity_type +- Common types: "Person", "Organization", "Location", "Event", "Concept", "Process", "Object", "Role" +- Choose the most specific type that applies + +**9. OWL Reserved Words:** +- Do NOT use OWL reserved words as class names +- Reserved words include: "Thing", "Nothing", "Class", "Property", "ObjectProperty", "DatatypeProperty", "AnnotationProperty", "Ontology", "Individual", "Literal" +- If a reserved word is needed, add a domain-specific prefix (e.g., "MedicalClass" instead of "Class") + +**10. Language Consistency:** +- Extract all class names in English (PascalCase format) for the "name" field +- Provide Chinese translation for class names in the "name_chinese" field +- Descriptions MUST be in Chinese (中文) +- Examples MUST be in Chinese (中文) +- Use clear, natural Chinese language for descriptions and examples + +===Examples=== + +**Example 1 (Healthcare Domain):** +Scenario: "A hospital manages patient records, schedules appointments, and coordinates medical procedures. Doctors diagnose conditions and prescribe treatments." + +Output: +{ + "classes": [ + { + "name": "Patient", + "name_chinese": "患者", + "description": "在医疗机构接受医疗护理或治疗的人", + "examples": ["张三", "李四", "患有糖尿病的老年患者"], + "parent_class": null, + "entity_type": "Person", + "domain": "Healthcare" + }, + { + "name": "MedicalProcedure", + "name_chinese": "医疗程序", + "description": "为医疗诊断或治疗而执行的系统性操作流程", + "examples": ["手术", "血液检查", "X光检查", "疫苗接种"], + "parent_class": null, + "entity_type": "Process", + "domain": "Healthcare" + }, + { + "name": "Diagnosis", + "name_chinese": "诊断", + "description": "基于症状和检查结果对疾病或状况的识别", + "examples": ["糖尿病诊断", "癌症诊断", "流感诊断"], + "parent_class": null, + "entity_type": "Concept", + "domain": "Healthcare" + }, + { + "name": "Doctor", + "name_chinese": "医生", + "description": "诊断和治疗患者的持证医疗专业人员", + "examples": ["全科医生", "外科医生", "心脏病专家"], + "parent_class": null, + "entity_type": "Role", + "domain": "Healthcare" + }, + { + "name": "Treatment", + "name_chinese": "治疗", + "description": "为治愈或管理疾病状况而提供的医疗护理或疗法", + "examples": ["药物治疗", "物理治疗", "化疗", "手术治疗"], + "parent_class": null, + "entity_type": "Process", + "domain": "Healthcare" + } + ], + "domain": "Healthcare", + "namespace": "http://example.org/healthcare#" +} + +**Example 2 (Education Domain):** +Scenario: "A university offers courses taught by professors. Students enroll in programs, attend lectures, and complete assignments to earn degrees." + +Output: +{ + "classes": [ + { + "name": "Student", + "name_chinese": "学生", + "description": "在教育机构注册学习的人", + "examples": ["本科生", "研究生", "在职学生"], + "parent_class": null, + "entity_type": "Role", + "domain": "Education" + }, + { + "name": "Course", + "name_chinese": "课程", + "description": "涵盖特定学科或主题的结构化教育课程", + "examples": ["计算机科学导论", "微积分I", "世界历史"], + "parent_class": null, + "entity_type": "Concept", + "domain": "Education" + }, + { + "name": "Professor", + "name_chinese": "教授", + "description": "教授课程并进行研究的学术教师", + "examples": ["助理教授", "副教授", "正教授"], + "parent_class": null, + "entity_type": "Role", + "domain": "Education" + }, + { + "name": "AcademicProgram", + "name_chinese": "学术项目", + "description": "通向学位或证书的结构化课程体系", + "examples": ["理学学士", "文学硕士", "博士项目"], + "parent_class": null, + "entity_type": "Concept", + "domain": "Education" + }, + { + "name": "Assignment", + "name_chinese": "作业", + "description": "分配给学生以评估学习成果的任务或项目", + "examples": ["论文", "习题集", "研究报告", "实验报告"], + "parent_class": null, + "entity_type": "Object", + "domain": "Education" + }, + { + "name": "Lecture", + "name_chinese": "讲座", + "description": "由教师进行的教育性演讲或讲座", + "examples": ["入门讲座", "客座讲座", "在线讲座"], + "parent_class": null, + "entity_type": "Event", + "domain": "Education" + } + ], + "domain": "Education", + "namespace": "http://example.org/education#" +} + +===Output Format=== + +**JSON Requirements:** +- Use only ASCII double quotes (") for JSON structure +- Never use Chinese quotation marks ("") or Unicode quotes +- Escape quotation marks in text with backslashes (\") +- Ensure proper string closure and comma separation +- No line breaks within JSON string values +- All class names must be in PascalCase format +- All class names must be unique (case-insensitive) +- Extract between 5 and {{ max_classes }} classes + +{{ json_schema }} diff --git a/api/app/core/memory/utils/validation/__init__.py b/api/app/core/memory/utils/validation/__init__.py new file mode 100644 index 00000000..d5dd41e7 --- /dev/null +++ b/api/app/core/memory/utils/validation/__init__.py @@ -0,0 +1,10 @@ +"""Validation utilities for ontology extraction. + +This module provides validation classes for ontology class names, +descriptions, and OWL compliance checking. +""" + +from .ontology_validator import OntologyValidator +from .owl_validator import OWLValidator + +__all__ = ['OntologyValidator', 'OWLValidator'] diff --git a/api/app/core/memory/utils/validation/ontology_validator.py b/api/app/core/memory/utils/validation/ontology_validator.py new file mode 100644 index 00000000..eb7492ad --- /dev/null +++ b/api/app/core/memory/utils/validation/ontology_validator.py @@ -0,0 +1,268 @@ +"""String validation for ontology class names and descriptions. + +This module provides the OntologyValidator class for validating and sanitizing +ontology class names according to OWL standards and naming conventions. + +Classes: + OntologyValidator: Validates class names, removes duplicates, and truncates descriptions +""" + +import logging +import re +from typing import List, Tuple + +from app.core.memory.models.ontology_models import OntologyClass + + +logger = logging.getLogger(__name__) + + +class OntologyValidator: + """Validator for ontology class names and descriptions. + + This validator performs string-level validation including: + - PascalCase naming convention validation + - OWL reserved word checking + - Duplicate class name removal + - Description length truncation + + Attributes: + OWL_RESERVED_WORDS: Set of OWL reserved words that cannot be used as class names + """ + + # OWL reserved words that cannot be used as class names + OWL_RESERVED_WORDS = { + 'Thing', 'Nothing', 'Class', 'Property', + 'ObjectProperty', 'DatatypeProperty', 'FunctionalProperty', + 'InverseFunctionalProperty', 'TransitiveProperty', 'SymmetricProperty', + 'AsymmetricProperty', 'ReflexiveProperty', 'IrreflexiveProperty', + 'Restriction', 'Ontology', 'Individual', 'NamedIndividual', + 'Annotation', 'AnnotationProperty', 'Axiom', + 'AllDifferent', 'AllDisjointClasses', 'AllDisjointProperties', + 'Datatype', 'DataRange', 'Literal', + 'DeprecatedClass', 'DeprecatedProperty', + 'Imports', 'IncompatibleWith', 'PriorVersion', 'VersionInfo', + 'BackwardCompatibleWith', 'OntologyProperty', + } + + def validate_class_name(self, name: str) -> Tuple[bool, str]: + """Validate that a class name follows OWL naming conventions. + + Validation rules: + 1. Must not be empty + 2. Must start with an uppercase letter (PascalCase) + 3. Cannot contain spaces + 4. Can only contain alphanumeric characters and underscores + 5. Cannot be an OWL reserved word + + Args: + name: The class name to validate + + Returns: + Tuple of (is_valid, error_message) + - is_valid: True if the name is valid, False otherwise + - error_message: Empty string if valid, error description if invalid + + Examples: + >>> validator = OntologyValidator() + >>> validator.validate_class_name("MedicalProcedure") + (True, "") + >>> validator.validate_class_name("medical procedure") + (False, "Class name 'medical procedure' cannot contain spaces") + >>> validator.validate_class_name("Thing") + (False, "Class name 'Thing' is an OWL reserved word") + """ + logger.debug(f"Validating class name: '{name}'") + + # Check if empty + if not name or not name.strip(): + error_msg = "Class name cannot be empty" + logger.warning(f"Validation failed: {error_msg}") + return False, error_msg + + name = name.strip() + + # Check if it's an OWL reserved word + if name in self.OWL_RESERVED_WORDS: + error_msg = f"Class name '{name}' is an OWL reserved word" + logger.warning(f"Validation failed: {error_msg}") + return False, error_msg + + # Check if starts with uppercase letter + if not name[0].isupper(): + error_msg = f"Class name '{name}' must start with an uppercase letter (PascalCase)" + logger.warning(f"Validation failed: {error_msg}") + return False, error_msg + + # Check for spaces + if ' ' in name: + error_msg = f"Class name '{name}' cannot contain spaces" + logger.warning(f"Validation failed: {error_msg}") + return False, error_msg + + # Check for invalid characters (only alphanumeric and underscore allowed) + if not re.match(r'^[A-Za-z0-9_]+$', name): + error_msg = f"Class name '{name}' contains invalid characters. Only alphanumeric characters and underscores are allowed" + logger.warning(f"Validation failed: {error_msg}") + return False, error_msg + + logger.debug(f"Class name '{name}' is valid") + return True, "" + + def sanitize_class_name(self, name: str) -> str: + """Attempt to sanitize an invalid class name into a valid format. + + Sanitization steps: + 1. Strip whitespace + 2. Remove invalid characters + 3. Replace spaces with empty string (PascalCase) + 4. Capitalize first letter of each word + 5. If result is empty or starts with number, prefix with 'Class' + + Args: + name: The class name to sanitize + + Returns: + Sanitized class name that should pass validation + + Examples: + >>> validator = OntologyValidator() + >>> validator.sanitize_class_name("medical procedure") + 'MedicalProcedure' + >>> validator.sanitize_class_name("patient-record") + 'PatientRecord' + >>> validator.sanitize_class_name("123invalid") + 'Class123Invalid' + """ + logger.debug(f"Sanitizing class name: '{name}'") + + if not name or not name.strip(): + logger.warning("Empty class name provided for sanitization, returning 'UnnamedClass'") + return "UnnamedClass" + + # Strip whitespace + name = name.strip() + original_name = name + + # Split on spaces, hyphens, and underscores, then capitalize each word + words = re.split(r'[\s\-_]+', name) + + # Capitalize first letter of each word and keep rest as is + sanitized_words = [] + for word in words: + if word: + # Remove non-alphanumeric characters except underscore + clean_word = re.sub(r'[^A-Za-z0-9_]', '', word) + if clean_word: + # Capitalize first letter + sanitized_words.append(clean_word[0].upper() + clean_word[1:]) + + # Join words + sanitized = ''.join(sanitized_words) + + # If empty or starts with number, prefix with 'Class' + if not sanitized or sanitized[0].isdigit(): + sanitized = 'Class' + sanitized + logger.info(f"Prefixed class name with 'Class': '{original_name}' -> '{sanitized}'") + + # If it's a reserved word, append 'Class' suffix + if sanitized in self.OWL_RESERVED_WORDS: + sanitized = sanitized + 'Class' + logger.info(f"Appended 'Class' suffix to reserved word: '{original_name}' -> '{sanitized}'") + + logger.info(f"Sanitized class name: '{original_name}' -> '{sanitized}'") + return sanitized + + def remove_duplicates(self, classes: List[OntologyClass]) -> List[OntologyClass]: + """Remove duplicate ontology classes based on case-insensitive name comparison. + + When duplicates are found, keeps the first occurrence and discards subsequent ones. + Comparison is case-insensitive to catch variations like 'Patient' and 'patient'. + + Args: + classes: List of OntologyClass objects + + Returns: + List of OntologyClass objects with duplicates removed + + Examples: + >>> validator = OntologyValidator() + >>> classes = [ + ... OntologyClass(name="Patient", description="A patient", entity_type="Person", domain="Healthcare"), + ... OntologyClass(name="patient", description="Another patient", entity_type="Person", domain="Healthcare"), + ... OntologyClass(name="Doctor", description="A doctor", entity_type="Person", domain="Healthcare"), + ... ] + >>> unique = validator.remove_duplicates(classes) + >>> len(unique) + 2 + >>> [c.name for c in unique] + ['Patient', 'Doctor'] + """ + if not classes: + logger.debug("No classes to check for duplicates") + return classes + + logger.debug(f"Checking {len(classes)} classes for duplicates") + + seen_names = set() + unique_classes = [] + duplicates_found = [] + + for ontology_class in classes: + # Use lowercase for comparison + name_lower = ontology_class.name.lower() + + if name_lower not in seen_names: + seen_names.add(name_lower) + unique_classes.append(ontology_class) + else: + duplicates_found.append(ontology_class.name) + logger.debug(f"Duplicate class found and removed: '{ontology_class.name}'") + + if duplicates_found: + logger.info( + f"Removed {len(duplicates_found)} duplicate classes: {duplicates_found}" + ) + else: + logger.debug("No duplicate classes found") + + return unique_classes + + def truncate_description(self, description: str, max_length: int = 500) -> str: + """Truncate a description to a maximum length. + + If the description exceeds max_length, it will be truncated and + an ellipsis (...) will be appended to indicate truncation. + + Args: + description: The description text to truncate + max_length: Maximum allowed length (default: 500) + + Returns: + Truncated description string + + Examples: + >>> validator = OntologyValidator() + >>> long_desc = "A" * 600 + >>> truncated = validator.truncate_description(long_desc, max_length=500) + >>> len(truncated) + 500 + >>> truncated.endswith("...") + True + """ + if not description: + return "" + + if len(description) <= max_length: + return description + + # Truncate and add ellipsis + # Reserve 3 characters for "..." + truncate_at = max_length - 3 + truncated = description[:truncate_at] + "..." + + logger.debug( + f"Truncated description from {len(description)} to {len(truncated)} characters" + ) + + return truncated diff --git a/api/app/core/memory/utils/validation/owl_validator.py b/api/app/core/memory/utils/validation/owl_validator.py new file mode 100644 index 00000000..2398d528 --- /dev/null +++ b/api/app/core/memory/utils/validation/owl_validator.py @@ -0,0 +1,585 @@ +"""OWL semantic validation for ontology classes using Owlready2. + +This module provides the OWLValidator class for validating ontology classes +against OWL standards using the Owlready2 library. It performs semantic +validation including consistency checking, circular inheritance detection, +and OWL file export. + +Classes: + OWLValidator: Validates ontology classes using OWL reasoning and exports to OWL formats +""" + +import logging +from typing import List, Optional, Tuple + +from owlready2 import ( + World, + Thing, + get_ontology, + sync_reasoner_pellet, + OwlReadyInconsistentOntologyError, +) + +from app.core.memory.models.ontology_models import OntologyClass +logger = logging.getLogger(__name__) + + +class OWLValidator: + """Validator for OWL semantic validation of ontology classes. + + This validator performs semantic-level validation using Owlready2 including: + - Creating OWL classes from ontology class definitions + - Running consistency checking with Pellet reasoner + - Detecting circular inheritance + - Validating Protégé compatibility + - Exporting ontologies to various OWL formats (RDF/XML, Turtle, N-Triples) + + Attributes: + base_namespace: Base URI for the ontology namespace + """ + + def __init__(self, base_namespace: str = "http://example.org/ontology#"): + """Initialize the OWL validator. + + Args: + base_namespace: Base URI for the ontology namespace (default: http://example.org/ontology#) + """ + self.base_namespace = base_namespace + + def validate_ontology_classes( + self, + classes: List[OntologyClass], + ) -> Tuple[bool, List[str], Optional[World]]: + """Validate extracted ontology classes against OWL standards. + + This method creates an OWL ontology from the provided classes using Owlready2, + runs consistency checking with the Pellet reasoner, and detects common issues + like circular inheritance. + + Args: + classes: List of OntologyClass objects to validate + + Returns: + Tuple of (is_valid, error_messages, world): + - is_valid: True if ontology is valid and consistent, False otherwise + - error_messages: List of error/warning messages + - world: Owlready2 World object containing the ontology (None if validation failed) + + Examples: + >>> validator = OWLValidator() + >>> classes = [ + ... OntologyClass(name="Patient", description="A patient", entity_type="Person", domain="Healthcare"), + ... OntologyClass(name="Doctor", description="A doctor", entity_type="Person", domain="Healthcare"), + ... ] + >>> is_valid, errors, world = validator.validate_ontology_classes(classes) + >>> is_valid + True + >>> len(errors) + 0 + """ + if not classes: + return False, ["No classes provided for validation"], None + + errors = [] + + try: + # Create a new world (isolated ontology environment) + world = World() + + # Use a proper ontology IRI + # Owlready2 expects the IRI to end with .owl or similar + onto_iri = self.base_namespace.rstrip('#/') + if not onto_iri.endswith('.owl'): + onto_iri = onto_iri + '.owl' + + # Create ontology + onto = world.get_ontology(onto_iri) + + with onto: + # Dictionary to store created OWL classes for parent reference + owl_classes = {} + + # First pass: Create all classes without parent relationships + for ontology_class in classes: + try: + # Create OWL class dynamically using type() with Thing as base + # The key is to NOT set namespace in the dict, let Owlready2 handle it + owl_class = type( + ontology_class.name, # Class name + (Thing,), # Base classes + {} # Class dict (empty, let Owlready2 manage) + ) + + # Add label (rdfs:label) - include both English and Chinese names + labels = [ontology_class.name] + if ontology_class.name_chinese: + labels.append(ontology_class.name_chinese) + owl_class.label = labels + + # Add comment (rdfs:comment) with description + if ontology_class.description: + owl_class.comment = [ontology_class.description] + + # Store for parent relationship setup + owl_classes[ontology_class.name] = owl_class + + logger.debug( + f"Created OWL class: {ontology_class.name} " + f"(Chinese: {ontology_class.name_chinese}) " + f"IRI: {owl_class.iri if hasattr(owl_class, 'iri') else 'N/A'}" + ) + + except Exception as e: + error_msg = f"Failed to create OWL class '{ontology_class.name}': {str(e)}" + errors.append(error_msg) + logger.error(error_msg, exc_info=True) + + # Second pass: Set up parent relationships + for ontology_class in classes: + if ontology_class.parent_class and ontology_class.name in owl_classes: + parent_name = ontology_class.parent_class + + # Check if parent exists + if parent_name in owl_classes: + try: + child_class = owl_classes[ontology_class.name] + parent_class = owl_classes[parent_name] + + # Set parent by modifying is_a + child_class.is_a = [parent_class] + + logger.debug( + f"Set parent relationship: {ontology_class.name} -> {parent_name}" + ) + + except Exception as e: + error_msg = ( + f"Failed to set parent relationship " + f"'{ontology_class.name}' -> '{parent_name}': {str(e)}" + ) + errors.append(error_msg) + logger.warning(error_msg) + else: + warning_msg = ( + f"Parent class '{parent_name}' not found for '{ontology_class.name}'" + ) + errors.append(warning_msg) + logger.warning(warning_msg) + + # Check for circular inheritance + for class_name, owl_class in owl_classes.items(): + if self._has_circular_inheritance(owl_class): + error_msg = f"Circular inheritance detected for class '{class_name}'" + errors.append(error_msg) + logger.error(error_msg) + + # Run consistency checking with Pellet reasoner + try: + logger.info("Running Pellet reasoner for consistency checking...") + sync_reasoner_pellet(world, infer_property_values=True, infer_data_property_values=True) + logger.info("Consistency check passed") + + except OwlReadyInconsistentOntologyError as e: + error_msg = f"Ontology is inconsistent: {str(e)}" + errors.append(error_msg) + logger.error(error_msg) + return False, errors, world + + except Exception as e: + # Reasoner errors are often due to Java not being installed or configured + # Log as warning but don't fail validation - ontology structure is still valid + warning_msg = f"Reasoner check skipped: {str(e)}" + if str(e).strip(): # Only log if there's an actual error message + logger.warning(warning_msg) + else: + logger.warning("Reasoner check skipped: Java may not be installed or configured") + # Continue - ontology structure is valid even without reasoner check + + # If we have errors (excluding warnings), validation failed + is_valid = len(errors) == 0 + + return is_valid, errors, world + + except Exception as e: + error_msg = f"OWL validation failed: {str(e)}" + errors.append(error_msg) + logger.error(error_msg, exc_info=True) + return False, errors, None + + def _has_circular_inheritance(self, owl_class) -> bool: + """Check if an OWL class has circular inheritance. + + Circular inheritance occurs when a class inherits from itself through + a chain of parent relationships (e.g., A -> B -> C -> A). + + Args: + owl_class: Owlready2 class object to check + + Returns: + True if circular inheritance is detected, False otherwise + """ + visited = set() + current = owl_class + + while current: + # Get class IRI or name as identifier + class_id = str(current.iri) if hasattr(current, 'iri') else str(current) + + if class_id in visited: + # Found a cycle + return True + + visited.add(class_id) + + # Get parent classes (is_a relationship) + parents = getattr(current, 'is_a', []) + + # Filter out Thing and other base classes + parent_classes = [p for p in parents if p != Thing and hasattr(p, 'is_a')] + + if not parent_classes: + # No more parents, no cycle + break + + # Check first parent (in single inheritance) + current = parent_classes[0] if parent_classes else None + + return False + + def export_to_owl( + self, + world: World, + output_path: Optional[str] = None, + format: str = "rdfxml", + classes: Optional[List] = None + ) -> str: + """Export ontology to OWL file in specified format. + + Supported formats: + - rdfxml: RDF/XML format (default, most compatible) + - turtle: Turtle format (more readable) + - ntriples: N-Triples format (simplest) + - json: JSON format (simplified, human-readable) + + Args: + world: Owlready2 World object containing the ontology + output_path: Optional file path to save the ontology (if None, returns string) + format: Export format - "rdfxml", "turtle", "ntriples", or "json" (default: "rdfxml") + classes: Optional list of OntologyClass objects (required for json format) + + Returns: + String representation of the exported ontology + + Raises: + ValueError: If format is not supported + RuntimeError: If export fails + + Examples: + >>> validator = OWLValidator() + >>> is_valid, errors, world = validator.validate_ontology_classes(classes) + >>> owl_content = validator.export_to_owl(world, "ontology.owl", format="rdfxml") + """ + # Validate format + valid_formats = ["rdfxml", "turtle", "ntriples", "json"] + if format not in valid_formats: + raise ValueError( + f"Unsupported format '{format}'. Must be one of: {', '.join(valid_formats)}" + ) + + # JSON format doesn't need OWL processing + if format == "json": + if not classes: + raise ValueError("Classes list is required for JSON format export") + return self._export_to_json(classes) + + # For OWL formats, world is required + if not world: + raise ValueError("World object is None. Cannot export ontology.") + + # Note: Owlready2 has issues with turtle format export + # We'll handle it specially by converting from rdfxml + use_conversion = (format == "turtle") + + try: + # Get all ontologies in the world + ontologies = list(world.ontologies.values()) + + if not ontologies: + raise RuntimeError("No ontologies found in world") + + # Find the ontology with classes (skip anonymous/empty ontologies) + onto = None + for ont in ontologies: + classes_count = len(list(ont.classes())) + logger.debug(f"Checking ontology {ont.base_iri}: {classes_count} classes") + if classes_count > 0: + onto = ont + break + + # If no ontology with classes found, use the last non-anonymous one + if onto is None: + for ont in reversed(ontologies): + if ont.base_iri != "http://anonymous/": + onto = ont + break + + # If still no ontology, use the first one + if onto is None: + onto = ontologies[0] + + # Log ontology contents for debugging + logger.info(f"Ontology IRI: {onto.base_iri}") + logger.info(f"Ontology contains {len(list(onto.classes()))} classes") + + # List all classes in the ontology + all_classes = list(onto.classes()) + for cls in all_classes: + logger.info(f"Class in ontology: {cls.name} (IRI: {cls.iri})") + if hasattr(cls, 'label'): + logger.debug(f" Labels: {cls.label}") + if hasattr(cls, 'comment'): + logger.debug(f" Comments: {cls.comment}") + + if len(all_classes) == 0: + logger.warning("No classes found in ontology! This may indicate a problem with class creation.") + + if output_path: + # Save to file + export_format = "rdfxml" if use_conversion else format + logger.info(f"Exporting ontology to {output_path} in {export_format} format") + onto.save(file=output_path, format=export_format) + + # Read back the file content to return + with open(output_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Convert to turtle if needed + if use_conversion: + content = self._convert_to_turtle(content) + + logger.info(f"Successfully exported ontology to {output_path}") + + # Format the content for better readability + content = self._format_owl_content(content, format) + + return content + else: + # Export to string (save to temporary location and read) + import tempfile + import os + + with tempfile.NamedTemporaryFile(mode='w', suffix='.owl', delete=False) as tmp: + tmp_path = tmp.name + + try: + export_format = "rdfxml" if use_conversion else format + onto.save(file=tmp_path, format=export_format) + + with open(tmp_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Convert to turtle if needed + if use_conversion: + content = self._convert_to_turtle(content) + + # Format the content for better readability + content = self._format_owl_content(content, format) + + return content + + finally: + # Clean up temporary file + if os.path.exists(tmp_path): + os.remove(tmp_path) + + except Exception as e: + error_msg = f"Failed to export ontology: {str(e)}" + logger.error(error_msg, exc_info=True) + raise RuntimeError(error_msg) from e + + def _export_to_json(self, classes: List) -> str: + """Export ontology classes to simplified JSON format. + + This format is more compact and easier to parse than OWL XML. + + Args: + classes: List of OntologyClass objects + + Returns: + JSON string representation (compact format) + """ + import json + + result = { + "ontology": { + "namespace": self.base_namespace, + "classes": [] + } + } + + for cls in classes: + class_data = { + "name": cls.name, + "name_chinese": cls.name_chinese, + "description": cls.description, + "entity_type": cls.entity_type, + "domain": cls.domain, + "parent_class": cls.parent_class, + "examples": cls.examples if hasattr(cls, 'examples') else [] + } + result["ontology"]["classes"].append(class_data) + + # 使用紧凑格式:无缩进,使用分隔符减少空格 + return json.dumps(result, ensure_ascii=False, separators=(',', ':')) + + def _convert_to_turtle(self, rdfxml_content: str) -> str: + """Convert RDF/XML content to Turtle format using rdflib. + + Args: + rdfxml_content: RDF/XML format content + + Returns: + Turtle format content + """ + try: + from rdflib import Graph + + # Parse RDF/XML + g = Graph() + g.parse(data=rdfxml_content, format="xml") + + # Serialize to Turtle + turtle_content = g.serialize(format="turtle") + + # Handle bytes vs string + if isinstance(turtle_content, bytes): + turtle_content = turtle_content.decode('utf-8') + + return turtle_content + + except ImportError: + logger.warning( + "rdflib is not installed. Cannot convert to Turtle format. " + "Install with: pip install rdflib" + ) + return rdfxml_content + except Exception as e: + logger.error(f"Failed to convert to Turtle format: {e}") + return rdfxml_content + + def _format_owl_content(self, content: str, format: str) -> str: + """Format OWL content for better readability. + + Args: + content: Raw OWL content string + format: Format type (rdfxml, turtle, ntriples) + + Returns: + Formatted OWL content string + """ + if format == "rdfxml": + # Format XML with proper indentation + try: + import xml.dom.minidom as minidom + dom = minidom.parseString(content) + # Pretty print with 2-space indentation + formatted = dom.toprettyxml(indent=" ", encoding="utf-8").decode("utf-8") + + # Remove extra blank lines + lines = [] + prev_blank = False + for line in formatted.split('\n'): + is_blank = not line.strip() + if not (is_blank and prev_blank): # Skip consecutive blank lines + lines.append(line) + prev_blank = is_blank + + formatted = '\n'.join(lines) + + return formatted + except Exception as e: + logger.warning(f"Failed to format XML content: {e}") + return content + + elif format == "turtle": + # Turtle format is already relatively readable + # Just ensure consistent line endings and not empty + if not content or content.strip() == "": + logger.warning("Turtle content is empty, this may indicate an export issue") + return content.strip() + '\n' if content.strip() else content + + elif format == "ntriples": + # N-Triples format is line-based, ensure proper line endings + return content.strip() + '\n' if content.strip() else content + + return content + + def validate_with_protege_compatibility( + self, + classes: List[OntologyClass] + ) -> Tuple[bool, List[str]]: + """Validate that ontology classes are compatible with Protégé editor. + + Protégé compatibility checks: + - Class names are valid OWL identifiers + - No special characters that Protégé cannot handle + - Namespace is properly formatted + - Labels and comments are properly encoded + + Args: + classes: List of OntologyClass objects to validate + + Returns: + Tuple of (is_compatible, warnings): + - is_compatible: True if compatible with Protégé, False otherwise + - warnings: List of compatibility warning messages + + Examples: + >>> validator = OWLValidator() + >>> classes = [OntologyClass(name="Patient", description="A patient", entity_type="Person", domain="Healthcare")] + >>> is_compatible, warnings = validator.validate_with_protege_compatibility(classes) + >>> is_compatible + True + """ + warnings = [] + + # Check namespace format + if not self.base_namespace.startswith(('http://', 'https://')): + warnings.append( + f"Namespace '{self.base_namespace}' should start with http:// or https:// " + "for Protégé compatibility" + ) + + if not self.base_namespace.endswith(('#', '/')): + warnings.append( + f"Namespace '{self.base_namespace}' should end with # or / " + "for Protégé compatibility" + ) + + # Check each class + for ontology_class in classes: + # Check for special characters that might cause issues + if any(char in ontology_class.name for char in ['<', '>', '"', '{', '}', '|', '^', '`']): + warnings.append( + f"Class name '{ontology_class.name}' contains special characters " + "that may cause issues in Protégé" + ) + + # Check description length (Protégé can handle long descriptions but may display poorly) + if ontology_class.description and len(ontology_class.description) > 1000: + warnings.append( + f"Class '{ontology_class.name}' has a very long description ({len(ontology_class.description)} chars) " + "which may display poorly in Protégé" + ) + + # Check for non-ASCII characters (Protégé supports them but encoding issues may occur) + if not ontology_class.name.isascii(): + warnings.append( + f"Class name '{ontology_class.name}' contains non-ASCII characters " + "which may cause encoding issues in some Protégé versions" + ) + + # If no warnings, it's compatible + is_compatible = len(warnings) == 0 + + return is_compatible, warnings diff --git a/api/app/models/__init__.py b/api/app/models/__init__.py index a429dd8e..984212de 100644 --- a/api/app/models/__init__.py +++ b/api/app/models/__init__.py @@ -28,6 +28,10 @@ from .tool_model import ( ToolExecution, ToolType, ToolStatus, AuthType, ExecutionStatus ) from .memory_perceptual_model import MemoryPerceptualModel +from .ontology_scene import OntologyScene +from .ontology_class import OntologyClass +from .ontology_scene import OntologyScene +from .ontology_class import OntologyClass __all__ = [ "Tenants", diff --git a/api/app/models/ontology_class.py b/api/app/models/ontology_class.py new file mode 100644 index 00000000..528d934e --- /dev/null +++ b/api/app/models/ontology_class.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +"""本体类型模型 + +本模块定义本体类型的数据模型。 + +Classes: + OntologyClass: 本体类型表模型 +""" + +import datetime +import uuid +from sqlalchemy import Column, String, DateTime, Text, ForeignKey +from sqlalchemy.dialects.postgresql import UUID +from sqlalchemy.orm import relationship +from app.db import Base + + +class OntologyClass(Base): + """本体类型表 - 用于存储某个场景提取出来的本体类型信息""" + __tablename__ = "ontology_class" + + # 主键 + class_id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True, comment="类型ID") + + # 类型信息 + class_name = Column(String(200), nullable=False, comment="类型名称") + class_description = Column(Text, nullable=True, comment="类型描述") + + # 外键:关联到本体场景 + scene_id = Column(UUID(as_uuid=True), ForeignKey("ontology_scene.scene_id", ondelete="CASCADE"), nullable=False, index=True, comment="所属场景ID") + + # 时间戳 + created_at = Column(DateTime, default=datetime.datetime.now, nullable=False, comment="创建时间") + updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now, nullable=False, comment="更新时间") + + # 关系:类型属于某个场景 + scene = relationship("OntologyScene", back_populates="classes") + + def __repr__(self): + return f"" diff --git a/api/app/models/ontology_scene.py b/api/app/models/ontology_scene.py new file mode 100644 index 00000000..350bfdd6 --- /dev/null +++ b/api/app/models/ontology_scene.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- +"""本体场景模型 + +本模块定义本体场景的数据模型。 + +Classes: + OntologyScene: 本体场景表模型 +""" + +import datetime +import uuid +from sqlalchemy import Column, String, DateTime, Integer, Text, ForeignKey, UniqueConstraint +from sqlalchemy.dialects.postgresql import UUID +from sqlalchemy.orm import relationship +from app.db import Base + + +class OntologyScene(Base): + """本体场景表 - 用于存储本体场景下不同的类型信息""" + __tablename__ = "ontology_scene" + __table_args__ = ( + UniqueConstraint('workspace_id', 'scene_name', name='uq_workspace_scene_name'), + ) + + # 主键 + scene_id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True, comment="场景ID") + + # 场景信息 + scene_name = Column(String(200), nullable=False, comment="场景名称") + scene_description = Column(Text, nullable=True, comment="场景描述") + + # 外键:关联到工作空间 + workspace_id = Column(UUID(as_uuid=True), ForeignKey("workspaces.id", ondelete="CASCADE"), nullable=False, index=True, comment="所属工作空间ID") + + # 时间戳 + created_at = Column(DateTime, default=datetime.datetime.now, nullable=False, comment="创建时间") + updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now, nullable=False, comment="更新时间") + + # 关系:一个场景可以有多个类型 + classes = relationship("OntologyClass", back_populates="scene", cascade="all, delete-orphan") + + def __repr__(self): + return f"" diff --git a/api/app/repositories/ontology_class_repository.py b/api/app/repositories/ontology_class_repository.py new file mode 100644 index 00000000..68f261ff --- /dev/null +++ b/api/app/repositories/ontology_class_repository.py @@ -0,0 +1,404 @@ +# -*- coding: utf-8 -*- +"""本体类型Repository层 + +本模块提供本体类型的数据访问层实现。 + +Classes: + OntologyClassRepository: 本体类型数据访问类 +""" + +import logging +from typing import List, Optional +from uuid import UUID + +from sqlalchemy.orm import Session, joinedload + +from app.core.logging_config import get_db_logger +from app.models.ontology_class import OntologyClass +from app.models.ontology_scene import OntologyScene + + +logger = get_db_logger() + + +class OntologyClassRepository: + """本体类型Repository + + 提供本体类型的CRUD操作和权限检查。 + + Attributes: + db: SQLAlchemy数据库会话 + """ + + def __init__(self, db: Session): + """初始化Repository + + Args: + db: SQLAlchemy数据库会话 + """ + self.db = db + + def create(self, class_data: dict, scene_id: UUID) -> OntologyClass: + """创建本体类型 + + Args: + class_data: 类型数据字典,包含class_name和class_description + scene_id: 所属场景ID + + Returns: + OntologyClass: 创建的类型对象 + + Raises: + Exception: 数据库操作失败 + + Examples: + >>> repo = OntologyClassRepository(db) + >>> ontology_class = repo.create( + ... {"class_name": "患者", "class_description": "描述"}, + ... scene_id + ... ) + """ + try: + logger.info( + f"Creating ontology class - " + f"name={class_data.get('class_name')}, " + f"scene_id={scene_id}" + ) + + ontology_class = OntologyClass( + class_name=class_data.get("class_name"), + class_description=class_data.get("class_description"), + scene_id=scene_id + ) + + self.db.add(ontology_class) + self.db.flush() # 获取ID但不提交 + + logger.info( + f"Ontology class created successfully - " + f"class_id={ontology_class.class_id}" + ) + + return ontology_class + + except Exception as e: + logger.error( + f"Failed to create ontology class: {str(e)}", + exc_info=True + ) + raise + + def get_by_id(self, class_id: UUID) -> Optional[OntologyClass]: + """根据ID获取类型 + + Args: + class_id: 类型ID + + Returns: + Optional[OntologyClass]: 类型对象,不存在则返回None + + Examples: + >>> repo = OntologyClassRepository(db) + >>> ontology_class = repo.get_by_id(class_id) + """ + try: + logger.debug(f"Getting ontology class by ID: {class_id}") + + ontology_class = self.db.query(OntologyClass).filter( + OntologyClass.class_id == class_id + ).first() + + if ontology_class: + logger.debug(f"Ontology class found: {class_id}") + else: + logger.debug(f"Ontology class not found: {class_id}") + + return ontology_class + + except Exception as e: + logger.error( + f"Failed to get ontology class by ID: {str(e)}", + exc_info=True + ) + raise + + def get_by_name(self, class_name: str, scene_id: UUID) -> Optional[OntologyClass]: + """根据类型名称和场景ID获取类型(精确匹配) + + Args: + class_name: 类型名称 + scene_id: 场景ID + + Returns: + Optional[OntologyClass]: 类型对象,不存在则返回None + + Examples: + >>> repo = OntologyClassRepository(db) + >>> ontology_class = repo.get_by_name("患者", scene_id) + """ + try: + logger.debug(f"Getting ontology class by name: {class_name}, scene_id: {scene_id}") + + ontology_class = self.db.query(OntologyClass).filter( + OntologyClass.class_name == class_name, + OntologyClass.scene_id == scene_id + ).first() + + if ontology_class: + logger.debug(f"Ontology class found: {class_name}") + else: + logger.debug(f"Ontology class not found: {class_name}") + + return ontology_class + + except Exception as e: + logger.error( + f"Failed to get ontology class by name: {str(e)}", + exc_info=True + ) + raise + + def search_by_name(self, keyword: str, scene_id: UUID) -> List[OntologyClass]: + """根据关键词模糊搜索类型 + + 使用 LIKE 进行模糊匹配,支持中文和英文。 + + Args: + keyword: 搜索关键词 + scene_id: 场景ID + + Returns: + List[OntologyClass]: 匹配的类型列表 + + Examples: + >>> repo = OntologyClassRepository(db) + >>> classes = repo.search_by_name("患者", scene_id) + """ + try: + logger.debug( + f"Searching ontology classes by keyword - " + f"keyword={keyword}, scene_id={scene_id}" + ) + + # 使用 ilike 进行不区分大小写的模糊匹配 + classes = self.db.query(OntologyClass).filter( + OntologyClass.class_name.ilike(f"%{keyword}%"), + OntologyClass.scene_id == scene_id + ).order_by( + OntologyClass.created_at.desc() + ).all() + + logger.info( + f"Found {len(classes)} ontology classes matching keyword '{keyword}' " + f"in scene {scene_id}" + ) + + return classes + + except Exception as e: + logger.error( + f"Failed to search ontology classes by keyword: {str(e)}", + exc_info=True + ) + raise + + def get_by_scene(self, scene_id: UUID) -> List[OntologyClass]: + """获取场景下的所有类型 + + 按创建时间倒序排列。 + + Args: + scene_id: 场景ID + + Returns: + List[OntologyClass]: 类型列表 + + Examples: + >>> repo = OntologyClassRepository(db) + >>> classes = repo.get_by_scene(scene_id) + """ + try: + logger.debug(f"Getting ontology classes by scene: {scene_id}") + + classes = self.db.query(OntologyClass).filter( + OntologyClass.scene_id == scene_id + ).order_by( + OntologyClass.created_at.desc() + ).all() + + logger.info( + f"Found {len(classes)} ontology classes in scene {scene_id}" + ) + + return classes + + except Exception as e: + logger.error( + f"Failed to get ontology classes by scene: {str(e)}", + exc_info=True + ) + raise + + def update(self, class_id: UUID, update_data: dict) -> Optional[OntologyClass]: + """更新类型信息 + + Args: + class_id: 类型ID + update_data: 更新数据字典 + + Returns: + Optional[OntologyClass]: 更新后的类型对象,不存在则返回None + + Raises: + Exception: 数据库操作失败 + + Examples: + >>> repo = OntologyClassRepository(db) + >>> ontology_class = repo.update( + ... class_id, + ... {"class_name": "新名称"} + ... ) + """ + try: + logger.info(f"Updating ontology class: {class_id}") + + ontology_class = self.get_by_id(class_id) + if not ontology_class: + logger.warning(f"Ontology class not found for update: {class_id}") + return None + + # 更新字段 + if "class_name" in update_data and update_data["class_name"] is not None: + ontology_class.class_name = update_data["class_name"] + + if "class_description" in update_data: + ontology_class.class_description = update_data["class_description"] + + self.db.flush() + + logger.info(f"Ontology class updated successfully: {class_id}") + + return ontology_class + + except Exception as e: + logger.error( + f"Failed to update ontology class: {str(e)}", + exc_info=True + ) + raise + + def delete(self, class_id: UUID) -> bool: + """删除类型 + + Args: + class_id: 类型ID + + Returns: + bool: 删除成功返回True,类型不存在返回False + + Raises: + Exception: 数据库操作失败 + + Examples: + >>> repo = OntologyClassRepository(db) + >>> success = repo.delete(class_id) + """ + try: + logger.info(f"Deleting ontology class: {class_id}") + + ontology_class = self.get_by_id(class_id) + if not ontology_class: + logger.warning(f"Ontology class not found for delete: {class_id}") + return False + + self.db.delete(ontology_class) + self.db.flush() + + logger.info(f"Ontology class deleted successfully: {class_id}") + + return True + + except Exception as e: + logger.error( + f"Failed to delete ontology class: {str(e)}", + exc_info=True + ) + raise + + def check_ownership(self, class_id: UUID, workspace_id: UUID) -> bool: + """检查类型是否属于指定工作空间(通过场景关联) + + Args: + class_id: 类型ID + workspace_id: 工作空间ID + + Returns: + bool: 属于返回True,否则返回False + + Examples: + >>> repo = OntologyClassRepository(db) + >>> is_owner = repo.check_ownership(class_id, workspace_id) + """ + try: + logger.debug( + f"Checking class ownership - " + f"class_id={class_id}, workspace_id={workspace_id}" + ) + + count = self.db.query(OntologyClass).join( + OntologyScene, + OntologyClass.scene_id == OntologyScene.scene_id + ).filter( + OntologyClass.class_id == class_id, + OntologyScene.workspace_id == workspace_id + ).count() + + is_owner = count > 0 + + logger.debug( + f"Class ownership check result: {is_owner} - " + f"class_id={class_id}" + ) + + return is_owner + + except Exception as e: + logger.error( + f"Failed to check class ownership: {str(e)}", + exc_info=True + ) + raise + + def get_scene_id_by_class(self, class_id: UUID) -> Optional[UUID]: + """根据类型ID获取所属场景ID + + Args: + class_id: 类型ID + + Returns: + Optional[UUID]: 场景ID,类型不存在则返回None + + Examples: + >>> repo = OntologyClassRepository(db) + >>> scene_id = repo.get_scene_id_by_class(class_id) + """ + try: + logger.debug(f"Getting scene ID by class: {class_id}") + + ontology_class = self.get_by_id(class_id) + if not ontology_class: + logger.debug(f"Class not found: {class_id}") + return None + + logger.debug( + f"Found scene ID: {ontology_class.scene_id} for class: {class_id}" + ) + + return ontology_class.scene_id + + except Exception as e: + logger.error( + f"Failed to get scene ID by class: {str(e)}", + exc_info=True + ) + raise diff --git a/api/app/repositories/ontology_scene_repository.py b/api/app/repositories/ontology_scene_repository.py new file mode 100644 index 00000000..322e111c --- /dev/null +++ b/api/app/repositories/ontology_scene_repository.py @@ -0,0 +1,394 @@ +# -*- coding: utf-8 -*- +"""本体场景Repository层 + +本模块提供本体场景的数据访问层实现。 + +Classes: + OntologySceneRepository: 本体场景数据访问类 +""" + +import logging +from typing import List, Optional +from uuid import UUID + +from sqlalchemy.orm import Session, joinedload + +from app.core.logging_config import get_db_logger +from app.models.ontology_scene import OntologyScene + + +logger = get_db_logger() + + +class OntologySceneRepository: + """本体场景Repository + + 提供本体场景的CRUD操作和权限检查。 + + Attributes: + db: SQLAlchemy数据库会话 + """ + + def __init__(self, db: Session): + """初始化Repository + + Args: + db: SQLAlchemy数据库会话 + """ + self.db = db + + def create(self, scene_data: dict, workspace_id: UUID) -> OntologyScene: + """创建本体场景 + + Args: + scene_data: 场景数据字典,包含scene_name和scene_description + workspace_id: 所属工作空间ID + + Returns: + OntologyScene: 创建的场景对象 + + Raises: + Exception: 数据库操作失败 + + Examples: + >>> repo = OntologySceneRepository(db) + >>> scene = repo.create( + ... {"scene_name": "医疗场景", "scene_description": "描述"}, + ... workspace_id + ... ) + """ + try: + logger.info( + f"Creating ontology scene - " + f"name={scene_data.get('scene_name')}, " + f"workspace_id={workspace_id}" + ) + + scene = OntologyScene( + scene_name=scene_data.get("scene_name"), + scene_description=scene_data.get("scene_description"), + workspace_id=workspace_id + ) + + self.db.add(scene) + self.db.flush() # 获取ID但不提交 + + logger.info( + f"Ontology scene created successfully - " + f"scene_id={scene.scene_id}" + ) + + return scene + + except Exception as e: + logger.error( + f"Failed to create ontology scene: {str(e)}", + exc_info=True + ) + raise + + def get_by_id(self, scene_id: UUID) -> Optional[OntologyScene]: + """根据ID获取场景 + + Args: + scene_id: 场景ID + + Returns: + Optional[OntologyScene]: 场景对象,不存在则返回None + + Examples: + >>> repo = OntologySceneRepository(db) + >>> scene = repo.get_by_id(scene_id) + """ + try: + logger.debug(f"Getting ontology scene by ID: {scene_id}") + + scene = self.db.query(OntologyScene).filter( + OntologyScene.scene_id == scene_id + ).first() + + if scene: + logger.debug(f"Ontology scene found: {scene_id}") + else: + logger.debug(f"Ontology scene not found: {scene_id}") + + return scene + + except Exception as e: + logger.error( + f"Failed to get ontology scene by ID: {str(e)}", + exc_info=True + ) + raise + + def get_by_name(self, scene_name: str, workspace_id: UUID) -> Optional[OntologyScene]: + """根据场景名称和工作空间ID获取场景(精确匹配) + + Args: + scene_name: 场景名称 + workspace_id: 工作空间ID + + Returns: + Optional[OntologyScene]: 场景对象,不存在则返回None + + Examples: + >>> repo = OntologySceneRepository(db) + >>> scene = repo.get_by_name("医疗场景", workspace_id) + """ + try: + logger.debug( + f"Getting ontology scene by name - " + f"scene_name={scene_name}, workspace_id={workspace_id}" + ) + + scene = self.db.query(OntologyScene).options( + joinedload(OntologyScene.classes) + ).filter( + OntologyScene.scene_name == scene_name, + OntologyScene.workspace_id == workspace_id + ).first() + + if scene: + logger.debug(f"Ontology scene found: {scene_name}") + else: + logger.debug(f"Ontology scene not found: {scene_name}") + + return scene + + except Exception as e: + logger.error( + f"Failed to get ontology scene by name: {str(e)}", + exc_info=True + ) + raise + + def search_by_name(self, keyword: str, workspace_id: UUID) -> List[OntologyScene]: + """根据关键词模糊搜索场景 + + 使用 LIKE 进行模糊匹配,支持中文和英文。 + + Args: + keyword: 搜索关键词 + workspace_id: 工作空间ID + + Returns: + List[OntologyScene]: 匹配的场景列表 + + Examples: + >>> repo = OntologySceneRepository(db) + >>> scenes = repo.search_by_name("医疗", workspace_id) + """ + try: + logger.debug( + f"Searching ontology scenes by keyword - " + f"keyword={keyword}, workspace_id={workspace_id}" + ) + + # 使用 ilike 进行不区分大小写的模糊匹配 + scenes = self.db.query(OntologyScene).options( + joinedload(OntologyScene.classes) + ).filter( + OntologyScene.scene_name.ilike(f"%{keyword}%"), + OntologyScene.workspace_id == workspace_id + ).order_by( + OntologyScene.updated_at.desc() + ).all() + + logger.info( + f"Found {len(scenes)} ontology scenes matching keyword '{keyword}' " + f"in workspace {workspace_id}" + ) + + return scenes + + except Exception as e: + logger.error( + f"Failed to search ontology scenes by keyword: {str(e)}", + exc_info=True + ) + raise + + def get_by_workspace(self, workspace_id: UUID, page: Optional[int] = None, page_size: Optional[int] = None) -> tuple: + """获取工作空间下的所有场景(支持分页) + + 使用joinedload预加载classes关系以统计数量。 + + Args: + workspace_id: 工作空间ID + page: 页码(可选,从1开始) + page_size: 每页数量(可选) + + Returns: + tuple: (场景列表, 总数量) + + Examples: + >>> repo = OntologySceneRepository(db) + >>> scenes, total = repo.get_by_workspace(workspace_id) + >>> scenes, total = repo.get_by_workspace(workspace_id, page=1, page_size=10) + """ + try: + logger.debug(f"Getting ontology scenes by workspace: {workspace_id}, page={page}, page_size={page_size}") + + # 构建基础查询 + query = self.db.query(OntologyScene).options( + joinedload(OntologyScene.classes) + ).filter( + OntologyScene.workspace_id == workspace_id + ).order_by( + OntologyScene.updated_at.desc() + ) + + # 获取总数 + total = query.count() + + # 如果提供了分页参数,应用分页 + if page is not None and page_size is not None: + offset = (page - 1) * page_size + query = query.offset(offset).limit(page_size) + logger.debug(f"Applying pagination: offset={offset}, limit={page_size}") + + scenes = query.all() + + logger.info( + f"Found {len(scenes)} ontology scenes (total: {total}) in workspace {workspace_id}" + ) + + return scenes, total + + except Exception as e: + logger.error( + f"Failed to get ontology scenes by workspace: {str(e)}", + exc_info=True + ) + raise + + def update(self, scene_id: UUID, update_data: dict) -> Optional[OntologyScene]: + """更新场景信息 + + Args: + scene_id: 场景ID + update_data: 更新数据字典 + + Returns: + Optional[OntologyScene]: 更新后的场景对象,不存在则返回None + + Raises: + Exception: 数据库操作失败 + + Examples: + >>> repo = OntologySceneRepository(db) + >>> scene = repo.update( + ... scene_id, + ... {"scene_name": "新名称"} + ... ) + """ + try: + logger.info(f"Updating ontology scene: {scene_id}") + + scene = self.get_by_id(scene_id) + if not scene: + logger.warning(f"Ontology scene not found for update: {scene_id}") + return None + + # 更新字段 + if "scene_name" in update_data and update_data["scene_name"] is not None: + scene.scene_name = update_data["scene_name"] + + if "scene_description" in update_data: + scene.scene_description = update_data["scene_description"] + + self.db.flush() + + logger.info(f"Ontology scene updated successfully: {scene_id}") + + return scene + + except Exception as e: + logger.error( + f"Failed to update ontology scene: {str(e)}", + exc_info=True + ) + raise + + def delete(self, scene_id: UUID) -> bool: + """删除场景(级联删除类型) + + 依赖数据库级联删除配置(ondelete="CASCADE")。 + + Args: + scene_id: 场景ID + + Returns: + bool: 删除成功返回True,场景不存在返回False + + Raises: + Exception: 数据库操作失败 + + Examples: + >>> repo = OntologySceneRepository(db) + >>> success = repo.delete(scene_id) + """ + try: + logger.info(f"Deleting ontology scene: {scene_id}") + + scene = self.get_by_id(scene_id) + if not scene: + logger.warning(f"Ontology scene not found for delete: {scene_id}") + return False + + self.db.delete(scene) + self.db.flush() + + logger.info( + f"Ontology scene deleted successfully (cascade): {scene_id}" + ) + + return True + + except Exception as e: + logger.error( + f"Failed to delete ontology scene: {str(e)}", + exc_info=True + ) + raise + + def check_ownership(self, scene_id: UUID, workspace_id: UUID) -> bool: + """检查场景是否属于指定工作空间 + + Args: + scene_id: 场景ID + workspace_id: 工作空间ID + + Returns: + bool: 属于返回True,否则返回False + + Examples: + >>> repo = OntologySceneRepository(db) + >>> is_owner = repo.check_ownership(scene_id, workspace_id) + """ + try: + logger.debug( + f"Checking scene ownership - " + f"scene_id={scene_id}, workspace_id={workspace_id}" + ) + + count = self.db.query(OntologyScene).filter( + OntologyScene.scene_id == scene_id, + OntologyScene.workspace_id == workspace_id + ).count() + + is_owner = count > 0 + + logger.debug( + f"Scene ownership check result: {is_owner} - " + f"scene_id={scene_id}" + ) + + return is_owner + + except Exception as e: + logger.error( + f"Failed to check scene ownership: {str(e)}", + exc_info=True + ) + raise diff --git a/api/app/schemas/ontology_schemas.py b/api/app/schemas/ontology_schemas.py new file mode 100644 index 00000000..5a88f84d --- /dev/null +++ b/api/app/schemas/ontology_schemas.py @@ -0,0 +1,461 @@ +"""本体提取API的请求和响应模型 + +本模块定义了本体提取系统的所有API请求和响应的Pydantic模型。 + +Classes: + ExtractionRequest: 本体提取请求模型 + ExtractionResponse: 本体提取响应模型 + ExportRequest: OWL文件导出请求模型 + ExportResponse: OWL文件导出响应模型 + OntologyResultResponse: 本体提取结果响应模型(带毫秒时间戳) + SceneCreateRequest: 场景创建请求模型 + SceneUpdateRequest: 场景更新请求模型 + SceneResponse: 场景响应模型 + SceneListResponse: 场景列表响应模型 + ClassCreateRequest: 类型创建请求模型 + ClassUpdateRequest: 类型更新请求模型 + ClassResponse: 类型响应模型 + ClassListResponse: 类型列表响应模型 +""" + +from typing import List, Optional +import datetime +from uuid import UUID + +from pydantic import BaseModel, Field, field_serializer, ConfigDict + +from app.core.memory.models.ontology_models import OntologyClass + + +class ExtractionRequest(BaseModel): + """本体提取请求模型 + + 用于POST /api/ontology/extract端点的请求体。 + + Attributes: + scenario: 场景描述文本,不能为空 + domain: 可选的领域提示(如Healthcare, Education等) + llm_id: LLM模型ID,必须提供 + scene_id: 场景ID,必须提供,用于将提取的类保存到指定场景 + + Examples: + >>> request = ExtractionRequest( + ... scenario="医院管理患者记录...", + ... domain="Healthcare", + ... llm_id="550e8400-e29b-41d4-a716-446655440000", + ... scene_id="660e8400-e29b-41d4-a716-446655440000" + ... ) + """ + scenario: str = Field(..., description="场景描述文本", min_length=1) + domain: Optional[str] = Field(None, description="可选的领域提示") + llm_id: str = Field(..., description="LLM模型ID") + scene_id: UUID = Field(..., description="场景ID,用于将提取的类保存到指定场景") + + +class ExtractionResponse(BaseModel): + """本体提取响应模型 + + 用于POST /api/ontology/extract端点的响应体。 + + Attributes: + classes: 提取的本体类列表 + domain: 识别的领域 + extracted_count: 提取的类数量 + + Examples: + >>> response = ExtractionResponse( + ... classes=[...], + ... domain="Healthcare", + ... extracted_count=7 + ... ) + """ + classes: List[OntologyClass] = Field(default_factory=list, description="提取的本体类列表") + domain: str = Field(..., description="识别的领域") + extracted_count: int = Field(..., description="提取的类数量") + + +class ExportRequest(BaseModel): + """OWL文件导出请求模型 + + 用于POST /api/ontology/export端点的请求体。 + + Attributes: + classes: 要导出的本体类列表 + format: 导出格式,可选值: rdfxml, turtle, ntriples, json + include_metadata: 是否包含完整的OWL元数据(命名空间等),默认True + + Examples: + >>> request = ExportRequest( + ... classes=[...], + ... format="rdfxml", + ... include_metadata=True + ... ) + """ + classes: List[OntologyClass] = Field(..., description="要导出的本体类列表", min_length=1) + format: str = Field("rdfxml", description="导出格式: rdfxml, turtle, ntriples, json") + include_metadata: bool = Field(True, description="是否包含完整的OWL元数据") + + +class ExportResponse(BaseModel): + """OWL文件导出响应模型 + + 用于POST /api/ontology/export端点的响应体。 + + Attributes: + owl_content: OWL文件内容 + format: 导出格式 + classes_count: 导出的类数量 + + Examples: + >>> response = ExportResponse( + ... owl_content="...", + ... format="rdfxml", + ... classes_count=7 + ... ) + """ + owl_content: str = Field(..., description="OWL文件内容") + format: str = Field(..., description="导出格式") + classes_count: int = Field(..., description="导出的类数量") + + +class OntologyResultResponse(BaseModel): + """本体提取结果响应模型 + + 用于返回数据库中存储的提取结果,时间戳为毫秒级。 + + Attributes: + id: 结果ID (UUID) + scenario: 场景描述文本 + domain: 领域 + classes_json: 提取的本体类数据(JSON格式) + extracted_count: 提取的类数量 + user_id: 用户ID + created_at: 创建时间(毫秒时间戳) + + Examples: + >>> response = OntologyResultResponse( + ... id=uuid.uuid4(), + ... scenario="医院管理患者记录...", + ... domain="Healthcare", + ... classes_json={"classes": [...]}, + ... extracted_count=7, + ... user_id=123, + ... created_at=datetime.now() + ... ) + """ + id: UUID = Field(..., description="结果ID") + scenario: str = Field(..., description="场景描述文本") + domain: Optional[str] = Field(None, description="领域") + classes_json: dict = Field(..., description="提取的本体类数据(JSON格式)") + extracted_count: int = Field(..., description="提取的类数量") + user_id: Optional[int] = Field(None, description="用户ID") + created_at: datetime.datetime = Field(..., description="创建时间") + + @field_serializer("created_at", when_used="json") + def _serialize_created_at(self, dt: datetime.datetime): + """将创建时间序列化为毫秒时间戳""" + return int(dt.timestamp() * 1000) if dt else None + + class Config: + from_attributes = True + + + +# ==================== 本体场景相关 Schema ==================== + +class SceneCreateRequest(BaseModel): + """场景创建请求模型 + + 用于创建新的本体场景。 + + Attributes: + scene_name: 场景名称,必填,1-200字符 + scene_description: 场景描述,可选 + + Examples: + >>> request = SceneCreateRequest( + ... scene_name="医疗场景", + ... scene_description="用于医疗领域的本体建模" + ... ) + """ + scene_name: str = Field(..., min_length=1, max_length=200, description="场景名称") + scene_description: Optional[str] = Field(None, description="场景描述") + + +class SceneUpdateRequest(BaseModel): + """场景更新请求模型 + + 用于更新已有本体场景信息。 + + Attributes: + scene_name: 场景名称,可选,1-200字符 + scene_description: 场景描述,可选 + + Examples: + >>> request = SceneUpdateRequest( + ... scene_name="更新后的场景名称", + ... scene_description="更新后的描述" + ... ) + """ + scene_name: Optional[str] = Field(None, min_length=1, max_length=200, description="场景名称") + scene_description: Optional[str] = Field(None, description="场景描述") + + +class SceneResponse(BaseModel): + """场景响应模型 + + 用于返回本体场景信息。 + + Attributes: + scene_id: 场景ID + scene_name: 场景名称 + scene_description: 场景描述 + type_num: 类型数量 + workspace_id: 所属工作空间ID + created_at: 创建时间(毫秒时间戳) + updated_at: 更新时间(毫秒时间戳) + classes_count: 类型数量 + + Examples: + >>> response = SceneResponse( + ... scene_id=uuid.uuid4(), + ... scene_name="医疗场景", + ... scene_description="用于医疗领域的本体建模", + ... type_num=0, + ... workspace_id=uuid.uuid4(), + ... created_at=datetime.now(), + ... updated_at=datetime.now(), + ... classes_count=5 + ... ) + """ + scene_id: UUID = Field(..., description="场景ID") + scene_name: str = Field(..., description="场景名称") + scene_description: Optional[str] = Field(None, description="场景描述") + type_num: int = Field(..., description="类型数量") + entity_type: Optional[List[str]] = Field(None, description="实体类型列表(最多3个class_name)") + workspace_id: UUID = Field(..., description="所属工作空间ID") + created_at: datetime.datetime = Field(..., description="创建时间(毫秒时间戳)") + updated_at: datetime.datetime = Field(..., description="更新时间(毫秒时间戳)") + classes_count: int = Field(0, description="类型数量") + + @field_serializer("created_at", when_used="json") + def _serialize_created_at(self, dt: datetime.datetime): + """将创建时间序列化为毫秒时间戳""" + return int(dt.timestamp() * 1000) if dt else None + + @field_serializer("updated_at", when_used="json") + def _serialize_updated_at(self, dt: datetime.datetime): + """将更新时间序列化为毫秒时间戳""" + return int(dt.timestamp() * 1000) if dt else None + + model_config = ConfigDict(from_attributes=True) + + +class PaginationInfo(BaseModel): + """分页信息模型 + + Attributes: + page: 当前页码 + pagesize: 每页数量 + total: 总数量 + hasnext: 是否有下一页 + """ + page: int = Field(..., description="当前页码") + pagesize: int = Field(..., description="每页数量") + total: int = Field(..., description="总数量") + hasnext: bool = Field(..., description="是否有下一页") + + +class SceneListResponse(BaseModel): + """场景列表响应模型(支持分页) + + 用于返回本体场景列表。 + + Attributes: + items: 场景列表 + page: 分页信息(可选,分页时返回) + + Examples: + >>> # 不分页 + >>> response = SceneListResponse( + ... items=[scene1, scene2] + ... ) + >>> # 分页 + >>> response = SceneListResponse( + ... items=[scene1, scene2, ...], + ... page=PaginationInfo(page=1, pagesize=100, total=150, hasnext=True) + ... ) + """ + items: List[SceneResponse] = Field(..., description="场景列表") + page: Optional[PaginationInfo] = Field(None, description="分页信息") + + +# ==================== 本体类型相关 Schema ==================== + +class ClassItem(BaseModel): + """单个类型信息模型 + + Attributes: + class_name: 类型名称,必填,1-200字符 + class_description: 类型描述,可选 + + Examples: + >>> item = ClassItem( + ... class_name="患者", + ... class_description="医院患者信息" + ... ) + """ + class_name: str = Field(..., min_length=1, max_length=200, description="类型名称") + class_description: Optional[str] = Field(None, description="类型描述") + + +class ClassCreateRequest(BaseModel): + """类型创建请求模型(统一使用列表形式) + + 通过列表中元素数量决定创建模式: + - 列表包含 1 个元素:单个创建 + - 列表包含多个元素:批量创建 + + Attributes: + scene_id: 所属场景ID,必填 + classes: 类型列表,必填,至少包含 1 个元素 + + Examples: + # 单个创建(列表中 1 个元素) + >>> request = ClassCreateRequest( + ... scene_id=uuid.uuid4(), + ... classes=[ + ... ClassItem(class_name="患者", class_description="医院患者信息") + ... ] + ... ) + + # 批量创建(列表中多个元素) + >>> request = ClassCreateRequest( + ... scene_id=uuid.uuid4(), + ... classes=[ + ... ClassItem(class_name="患者", class_description="医院患者信息"), + ... ClassItem(class_name="医生", class_description="医院医生信息"), + ... ClassItem(class_name="药品", class_description="医院药品信息") + ... ] + ... ) + """ + scene_id: UUID = Field(..., description="所属场景ID") + classes: List[ClassItem] = Field(..., min_length=1, description="类型列表,至少包含 1 个元素") + + +class ClassUpdateRequest(BaseModel): + """类型更新请求模型 + + 用于更新已有本体类型信息。 + + Attributes: + class_name: 类型名称,可选,1-200字符 + class_description: 类型描述,可选 + + Examples: + >>> request = ClassUpdateRequest( + ... class_name="更新后的类型名称", + ... class_description="更新后的描述" + ... ) + """ + class_name: Optional[str] = Field(None, min_length=1, max_length=200, description="类型名称") + class_description: Optional[str] = Field(None, description="类型描述") + + +class ClassResponse(BaseModel): + """类型响应模型 + + 用于返回本体类型信息。 + + Attributes: + class_id: 类型ID + class_name: 类型名称 + class_description: 类型描述 + scene_id: 所属场景ID + created_at: 创建时间(毫秒时间戳) + updated_at: 更新时间(毫秒时间戳) + + Examples: + >>> response = ClassResponse( + ... class_id=uuid.uuid4(), + ... class_name="患者", + ... class_description="医院患者信息", + ... scene_id=uuid.uuid4(), + ... created_at=datetime.now(), + ... updated_at=datetime.now() + ... ) + """ + class_id: UUID = Field(..., description="类型ID") + class_name: str = Field(..., description="类型名称") + class_description: Optional[str] = Field(None, description="类型描述") + scene_id: UUID = Field(..., description="所属场景ID") + created_at: datetime.datetime = Field(..., description="创建时间(毫秒时间戳)") + updated_at: datetime.datetime = Field(..., description="更新时间(毫秒时间戳)") + + @field_serializer("created_at", when_used="json") + def _serialize_created_at(self, dt: datetime.datetime): + """将创建时间序列化为毫秒时间戳""" + return int(dt.timestamp() * 1000) if dt else None + + @field_serializer("updated_at", when_used="json") + def _serialize_updated_at(self, dt: datetime.datetime): + """将更新时间序列化为毫秒时间戳""" + return int(dt.timestamp() * 1000) if dt else None + + model_config = ConfigDict(from_attributes=True) + + +class ClassBatchCreateResponse(BaseModel): + """批量创建类型响应模型 + + 用于返回批量创建的结果统计和详情。 + + Attributes: + total: 总共尝试创建的数量 + success_count: 成功创建的数量 + failed_count: 失败的数量 + items: 成功创建的类型列表 + errors: 失败的错误信息列表(可选) + + Examples: + >>> response = ClassBatchCreateResponse( + ... total=3, + ... success_count=2, + ... failed_count=1, + ... items=[class1, class2], + ... errors=["创建类型 '药品' 失败: 类型名称已存在"] + ... ) + """ + total: int = Field(..., description="总共尝试创建的数量") + success_count: int = Field(..., description="成功创建的数量") + failed_count: int = Field(0, description="失败的数量") + items: List[ClassResponse] = Field(..., description="成功创建的类型列表") + errors: Optional[List[str]] = Field(None, description="失败的错误信息列表") + + +class ClassListResponse(BaseModel): + """类型列表响应模型 + + 用于返回本体类型列表。 + + Attributes: + total: 总数量 + scene_id: 所属场景ID + scene_name: 场景名称 + scene_description: 场景描述 + items: 类型列表 + + Examples: + >>> response = ClassListResponse( + ... total=3, + ... scene_id=uuid.uuid4(), + ... scene_name="医疗场景", + ... scene_description="用于医疗领域的本体建模", + ... items=[class1, class2, class3] + ... ) + """ + total: int = Field(..., description="总数量") + scene_id: UUID = Field(..., description="所属场景ID") + scene_name: str = Field(..., description="场景名称") + scene_description: Optional[str] = Field(None, description="场景描述") + items: List[ClassResponse] = Field(..., description="类型列表") diff --git a/api/app/services/ontology_service.py b/api/app/services/ontology_service.py new file mode 100644 index 00000000..c832b0cc --- /dev/null +++ b/api/app/services/ontology_service.py @@ -0,0 +1,1162 @@ +"""本体提取服务层 + +本模块提供本体提取的业务逻辑封装,协调OntologyExtractor和OWLValidator。 +包括本体提取、OWL文件导出等功能。 + +Classes: + OntologyService: 本体提取服务类,封装业务逻辑 +""" + +import logging +import time +from typing import Any, Dict, List, Optional + +from sqlalchemy.orm import Session + +from app.core.memory.llm_tools.openai_client import OpenAIClient +from app.core.memory.models.ontology_models import ( + OntologyClass, + OntologyExtractionResponse, +) +from app.core.memory.storage_services.extraction_engine.knowledge_extraction.ontology_extraction import ( + OntologyExtractor, +) +from app.core.memory.utils.validation.owl_validator import OWLValidator + + +logger = logging.getLogger(__name__) + + +class OntologyService: + """本体提取服务层 + + 封装本体提取的业务逻辑,协调各个组件: + - OntologyExtractor: 执行LLM驱动的本体提取 + - OWLValidator: OWL语义验证 + + Attributes: + extractor: 本体提取器实例 + owl_validator: OWL验证器实例 + db: 数据库会话 + """ + + # 默认配置参数 + DEFAULT_MAX_CLASSES = 15 + DEFAULT_MIN_CLASSES = 5 + DEFAULT_MAX_DESCRIPTION_LENGTH = 500 + DEFAULT_LLM_TEMPERATURE = 0.3 + DEFAULT_LLM_MAX_TOKENS = 2000 + DEFAULT_LLM_TIMEOUT = 30.0 + DEFAULT_ENABLE_OWL_VALIDATION = True + + def __init__( + self, + llm_client: OpenAIClient, + db: Session + ): + """初始化本体提取服务 + + Args: + llm_client: OpenAI客户端实例 + db: SQLAlchemy数据库会话 + """ + self.extractor = OntologyExtractor(llm_client) + self.owl_validator = OWLValidator() + self.db = db + + # 初始化Repository + from app.repositories.ontology_scene_repository import OntologySceneRepository + from app.repositories.ontology_class_repository import OntologyClassRepository + + self.scene_repo = OntologySceneRepository(db) + self.class_repo = OntologyClassRepository(db) + + logger.info("OntologyService initialized") + + async def extract_ontology( + self, + scenario: str, + domain: Optional[str] = None, + scene_id: Optional[Any] = None, + workspace_id: Optional[Any] = None + ) -> OntologyExtractionResponse: + """执行本体提取 + + 使用默认配置参数调用OntologyExtractor执行提取。 + 提取结果仅返回给前端,不会自动保存到数据库。 + 前端需要调用 /class 接口来保存选中的类型。 + + Args: + scenario: 场景描述文本 + domain: 可选的领域提示 + scene_id: 可选的场景ID,用于权限验证(不再用于自动保存) + workspace_id: 可选的工作空间ID,用于权限验证 + + Returns: + OntologyExtractionResponse: 提取结果 + + Raises: + ValueError: 场景描述为空、场景不存在或无权限 + RuntimeError: 提取过程失败 + + Examples: + >>> service = OntologyService(llm_client, db) + >>> response = await service.extract_ontology( + ... scenario="医院管理患者记录...", + ... domain="Healthcare", + ... scene_id=scene_uuid, + ... workspace_id=workspace_uuid + ... ) + >>> len(response.classes) + 7 + """ + # 开始计时 + start_time = time.time() + + # 验证输入 + if not scenario or not scenario.strip(): + logger.error("Scenario description is empty") + raise ValueError("Scenario description cannot be empty") + + # 如果提供了scene_id,验证场景是否存在且有权限 + if scene_id and workspace_id: + logger.info(f"Validating scene access - scene_id={scene_id}, workspace_id={workspace_id}") + scene = self.scene_repo.get_by_id(scene_id) + if not scene: + logger.warning(f"Scene not found: {scene_id}") + raise ValueError("场景不存在") + + if not self.scene_repo.check_ownership(scene_id, workspace_id): + logger.warning( + f"Permission denied - scene_id={scene_id}, " + f"workspace_id={workspace_id}" + ) + raise ValueError("无权限在该场景下创建类型") + + logger.info( + f"Starting ontology extraction service - " + f"scenario_length={len(scenario)}, " + f"domain={domain}, " + f"scene_id={scene_id}" + ) + + try: + # 调用提取器执行提取(使用默认配置) + logger.info("Calling OntologyExtractor with default config") + extraction_start_time = time.time() + + response = await self.extractor.extract_ontology_classes( + scenario=scenario, + domain=domain, + max_classes=self.DEFAULT_MAX_CLASSES, + min_classes=self.DEFAULT_MIN_CLASSES, + enable_owl_validation=self.DEFAULT_ENABLE_OWL_VALIDATION, + llm_temperature=self.DEFAULT_LLM_TEMPERATURE, + llm_max_tokens=self.DEFAULT_LLM_MAX_TOKENS, + max_description_length=self.DEFAULT_MAX_DESCRIPTION_LENGTH, + timeout=self.DEFAULT_LLM_TIMEOUT, + ) + + extraction_duration = time.time() - extraction_start_time + + # 检查是否成功提取到类 + if not response.classes: + logger.error("Ontology extraction failed: No classes extracted (structured output may have failed)") + raise RuntimeError("本体提取失败:结构化输出失败,未能提取到任何本体类") + + # 注释:提取结果仅返回给前端,不保存到数据库 + # 前端将从返回结果中选择需要的类型,然后调用 /class 接口创建 + logger.info( + f"Extraction completed. Classes will be saved to ontology_class " + f"via /class endpoint based on user selection" + ) + + total_duration = time.time() - start_time + + # 记录提取统计 + logger.info( + f"Ontology extraction service completed - " + f"extracted_classes={len(response.classes)}, " + f"domain={response.domain}, " + f"extraction_duration={extraction_duration:.2f}s, " + f"total_duration={total_duration:.2f}s" + ) + + return response + + except ValueError: + # 重新抛出验证错误 + total_duration = time.time() - start_time + logger.error( + f"Validation error after {total_duration:.2f}s", + exc_info=True + ) + raise + except Exception as e: + total_duration = time.time() - start_time + error_msg = f"Ontology extraction failed after {total_duration:.2f}s: {str(e)}" + logger.error(error_msg, exc_info=True) + raise RuntimeError(error_msg) from e + + async def export_owl_file( + self, + classes: List[OntologyClass], + output_path: str, + format: str = "rdfxml", + ) -> str: + """导出OWL文件 + + 将提取的本体类导出为OWL文件,支持多种格式。 + + Args: + classes: 本体类列表 + output_path: 输出文件路径 + format: 导出格式,可选值: "rdfxml", "turtle", "ntriples" (默认: "rdfxml") + + Returns: + str: 导出的OWL文件内容 + + Raises: + ValueError: 类列表为空或格式不支持 + RuntimeError: 导出失败 + + Examples: + >>> service = OntologyService(llm_client, db) + >>> owl_content = await service.export_owl_file( + ... classes=response.classes, + ... output_path="ontology.owl", + ... format="rdfxml" + ... ) + """ + # 验证输入 + if not classes: + logger.error("Classes list is empty") + raise ValueError("Classes list cannot be empty") + + valid_formats = ["rdfxml", "turtle", "ntriples"] + if format not in valid_formats: + error_msg = f"Unsupported format '{format}'. Must be one of: {', '.join(valid_formats)}" + logger.error(error_msg) + raise ValueError(error_msg) + + logger.info( + f"Starting OWL export - " + f"classes_count={len(classes)}, " + f"output_path={output_path}, " + f"format={format}" + ) + + try: + # 步骤1: 验证本体类 + logger.debug("Validating ontology classes") + is_valid, errors, world = self.owl_validator.validate_ontology_classes( + classes=classes, + ) + + if not is_valid: + logger.warning( + f"OWL validation found {len(errors)} issues during export: {errors}" + ) + # 继续导出,但记录警告 + + if not world: + error_msg = "Failed to create OWL world for export" + logger.error(error_msg) + raise RuntimeError(error_msg) + + # 步骤2: 导出OWL文件 + logger.info(f"Exporting to {format} format") + owl_content = self.owl_validator.export_to_owl( + world=world, + output_path=output_path, + format=format + ) + + logger.info( + f"OWL export completed - " + f"output_path={output_path}, " + f"content_length={len(owl_content)}" + ) + + return owl_content + + except Exception as e: + error_msg = f"OWL export failed: {str(e)}" + logger.error(error_msg, exc_info=True) + raise RuntimeError(error_msg) from e + + + # ==================== 本体场景管理方法 ==================== + + def create_scene( + self, + scene_name: str, + scene_description: Optional[str], + workspace_id: Any + ): + """创建本体场景 + + Args: + scene_name: 场景名称 + scene_description: 场景描述 + workspace_id: 所属工作空间ID + + Returns: + OntologyScene: 创建的场景对象 + + Raises: + ValueError: 场景名称为空 + RuntimeError: 创建失败 + + Examples: + >>> service = OntologyService(llm_client, db) + >>> scene = service.create_scene( + ... "医疗场景", + ... "用于医疗领域的本体建模", + ... workspace_id + ... ) + """ + # 验证输入 + if not scene_name or not scene_name.strip(): + logger.error("Scene name is empty") + raise ValueError("场景名称不能为空") + + logger.info( + f"Creating scene - " + f"name={scene_name}, workspace_id={workspace_id}" + ) + + try: + scene_data = { + "scene_name": scene_name.strip(), + "scene_description": scene_description + } + + scene = self.scene_repo.create(scene_data, workspace_id) + self.db.commit() + + logger.info(f"Scene created successfully: {scene.scene_id}") + + return scene + + except ValueError: + raise + except Exception as e: + self.db.rollback() + error_msg = f"Failed to create scene: {str(e)}" + logger.error(error_msg, exc_info=True) + raise RuntimeError(error_msg) from e + + def update_scene( + self, + scene_id: Any, + scene_name: Optional[str], + scene_description: Optional[str], + workspace_id: Any + ): + """更新本体场景 + + Args: + scene_id: 场景ID + scene_name: 场景名称(可选) + scene_description: 场景描述(可选) + workspace_id: 工作空间ID(用于权限验证) + + Returns: + OntologyScene: 更新后的场景对象 + + Raises: + ValueError: 场景不存在或无权限 + RuntimeError: 更新失败 + + Examples: + >>> service = OntologyService(llm_client, db) + >>> scene = service.update_scene( + ... scene_id, + ... "新名称", + ... "新描述", + ... workspace_id + ... ) + """ + logger.info(f"Updating scene: {scene_id}") + + try: + # 检查场景是否存在 + scene = self.scene_repo.get_by_id(scene_id) + if not scene: + logger.warning(f"Scene not found: {scene_id}") + raise ValueError("场景不存在") + + # 检查权限 + if not self.scene_repo.check_ownership(scene_id, workspace_id): + logger.warning( + f"Permission denied - scene_id={scene_id}, " + f"workspace_id={workspace_id}" + ) + raise ValueError("无权限操作该场景") + + # 准备更新数据 + update_data = {} + if scene_name is not None: + if not scene_name.strip(): + raise ValueError("场景名称不能为空") + update_data["scene_name"] = scene_name.strip() + + if scene_description is not None: + update_data["scene_description"] = scene_description + + # 如果没有更新数据,直接返回 + if not update_data: + logger.info("No update data provided, returning existing scene") + return scene + + # 执行更新 + updated_scene = self.scene_repo.update(scene_id, update_data) + self.db.commit() + + logger.info(f"Scene updated successfully: {scene_id}") + + return updated_scene + + except ValueError: + raise + except Exception as e: + self.db.rollback() + error_msg = f"Failed to update scene: {str(e)}" + logger.error(error_msg, exc_info=True) + raise RuntimeError(error_msg) from e + + def delete_scene( + self, + scene_id: Any, + workspace_id: Any + ) -> bool: + """删除本体场景 + + Args: + scene_id: 场景ID + workspace_id: 工作空间ID(用于权限验证) + + Returns: + bool: 删除成功返回True + + Raises: + ValueError: 场景不存在或无权限 + RuntimeError: 删除失败 + + Examples: + >>> service = OntologyService(llm_client, db) + >>> success = service.delete_scene(scene_id, workspace_id) + """ + logger.info(f"Deleting scene: {scene_id}") + + try: + # 检查场景是否存在 + scene = self.scene_repo.get_by_id(scene_id) + if not scene: + logger.warning(f"Scene not found: {scene_id}") + raise ValueError("场景不存在") + + # 检查权限 + if not self.scene_repo.check_ownership(scene_id, workspace_id): + logger.warning( + f"Permission denied - scene_id={scene_id}, " + f"workspace_id={workspace_id}" + ) + raise ValueError("无权限操作该场景") + + # 执行删除 + success = self.scene_repo.delete(scene_id) + self.db.commit() + + logger.info(f"Scene deleted successfully: {scene_id}") + + return success + + except ValueError: + raise + except Exception as e: + self.db.rollback() + error_msg = f"Failed to delete scene: {str(e)}" + logger.error(error_msg, exc_info=True) + raise RuntimeError(error_msg) from e + + def get_scene_by_id( + self, + scene_id: Any, + workspace_id: Any + ): + """获取单个场景 + + Args: + scene_id: 场景ID + workspace_id: 工作空间ID(用于权限验证) + + Returns: + Optional[OntologyScene]: 场景对象 + + Raises: + ValueError: 场景不存在或无权限 + + Examples: + >>> service = OntologyService(llm_client, db) + >>> scene = service.get_scene_by_id(scene_id, workspace_id) + """ + logger.debug(f"Getting scene by ID: {scene_id}") + + try: + # 获取场景 + scene = self.scene_repo.get_by_id(scene_id) + if not scene: + logger.warning(f"Scene not found: {scene_id}") + raise ValueError("场景不存在") + + # 检查权限 + if not self.scene_repo.check_ownership(scene_id, workspace_id): + logger.warning( + f"Permission denied - scene_id={scene_id}, " + f"workspace_id={workspace_id}" + ) + raise ValueError("无权限访问该场景") + + return scene + + except ValueError: + raise + except Exception as e: + error_msg = f"Failed to get scene: {str(e)}" + logger.error(error_msg, exc_info=True) + raise RuntimeError(error_msg) from e + + def get_scene_by_name( + self, + scene_name: str, + workspace_id: Any + ): + """根据场景名称获取场景(精确匹配) + + Args: + scene_name: 场景名称 + workspace_id: 工作空间ID + + Returns: + Optional[OntologyScene]: 场景对象 + + Raises: + ValueError: 场景不存在 + + Examples: + >>> service = OntologyService(llm_client, db) + >>> scene = service.get_scene_by_name("医疗场景", workspace_id) + """ + logger.debug(f"Getting scene by name: {scene_name}, workspace_id: {workspace_id}") + + try: + # 获取场景 + scene = self.scene_repo.get_by_name(scene_name, workspace_id) + if not scene: + logger.warning(f"Scene not found: {scene_name} in workspace {workspace_id}") + raise ValueError("场景不存在") + + return scene + + except ValueError: + raise + except Exception as e: + error_msg = f"Failed to get scene by name: {str(e)}" + logger.error(error_msg, exc_info=True) + raise RuntimeError(error_msg) from e + + def search_scenes_by_name( + self, + keyword: str, + workspace_id: Any + ) -> List: + """根据关键词模糊搜索场景 + + Args: + keyword: 搜索关键词 + workspace_id: 工作空间ID + + Returns: + List[OntologyScene]: 匹配的场景列表 + + Raises: + RuntimeError: 搜索失败 + + Examples: + >>> service = OntologyService(llm_client, db) + >>> scenes = service.search_scenes_by_name("医疗", workspace_id) + """ + logger.debug(f"Searching scenes by keyword: {keyword}, workspace_id: {workspace_id}") + + try: + scenes = self.scene_repo.search_by_name(keyword, workspace_id) + + logger.info( + f"Found {len(scenes)} scenes matching keyword '{keyword}' " + f"in workspace {workspace_id}" + ) + + return scenes + + except Exception as e: + error_msg = f"Failed to search scenes by keyword: {str(e)}" + logger.error(error_msg, exc_info=True) + raise RuntimeError(error_msg) from e + + def list_scenes( + self, + workspace_id: Any, + page: Optional[int] = None, + page_size: Optional[int] = None + ) -> tuple: + """获取工作空间下的所有场景(支持分页) + + Args: + workspace_id: 工作空间ID + page: 页码(可选,从1开始) + page_size: 每页数量(可选) + + Returns: + tuple: (场景列表, 总数量) + + Raises: + RuntimeError: 查询失败 + + Examples: + >>> service = OntologyService(llm_client, db) + >>> scenes, total = service.list_scenes(workspace_id) + >>> scenes, total = service.list_scenes(workspace_id, page=1, page_size=10) + """ + logger.debug(f"Listing scenes for workspace: {workspace_id}, page={page}, page_size={page_size}") + + try: + scenes, total = self.scene_repo.get_by_workspace(workspace_id, page, page_size) + + logger.info(f"Found {len(scenes)} scenes (total: {total}) in workspace {workspace_id}") + + return scenes, total + + except Exception as e: + error_msg = f"Failed to list scenes: {str(e)}" + logger.error(error_msg, exc_info=True) + raise RuntimeError(error_msg) from e + + # ==================== 本体类型管理方法 ==================== + + def create_class( + self, + scene_id: Any, + class_name: str, + class_description: Optional[str], + workspace_id: Any + ): + """创建本体类型 + + Args: + scene_id: 所属场景ID + class_name: 类型名称 + class_description: 类型描述 + workspace_id: 工作空间ID(用于权限验证) + + Returns: + OntologyClass: 创建的类型对象 + + Raises: + ValueError: 类型名称为空、场景不存在或无权限 + RuntimeError: 创建失败 + + Examples: + >>> service = OntologyService(llm_client, db) + >>> ontology_class = service.create_class( + ... scene_id, + ... "患者", + ... "医院患者信息", + ... workspace_id + ... ) + """ + # 验证输入 + if not class_name or not class_name.strip(): + logger.error("Class name is empty") + raise ValueError("类型名称不能为空") + + logger.info( + f"Creating class - " + f"name={class_name}, scene_id={scene_id}" + ) + + try: + # 检查场景是否存在且属于当前工作空间 + scene = self.scene_repo.get_by_id(scene_id) + if not scene: + logger.warning(f"Scene not found: {scene_id}") + raise ValueError("所属场景不存在") + + if not self.scene_repo.check_ownership(scene_id, workspace_id): + logger.warning( + f"Permission denied - scene_id={scene_id}, " + f"workspace_id={workspace_id}" + ) + raise ValueError("无权限在该场景下创建类型") + + # 创建类型 + class_data = { + "class_name": class_name.strip(), + "class_description": class_description + } + + ontology_class = self.class_repo.create(class_data, scene_id) + self.db.commit() + + logger.info(f"Class created successfully: {ontology_class.class_id}") + + return ontology_class + + except ValueError: + raise + except Exception as e: + self.db.rollback() + error_msg = f"Failed to create class: {str(e)}" + logger.error(error_msg, exc_info=True) + raise RuntimeError(error_msg) from e + + def create_classes_batch( + self, + scene_id: Any, + classes: List[Dict[str, Optional[str]]], + workspace_id: Any + ): + """批量创建本体类型 + + Args: + scene_id: 所属场景ID + classes: 类型列表,每个元素包含 class_name 和 class_description + workspace_id: 工作空间ID(用于权限验证) + + Returns: + Tuple[List, List[str]]: (成功创建的类型列表, 错误信息列表) + + Raises: + ValueError: 场景不存在或无权限 + + Examples: + >>> service = OntologyService(llm_client, db) + >>> classes_data = [ + ... {"class_name": "患者", "class_description": "医院患者信息"}, + ... {"class_name": "医生", "class_description": "医院医生信息"} + ... ] + >>> created_classes, errors = service.create_classes_batch( + ... scene_id, + ... classes_data, + ... workspace_id + ... ) + """ + logger.info( + f"Batch creating classes - " + f"count={len(classes)}, scene_id={scene_id}" + ) + + # 检查场景是否存在且属于当前工作空间(只检查一次) + scene = self.scene_repo.get_by_id(scene_id) + if not scene: + logger.warning(f"Scene not found: {scene_id}") + raise ValueError("所属场景不存在") + + if not self.scene_repo.check_ownership(scene_id, workspace_id): + logger.warning( + f"Permission denied - scene_id={scene_id}, " + f"workspace_id={workspace_id}" + ) + raise ValueError("无权限在该场景下创建类型") + + created_classes = [] + errors = [] + + for idx, class_data in enumerate(classes): + class_name = class_data.get("class_name", "").strip() + class_description = class_data.get("class_description") + + if not class_name: + error_msg = f"第 {idx + 1} 个类型名称为空,已跳过" + logger.warning(error_msg) + errors.append(error_msg) + continue + + try: + # 创建类型(不需要再次检查权限) + create_data = { + "class_name": class_name, + "class_description": class_description + } + + ontology_class = self.class_repo.create(create_data, scene_id) + created_classes.append(ontology_class) + logger.info(f"Class created successfully: {class_name}") + + except Exception as e: + error_msg = f"创建类型 '{class_name}' 失败: {str(e)}" + logger.error(error_msg) + errors.append(error_msg) + + # 统一提交所有成功的创建 + try: + self.db.commit() + logger.info( + f"Batch creation completed - " + f"success={len(created_classes)}, failed={len(errors)}" + ) + except Exception as e: + self.db.rollback() + error_msg = f"批量创建提交失败: {str(e)}" + logger.error(error_msg, exc_info=True) + raise RuntimeError(error_msg) from e + + return created_classes, errors + + def update_class( + self, + class_id: Any, + class_name: Optional[str], + class_description: Optional[str], + workspace_id: Any + ): + """更新本体类型 + + Args: + class_id: 类型ID + class_name: 类型名称(可选) + class_description: 类型描述(可选) + workspace_id: 工作空间ID(用于权限验证) + + Returns: + OntologyClass: 更新后的类型对象 + + Raises: + ValueError: 类型不存在或无权限 + RuntimeError: 更新失败 + + Examples: + >>> service = OntologyService(llm_client, db) + >>> ontology_class = service.update_class( + ... class_id, + ... "新名称", + ... "新描述", + ... workspace_id + ... ) + """ + logger.info(f"Updating class: {class_id}") + + try: + # 检查类型是否存在 + ontology_class = self.class_repo.get_by_id(class_id) + if not ontology_class: + logger.warning(f"Class not found: {class_id}") + raise ValueError("类型不存在") + + # 检查权限(通过场景关联) + if not self.class_repo.check_ownership(class_id, workspace_id): + logger.warning( + f"Permission denied - class_id={class_id}, " + f"workspace_id={workspace_id}" + ) + raise ValueError("无权限操作该类型") + + # 准备更新数据 + update_data = {} + if class_name is not None: + if not class_name.strip(): + raise ValueError("类型名称不能为空") + update_data["class_name"] = class_name.strip() + + if class_description is not None: + update_data["class_description"] = class_description + + # 如果没有更新数据,直接返回 + if not update_data: + logger.info("No update data provided, returning existing class") + return ontology_class + + # 执行更新 + updated_class = self.class_repo.update(class_id, update_data) + self.db.commit() + + logger.info(f"Class updated successfully: {class_id}") + + return updated_class + + except ValueError: + raise + except Exception as e: + self.db.rollback() + error_msg = f"Failed to update class: {str(e)}" + logger.error(error_msg, exc_info=True) + raise RuntimeError(error_msg) from e + + def delete_class( + self, + class_id: Any, + workspace_id: Any + ) -> bool: + """删除本体类型 + + Args: + class_id: 类型ID + workspace_id: 工作空间ID(用于权限验证) + + Returns: + bool: 删除成功返回True + + Raises: + ValueError: 类型不存在或无权限 + RuntimeError: 删除失败 + + Examples: + >>> service = OntologyService(llm_client, db) + >>> success = service.delete_class(class_id, workspace_id) + """ + logger.info(f"Deleting class: {class_id}") + + try: + # 检查类型是否存在 + ontology_class = self.class_repo.get_by_id(class_id) + if not ontology_class: + logger.warning(f"Class not found: {class_id}") + raise ValueError("类型不存在") + + # 检查权限(通过场景关联) + if not self.class_repo.check_ownership(class_id, workspace_id): + logger.warning( + f"Permission denied - class_id={class_id}, " + f"workspace_id={workspace_id}" + ) + raise ValueError("无权限操作该类型") + + # 执行删除 + success = self.class_repo.delete(class_id) + self.db.commit() + + logger.info(f"Class deleted successfully: {class_id}") + + return success + + except ValueError: + raise + except Exception as e: + self.db.rollback() + error_msg = f"Failed to delete class: {str(e)}" + logger.error(error_msg, exc_info=True) + raise RuntimeError(error_msg) from e + + def get_class_by_id( + self, + class_id: Any, + workspace_id: Any + ): + """获取单个类型 + + Args: + class_id: 类型ID + workspace_id: 工作空间ID(用于权限验证) + + Returns: + Optional[OntologyClass]: 类型对象 + + Raises: + ValueError: 类型不存在或无权限 + + Examples: + >>> service = OntologyService(llm_client, db) + >>> ontology_class = service.get_class_by_id(class_id, workspace_id) + """ + logger.debug(f"Getting class by ID: {class_id}") + + try: + # 获取类型 + ontology_class = self.class_repo.get_by_id(class_id) + if not ontology_class: + logger.warning(f"Class not found: {class_id}") + raise ValueError("类型不存在") + + # 检查权限(通过场景关联) + if not self.class_repo.check_ownership(class_id, workspace_id): + logger.warning( + f"Permission denied - class_id={class_id}, " + f"workspace_id={workspace_id}" + ) + raise ValueError("无权限访问该类型") + + return ontology_class + + except ValueError: + raise + except Exception as e: + error_msg = f"Failed to get class: {str(e)}" + logger.error(error_msg, exc_info=True) + raise RuntimeError(error_msg) from e + + def get_class_by_name( + self, + class_name: str, + scene_id: Any, + workspace_id: Any + ): + """根据类型名称获取类型(精确匹配) + + Args: + class_name: 类型名称 + scene_id: 场景ID + workspace_id: 工作空间ID(用于权限验证) + + Returns: + Optional[OntologyClass]: 类型对象 + + Raises: + ValueError: 类型不存在或无权限 + + Examples: + >>> service = OntologyService(llm_client, db) + >>> ontology_class = service.get_class_by_name("患者", scene_id, workspace_id) + """ + logger.debug(f"Getting class by name: {class_name}, scene_id: {scene_id}") + + try: + # 检查场景是否存在且属于当前工作空间 + scene = self.scene_repo.get_by_id(scene_id) + if not scene: + logger.warning(f"Scene not found: {scene_id}") + raise ValueError("场景不存在") + + if not self.scene_repo.check_ownership(scene_id, workspace_id): + logger.warning( + f"Permission denied - scene_id={scene_id}, " + f"workspace_id={workspace_id}" + ) + raise ValueError("无权限访问该场景") + + # 获取类型 + ontology_class = self.class_repo.get_by_name(class_name, scene_id) + if not ontology_class: + logger.warning(f"Class not found: {class_name} in scene {scene_id}") + raise ValueError("类型不存在") + + return ontology_class + + except ValueError: + raise + except Exception as e: + error_msg = f"Failed to get class by name: {str(e)}" + logger.error(error_msg, exc_info=True) + raise RuntimeError(error_msg) from e + + def search_classes_by_name( + self, + keyword: str, + scene_id: Any, + workspace_id: Any + ) -> List: + """根据关键词模糊搜索类型 + + Args: + keyword: 搜索关键词 + scene_id: 场景ID + workspace_id: 工作空间ID(用于权限验证) + + Returns: + List[OntologyClass]: 匹配的类型列表 + + Raises: + ValueError: 场景不存在或无权限 + RuntimeError: 搜索失败 + + Examples: + >>> service = OntologyService(llm_client, db) + >>> classes = service.search_classes_by_name("患者", scene_id, workspace_id) + """ + logger.debug( + f"Searching classes by keyword: {keyword}, " + f"scene_id: {scene_id}, workspace_id: {workspace_id}" + ) + + try: + # 检查场景是否存在且属于当前工作空间 + scene = self.scene_repo.get_by_id(scene_id) + if not scene: + logger.warning(f"Scene not found: {scene_id}") + raise ValueError("场景不存在") + + if not self.scene_repo.check_ownership(scene_id, workspace_id): + logger.warning( + f"Permission denied - scene_id={scene_id}, " + f"workspace_id={workspace_id}" + ) + raise ValueError("无权限访问该场景") + + # 搜索类型 + classes = self.class_repo.search_by_name(keyword, scene_id) + + logger.info( + f"Found {len(classes)} classes matching keyword '{keyword}' " + f"in scene {scene_id}" + ) + + return classes + + except ValueError: + raise + except Exception as e: + error_msg = f"Failed to search classes by keyword: {str(e)}" + logger.error(error_msg, exc_info=True) + raise RuntimeError(error_msg) from e + + def list_classes_by_scene( + self, + scene_id: Any, + workspace_id: Any + ) -> List: + """获取场景下的所有类型 + + Args: + scene_id: 场景ID + workspace_id: 工作空间ID(用于权限验证) + + Returns: + List[OntologyClass]: 类型列表 + + Raises: + ValueError: 场景不存在或无权限 + RuntimeError: 查询失败 + + Examples: + >>> service = OntologyService(llm_client, db) + >>> classes = service.list_classes_by_scene(scene_id, workspace_id) + """ + logger.debug(f"Listing classes for scene: {scene_id}") + + try: + # 检查场景是否存在且属于当前工作空间 + scene = self.scene_repo.get_by_id(scene_id) + if not scene: + logger.warning(f"Scene not found: {scene_id}") + raise ValueError("场景不存在") + + if not self.scene_repo.check_ownership(scene_id, workspace_id): + logger.warning( + f"Permission denied - scene_id={scene_id}, " + f"workspace_id={workspace_id}" + ) + raise ValueError("无权限访问该场景的类型") + + # 获取类型列表 + classes = self.class_repo.get_by_scene(scene_id) + + logger.info(f"Found {len(classes)} classes in scene {scene_id}") + + return classes + + except ValueError: + raise + except Exception as e: + error_msg = f"Failed to list classes: {str(e)}" + logger.error(error_msg, exc_info=True) + raise RuntimeError(error_msg) from e diff --git a/api/pyproject.toml b/api/pyproject.toml index 29597409..6d23a3b9 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -140,6 +140,7 @@ dependencies = [ "oss2>=2.19.1", "flower>=2.0.1", "aiofiles>=23.0.0", + "owlready2>=0.46", ] [tool.pytest.ini_options]