diff --git a/api/app/controllers/chunk_controller.py b/api/app/controllers/chunk_controller.py index 509ad442..620d8a1a 100644 --- a/api/app/controllers/chunk_controller.py +++ b/api/app/controllers/chunk_controller.py @@ -1,27 +1,28 @@ import os from typing import Any, Optional import uuid + from fastapi import APIRouter, Depends, HTTPException, status, Query +from fastapi.encoders import jsonable_encoder from sqlalchemy.orm import Session -from sqlalchemy import func from app.core.config import settings -from app.db import get_db -from app.core.rag.llm.cv_model import QWenCV -from app.dependencies import get_current_user -from app.models.user_model import User -from app.models.document_model import Document -from app.models import knowledge_model, knowledgeshare_model -from app.core.rag.models.chunk import DocumentChunk -from app.schemas import chunk_schema -from app.schemas.response_schema import ApiResponse -from app.core.response_utils import success -from app.services import knowledge_service, document_service, file_service, knowledgeshare_service -from app.core.rag.vdb.elasticsearch.elasticsearch_vector import ElasticSearchVectorFactory +from app.core.logging_config import get_api_logger from app.core.rag.common.settings import kg_retriever from app.core.rag.llm.chat_model import Base +from app.core.rag.llm.cv_model import QWenCV from app.core.rag.llm.embedding_model import OpenAIEmbed -from app.core.logging_config import get_api_logger +from app.core.rag.models.chunk import DocumentChunk +from app.core.rag.vdb.elasticsearch.elasticsearch_vector import ElasticSearchVectorFactory +from app.core.response_utils import success +from app.db import get_db +from app.dependencies import get_current_user +from app.models import knowledge_model, knowledgeshare_model +from app.models.document_model import Document +from app.models.user_model import User +from app.schemas import chunk_schema +from app.schemas.response_schema import ApiResponse +from app.services import knowledge_service, document_service, file_service, knowledgeshare_service # Obtain a dedicated API logger api_logger = get_api_logger() @@ -144,7 +145,7 @@ async def get_preview_chunks( } } api_logger.info(f"Querying the document block preview list successful: total={total}, returned={len(chunks)} records") - return success(data=result, msg="Querying the document block preview list succeeded") + return success(data=jsonable_encoder(result), msg="Querying the document block preview list succeeded") @router.get("/{kb_id}/{document_id}/chunks", response_model=ApiResponse) @@ -202,7 +203,7 @@ async def get_chunks( "has_next": True if page * pagesize < total else False } } - return success(data=result, msg="Query of document chunk list succeeded") + return success(data=jsonable_encoder(result), msg="Query of document chunk list succeeded") @router.post("/{kb_id}/{document_id}/chunk", response_model=ApiResponse) @@ -263,7 +264,7 @@ async def create_chunk( db_document.chunk_num += 1 db.commit() - return success(data=chunk, msg="Document chunk creation successful") + return success(data=jsonable_encoder(chunk), msg="Document chunk creation successful") @router.get("/{kb_id}/{document_id}/{doc_id}", response_model=ApiResponse) @@ -290,7 +291,7 @@ async def get_chunk( vector_service = ElasticSearchVectorFactory().init_vector(knowledge=db_knowledge) total, items = vector_service.get_by_segment(doc_id=doc_id) if total: - return success(data=items[0], msg="Document chunk query successful") + return success(data=jsonable_encoder(items[0]), msg="Document chunk query successful") else: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, @@ -327,7 +328,7 @@ async def update_chunk( chunk = items[0] chunk.page_content = content vector_service.update_by_segment(chunk) - return success(data=chunk, msg="The document chunk has been successfully updated") + return success(data=jsonable_encoder(chunk), msg="The document chunk has been successfully updated") else: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, @@ -392,7 +393,7 @@ async def retrieve_chunks( knowledge_model.Knowledge.chunk_num > 0, knowledge_model.Knowledge.status == 1 ] - private_items = knowledge_service.get_chunded_knowledgeids( + private_items = knowledge_service.get_chunked_knowledgeids( db=db, filters=filters, current_user=current_user @@ -405,7 +406,7 @@ async def retrieve_chunks( knowledge_model.Knowledge.chunk_num > 0, knowledge_model.Knowledge.status == 1 ] - items = knowledge_service.get_chunded_knowledgeids( + items = knowledge_service.get_chunked_knowledgeids( db=db, filters=filters, current_user=current_user @@ -473,4 +474,4 @@ async def retrieve_chunks( doc = kg_retriever.retrieval(question=retrieve_data.query, workspace_ids=workspace_ids, kb_ids= kb_ids, emb_mdl=embedding_model, llm=chat_model) if doc: rs.insert(0, doc) - return success(data=rs, msg="retrieval successful") \ No newline at end of file + return success(data=jsonable_encoder(rs), msg="retrieval successful") \ No newline at end of file diff --git a/api/app/controllers/document_controller.py b/api/app/controllers/document_controller.py index 39a690f9..72f9cb8f 100644 --- a/api/app/controllers/document_controller.py +++ b/api/app/controllers/document_controller.py @@ -1,23 +1,26 @@ +import datetime import os from typing import Optional -import datetime import uuid + from fastapi import APIRouter, Depends, HTTPException, status, Query +from fastapi.encoders import jsonable_encoder from sqlalchemy.orm import Session +from app.celery_app import celery_app +from app.controllers import file_controller from app.core.config import settings +from app.core.logging_config import get_api_logger +from app.core.rag.vdb.elasticsearch.elasticsearch_vector import ElasticSearchVectorFactory +from app.core.response_utils import success from app.db import get_db from app.dependencies import get_current_user -from app.models.user_model import User from app.models import document_model +from app.models.user_model import User from app.schemas import document_schema from app.schemas.response_schema import ApiResponse -from app.core.response_utils import success from app.services import document_service, file_service, knowledge_service -from app.controllers import file_controller -from app.celery_app import celery_app -from app.core.rag.vdb.elasticsearch.elasticsearch_vector import ElasticSearchVectorFactory -from app.core.logging_config import get_api_logger + # Obtain a dedicated API logger api_logger = get_api_logger() @@ -106,7 +109,7 @@ async def get_documents( "has_next": True if page * pagesize < total else False } } - return success(data=result, msg="Query of document list succeeded") + return success(data=jsonable_encoder(result), msg="Query of document list succeeded") @router.post("/document", response_model=ApiResponse) @@ -124,7 +127,7 @@ async def create_document( api_logger.debug(f"Start creating a document: {create_data.file_name}") db_document = document_service.create_document(db=db, document=create_data, current_user=current_user) api_logger.info(f"Document created successfully: {db_document.file_name} (ID: {db_document.id})") - return success(data=document_schema.Document.model_validate(db_document), msg="Document creation successful") + return success(data=jsonable_encoder(document_schema.Document.model_validate(db_document)), msg="Document creation successful") except Exception as e: api_logger.error(f"Document creation failed: {create_data.file_name} - {str(e)}") raise @@ -153,7 +156,7 @@ async def get_document( ) api_logger.info(f"Document query successful: {db_document.file_name} (ID: {db_document.id})") - return success(data=document_schema.Document.model_validate(db_document), msg="Successfully obtained document information") + return success(data=jsonable_encoder(document_schema.Document.model_validate(db_document)), msg="Successfully obtained document information") except HTTPException: raise except Exception as e: @@ -221,7 +224,7 @@ async def update_document( ) # 5. Return the updated document - return success(data=document_schema.Document.model_validate(db_document), msg="Document information updated successfully") + return success(data=jsonable_encoder(document_schema.Document.model_validate(db_document)), msg="Document information updated successfully") @router.delete("/{document_id}", response_model=ApiResponse) diff --git a/api/app/controllers/file_controller.py b/api/app/controllers/file_controller.py index a020adbb..f7bd0e7a 100644 --- a/api/app/controllers/file_controller.py +++ b/api/app/controllers/file_controller.py @@ -1,22 +1,25 @@ import os -from typing import Any, Optional from pathlib import Path import shutil +from typing import Any, Optional import uuid + from fastapi import APIRouter, Depends, HTTPException, status, File, UploadFile, Query +from fastapi.encoders import jsonable_encoder from fastapi.responses import FileResponse from sqlalchemy.orm import Session from app.core.config import settings +from app.core.logging_config import get_api_logger +from app.core.response_utils import success from app.db import get_db from app.dependencies import get_current_user -from app.models.user_model import User from app.models import file_model +from app.models.user_model import User from app.schemas import file_schema, document_schema from app.schemas.response_schema import ApiResponse -from app.core.response_utils import success from app.services import file_service, document_service -from app.core.logging_config import get_api_logger + # Obtain a dedicated API logger api_logger = get_api_logger() @@ -93,11 +96,11 @@ async def get_files( "has_next": True if page * pagesize < total else False } } - return success(data=result, msg="Query of file list succeeded") + return success(data=jsonable_encoder(result), msg="Query of file list succeeded") @router.post("/folder", response_model=ApiResponse) -def create_folder( +async def create_folder( kb_id: uuid.UUID, parent_id: uuid.UUID, folder_name: str = '/', @@ -121,7 +124,7 @@ def create_folder( ) db_file = file_service.create_file(db=db, file=create_folder, current_user=current_user) api_logger.info(f"Folder created successfully: {db_file.file_name} (ID: {db_file.id})") - return success(data=file_schema.File.model_validate(db_file), msg="Folder creation successful") + return success(data=jsonable_encoder(file_schema.File.model_validate(db_file)), msg="Folder creation successful") except Exception as e: api_logger.error(f"Folder creation failed: {folder_name} - {str(e)}") raise @@ -207,7 +210,7 @@ async def upload_file( db_document = document_service.create_document(db=db, document=create_data, current_user=current_user) api_logger.info(f"File upload successfully: {file.filename} (file_id: {db_file.id}, document_id: {db_document.id})") - return success(data=document_schema.Document.model_validate(db_document), msg="File upload successful") + return success(data=jsonable_encoder(document_schema.Document.model_validate(db_document)), msg="File upload successful") @router.post("/customtext", response_model=ApiResponse) @@ -288,7 +291,7 @@ async def custom_text( db_document = document_service.create_document(db=db, document=create_document_data, current_user=current_user) api_logger.info(f"custom text upload successfully: {create_data.title} (file_id: {db_file.id}, document_id: {db_document.id})") - return success(data=document_schema.Document.model_validate(db_document), msg="custom text upload successful") + return success(data=jsonable_encoder(document_schema.Document.model_validate(db_document)), msg="custom text upload successful") @router.get("/{file_id}", response_model=Any) @@ -362,7 +365,7 @@ async def update_file( # 2. Update fields (only update non-null fields) api_logger.debug(f"Start updating the file fields: {file_id}") updated_fields = [] - for field, value in update_data.items(): + for field, value in update_data.dict(exclude_unset=True).items(): if hasattr(db_file, field): old_value = getattr(db_file, field) if old_value != value: @@ -387,7 +390,7 @@ async def update_file( ) # 4. Return the updated file - return success(data=file_schema.File.model_validate(db_file), msg="File information updated successfully") + return success(data=jsonable_encoder(file_schema.File.model_validate(db_file)), msg="File information updated successfully") @router.delete("/{file_id}", response_model=ApiResponse) diff --git a/api/app/controllers/knowledge_controller.py b/api/app/controllers/knowledge_controller.py index deed5723..901208ba 100644 --- a/api/app/controllers/knowledge_controller.py +++ b/api/app/controllers/knowledge_controller.py @@ -4,6 +4,7 @@ from typing import Optional import uuid from fastapi import APIRouter, Depends, HTTPException, status, Query +from fastapi.encoders import jsonable_encoder from sqlalchemy import or_ from sqlalchemy.orm import Session @@ -17,8 +18,8 @@ from app.core.rag.vdb.elasticsearch.elasticsearch_vector import ElasticSearchVec from app.core.response_utils import success from app.db import get_db from app.dependencies import get_current_user +from app.models import knowledge_model from app.models.user_model import User -from app.models import knowledge_model, document_model, file_model from app.schemas import knowledge_schema from app.schemas.response_schema import ApiResponse from app.services import knowledge_service, document_service @@ -171,7 +172,7 @@ async def get_knowledges( "has_next": True if page*pagesize < total else False } } - return success(data=result, msg="Query of knowledge base list successful") + return success(data=jsonable_encoder(result), msg="Query of knowledge base list successful") @router.post("/knowledge", response_model=ApiResponse) @@ -197,7 +198,7 @@ async def create_knowledge( ) db_knowledge = knowledge_service.create_knowledge(db=db, knowledge=create_data, current_user=current_user) api_logger.info(f"The knowledge base has been successfully created: {db_knowledge.name} (ID: {db_knowledge.id})") - return success(data=knowledge_schema.Knowledge.model_validate(db_knowledge), msg="The knowledge base has been successfully created") + return success(data=jsonable_encoder(knowledge_schema.Knowledge.model_validate(db_knowledge)), msg="The knowledge base has been successfully created") except Exception as e: api_logger.error(f"The creation of the knowledge base failed: {create_data.name} - {str(e)}") raise @@ -226,7 +227,7 @@ async def get_knowledge( ) api_logger.info(f"Knowledge base query successful: {db_knowledge.name} (ID: {db_knowledge.id})") - return success(data=knowledge_schema.Knowledge.model_validate(db_knowledge), msg="Successfully obtained knowledge base information") + return success(data=jsonable_encoder(knowledge_schema.Knowledge.model_validate(db_knowledge)), msg="Successfully obtained knowledge base information") except HTTPException: raise except Exception as e: @@ -243,7 +244,7 @@ async def update_knowledge( ): api_logger.info(f"Update knowledge base request: knowledge_id={knowledge_id}, username: {current_user.username}") db_knowledge = await _update_knowledge(knowledge_id=knowledge_id, update_data=update_data, db=db, current_user=current_user) - return success(data=knowledge_schema.Knowledge.model_validate(db_knowledge), msg="The knowledge base information has been successfully updated") + return success(data=jsonable_encoder(knowledge_schema.Knowledge.model_validate(db_knowledge)), msg="The knowledge base information has been successfully updated") async def _update_knowledge( diff --git a/api/app/controllers/service/__init__.py b/api/app/controllers/service/__init__.py index 00f056e7..8c679c1f 100644 --- a/api/app/controllers/service/__init__.py +++ b/api/app/controllers/service/__init__.py @@ -4,14 +4,17 @@ 认证方式: API Key """ from fastapi import APIRouter -from . import app_api_controller, rag_api_controller, memory_api_controller +from . import app_api_controller, rag_api_knowledge_controller, rag_api_document_controller, rag_api_file_controller, rag_api_chunk_controller, memory_api_controller # 创建 V1 API 路由器 service_router = APIRouter() # 注册子路由 service_router.include_router(app_api_controller.router) -service_router.include_router(rag_api_controller.router) +service_router.include_router(rag_api_knowledge_controller.router) +service_router.include_router(rag_api_document_controller.router) +service_router.include_router(rag_api_file_controller.router) +service_router.include_router(rag_api_chunk_controller.router) service_router.include_router(memory_api_controller.router) __all__ = ["service_router"] diff --git a/api/app/controllers/service/rag_api_chunk_controller.py b/api/app/controllers/service/rag_api_chunk_controller.py new file mode 100644 index 00000000..a4d9a20c --- /dev/null +++ b/api/app/controllers/service/rag_api_chunk_controller.py @@ -0,0 +1,221 @@ +"""RAG 服务接口 - 基于 API Key 认证""" + +from typing import Any, Optional, Union +import uuid + +from fastapi import APIRouter, Body, Depends, Request, status, Query +from sqlalchemy.orm import Session + +from app.controllers import chunk_controller +from app.core.api_key_auth import require_api_key +from app.core.logging_config import get_business_logger +from app.core.rag.models.chunk import QAChunk +from app.core.response_utils import success +from app.db import get_db +from app.schemas import chunk_schema +from app.schemas.api_key_schema import ApiKeyAuth +from app.schemas.response_schema import ApiResponse +from app.services import api_key_service + + +router = APIRouter(prefix="/chunks", tags=["V1 - RAG API"]) +api_logger = get_business_logger() + + +@router.get("/{kb_id}/{document_id}/previewchunks", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def get_preview_chunks( + kb_id: uuid.UUID, + document_id: uuid.UUID, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), + page: int = Query(1, gt=0), # Default: 1, which must be greater than 0 + pagesize: int = Query(20, gt=0, le=100), # Default: 20 items per page, maximum: 100 items + keywords: Optional[str] = Query(None, description="The keywords used to match chunk content") +): + """ + Paged query document block preview list + - Support filtering by document_id + - Support keyword search for segmented content + - Return paging metadata + file list + """ + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await chunk_controller.get_preview_chunks(kb_id=kb_id, + document_id=document_id, + page=page, + pagesize=pagesize, + keywords=keywords, + db=db, + current_user=current_user) + + +@router.get("/{kb_id}/{document_id}/chunks", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def get_chunks( + kb_id: uuid.UUID, + document_id: uuid.UUID, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), + page: int = Query(1, gt=0), # Default: 1, which must be greater than 0 + pagesize: int = Query(20, gt=0, le=100), # Default: 20 items per page, maximum: 100 items + keywords: Optional[str] = Query(None, description="The keywords used to match chunk content") +): + """ + Paged query document chunk list + - Support filtering by document_id + - Support keyword search for segmented content + - Return paging metadata + file list + """ + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await chunk_controller.get_chunks(kb_id=kb_id, + document_id=document_id, + page=page, + pagesize=pagesize, + keywords=keywords, + db=db, + current_user=current_user) + + +@router.post("/{kb_id}/{document_id}/chunk", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def create_chunk( + kb_id: uuid.UUID, + document_id: uuid.UUID, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), + content: Union[str, QAChunk] = Body(..., description="Content can be either a string or a QAChunk object"), +): + """ + create chunk + """ + body = await request.json() + create_data = chunk_schema.ChunkCreate(**body) + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await chunk_controller.create_chunk(kb_id=kb_id, + document_id=document_id, + create_data=create_data, + db=db, + current_user=current_user) + + +@router.get("/{kb_id}/{document_id}/{doc_id}", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def get_chunk( + kb_id: uuid.UUID, + document_id: uuid.UUID, + doc_id: str, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), +): + """ + Retrieve document chunk information based on doc_id + """ + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await chunk_controller.get_chunk(kb_id=kb_id, + document_id=document_id, + doc_id=doc_id, + db=db, + current_user=current_user) + + +@router.put("/{kb_id}/{document_id}/{doc_id}", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def update_chunk( + kb_id: uuid.UUID, + document_id: uuid.UUID, + doc_id: str, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), + content: Union[str, QAChunk] = Body(..., description="Content can be either a string or a QAChunk object"), +): + """ + Update document chunk content + """ + body = await request.json() + update_data = chunk_schema.ChunkUpdate(**body) + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await chunk_controller.update_chunk(kb_id=kb_id, + document_id=document_id, + doc_id=doc_id, + update_data=update_data, + db=db, + current_user=current_user) + + +@router.delete("/{kb_id}/{document_id}/{doc_id}", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def delete_chunk( + kb_id: uuid.UUID, + document_id: uuid.UUID, + doc_id: str, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), +): + """ + delete document chunk + """ + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await chunk_controller.delete_chunk(kb_id=kb_id, + document_id=document_id, + doc_id=doc_id, + db=db, + current_user=current_user) + + +@router.get("/retrieve_type", response_model=ApiResponse) +def get_retrieve_types(): + return success(msg="Successfully obtained the retrieval type", data=list(chunk_schema.RetrieveType)) + + +@router.post("/retrieval", response_model=Any, status_code=status.HTTP_200_OK) +@require_api_key(scopes=["rag"]) +async def retrieve_chunks( + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), + query: str = Body(..., description="question"), +): + """ + retrieve chunk + """ + body = await request.json() + retrieve_data = chunk_schema.ChunkRetrieve(**body) + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await chunk_controller.retrieve_chunks(retrieve_data=retrieve_data, + db=db, + current_user=current_user) + diff --git a/api/app/controllers/service/rag_api_controller.py b/api/app/controllers/service/rag_api_controller.py deleted file mode 100644 index ecd1dd23..00000000 --- a/api/app/controllers/service/rag_api_controller.py +++ /dev/null @@ -1,16 +0,0 @@ -"""RAG 服务接口 - 基于 API Key 认证""" -from fastapi import APIRouter, Depends -from sqlalchemy.orm import Session - -from app.db import get_db -from app.core.response_utils import success -from app.core.logging_config import get_business_logger - -router = APIRouter(prefix="/knowledge", tags=["V1 - RAG API"]) -logger = get_business_logger() - - -@router.get("") -async def list_knowledge(): - """列出可访问的知识库(占位)""" - return success(data=[], msg="RAG API - Coming Soon") diff --git a/api/app/controllers/service/rag_api_document_controller.py b/api/app/controllers/service/rag_api_document_controller.py new file mode 100644 index 00000000..13ba72d2 --- /dev/null +++ b/api/app/controllers/service/rag_api_document_controller.py @@ -0,0 +1,172 @@ +"""RAG 服务接口 - 基于 API Key 认证""" + +from typing import Optional +import uuid + +from fastapi import APIRouter, Body, Depends, Request, Query +from sqlalchemy.orm import Session + +from app.controllers import document_controller +from app.core.api_key_auth import require_api_key +from app.core.logging_config import get_business_logger +from app.db import get_db +from app.schemas import document_schema +from app.schemas.api_key_schema import ApiKeyAuth +from app.schemas.response_schema import ApiResponse +from app.services import api_key_service + + +router = APIRouter(prefix="/documents", tags=["V1 - RAG API"]) +api_logger = get_business_logger() + + +@router.get("/{kb_id}/documents", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def get_documents( + kb_id: uuid.UUID, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), + parent_id: Optional[uuid.UUID] = Query(None, description="parent folder id when type is Folder"), + page: int = Query(1, gt=0), # Default: 1, which must be greater than 0 + pagesize: int = Query(20, gt=0, le=100), # Default: 20 items per page, maximum: 100 items + orderby: Optional[str] = Query(None, description="Sort fields, such as: created_at,updated_at"), + desc: Optional[bool] = Query(False, description="Is it descending order"), + keywords: Optional[str] = Query(None, description="Search keywords (file name)"), + document_ids: Optional[str] = Query(None, description="document ids, separated by commas") +): + """ + Paged query document list + - Support filtering by kb_id and parent_id + - Support keyword search for file names + - Support dynamic sorting + - Return paging metadata + file list + """ + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await document_controller.get_documents(kb_id=kb_id, + parent_id=parent_id, + page=page, + pagesize=pagesize, + orderby=orderby, + desc=desc, + keywords=keywords, + document_ids=document_ids, + db=db, + current_user=current_user) + + +@router.post("/document", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def create_document( + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), + kb_id: uuid.UUID = Body(..., description="kb id"), + file_name: str = Body(..., description="file name"), +): + """ + create document + """ + body = await request.json() + create_data = document_schema.DocumentCreate(**body) + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await document_controller.create_document(create_data=create_data, + db=db, + current_user=current_user) + + +@router.get("/{document_id}", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def get_document( + document_id: uuid.UUID, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), +): + """ + Retrieve document information based on document_id + """ + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await document_controller.get_document(document_id=document_id, + db=db, + current_user=current_user) + + +@router.put("/{document_id}", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def update_document( + document_id: uuid.UUID, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), + file_name: str = Body(None, description="file name (optional)"), +): + """ + Update document information + """ + body = await request.json() + update_data = document_schema.DocumentUpdate(**body) + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await document_controller.update_document(document_id=document_id, + update_data=update_data, + db=db, + current_user=current_user) + + +@router.delete("/{document_id}", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def delete_document( + document_id: uuid.UUID, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), +): + """ + Delete document + """ + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await document_controller.delete_document(document_id=document_id, + db=db, + current_user=current_user) + + +@router.post("/{document_id}/chunks", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def parse_documents( + document_id: uuid.UUID, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), +): + """ + parse document + """ + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await document_controller.parse_documents(document_id=document_id, + db=db, + current_user=current_user) + diff --git a/api/app/controllers/service/rag_api_file_controller.py b/api/app/controllers/service/rag_api_file_controller.py new file mode 100644 index 00000000..6ca5839f --- /dev/null +++ b/api/app/controllers/service/rag_api_file_controller.py @@ -0,0 +1,198 @@ +"""RAG 服务接口 - 基于 API Key 认证""" + +from typing import Any, Optional +import uuid + +from fastapi import APIRouter, Body, Depends, Request, Query, File, UploadFile +from sqlalchemy.orm import Session + +from app.controllers import file_controller +from app.core.api_key_auth import require_api_key +from app.core.logging_config import get_business_logger +from app.db import get_db +from app.schemas import file_schema +from app.schemas.api_key_schema import ApiKeyAuth +from app.schemas.response_schema import ApiResponse +from app.services import api_key_service + + +router = APIRouter(prefix="/files", tags=["V1 - RAG API"]) +api_logger = get_business_logger() + + +@router.get("/{kb_id}/{parent_id}/files", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def get_files( + kb_id: uuid.UUID, + parent_id: uuid.UUID, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), + page: int = Query(1, gt=0), # Default: 1, which must be greater than 0 + pagesize: int = Query(20, gt=0, le=100), # Default: 20 items per page, maximum: 100 items + orderby: Optional[str] = Query(None, description="Sort fields, such as: created_at"), + desc: Optional[bool] = Query(False, description="Is it descending order"), + keywords: Optional[str] = Query(None, description="Search keywords (file name)"), +): + """ + Paged query file list + - Support filtering by kb_id and parent_id + - Support keyword search for file names + - Support dynamic sorting + - Return paging metadata + file list + """ + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id=api_key_auth.workspace_id + + return await file_controller.get_files(kb_id=kb_id, + parent_id=parent_id, + page=page, + pagesize=pagesize, + orderby=orderby, + desc=desc, + keywords=keywords, + db=db, + current_user=current_user) + + +@router.post("/folder", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def create_folder( + kb_id: uuid.UUID, + parent_id: uuid.UUID, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), + folder_name: str = '/' +): + """ + Create a new folder + """ + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await file_controller.create_folder(kb_id=kb_id, + parent_id=parent_id, + folder_name=folder_name, + db=db, + current_user=current_user) + + +@router.post("/file", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def upload_file( + kb_id: uuid.UUID, + parent_id: uuid.UUID, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), + file: UploadFile = File(...), +): + """ + upload file + """ + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await file_controller.upload_file(kb_id=kb_id, + parent_id=parent_id, + file=file, + db=db, + current_user=current_user) + + +@router.post("/customtext", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def custom_text( + kb_id: uuid.UUID, + parent_id: uuid.UUID, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), + title: str = Body(..., description="title"), + content: str = Body(..., description="content"), +): + """ + custom text + """ + body = await request.json() + create_data = file_schema.CustomTextFileCreate(**body) + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await file_controller.custom_text(kb_id=kb_id, + parent_id=parent_id, + create_data=create_data, + db=db, + current_user=current_user) + + +@router.get("/{file_id}", response_model=Any) +async def get_file( + file_id: uuid.UUID, + db: Session = Depends(get_db) +) -> Any: + """ + Download the file based on the file_id + - Query file information from the database + - Construct the file path and check if it exists + - Return a FileResponse to download the file + """ + return await file_controller.get_file(file_id=file_id, + db=db) + + +@router.put("/{file_id}", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def update_file( + file_id: uuid.UUID, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), + file_name: str = Body(None, description="file name (optional)"), +): + """ + Update file information (such as file name) + - Only specified fields such as file_name are allowed to be modified + """ + body = await request.json() + update_data = file_schema.FileUpdate(**body) + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await file_controller.update_file(file_id=file_id, + update_data=update_data, + db=db, + current_user=current_user) + + +@router.delete("/{file_id}", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def delete_file( + file_id: uuid.UUID, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), +): + """ + Delete a file or folder + """ + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await file_controller.delete_file(file_id=file_id, + db=db, + current_user=current_user) + diff --git a/api/app/controllers/service/rag_api_knowledge_controller.py b/api/app/controllers/service/rag_api_knowledge_controller.py new file mode 100644 index 00000000..72c4a1b5 --- /dev/null +++ b/api/app/controllers/service/rag_api_knowledge_controller.py @@ -0,0 +1,248 @@ +"""RAG 服务接口 - 基于 API Key 认证""" + +from typing import Optional, Dict +import uuid + +from fastapi import APIRouter, Body, Depends, Request, Query +from sqlalchemy.orm import Session + +from app.controllers import knowledge_controller +from app.core.api_key_auth import require_api_key +from app.core.logging_config import get_business_logger +from app.core.response_utils import success +from app.db import get_db +from app.models import knowledge_model +from app.schemas import knowledge_schema +from app.schemas.api_key_schema import ApiKeyAuth +from app.schemas.response_schema import ApiResponse +from app.services import api_key_service + + +router = APIRouter(prefix="/knowledges", tags=["V1 - RAG API"]) +api_logger = get_business_logger() + + +@router.get("/knowledgetype", response_model=ApiResponse) +def get_knowledge_types(): + return success(msg="Successfully obtained the knowledge type", data=list(knowledge_model.KnowledgeType)) + + +@router.get("/permissiontype", response_model=ApiResponse) +def get_permission_types(): + return success(msg="Successfully obtained the knowledge permission type", data=list(knowledge_model.PermissionType)) + + +@router.get("/parsertype", response_model=ApiResponse) +def get_parser_types(): + return success(msg="Successfully obtained the knowledge parser type", data=list(knowledge_model.ParserType)) + + +@router.get("/knowledge_graph_entity_types", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def get_knowledge_graph_entity_types( + llm_id: uuid.UUID, + scenario: str, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), +): + """ + get knowledge graph entity types based on llm_id + """ + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await knowledge_controller.get_knowledge_graph_entity_types(llm_id=llm_id, + scenario=scenario, + db=db, + current_user=current_user) + + +@router.get("/knowledges", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def get_knowledges( + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), + parent_id: Optional[uuid.UUID] = Query(None, description="parent folder id"), + page: int = Query(1, gt=0), # Default: 1, which must be greater than 0 + pagesize: int = Query(20, gt=0, le=100), # Default: 20 items per page, maximum: 100 items + orderby: Optional[str] = Query(None, description="Sort fields, such as: created_at,updated_at"), + desc: Optional[bool] = Query(False, description="Is it descending order"), + keywords: Optional[str] = Query(None, description="Search keywords (knowledge base name)"), + kb_ids: Optional[str] = Query(None, description="Knowledge base ids, separated by commas") +): + """ + Query the knowledge base list in pages + - Support filtering by parent_id + - Support keyword search for knowledge base names + - Support dynamic sorting + - Return paging metadata + file list + """ + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await knowledge_controller.get_knowledges(parent_id=parent_id, + page=page, + pagesize=pagesize, + orderby=orderby, + desc=desc, + keywords=keywords, + kb_ids=kb_ids, + db=db, + current_user=current_user) + + +@router.post("/knowledge", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def create_knowledge( + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), + name: str = Body(..., description="KB name"), +): + """ + create knowledge + """ + body = await request.json() + create_data = knowledge_schema.KnowledgeCreate(**body) + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await knowledge_controller.create_knowledge(create_data=create_data, + db=db, + current_user=current_user) + + +@router.get("/{knowledge_id}", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def get_knowledge( + knowledge_id: uuid.UUID, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), +): + """ + Retrieve knowledge base information based on knowledge_id + """ + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await knowledge_controller.get_knowledge(knowledge_id=knowledge_id, + db=db, + current_user=current_user) + + +@router.put("/{knowledge_id}", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def update_knowledge( + knowledge_id: uuid.UUID, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), + name: str = Body(None, description="KB name (optional)"), +): + body = await request.json() + update_data = knowledge_schema.KnowledgeUpdate(**body) + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await knowledge_controller.update_knowledge(knowledge_id=knowledge_id, + update_data=update_data, + db=db, + current_user=current_user) + + +@router.delete("/{knowledge_id}", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def delete_knowledge( + knowledge_id: uuid.UUID, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), +): + """ + Soft-delete knowledge base + """ + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await knowledge_controller.delete_knowledge(knowledge_id=knowledge_id, + db=db, + current_user=current_user) + + +@router.get("/{knowledge_id}/knowledge_graph", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def get_knowledge_graph( + knowledge_id: uuid.UUID, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), +): + """ + Retrieve knowledge_graph base information based on knowledge_id + """ + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await knowledge_controller.get_knowledge_graph(knowledge_id=knowledge_id, + db=db, + current_user=current_user) + + +@router.delete("/{knowledge_id}/knowledge_graph", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def delete_knowledge_graph( + knowledge_id: uuid.UUID, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), +): + """ + delete knowledge graph + """ + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await knowledge_controller.delete_knowledge_graph(knowledge_id=knowledge_id, + db=db, + current_user=current_user) + + +@router.post("/{knowledge_id}/knowledge_graph", response_model=ApiResponse) +@require_api_key(scopes=["rag"]) +async def rebuild_knowledge_graph( + knowledge_id: uuid.UUID, + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), +): + """ + rebuild knowledge graph + """ + # 0. Obtain the creator of the api key + api_key = api_key_service.ApiKeyService.get_api_key(db, api_key_auth.api_key_id, api_key_auth.workspace_id) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + + return await knowledge_controller.rebuild_knowledge_graph(knowledge_id=knowledge_id, + db=db, + current_user=current_user) + diff --git a/api/app/repositories/knowledge_repository.py b/api/app/repositories/knowledge_repository.py index 0766c447..681d1c10 100644 --- a/api/app/repositories/knowledge_repository.py +++ b/api/app/repositories/knowledge_repository.py @@ -52,7 +52,7 @@ def get_knowledges_paginated( raise -def get_chunded_knowledgeids( +def get_chunked_knowledgeids( db: Session, filters: list ) -> list: diff --git a/api/app/services/knowledge_service.py b/api/app/services/knowledge_service.py index cf47fd4f..bac02e96 100644 --- a/api/app/services/knowledge_service.py +++ b/api/app/services/knowledge_service.py @@ -37,7 +37,7 @@ def get_knowledges_paginated( raise -def get_chunded_knowledgeids( +def get_chunked_knowledgeids( db: Session, current_user: User, filters: list