diff --git a/api/app/controllers/chunk_controller.py b/api/app/controllers/chunk_controller.py index a0b985bf..cfe36a3a 100644 --- a/api/app/controllers/chunk_controller.py +++ b/api/app/controllers/chunk_controller.py @@ -546,6 +546,7 @@ async def delete_chunk( kb_id: uuid.UUID, document_id: uuid.UUID, doc_id: str, + force_refresh: bool = Query(False, description="Force Elasticsearch refresh after deletion"), db: Session = Depends(get_db), current_user: User = Depends(get_current_user) ): @@ -563,7 +564,7 @@ async def delete_chunk( vector_service = ElasticSearchVectorFactory().init_vector(knowledge=db_knowledge) if vector_service.text_exists(doc_id): - vector_service.delete_by_ids([doc_id]) + vector_service.delete_by_ids([doc_id], refresh=force_refresh) # 更新 chunk_num db_document = db.query(Document).filter(Document.id == document_id).first() db_document.chunk_num -= 1 diff --git a/api/app/controllers/service/rag_api_chunk_controller.py b/api/app/controllers/service/rag_api_chunk_controller.py index a4d9a20c..c9f5e7de 100644 --- a/api/app/controllers/service/rag_api_chunk_controller.py +++ b/api/app/controllers/service/rag_api_chunk_controller.py @@ -176,6 +176,7 @@ async def delete_chunk( request: Request, api_key_auth: ApiKeyAuth = None, db: Session = Depends(get_db), + force_refresh: bool = Query(False, description="Force Elasticsearch refresh after deletion"), ): """ delete document chunk @@ -188,6 +189,7 @@ async def delete_chunk( return await chunk_controller.delete_chunk(kb_id=kb_id, document_id=document_id, doc_id=doc_id, + force_refresh=force_refresh, db=db, current_user=current_user) diff --git a/api/app/core/rag/vdb/elasticsearch/elasticsearch_vector.py b/api/app/core/rag/vdb/elasticsearch/elasticsearch_vector.py index cd52550b..5f9e86c5 100644 --- a/api/app/core/rag/vdb/elasticsearch/elasticsearch_vector.py +++ b/api/app/core/rag/vdb/elasticsearch/elasticsearch_vector.py @@ -142,7 +142,7 @@ class ElasticSearchVector(BaseVector): return True - def delete_by_ids(self, ids: list[str]): + def delete_by_ids(self, ids: list[str], *, refresh: bool = False): if not ids: return if not self._client.indices.exists(index=self._collection_name): @@ -163,6 +163,8 @@ class ElasticSearchVector(BaseVector): actions = [{"_op_type": "delete", "_index": self._collection_name, "_id": es_id} for es_id in actual_ids] try: helpers.bulk(self._client, actions) + if refresh: + self._client.indices.refresh(index=self._collection_name) except BulkIndexError as e: for error in e.errors: delete_error = error.get('delete', {}) @@ -182,7 +184,7 @@ class ElasticSearchVector(BaseVector): else: return None - def delete_by_metadata_field(self, key: str, value: str): + def delete_by_metadata_field(self, key: str, value: str, *, refresh: bool = False): if not self._client.indices.exists(index=self._collection_name): return False actual_ids = self.get_ids_by_metadata_field(key, value) @@ -191,6 +193,8 @@ class ElasticSearchVector(BaseVector): actions = [{"_op_type": "delete", "_index": self._collection_name, "_id": es_id} for es_id in actual_ids] try: helpers.bulk(self._client, actions) + if refresh: + self._client.indices.refresh(index=self._collection_name) except BulkIndexError as e: for error in e.errors: delete_error = error.get('delete', {}) diff --git a/api/app/core/rag/vdb/vector_base.py b/api/app/core/rag/vdb/vector_base.py index df3ac7d8..266a3f40 100644 --- a/api/app/core/rag/vdb/vector_base.py +++ b/api/app/core/rag/vdb/vector_base.py @@ -27,14 +27,14 @@ class BaseVector(ABC): raise NotImplementedError @abstractmethod - def delete_by_ids(self, ids: list[str]): + def delete_by_ids(self, ids: list[str], *, refresh: bool = False): raise NotImplementedError def get_ids_by_metadata_field(self, key: str, value: str): raise NotImplementedError @abstractmethod - def delete_by_metadata_field(self, key: str, value: str): + def delete_by_metadata_field(self, key: str, value: str, *, refresh: bool = False): raise NotImplementedError @abstractmethod