【ADD]Knowledge base retrieval supports file set retrieval
This commit is contained in:
@@ -441,14 +441,14 @@ async def retrieve_chunks(
|
|||||||
# 1 participle search, 2 semantic search, 3 hybrid search
|
# 1 participle search, 2 semantic search, 3 hybrid search
|
||||||
match retrieve_data.retrieve_type:
|
match retrieve_data.retrieve_type:
|
||||||
case chunk_schema.RetrieveType.PARTICIPLE:
|
case chunk_schema.RetrieveType.PARTICIPLE:
|
||||||
rs = vector_service.search_by_full_text(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.similarity_threshold)
|
rs = vector_service.search_by_full_text(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.similarity_threshold, file_names_filter=retrieve_data.file_names_filter)
|
||||||
return success(data=rs, msg="retrieval successful")
|
return success(data=rs, msg="retrieval successful")
|
||||||
case chunk_schema.RetrieveType.SEMANTIC:
|
case chunk_schema.RetrieveType.SEMANTIC:
|
||||||
rs = vector_service.search_by_vector(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.vector_similarity_weight)
|
rs = vector_service.search_by_vector(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.vector_similarity_weight, file_names_filter=retrieve_data.file_names_filter)
|
||||||
return success(data=rs, msg="retrieval successful")
|
return success(data=rs, msg="retrieval successful")
|
||||||
case _:
|
case _:
|
||||||
rs1 = vector_service.search_by_vector(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.vector_similarity_weight)
|
rs1 = vector_service.search_by_vector(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.vector_similarity_weight, file_names_filter=retrieve_data.file_names_filter)
|
||||||
rs2 = vector_service.search_by_full_text(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.similarity_threshold)
|
rs2 = vector_service.search_by_full_text(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.similarity_threshold, file_names_filter=retrieve_data.file_names_filter)
|
||||||
# Efficient deduplication
|
# Efficient deduplication
|
||||||
seen_ids = set()
|
seen_ids = set()
|
||||||
unique_rs = []
|
unique_rs = []
|
||||||
|
|||||||
@@ -46,6 +46,7 @@ class ChunkUpdate(BaseModel):
|
|||||||
class ChunkRetrieve(BaseModel):
|
class ChunkRetrieve(BaseModel):
|
||||||
query: str
|
query: str
|
||||||
kb_ids: list[uuid.UUID]
|
kb_ids: list[uuid.UUID]
|
||||||
|
file_names_filter: list[str] | None = Field(None)
|
||||||
similarity_threshold: float | None = Field(None)
|
similarity_threshold: float | None = Field(None)
|
||||||
vector_similarity_weight: float | None = Field(None)
|
vector_similarity_weight: float | None = Field(None)
|
||||||
top_k: int | None = Field(None)
|
top_k: int | None = Field(None)
|
||||||
|
|||||||
Reference in New Issue
Block a user