[ADD]Support graph search
This commit is contained in:
@@ -18,6 +18,9 @@ from app.schemas.response_schema import ApiResponse
|
||||
from app.core.response_utils import success
|
||||
from app.services import knowledge_service, document_service, file_service, knowledgeshare_service
|
||||
from app.core.rag.vdb.elasticsearch.elasticsearch_vector import ElasticSearchVectorFactory
|
||||
from app.core.rag.common.settings import kg_retriever
|
||||
from app.core.rag.llm.chat_model import Base
|
||||
from app.core.rag.llm.embedding_model import OpenAIEmbed
|
||||
from app.core.logging_config import get_api_logger
|
||||
|
||||
# Obtain a dedicated API logger
|
||||
@@ -389,36 +392,41 @@ async def retrieve_chunks(
|
||||
knowledge_model.Knowledge.chunk_num > 0,
|
||||
knowledge_model.Knowledge.status == 1
|
||||
]
|
||||
existing_ids = knowledge_service.get_chunded_knowledgeids(
|
||||
private_items = knowledge_service.get_chunded_knowledgeids(
|
||||
db=db,
|
||||
filters=filters,
|
||||
current_user=current_user
|
||||
)
|
||||
private_kb_ids = [item[0] for item in private_items]
|
||||
private_workspace_ids = [item[1] for item in private_items]
|
||||
filters = [
|
||||
knowledge_model.Knowledge.id.in_(retrieve_data.kb_ids),
|
||||
knowledge_model.Knowledge.permission_id == knowledge_model.PermissionType.Share,
|
||||
knowledge_model.Knowledge.chunk_num > 0,
|
||||
knowledge_model.Knowledge.status == 1
|
||||
]
|
||||
share_ids = knowledge_service.get_chunded_knowledgeids(
|
||||
items = knowledge_service.get_chunded_knowledgeids(
|
||||
db=db,
|
||||
filters=filters,
|
||||
current_user=current_user
|
||||
)
|
||||
if share_ids:
|
||||
if items:
|
||||
filters = [
|
||||
knowledgeshare_model.KnowledgeShare.target_kb_id.in_(retrieve_data.kb_ids)
|
||||
]
|
||||
items = knowledgeshare_service.get_source_kb_ids_by_target_kb_id(
|
||||
share_items = knowledgeshare_service.get_source_kb_ids_by_target_kb_id(
|
||||
db=db,
|
||||
filters=filters,
|
||||
current_user=current_user
|
||||
)
|
||||
existing_ids.extend(items)
|
||||
if not existing_ids:
|
||||
share_kb_ids = [item[0] for item in share_items]
|
||||
share_workspace_ids = [item[1] for item in share_items]
|
||||
private_kb_ids.extend(share_kb_ids)
|
||||
private_workspace_ids.extend(share_workspace_ids)
|
||||
if not private_kb_ids:
|
||||
return success(data=[], msg="retrieval successful")
|
||||
kb_id = existing_ids[0]
|
||||
uuid_strs = [f"Vector_index_{kb_id}_Node".lower() for kb_id in existing_ids]
|
||||
kb_id = private_kb_ids[0]
|
||||
uuid_strs = [f"Vector_index_{kb_id}_Node".lower() for kb_id in private_kb_ids]
|
||||
indices = ",".join(uuid_strs)
|
||||
db_knowledge = knowledge_service.get_knowledge_by_id(db, knowledge_id=kb_id, current_user=current_user)
|
||||
if not db_knowledge:
|
||||
@@ -448,4 +456,21 @@ async def retrieve_chunks(
|
||||
seen_ids.add(doc.metadata["doc_id"])
|
||||
unique_rs.append(doc)
|
||||
rs = vector_service.rerank(query=retrieve_data.query, docs=unique_rs, top_k=retrieve_data.top_k)
|
||||
if retrieve_data.retrieve_type == chunk_schema.RetrieveType.Graph:
|
||||
kb_ids = [str(kb_id) for kb_id in private_kb_ids]
|
||||
workspace_ids = [str(workspace_id) for workspace_id in private_workspace_ids]
|
||||
# Prepare to configure chat_mdl、embedding_model、vision_model information
|
||||
chat_model = Base(
|
||||
key=db_knowledge.llm.api_keys[0].api_key,
|
||||
model_name=db_knowledge.llm.api_keys[0].model_name,
|
||||
base_url=db_knowledge.llm.api_keys[0].api_base
|
||||
)
|
||||
embedding_model = OpenAIEmbed(
|
||||
key=db_knowledge.embedding.api_keys[0].api_key,
|
||||
model_name=db_knowledge.embedding.api_keys[0].model_name,
|
||||
base_url=db_knowledge.embedding.api_keys[0].api_base
|
||||
)
|
||||
doc = kg_retriever.retrieval(question=retrieve_data.query, workspace_ids=workspace_ids, kb_ids= kb_ids, emb_mdl=embedding_model, llm=chat_model)
|
||||
if doc:
|
||||
rs.insert(0, doc)
|
||||
return success(data=rs, msg="retrieval successful")
|
||||
@@ -4,6 +4,7 @@ from collections import defaultdict
|
||||
from copy import deepcopy
|
||||
import json_repair
|
||||
import pandas as pd
|
||||
import time
|
||||
import trio
|
||||
|
||||
from app.core.rag.common.misc_utils import get_uuid
|
||||
@@ -262,21 +263,21 @@ class KGSearch(Dealer):
|
||||
relas = ""
|
||||
|
||||
return {
|
||||
"chunk_id": get_uuid(),
|
||||
"content_ltks": "",
|
||||
"page_content": ents + relas + self._community_retrieval_([n for n, _ in ents_from_query], filters, kb_ids, idxnms,
|
||||
comm_topn, max_token),
|
||||
"page_content": ents + relas + self._community_retrieval_([n for n, _ in ents_from_query], filters, kb_ids, idxnms, comm_topn, max_token),
|
||||
"vector": None,
|
||||
"metadata": {
|
||||
"doc_id": get_uuid(),
|
||||
"file_id": "",
|
||||
"file_name": "Related content in Knowledge Graph",
|
||||
"file_created_at": int(time.time() * 1000),
|
||||
"document_id": "",
|
||||
"docnm_kwd": "Related content in Knowledge Graph",
|
||||
"kb_id": kb_ids,
|
||||
"important_kwd": [],
|
||||
"image_id": "",
|
||||
"similarity": 1.,
|
||||
"vector_similarity": 1.,
|
||||
"term_similarity": 0,
|
||||
"vector": [],
|
||||
"positions": [],
|
||||
}
|
||||
"knowledge_id": kb_ids,
|
||||
"sort_id": 0,
|
||||
"status": 1,
|
||||
"score": 1
|
||||
},
|
||||
"children": None
|
||||
}
|
||||
|
||||
def _community_retrieval_(self, entities, condition, kb_ids, idxnms, topn, max_token):
|
||||
## Community retrieval
|
||||
|
||||
@@ -213,7 +213,7 @@ class ESConnection(DocStoreConnection):
|
||||
m.topn * 2,
|
||||
query_vector=list(m.embedding_data),
|
||||
filter=bqry.to_dict(),
|
||||
similarity=similarity,
|
||||
# similarity=similarity
|
||||
)
|
||||
|
||||
if bqry and rank_feature:
|
||||
|
||||
@@ -52,19 +52,19 @@ def get_knowledges_paginated(
|
||||
raise
|
||||
|
||||
|
||||
def get_chunked_knowledgeids(
|
||||
def get_chunded_knowledgeids(
|
||||
db: Session,
|
||||
filters: list
|
||||
) -> list:
|
||||
"""
|
||||
Query the list of vectorized knowledge base IDs
|
||||
Return: list[UUID] - List of knowledge base IDs
|
||||
Return: list[(id,workspace_id)] - List of knowledge base id and workspace_id
|
||||
"""
|
||||
db_logger.debug(f"Query the list of vectorized knowledge base IDs: filters_count={len(filters)}")
|
||||
|
||||
try:
|
||||
# Only query the id field
|
||||
query = db.query(Knowledge.id)
|
||||
query = db.query(Knowledge.id, Knowledge.workspace_id)
|
||||
|
||||
# Apply filter conditions
|
||||
for filter_cond in filters:
|
||||
@@ -74,8 +74,8 @@ def get_chunked_knowledgeids(
|
||||
items = query.all()
|
||||
db_logger.info(f"Querying the vectorized knowledge base id list succeeded: count={len(items)}")
|
||||
|
||||
# Return the list of IDs directly. Since only the ID field is queried, the returned data is a single column
|
||||
return [item[0] for item in items]
|
||||
# Return the list of ID and workspace_id directly. Since only the ID and workspace_id field is queried
|
||||
return items
|
||||
except Exception as e:
|
||||
db_logger.error(f"Querying the vectorized knowledge base id list failed: {str(e)}")
|
||||
raise
|
||||
|
||||
@@ -61,14 +61,14 @@ def get_source_kb_ids_by_target_kb_id(
|
||||
) -> list:
|
||||
"""
|
||||
Query the original knowledge base ID list by sharing the knowledge base
|
||||
Return: list[UUID] - List of knowledge base IDs
|
||||
Return: list[(source_kb_id,source_workspace_id)] - List of knowledge base source_kb_id and source_workspace_id
|
||||
"""
|
||||
db_logger.debug(
|
||||
f"Query the original knowledge base id list by sharing the knowledge base: filters_count={len(filters)}")
|
||||
|
||||
try:
|
||||
# Only query the id field
|
||||
query = db.query(KnowledgeShare.source_kb_id)
|
||||
query = db.query(KnowledgeShare.source_kb_id, KnowledgeShare.source_workspace_id)
|
||||
|
||||
# Apply filter conditions
|
||||
for filter_cond in filters:
|
||||
@@ -78,8 +78,8 @@ def get_source_kb_ids_by_target_kb_id(
|
||||
items = query.all()
|
||||
db_logger.info(f"Successfully queried the original knowledge base ID list by sharing the knowledge base: count={len(items)}")
|
||||
|
||||
# Return the list of IDs directly. Since only the ID field is queried, the returned data is a single column
|
||||
return [item[0] for item in items]
|
||||
# Return the list of source_kb_id and source_workspace_id directly. Since only the source_kb_id and source_workspace_id field is queried
|
||||
return items
|
||||
except Exception as e:
|
||||
db_logger.error(f"Failed to query the original knowledge base ID list through knowledge base sharing: {str(e)}")
|
||||
raise
|
||||
|
||||
@@ -10,6 +10,7 @@ class RetrieveType(StrEnum):
|
||||
PARTICIPLE = "participle"
|
||||
SEMANTIC = "semantic"
|
||||
HYBRID = "hybrid"
|
||||
Graph = "graph"
|
||||
|
||||
|
||||
class ChunkCreate(BaseModel):
|
||||
|
||||
Reference in New Issue
Block a user