[ADD]Support graph search

This commit is contained in:
lixiangcheng1
2025-12-30 11:53:16 +08:00
parent 6defcaf982
commit 0078028992
6 changed files with 59 additions and 32 deletions

View File

@@ -18,6 +18,9 @@ from app.schemas.response_schema import ApiResponse
from app.core.response_utils import success
from app.services import knowledge_service, document_service, file_service, knowledgeshare_service
from app.core.rag.vdb.elasticsearch.elasticsearch_vector import ElasticSearchVectorFactory
from app.core.rag.common.settings import kg_retriever
from app.core.rag.llm.chat_model import Base
from app.core.rag.llm.embedding_model import OpenAIEmbed
from app.core.logging_config import get_api_logger
# Obtain a dedicated API logger
@@ -389,36 +392,41 @@ async def retrieve_chunks(
knowledge_model.Knowledge.chunk_num > 0,
knowledge_model.Knowledge.status == 1
]
existing_ids = knowledge_service.get_chunded_knowledgeids(
private_items = knowledge_service.get_chunded_knowledgeids(
db=db,
filters=filters,
current_user=current_user
)
private_kb_ids = [item[0] for item in private_items]
private_workspace_ids = [item[1] for item in private_items]
filters = [
knowledge_model.Knowledge.id.in_(retrieve_data.kb_ids),
knowledge_model.Knowledge.permission_id == knowledge_model.PermissionType.Share,
knowledge_model.Knowledge.chunk_num > 0,
knowledge_model.Knowledge.status == 1
]
share_ids = knowledge_service.get_chunded_knowledgeids(
items = knowledge_service.get_chunded_knowledgeids(
db=db,
filters=filters,
current_user=current_user
)
if share_ids:
if items:
filters = [
knowledgeshare_model.KnowledgeShare.target_kb_id.in_(retrieve_data.kb_ids)
]
items = knowledgeshare_service.get_source_kb_ids_by_target_kb_id(
share_items = knowledgeshare_service.get_source_kb_ids_by_target_kb_id(
db=db,
filters=filters,
current_user=current_user
)
existing_ids.extend(items)
if not existing_ids:
share_kb_ids = [item[0] for item in share_items]
share_workspace_ids = [item[1] for item in share_items]
private_kb_ids.extend(share_kb_ids)
private_workspace_ids.extend(share_workspace_ids)
if not private_kb_ids:
return success(data=[], msg="retrieval successful")
kb_id = existing_ids[0]
uuid_strs = [f"Vector_index_{kb_id}_Node".lower() for kb_id in existing_ids]
kb_id = private_kb_ids[0]
uuid_strs = [f"Vector_index_{kb_id}_Node".lower() for kb_id in private_kb_ids]
indices = ",".join(uuid_strs)
db_knowledge = knowledge_service.get_knowledge_by_id(db, knowledge_id=kb_id, current_user=current_user)
if not db_knowledge:
@@ -448,4 +456,21 @@ async def retrieve_chunks(
seen_ids.add(doc.metadata["doc_id"])
unique_rs.append(doc)
rs = vector_service.rerank(query=retrieve_data.query, docs=unique_rs, top_k=retrieve_data.top_k)
if retrieve_data.retrieve_type == chunk_schema.RetrieveType.Graph:
kb_ids = [str(kb_id) for kb_id in private_kb_ids]
workspace_ids = [str(workspace_id) for workspace_id in private_workspace_ids]
# Prepare to configure chat_mdl、embedding_model、vision_model information
chat_model = Base(
key=db_knowledge.llm.api_keys[0].api_key,
model_name=db_knowledge.llm.api_keys[0].model_name,
base_url=db_knowledge.llm.api_keys[0].api_base
)
embedding_model = OpenAIEmbed(
key=db_knowledge.embedding.api_keys[0].api_key,
model_name=db_knowledge.embedding.api_keys[0].model_name,
base_url=db_knowledge.embedding.api_keys[0].api_base
)
doc = kg_retriever.retrieval(question=retrieve_data.query, workspace_ids=workspace_ids, kb_ids= kb_ids, emb_mdl=embedding_model, llm=chat_model)
if doc:
rs.insert(0, doc)
return success(data=rs, msg="retrieval successful")

View File

@@ -4,6 +4,7 @@ from collections import defaultdict
from copy import deepcopy
import json_repair
import pandas as pd
import time
import trio
from app.core.rag.common.misc_utils import get_uuid
@@ -262,21 +263,21 @@ class KGSearch(Dealer):
relas = ""
return {
"chunk_id": get_uuid(),
"content_ltks": "",
"page_content": ents + relas + self._community_retrieval_([n for n, _ in ents_from_query], filters, kb_ids, idxnms,
comm_topn, max_token),
"page_content": ents + relas + self._community_retrieval_([n for n, _ in ents_from_query], filters, kb_ids, idxnms, comm_topn, max_token),
"vector": None,
"metadata": {
"doc_id": get_uuid(),
"file_id": "",
"file_name": "Related content in Knowledge Graph",
"file_created_at": int(time.time() * 1000),
"document_id": "",
"docnm_kwd": "Related content in Knowledge Graph",
"kb_id": kb_ids,
"important_kwd": [],
"image_id": "",
"similarity": 1.,
"vector_similarity": 1.,
"term_similarity": 0,
"vector": [],
"positions": [],
}
"knowledge_id": kb_ids,
"sort_id": 0,
"status": 1,
"score": 1
},
"children": None
}
def _community_retrieval_(self, entities, condition, kb_ids, idxnms, topn, max_token):
## Community retrieval

View File

@@ -213,7 +213,7 @@ class ESConnection(DocStoreConnection):
m.topn * 2,
query_vector=list(m.embedding_data),
filter=bqry.to_dict(),
similarity=similarity,
# similarity=similarity
)
if bqry and rank_feature:

View File

@@ -52,19 +52,19 @@ def get_knowledges_paginated(
raise
def get_chunked_knowledgeids(
def get_chunded_knowledgeids(
db: Session,
filters: list
) -> list:
"""
Query the list of vectorized knowledge base IDs
Return: list[UUID] - List of knowledge base IDs
Return: list[(id,workspace_id)] - List of knowledge base id and workspace_id
"""
db_logger.debug(f"Query the list of vectorized knowledge base IDs: filters_count={len(filters)}")
try:
# Only query the id field
query = db.query(Knowledge.id)
query = db.query(Knowledge.id, Knowledge.workspace_id)
# Apply filter conditions
for filter_cond in filters:
@@ -74,8 +74,8 @@ def get_chunked_knowledgeids(
items = query.all()
db_logger.info(f"Querying the vectorized knowledge base id list succeeded: count={len(items)}")
# Return the list of IDs directly. Since only the ID field is queried, the returned data is a single column
return [item[0] for item in items]
# Return the list of ID and workspace_id directly. Since only the ID and workspace_id field is queried
return items
except Exception as e:
db_logger.error(f"Querying the vectorized knowledge base id list failed: {str(e)}")
raise

View File

@@ -61,14 +61,14 @@ def get_source_kb_ids_by_target_kb_id(
) -> list:
"""
Query the original knowledge base ID list by sharing the knowledge base
Return: list[UUID] - List of knowledge base IDs
Return: list[(source_kb_id,source_workspace_id)] - List of knowledge base source_kb_id and source_workspace_id
"""
db_logger.debug(
f"Query the original knowledge base id list by sharing the knowledge base: filters_count={len(filters)}")
try:
# Only query the id field
query = db.query(KnowledgeShare.source_kb_id)
query = db.query(KnowledgeShare.source_kb_id, KnowledgeShare.source_workspace_id)
# Apply filter conditions
for filter_cond in filters:
@@ -78,8 +78,8 @@ def get_source_kb_ids_by_target_kb_id(
items = query.all()
db_logger.info(f"Successfully queried the original knowledge base ID list by sharing the knowledge base: count={len(items)}")
# Return the list of IDs directly. Since only the ID field is queried, the returned data is a single column
return [item[0] for item in items]
# Return the list of source_kb_id and source_workspace_id directly. Since only the source_kb_id and source_workspace_id field is queried
return items
except Exception as e:
db_logger.error(f"Failed to query the original knowledge base ID list through knowledge base sharing: {str(e)}")
raise

View File

@@ -10,6 +10,7 @@ class RetrieveType(StrEnum):
PARTICIPLE = "participle"
SEMANTIC = "semantic"
HYBRID = "hybrid"
Graph = "graph"
class ChunkCreate(BaseModel):