Merge branch 'feature/20251219_lxc' into develop
This commit is contained in:
@@ -18,6 +18,9 @@ from app.schemas.response_schema import ApiResponse
|
|||||||
from app.core.response_utils import success
|
from app.core.response_utils import success
|
||||||
from app.services import knowledge_service, document_service, file_service, knowledgeshare_service
|
from app.services import knowledge_service, document_service, file_service, knowledgeshare_service
|
||||||
from app.core.rag.vdb.elasticsearch.elasticsearch_vector import ElasticSearchVectorFactory
|
from app.core.rag.vdb.elasticsearch.elasticsearch_vector import ElasticSearchVectorFactory
|
||||||
|
from app.core.rag.common.settings import kg_retriever
|
||||||
|
from app.core.rag.llm.chat_model import Base
|
||||||
|
from app.core.rag.llm.embedding_model import OpenAIEmbed
|
||||||
from app.core.logging_config import get_api_logger
|
from app.core.logging_config import get_api_logger
|
||||||
|
|
||||||
# Obtain a dedicated API logger
|
# Obtain a dedicated API logger
|
||||||
@@ -389,36 +392,41 @@ async def retrieve_chunks(
|
|||||||
knowledge_model.Knowledge.chunk_num > 0,
|
knowledge_model.Knowledge.chunk_num > 0,
|
||||||
knowledge_model.Knowledge.status == 1
|
knowledge_model.Knowledge.status == 1
|
||||||
]
|
]
|
||||||
existing_ids = knowledge_service.get_chunded_knowledgeids(
|
private_items = knowledge_service.get_chunded_knowledgeids(
|
||||||
db=db,
|
db=db,
|
||||||
filters=filters,
|
filters=filters,
|
||||||
current_user=current_user
|
current_user=current_user
|
||||||
)
|
)
|
||||||
|
private_kb_ids = [item[0] for item in private_items]
|
||||||
|
private_workspace_ids = [item[1] for item in private_items]
|
||||||
filters = [
|
filters = [
|
||||||
knowledge_model.Knowledge.id.in_(retrieve_data.kb_ids),
|
knowledge_model.Knowledge.id.in_(retrieve_data.kb_ids),
|
||||||
knowledge_model.Knowledge.permission_id == knowledge_model.PermissionType.Share,
|
knowledge_model.Knowledge.permission_id == knowledge_model.PermissionType.Share,
|
||||||
knowledge_model.Knowledge.chunk_num > 0,
|
knowledge_model.Knowledge.chunk_num > 0,
|
||||||
knowledge_model.Knowledge.status == 1
|
knowledge_model.Knowledge.status == 1
|
||||||
]
|
]
|
||||||
share_ids = knowledge_service.get_chunded_knowledgeids(
|
items = knowledge_service.get_chunded_knowledgeids(
|
||||||
db=db,
|
db=db,
|
||||||
filters=filters,
|
filters=filters,
|
||||||
current_user=current_user
|
current_user=current_user
|
||||||
)
|
)
|
||||||
if share_ids:
|
if items:
|
||||||
filters = [
|
filters = [
|
||||||
knowledgeshare_model.KnowledgeShare.target_kb_id.in_(retrieve_data.kb_ids)
|
knowledgeshare_model.KnowledgeShare.target_kb_id.in_(retrieve_data.kb_ids)
|
||||||
]
|
]
|
||||||
items = knowledgeshare_service.get_source_kb_ids_by_target_kb_id(
|
share_items = knowledgeshare_service.get_source_kb_ids_by_target_kb_id(
|
||||||
db=db,
|
db=db,
|
||||||
filters=filters,
|
filters=filters,
|
||||||
current_user=current_user
|
current_user=current_user
|
||||||
)
|
)
|
||||||
existing_ids.extend(items)
|
share_kb_ids = [item[0] for item in share_items]
|
||||||
if not existing_ids:
|
share_workspace_ids = [item[1] for item in share_items]
|
||||||
|
private_kb_ids.extend(share_kb_ids)
|
||||||
|
private_workspace_ids.extend(share_workspace_ids)
|
||||||
|
if not private_kb_ids:
|
||||||
return success(data=[], msg="retrieval successful")
|
return success(data=[], msg="retrieval successful")
|
||||||
kb_id = existing_ids[0]
|
kb_id = private_kb_ids[0]
|
||||||
uuid_strs = [f"Vector_index_{kb_id}_Node".lower() for kb_id in existing_ids]
|
uuid_strs = [f"Vector_index_{kb_id}_Node".lower() for kb_id in private_kb_ids]
|
||||||
indices = ",".join(uuid_strs)
|
indices = ",".join(uuid_strs)
|
||||||
db_knowledge = knowledge_service.get_knowledge_by_id(db, knowledge_id=kb_id, current_user=current_user)
|
db_knowledge = knowledge_service.get_knowledge_by_id(db, knowledge_id=kb_id, current_user=current_user)
|
||||||
if not db_knowledge:
|
if not db_knowledge:
|
||||||
@@ -448,4 +456,21 @@ async def retrieve_chunks(
|
|||||||
seen_ids.add(doc.metadata["doc_id"])
|
seen_ids.add(doc.metadata["doc_id"])
|
||||||
unique_rs.append(doc)
|
unique_rs.append(doc)
|
||||||
rs = vector_service.rerank(query=retrieve_data.query, docs=unique_rs, top_k=retrieve_data.top_k)
|
rs = vector_service.rerank(query=retrieve_data.query, docs=unique_rs, top_k=retrieve_data.top_k)
|
||||||
|
if retrieve_data.retrieve_type == chunk_schema.RetrieveType.Graph:
|
||||||
|
kb_ids = [str(kb_id) for kb_id in private_kb_ids]
|
||||||
|
workspace_ids = [str(workspace_id) for workspace_id in private_workspace_ids]
|
||||||
|
# Prepare to configure chat_mdl、embedding_model、vision_model information
|
||||||
|
chat_model = Base(
|
||||||
|
key=db_knowledge.llm.api_keys[0].api_key,
|
||||||
|
model_name=db_knowledge.llm.api_keys[0].model_name,
|
||||||
|
base_url=db_knowledge.llm.api_keys[0].api_base
|
||||||
|
)
|
||||||
|
embedding_model = OpenAIEmbed(
|
||||||
|
key=db_knowledge.embedding.api_keys[0].api_key,
|
||||||
|
model_name=db_knowledge.embedding.api_keys[0].model_name,
|
||||||
|
base_url=db_knowledge.embedding.api_keys[0].api_base
|
||||||
|
)
|
||||||
|
doc = kg_retriever.retrieval(question=retrieve_data.query, workspace_ids=workspace_ids, kb_ids= kb_ids, emb_mdl=embedding_model, llm=chat_model)
|
||||||
|
if doc:
|
||||||
|
rs.insert(0, doc)
|
||||||
return success(data=rs, msg="retrieval successful")
|
return success(data=rs, msg="retrieval successful")
|
||||||
@@ -4,6 +4,7 @@ from collections import defaultdict
|
|||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
import json_repair
|
import json_repair
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import time
|
||||||
import trio
|
import trio
|
||||||
|
|
||||||
from app.core.rag.common.misc_utils import get_uuid
|
from app.core.rag.common.misc_utils import get_uuid
|
||||||
@@ -262,21 +263,21 @@ class KGSearch(Dealer):
|
|||||||
relas = ""
|
relas = ""
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"chunk_id": get_uuid(),
|
"page_content": ents + relas + self._community_retrieval_([n for n, _ in ents_from_query], filters, kb_ids, idxnms, comm_topn, max_token),
|
||||||
"content_ltks": "",
|
"vector": None,
|
||||||
"page_content": ents + relas + self._community_retrieval_([n for n, _ in ents_from_query], filters, kb_ids, idxnms,
|
"metadata": {
|
||||||
comm_topn, max_token),
|
"doc_id": get_uuid(),
|
||||||
|
"file_id": "",
|
||||||
|
"file_name": "Related content in Knowledge Graph",
|
||||||
|
"file_created_at": int(time.time() * 1000),
|
||||||
"document_id": "",
|
"document_id": "",
|
||||||
"docnm_kwd": "Related content in Knowledge Graph",
|
"knowledge_id": kb_ids,
|
||||||
"kb_id": kb_ids,
|
"sort_id": 0,
|
||||||
"important_kwd": [],
|
"status": 1,
|
||||||
"image_id": "",
|
"score": 1
|
||||||
"similarity": 1.,
|
},
|
||||||
"vector_similarity": 1.,
|
"children": None
|
||||||
"term_similarity": 0,
|
}
|
||||||
"vector": [],
|
|
||||||
"positions": [],
|
|
||||||
}
|
|
||||||
|
|
||||||
def _community_retrieval_(self, entities, condition, kb_ids, idxnms, topn, max_token):
|
def _community_retrieval_(self, entities, condition, kb_ids, idxnms, topn, max_token):
|
||||||
## Community retrieval
|
## Community retrieval
|
||||||
|
|||||||
@@ -213,7 +213,7 @@ class ESConnection(DocStoreConnection):
|
|||||||
m.topn * 2,
|
m.topn * 2,
|
||||||
query_vector=list(m.embedding_data),
|
query_vector=list(m.embedding_data),
|
||||||
filter=bqry.to_dict(),
|
filter=bqry.to_dict(),
|
||||||
similarity=similarity,
|
# similarity=similarity
|
||||||
)
|
)
|
||||||
|
|
||||||
if bqry and rank_feature:
|
if bqry and rank_feature:
|
||||||
|
|||||||
@@ -52,19 +52,19 @@ def get_knowledges_paginated(
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
def get_chunked_knowledgeids(
|
def get_chunded_knowledgeids(
|
||||||
db: Session,
|
db: Session,
|
||||||
filters: list
|
filters: list
|
||||||
) -> list:
|
) -> list:
|
||||||
"""
|
"""
|
||||||
Query the list of vectorized knowledge base IDs
|
Query the list of vectorized knowledge base IDs
|
||||||
Return: list[UUID] - List of knowledge base IDs
|
Return: list[(id,workspace_id)] - List of knowledge base id and workspace_id
|
||||||
"""
|
"""
|
||||||
db_logger.debug(f"Query the list of vectorized knowledge base IDs: filters_count={len(filters)}")
|
db_logger.debug(f"Query the list of vectorized knowledge base IDs: filters_count={len(filters)}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Only query the id field
|
# Only query the id field
|
||||||
query = db.query(Knowledge.id)
|
query = db.query(Knowledge.id, Knowledge.workspace_id)
|
||||||
|
|
||||||
# Apply filter conditions
|
# Apply filter conditions
|
||||||
for filter_cond in filters:
|
for filter_cond in filters:
|
||||||
@@ -74,8 +74,8 @@ def get_chunked_knowledgeids(
|
|||||||
items = query.all()
|
items = query.all()
|
||||||
db_logger.info(f"Querying the vectorized knowledge base id list succeeded: count={len(items)}")
|
db_logger.info(f"Querying the vectorized knowledge base id list succeeded: count={len(items)}")
|
||||||
|
|
||||||
# Return the list of IDs directly. Since only the ID field is queried, the returned data is a single column
|
# Return the list of ID and workspace_id directly. Since only the ID and workspace_id field is queried
|
||||||
return [item[0] for item in items]
|
return items
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
db_logger.error(f"Querying the vectorized knowledge base id list failed: {str(e)}")
|
db_logger.error(f"Querying the vectorized knowledge base id list failed: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|||||||
@@ -61,14 +61,14 @@ def get_source_kb_ids_by_target_kb_id(
|
|||||||
) -> list:
|
) -> list:
|
||||||
"""
|
"""
|
||||||
Query the original knowledge base ID list by sharing the knowledge base
|
Query the original knowledge base ID list by sharing the knowledge base
|
||||||
Return: list[UUID] - List of knowledge base IDs
|
Return: list[(source_kb_id,source_workspace_id)] - List of knowledge base source_kb_id and source_workspace_id
|
||||||
"""
|
"""
|
||||||
db_logger.debug(
|
db_logger.debug(
|
||||||
f"Query the original knowledge base id list by sharing the knowledge base: filters_count={len(filters)}")
|
f"Query the original knowledge base id list by sharing the knowledge base: filters_count={len(filters)}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Only query the id field
|
# Only query the id field
|
||||||
query = db.query(KnowledgeShare.source_kb_id)
|
query = db.query(KnowledgeShare.source_kb_id, KnowledgeShare.source_workspace_id)
|
||||||
|
|
||||||
# Apply filter conditions
|
# Apply filter conditions
|
||||||
for filter_cond in filters:
|
for filter_cond in filters:
|
||||||
@@ -78,8 +78,8 @@ def get_source_kb_ids_by_target_kb_id(
|
|||||||
items = query.all()
|
items = query.all()
|
||||||
db_logger.info(f"Successfully queried the original knowledge base ID list by sharing the knowledge base: count={len(items)}")
|
db_logger.info(f"Successfully queried the original knowledge base ID list by sharing the knowledge base: count={len(items)}")
|
||||||
|
|
||||||
# Return the list of IDs directly. Since only the ID field is queried, the returned data is a single column
|
# Return the list of source_kb_id and source_workspace_id directly. Since only the source_kb_id and source_workspace_id field is queried
|
||||||
return [item[0] for item in items]
|
return items
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
db_logger.error(f"Failed to query the original knowledge base ID list through knowledge base sharing: {str(e)}")
|
db_logger.error(f"Failed to query the original knowledge base ID list through knowledge base sharing: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ class RetrieveType(StrEnum):
|
|||||||
PARTICIPLE = "participle"
|
PARTICIPLE = "participle"
|
||||||
SEMANTIC = "semantic"
|
SEMANTIC = "semantic"
|
||||||
HYBRID = "hybrid"
|
HYBRID = "hybrid"
|
||||||
|
Graph = "graph"
|
||||||
|
|
||||||
|
|
||||||
class ChunkCreate(BaseModel):
|
class ChunkCreate(BaseModel):
|
||||||
|
|||||||
Reference in New Issue
Block a user