refactor(rag/nlp): standardize knowledge graph retrieval to use DocumentChunk and add debug logging

The knowledge graph retrieval logic in `search.py` was updated to consistently return `DocumentChunk` instances instead of raw dictionaries, improving type safety and alignment with the RAG pipeline's expected data structure. Additionally, debug logging was enhanced in `draft_run_service.py` to log the full `retrieve_chunks_result` before extracting page content, aiding troubleshooting.
This commit is contained in:
Timebomb2018
2026-04-09 19:07:53 +08:00
parent 62e0b2730b
commit 130684cac0
2 changed files with 9 additions and 3 deletions

View File

@@ -133,7 +133,10 @@ def knowledge_retrieval(
from app.core.rag.common.settings import kg_retriever
doc = kg_retriever.retrieval(question=query, workspace_ids=workspace_ids, kb_ids=kb_ids, emb_mdl=embedding_model, llm=chat_model)
if doc:
all_results.insert(0, doc)
all_results.insert(0, DocumentChunk(
page_content=doc.get("page_content", ""),
metadata=doc.get("metadata", {})
))
except Exception as graph_error:
print(f"Failed to retrieve from knowledge graph: {str(graph_error)}")
@@ -262,7 +265,10 @@ def _retrieve_for_knowledge(
llm=chat_model,
)
if graph_doc:
rs.insert(0, graph_doc)
rs.insert(0, DocumentChunk(
page_content=graph_doc.get("page_content", ""),
metadata=graph_doc.get("metadata", {})
))
except Exception as graph_error:
logger.warning(f"Graph retrieval failed for kb {db_knowledge.id}: {graph_error}")

View File

@@ -223,8 +223,8 @@ def create_knowledge_retrieval_tool(kb_config, kb_ids, user_id, citations_collec
retrieve_chunks_result = knowledge_retrieval(query, kb_config)
if retrieve_chunks_result:
logger.warning(f"检索知识结果:{retrieve_chunks_result}")
retrieval_knowledge = [i.page_content for i in retrieve_chunks_result]
logger.warning(f"检索知识结果:{retrieval_knowledge}")
context = '\n\n'.join(retrieval_knowledge)
logger.info(
"知识库检索成功",