From 130684cac0b008a2726ea41a65884b68ca6168c8 Mon Sep 17 00:00:00 2001 From: Timebomb2018 <18868801967@163.com> Date: Thu, 9 Apr 2026 19:07:53 +0800 Subject: [PATCH] refactor(rag/nlp): standardize knowledge graph retrieval to use DocumentChunk and add debug logging The knowledge graph retrieval logic in `search.py` was updated to consistently return `DocumentChunk` instances instead of raw dictionaries, improving type safety and alignment with the RAG pipeline's expected data structure. Additionally, debug logging was enhanced in `draft_run_service.py` to log the full `retrieve_chunks_result` before extracting page content, aiding troubleshooting. --- api/app/core/rag/nlp/search.py | 10 ++++++++-- api/app/services/draft_run_service.py | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/api/app/core/rag/nlp/search.py b/api/app/core/rag/nlp/search.py index 1a84b8a7..371facb4 100644 --- a/api/app/core/rag/nlp/search.py +++ b/api/app/core/rag/nlp/search.py @@ -133,7 +133,10 @@ def knowledge_retrieval( from app.core.rag.common.settings import kg_retriever doc = kg_retriever.retrieval(question=query, workspace_ids=workspace_ids, kb_ids=kb_ids, emb_mdl=embedding_model, llm=chat_model) if doc: - all_results.insert(0, doc) + all_results.insert(0, DocumentChunk( + page_content=doc.get("page_content", ""), + metadata=doc.get("metadata", {}) + )) except Exception as graph_error: print(f"Failed to retrieve from knowledge graph: {str(graph_error)}") @@ -262,7 +265,10 @@ def _retrieve_for_knowledge( llm=chat_model, ) if graph_doc: - rs.insert(0, graph_doc) + rs.insert(0, DocumentChunk( + page_content=graph_doc.get("page_content", ""), + metadata=graph_doc.get("metadata", {}) + )) except Exception as graph_error: logger.warning(f"Graph retrieval failed for kb {db_knowledge.id}: {graph_error}") diff --git a/api/app/services/draft_run_service.py b/api/app/services/draft_run_service.py index 461ee0c4..8a381c8f 100644 --- a/api/app/services/draft_run_service.py +++ b/api/app/services/draft_run_service.py @@ -223,8 +223,8 @@ def create_knowledge_retrieval_tool(kb_config, kb_ids, user_id, citations_collec retrieve_chunks_result = knowledge_retrieval(query, kb_config) if retrieve_chunks_result: + logger.warning(f"检索知识结果:{retrieve_chunks_result}") retrieval_knowledge = [i.page_content for i in retrieve_chunks_result] - logger.warning(f"检索知识结果:{retrieval_knowledge}") context = '\n\n'.join(retrieval_knowledge) logger.info( "知识库检索成功",