From 0dc8d8cbebf1672b5ed8b8fc07f9324a4e7c0898 Mon Sep 17 00:00:00 2001 From: Timebomb2018 <18868801967@163.com> Date: Thu, 7 May 2026 18:34:16 +0800 Subject: [PATCH] feat(workflow): support doc_id in citation metadata and unify document_id handling --- api/app/core/workflow/nodes/knowledge/node.py | 9 +++++---- api/app/schemas/app_schema.py | 1 + api/app/services/draft_run_service.py | 9 +++++---- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/api/app/core/workflow/nodes/knowledge/node.py b/api/app/core/workflow/nodes/knowledge/node.py index c3fda4e2..ebbdd357 100644 --- a/api/app/core/workflow/nodes/knowledge/node.py +++ b/api/app/core/workflow/nodes/knowledge/node.py @@ -363,11 +363,12 @@ class KnowledgeRetrievalNode(BaseNode): seen_doc_ids = set() for chunk in final_rs: meta = chunk.metadata or {} - doc_id = meta.get("document_id") or meta.get("doc_id") - if doc_id and doc_id not in seen_doc_ids: - seen_doc_ids.add(doc_id) + document_id = meta.get("document_id") + if document_id and document_id not in seen_doc_ids: + seen_doc_ids.add(document_id) citations.append({ - "document_id": str(doc_id), + "document_id": str(document_id), + "doc_id": meta.get("doc_id", ""), "file_name": meta.get("file_name", ""), "knowledge_id": str(meta.get("knowledge_id", kb_config.kb_id)), "score": meta.get("score", 0.0), diff --git a/api/app/schemas/app_schema.py b/api/app/schemas/app_schema.py index 40d927d7..521bad09 100644 --- a/api/app/schemas/app_schema.py +++ b/api/app/schemas/app_schema.py @@ -205,6 +205,7 @@ class CitationConfig(BaseModel): class Citation(BaseModel): document_id: str + doc_id: str file_name: str knowledge_id: str score: float diff --git a/api/app/services/draft_run_service.py b/api/app/services/draft_run_service.py index 16d856ca..d37d7691 100644 --- a/api/app/services/draft_run_service.py +++ b/api/app/services/draft_run_service.py @@ -242,11 +242,12 @@ def create_knowledge_retrieval_tool(kb_config, kb_ids, user_id, citations_collec seen_doc_ids = {c.get("document_id") for c in citations_collector} for chunk in retrieve_chunks_result: meta = chunk.metadata or {} - doc_id = meta.get("document_id") or meta.get("doc_id") - if doc_id and doc_id not in seen_doc_ids: - seen_doc_ids.add(doc_id) + document_id = meta.get("document_id") + if document_id and document_id not in seen_doc_ids: + seen_doc_ids.add(document_id) citations_collector.append(Citation( - document_id=doc_id, + document_id=str(document_id), + doc_id=meta.get("doc_id", ""), file_name=meta.get("file_name", ""), knowledge_id=str(meta.get("knowledge_id", "")), score=meta.get("score", 0)