From 0dc8d8cbebf1672b5ed8b8fc07f9324a4e7c0898 Mon Sep 17 00:00:00 2001
From: Timebomb2018 <18868801967@163.com>
Date: Thu, 7 May 2026 18:34:16 +0800
Subject: [PATCH] feat(workflow): support doc_id in citation metadata and unify
 document_id handling

---
 api/app/core/workflow/nodes/knowledge/node.py | 9 +++++----
 api/app/schemas/app_schema.py                 | 1 +
 api/app/services/draft_run_service.py         | 9 +++++----
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/api/app/core/workflow/nodes/knowledge/node.py b/api/app/core/workflow/nodes/knowledge/node.py
index c3fda4e2..ebbdd357 100644
--- a/api/app/core/workflow/nodes/knowledge/node.py
+++ b/api/app/core/workflow/nodes/knowledge/node.py
@@ -363,11 +363,12 @@ class KnowledgeRetrievalNode(BaseNode):
             seen_doc_ids = set()
             for chunk in final_rs:
                 meta = chunk.metadata or {}
-                doc_id = meta.get("document_id") or meta.get("doc_id")
-                if doc_id and doc_id not in seen_doc_ids:
-                    seen_doc_ids.add(doc_id)
+                document_id = meta.get("document_id")
+                if document_id and document_id not in seen_doc_ids:
+                    seen_doc_ids.add(document_id)
                     citations.append({
-                        "document_id": str(doc_id),
+                        "document_id": str(document_id),
+                        "doc_id": meta.get("doc_id", ""),
                         "file_name": meta.get("file_name", ""),
                         "knowledge_id": str(meta.get("knowledge_id", kb_config.kb_id)),
                         "score": meta.get("score", 0.0),
diff --git a/api/app/schemas/app_schema.py b/api/app/schemas/app_schema.py
index 40d927d7..521bad09 100644
--- a/api/app/schemas/app_schema.py
+++ b/api/app/schemas/app_schema.py
@@ -205,6 +205,7 @@ class CitationConfig(BaseModel):
 
 class Citation(BaseModel):
     document_id: str
+    doc_id: str
     file_name: str
     knowledge_id: str
     score: float
diff --git a/api/app/services/draft_run_service.py b/api/app/services/draft_run_service.py
index 16d856ca..d37d7691 100644
--- a/api/app/services/draft_run_service.py
+++ b/api/app/services/draft_run_service.py
@@ -242,11 +242,12 @@ def create_knowledge_retrieval_tool(kb_config, kb_ids, user_id, citations_collec
                     seen_doc_ids = {c.get("document_id") for c in citations_collector}
                     for chunk in retrieve_chunks_result:
                         meta = chunk.metadata or {}
-                        doc_id = meta.get("document_id") or meta.get("doc_id")
-                        if doc_id and doc_id not in seen_doc_ids:
-                            seen_doc_ids.add(doc_id)
+                        document_id = meta.get("document_id")
+                        if document_id and document_id not in seen_doc_ids:
+                            seen_doc_ids.add(document_id)
                             citations_collector.append(Citation(
-                                document_id=doc_id,
+                                document_id=str(document_id),
+                                doc_id=meta.get("doc_id", ""),
                                 file_name=meta.get("file_name", ""),
                                 knowledge_id=str(meta.get("knowledge_id", "")),
                                 score=meta.get("score", 0)