feat(workflow): support doc_id in citation metadata and unify document_id handling

This commit is contained in:
Timebomb2018
2026-05-07 18:34:16 +08:00
parent 8967b00303
commit 0dc8d8cbeb
3 changed files with 11 additions and 8 deletions

View File

@@ -363,11 +363,12 @@ class KnowledgeRetrievalNode(BaseNode):
seen_doc_ids = set()
for chunk in final_rs:
meta = chunk.metadata or {}
doc_id = meta.get("document_id") or meta.get("doc_id")
if doc_id and doc_id not in seen_doc_ids:
seen_doc_ids.add(doc_id)
document_id = meta.get("document_id")
if document_id and document_id not in seen_doc_ids:
seen_doc_ids.add(document_id)
citations.append({
"document_id": str(doc_id),
"document_id": str(document_id),
"doc_id": meta.get("doc_id", ""),
"file_name": meta.get("file_name", ""),
"knowledge_id": str(meta.get("knowledge_id", kb_config.kb_id)),
"score": meta.get("score", 0.0),

View File

@@ -205,6 +205,7 @@ class CitationConfig(BaseModel):
class Citation(BaseModel):
document_id: str
doc_id: str
file_name: str
knowledge_id: str
score: float

View File

@@ -242,11 +242,12 @@ def create_knowledge_retrieval_tool(kb_config, kb_ids, user_id, citations_collec
seen_doc_ids = {c.get("document_id") for c in citations_collector}
for chunk in retrieve_chunks_result:
meta = chunk.metadata or {}
doc_id = meta.get("document_id") or meta.get("doc_id")
if doc_id and doc_id not in seen_doc_ids:
seen_doc_ids.add(doc_id)
document_id = meta.get("document_id")
if document_id and document_id not in seen_doc_ids:
seen_doc_ids.add(document_id)
citations_collector.append(Citation(
document_id=doc_id,
document_id=str(document_id),
doc_id=meta.get("doc_id", ""),
file_name=meta.get("file_name", ""),
knowledge_id=str(meta.get("knowledge_id", "")),
score=meta.get("score", 0)