feat(workflow): support doc_id in citation metadata and unify document_id handling

This commit is contained in:
Timebomb2018
2026-05-07 18:34:16 +08:00
parent 8967b00303
commit 0dc8d8cbeb
3 changed files with 11 additions and 8 deletions

View File

@@ -363,11 +363,12 @@ class KnowledgeRetrievalNode(BaseNode):
seen_doc_ids = set() seen_doc_ids = set()
for chunk in final_rs: for chunk in final_rs:
meta = chunk.metadata or {} meta = chunk.metadata or {}
doc_id = meta.get("document_id") or meta.get("doc_id") document_id = meta.get("document_id")
if doc_id and doc_id not in seen_doc_ids: if document_id and document_id not in seen_doc_ids:
seen_doc_ids.add(doc_id) seen_doc_ids.add(document_id)
citations.append({ citations.append({
"document_id": str(doc_id), "document_id": str(document_id),
"doc_id": meta.get("doc_id", ""),
"file_name": meta.get("file_name", ""), "file_name": meta.get("file_name", ""),
"knowledge_id": str(meta.get("knowledge_id", kb_config.kb_id)), "knowledge_id": str(meta.get("knowledge_id", kb_config.kb_id)),
"score": meta.get("score", 0.0), "score": meta.get("score", 0.0),

View File

@@ -205,6 +205,7 @@ class CitationConfig(BaseModel):
class Citation(BaseModel): class Citation(BaseModel):
document_id: str document_id: str
doc_id: str
file_name: str file_name: str
knowledge_id: str knowledge_id: str
score: float score: float

View File

@@ -242,11 +242,12 @@ def create_knowledge_retrieval_tool(kb_config, kb_ids, user_id, citations_collec
seen_doc_ids = {c.get("document_id") for c in citations_collector} seen_doc_ids = {c.get("document_id") for c in citations_collector}
for chunk in retrieve_chunks_result: for chunk in retrieve_chunks_result:
meta = chunk.metadata or {} meta = chunk.metadata or {}
doc_id = meta.get("document_id") or meta.get("doc_id") document_id = meta.get("document_id")
if doc_id and doc_id not in seen_doc_ids: if document_id and document_id not in seen_doc_ids:
seen_doc_ids.add(doc_id) seen_doc_ids.add(document_id)
citations_collector.append(Citation( citations_collector.append(Citation(
document_id=doc_id, document_id=str(document_id),
doc_id=meta.get("doc_id", ""),
file_name=meta.get("file_name", ""), file_name=meta.get("file_name", ""),
knowledge_id=str(meta.get("knowledge_id", "")), knowledge_id=str(meta.get("knowledge_id", "")),
score=meta.get("score", 0) score=meta.get("score", 0)