feat(workflow): support doc_id in citation metadata and unify document_id handling
This commit is contained in:
@@ -363,11 +363,12 @@ class KnowledgeRetrievalNode(BaseNode):
|
|||||||
seen_doc_ids = set()
|
seen_doc_ids = set()
|
||||||
for chunk in final_rs:
|
for chunk in final_rs:
|
||||||
meta = chunk.metadata or {}
|
meta = chunk.metadata or {}
|
||||||
doc_id = meta.get("document_id") or meta.get("doc_id")
|
document_id = meta.get("document_id")
|
||||||
if doc_id and doc_id not in seen_doc_ids:
|
if document_id and document_id not in seen_doc_ids:
|
||||||
seen_doc_ids.add(doc_id)
|
seen_doc_ids.add(document_id)
|
||||||
citations.append({
|
citations.append({
|
||||||
"document_id": str(doc_id),
|
"document_id": str(document_id),
|
||||||
|
"doc_id": meta.get("doc_id", ""),
|
||||||
"file_name": meta.get("file_name", ""),
|
"file_name": meta.get("file_name", ""),
|
||||||
"knowledge_id": str(meta.get("knowledge_id", kb_config.kb_id)),
|
"knowledge_id": str(meta.get("knowledge_id", kb_config.kb_id)),
|
||||||
"score": meta.get("score", 0.0),
|
"score": meta.get("score", 0.0),
|
||||||
|
|||||||
@@ -205,6 +205,7 @@ class CitationConfig(BaseModel):
|
|||||||
|
|
||||||
class Citation(BaseModel):
|
class Citation(BaseModel):
|
||||||
document_id: str
|
document_id: str
|
||||||
|
doc_id: str
|
||||||
file_name: str
|
file_name: str
|
||||||
knowledge_id: str
|
knowledge_id: str
|
||||||
score: float
|
score: float
|
||||||
|
|||||||
@@ -242,11 +242,12 @@ def create_knowledge_retrieval_tool(kb_config, kb_ids, user_id, citations_collec
|
|||||||
seen_doc_ids = {c.get("document_id") for c in citations_collector}
|
seen_doc_ids = {c.get("document_id") for c in citations_collector}
|
||||||
for chunk in retrieve_chunks_result:
|
for chunk in retrieve_chunks_result:
|
||||||
meta = chunk.metadata or {}
|
meta = chunk.metadata or {}
|
||||||
doc_id = meta.get("document_id") or meta.get("doc_id")
|
document_id = meta.get("document_id")
|
||||||
if doc_id and doc_id not in seen_doc_ids:
|
if document_id and document_id not in seen_doc_ids:
|
||||||
seen_doc_ids.add(doc_id)
|
seen_doc_ids.add(document_id)
|
||||||
citations_collector.append(Citation(
|
citations_collector.append(Citation(
|
||||||
document_id=doc_id,
|
document_id=str(document_id),
|
||||||
|
doc_id=meta.get("doc_id", ""),
|
||||||
file_name=meta.get("file_name", ""),
|
file_name=meta.get("file_name", ""),
|
||||||
knowledge_id=str(meta.get("knowledge_id", "")),
|
knowledge_id=str(meta.get("knowledge_id", "")),
|
||||||
score=meta.get("score", 0)
|
score=meta.get("score", 0)
|
||||||
|
|||||||
Reference in New Issue
Block a user