fix(rag): fix pdfplumber concurrency issue and add debug logging

The pdfplumber parser now uses a global lock to prevent concurrent access issues during PDF image rendering. Additionally, added a warning log to trace knowledge retrieval results for debugging purposes. The syntax fix in knowledge node's match case ensures correct pattern matching behavior.

BREAKING CHANGE: The pdfplumber parser now requires LOCK_KEY_pdfplumber to be defined in sys.modules for thread safety.

Closes #841
This commit is contained in:
Timebomb2018
2026-04-09 17:48:16 +08:00
parent 0516822d42
commit a7b8ba0c66
3 changed files with 6 additions and 4 deletions

View File

@@ -292,9 +292,10 @@ class MinerUParser(RAGPdfParser):
self.page_from = page_from
self.page_to = page_to
try:
with pdfplumber.open(fnm) if isinstance(fnm, (str, PathLike)) else pdfplumber.open(BytesIO(fnm)) as pdf:
self.pdf = pdf
self.page_images = [p.to_image(resolution=72 * zoomin, antialias=True).original for _, p in enumerate(self.pdf.pages[page_from:page_to])]
with sys.modules[LOCK_KEY_pdfplumber]: # ← 加这一行,获取全局锁
with pdfplumber.open(fnm) if isinstance(fnm, (str, PathLike)) else pdfplumber.open(BytesIO(fnm)) as pdf:
self.pdf = pdf
self.page_images = [p.to_image(resolution=72 * zoomin, antialias=True).original for _, p in enumerate(self.pdf.pages[page_from:page_to])]
except Exception as e:
self.page_images = None
self.total_page = 0

View File

@@ -233,7 +233,7 @@ class KnowledgeRetrievalNode(BaseNode):
}
)
)
case RetrieveType.HYBRID | RetrieveType.Graph:
case (RetrieveType.HYBRID, RetrieveType.Graph):
rs1_task = asyncio.to_thread(
vector_service.search_by_vector, **{
"query": query,

View File

@@ -224,6 +224,7 @@ def create_knowledge_retrieval_tool(kb_config, kb_ids, user_id, citations_collec
retrieve_chunks_result = knowledge_retrieval(query, kb_config)
if retrieve_chunks_result:
retrieval_knowledge = [i.page_content for i in retrieve_chunks_result]
logger.warning(f"检索知识结果:{retrieval_knowledge}")
context = '\n\n'.join(retrieval_knowledge)
logger.info(
"知识库检索成功",