[fix]Fix the display issue of semantic chunking for streaming output

This commit is contained in:
lanceyq
2026-02-28 18:19:44 +08:00
parent e6aa0e0e10
commit 035464c0ac

View File

@@ -200,18 +200,19 @@ async def run_pilot_extraction(
# 进度回调:输出每个分块的结果 # 进度回调:输出每个分块的结果
if progress_callback: if progress_callback:
for dlg in chunked_dialogs: for dlg in chunked_dialogs:
for i, chunk in enumerate(dlg.chunks): if hasattr(dlg, 'chunks') and dlg.chunks:
chunk_result = { for i, chunk in enumerate(dlg.chunks):
"chunk_index": i + 1, chunk_result = {
"content": chunk.content[:200] + "..." if len(chunk.content) > 200 else chunk.content, "chunk_index": i + 1,
"full_length": len(chunk.content), "content": chunk.content[:200] + "..." if len(chunk.content) > 200 else chunk.content,
"dialog_id": dlg.id, "full_length": len(chunk.content),
"chunker_strategy": memory_config.chunker_strategy, "dialog_id": dlg.id,
} "chunker_strategy": memory_config.chunker_strategy,
await progress_callback("text_preprocessing_chunking", f"分块 {i + 1} 处理完成", chunk_result) }
await progress_callback("text_preprocessing_result", f"分块 {i + 1} 处理完成", chunk_result)
preprocessing_summary = { preprocessing_summary = {
"total_chunks": sum(len(dlg.chunks) for dlg in chunked_dialogs), "total_chunks": sum(len(dlg.chunks) for dlg in chunked_dialogs if hasattr(dlg, 'chunks') and dlg.chunks),
"total_dialogs": len(chunked_dialogs), "total_dialogs": len(chunked_dialogs),
"chunker_strategy": memory_config.chunker_strategy, "chunker_strategy": memory_config.chunker_strategy,
} }