[add]Complete the interface integration for the display of semantic pruning for streaming output.
This commit is contained in:
@@ -106,6 +106,7 @@ async def run_pilot_extraction(
|
|||||||
# ========== 步骤 2.1: 语义剪枝 ==========
|
# ========== 步骤 2.1: 语义剪枝 ==========
|
||||||
pruned_dialogs = [dialog]
|
pruned_dialogs = [dialog]
|
||||||
deleted_messages = [] # 记录被删除的消息
|
deleted_messages = [] # 记录被删除的消息
|
||||||
|
pruning_stats = None # 保存剪枝统计信息,用于最终汇总
|
||||||
|
|
||||||
if memory_config.pruning_enabled:
|
if memory_config.pruning_enabled:
|
||||||
try:
|
try:
|
||||||
@@ -147,13 +148,17 @@ async def run_pilot_extraction(
|
|||||||
if msg["content"] not in remaining_contents
|
if msg["content"] not in remaining_contents
|
||||||
]
|
]
|
||||||
|
|
||||||
pruning_result = {
|
# 保存剪枝统计信息(用于最终汇总,只保留deleted_count)
|
||||||
|
pruning_stats = {
|
||||||
"enabled": True,
|
"enabled": True,
|
||||||
"scene": config.pruning_scene,
|
"scene": config.pruning_scene,
|
||||||
"threshold": config.pruning_threshold,
|
"threshold": config.pruning_threshold,
|
||||||
"original_count": original_msg_count,
|
|
||||||
"remaining_count": remaining_msg_count,
|
|
||||||
"deleted_count": deleted_msg_count,
|
"deleted_count": deleted_msg_count,
|
||||||
|
}
|
||||||
|
|
||||||
|
# 输出剪枝结果(显示删除的消息详情)
|
||||||
|
pruning_result = {
|
||||||
|
"type": "pruning",
|
||||||
"deleted_messages": deleted_messages,
|
"deleted_messages": deleted_messages,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -163,7 +168,7 @@ async def run_pilot_extraction(
|
|||||||
)
|
)
|
||||||
|
|
||||||
if progress_callback:
|
if progress_callback:
|
||||||
await progress_callback("text_preprocessing_pruning", "语义剪枝完成", pruning_result)
|
await progress_callback("text_preprocessing_result", "语义剪枝完成", pruning_result)
|
||||||
else:
|
else:
|
||||||
logger.warning("[PILOT_RUN] 剪枝后对话为空,使用原始对话")
|
logger.warning("[PILOT_RUN] 剪枝后对话为空,使用原始对话")
|
||||||
pruned_dialogs = [dialog]
|
pruned_dialogs = [dialog]
|
||||||
@@ -173,19 +178,16 @@ async def run_pilot_extraction(
|
|||||||
pruned_dialogs = [dialog]
|
pruned_dialogs = [dialog]
|
||||||
if progress_callback:
|
if progress_callback:
|
||||||
error_result = {
|
error_result = {
|
||||||
"enabled": True,
|
"type": "pruning",
|
||||||
"error": str(e),
|
"error": str(e),
|
||||||
"fallback": "使用原始对话"
|
"fallback": "使用原始对话"
|
||||||
}
|
}
|
||||||
await progress_callback("text_preprocessing_pruning", "语义剪枝失败", error_result)
|
await progress_callback("text_preprocessing_result", "语义剪枝失败", error_result)
|
||||||
else:
|
else:
|
||||||
logger.info("[PILOT_RUN] 语义剪枝已关闭,跳过")
|
logger.info("[PILOT_RUN] 语义剪枝已关闭,跳过")
|
||||||
if progress_callback:
|
pruning_stats = {
|
||||||
pruning_result = {
|
"enabled": False,
|
||||||
"enabled": False,
|
}
|
||||||
"message": "语义剪枝已关闭"
|
|
||||||
}
|
|
||||||
await progress_callback("text_preprocessing_pruning", "语义剪枝已关闭", pruning_result)
|
|
||||||
|
|
||||||
# ========== 步骤 2.2: 语义分块 ==========
|
# ========== 步骤 2.2: 语义分块 ==========
|
||||||
chunked_dialogs = await get_chunked_dialogs_from_preprocessed(
|
chunked_dialogs = await get_chunked_dialogs_from_preprocessed(
|
||||||
@@ -203,6 +205,7 @@ async def run_pilot_extraction(
|
|||||||
if hasattr(dlg, 'chunks') and dlg.chunks:
|
if hasattr(dlg, 'chunks') and dlg.chunks:
|
||||||
for i, chunk in enumerate(dlg.chunks):
|
for i, chunk in enumerate(dlg.chunks):
|
||||||
chunk_result = {
|
chunk_result = {
|
||||||
|
"type": "chunking",
|
||||||
"chunk_index": i + 1,
|
"chunk_index": i + 1,
|
||||||
"content": chunk.content[:200] + "..." if len(chunk.content) > 200 else chunk.content,
|
"content": chunk.content[:200] + "..." if len(chunk.content) > 200 else chunk.content,
|
||||||
"full_length": len(chunk.content),
|
"full_length": len(chunk.content),
|
||||||
@@ -211,11 +214,17 @@ async def run_pilot_extraction(
|
|||||||
}
|
}
|
||||||
await progress_callback("text_preprocessing_result", f"分块 {i + 1} 处理完成", chunk_result)
|
await progress_callback("text_preprocessing_result", f"分块 {i + 1} 处理完成", chunk_result)
|
||||||
|
|
||||||
|
# 构建预处理完成总结(包含剪枝统计)
|
||||||
preprocessing_summary = {
|
preprocessing_summary = {
|
||||||
"total_chunks": sum(len(dlg.chunks) for dlg in chunked_dialogs if hasattr(dlg, 'chunks') and dlg.chunks),
|
"total_chunks": sum(len(dlg.chunks) for dlg in chunked_dialogs if hasattr(dlg, 'chunks') and dlg.chunks),
|
||||||
"total_dialogs": len(chunked_dialogs),
|
"total_dialogs": len(chunked_dialogs),
|
||||||
"chunker_strategy": memory_config.chunker_strategy,
|
"chunker_strategy": memory_config.chunker_strategy,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# 添加剪枝统计信息
|
||||||
|
if pruning_stats:
|
||||||
|
preprocessing_summary["pruning"] = pruning_stats
|
||||||
|
|
||||||
await progress_callback("text_preprocessing_complete", "预处理文本完成(剪枝 + 分块)", preprocessing_summary)
|
await progress_callback("text_preprocessing_complete", "预处理文本完成(剪枝 + 分块)", preprocessing_summary)
|
||||||
|
|
||||||
log_time("Data Loading & Chunking", time.time() - step_start, log_file)
|
log_time("Data Loading & Chunking", time.time() - step_start, log_file)
|
||||||
|
|||||||
Reference in New Issue
Block a user