From 8b546b73669c455e7ded0ccc0ecd4cbf2a2a181f Mon Sep 17 00:00:00 2001
From: lanceyq <1982376970@qq.com>
Date: Sat, 28 Feb 2026 19:26:16 +0800
Subject: [PATCH] [add]Complete the interface integration for the display of
 semantic pruning for streaming output.

---
 api/app/services/pilot_run_service.py | 33 +++++++++++++++++----------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/api/app/services/pilot_run_service.py b/api/app/services/pilot_run_service.py
index 4cfa158d..c39d089e 100644
--- a/api/app/services/pilot_run_service.py
+++ b/api/app/services/pilot_run_service.py
@@ -106,6 +106,7 @@ async def run_pilot_extraction(
         # ========== 步骤 2.1: 语义剪枝 ==========
         pruned_dialogs = [dialog]
         deleted_messages = []  # 记录被删除的消息
+        pruning_stats = None  # 保存剪枝统计信息，用于最终汇总
         
         if memory_config.pruning_enabled:
             try:
@@ -147,13 +148,17 @@ async def run_pilot_extraction(
                         if msg["content"] not in remaining_contents
                     ]
                     
-                    pruning_result = {
+                    # 保存剪枝统计信息（用于最终汇总，只保留deleted_count）
+                    pruning_stats = {
                         "enabled": True,
                         "scene": config.pruning_scene,
                         "threshold": config.pruning_threshold,
-                        "original_count": original_msg_count,
-                        "remaining_count": remaining_msg_count,
                         "deleted_count": deleted_msg_count,
+                    }
+                    
+                    # 输出剪枝结果（显示删除的消息详情）
+                    pruning_result = {
+                        "type": "pruning",
                         "deleted_messages": deleted_messages,
                     }
                     
@@ -163,7 +168,7 @@ async def run_pilot_extraction(
                     )
                     
                     if progress_callback:
-                        await progress_callback("text_preprocessing_pruning", "语义剪枝完成", pruning_result)
+                        await progress_callback("text_preprocessing_result", "语义剪枝完成", pruning_result)
                 else:
                     logger.warning("[PILOT_RUN] 剪枝后对话为空，使用原始对话")
                     pruned_dialogs = [dialog]
@@ -173,19 +178,16 @@ async def run_pilot_extraction(
                 pruned_dialogs = [dialog]
                 if progress_callback:
                     error_result = {
-                        "enabled": True,
+                        "type": "pruning",
                         "error": str(e),
                         "fallback": "使用原始对话"
                     }
-                    await progress_callback("text_preprocessing_pruning", "语义剪枝失败", error_result)
+                    await progress_callback("text_preprocessing_result", "语义剪枝失败", error_result)
         else:
             logger.info("[PILOT_RUN] 语义剪枝已关闭，跳过")
-            if progress_callback:
-                pruning_result = {
-                    "enabled": False,
-                    "message": "语义剪枝已关闭"
-                }
-                await progress_callback("text_preprocessing_pruning", "语义剪枝已关闭", pruning_result)
+            pruning_stats = {
+                "enabled": False,
+            }
 
         # ========== 步骤 2.2: 语义分块 ==========
         chunked_dialogs = await get_chunked_dialogs_from_preprocessed(
@@ -203,6 +205,7 @@ async def run_pilot_extraction(
                 if hasattr(dlg, 'chunks') and dlg.chunks:
                     for i, chunk in enumerate(dlg.chunks):
                         chunk_result = {
+                            "type": "chunking",
                             "chunk_index": i + 1,
                             "content": chunk.content[:200] + "..." if len(chunk.content) > 200 else chunk.content,
                             "full_length": len(chunk.content),
@@ -211,11 +214,17 @@ async def run_pilot_extraction(
                         }
                         await progress_callback("text_preprocessing_result", f"分块 {i + 1} 处理完成", chunk_result)
 
+            # 构建预处理完成总结（包含剪枝统计）
             preprocessing_summary = {
                 "total_chunks": sum(len(dlg.chunks) for dlg in chunked_dialogs if hasattr(dlg, 'chunks') and dlg.chunks),
                 "total_dialogs": len(chunked_dialogs),
                 "chunker_strategy": memory_config.chunker_strategy,
             }
+            
+            # 添加剪枝统计信息
+            if pruning_stats:
+                preprocessing_summary["pruning"] = pruning_stats
+            
             await progress_callback("text_preprocessing_complete", "预处理文本完成（剪枝 + 分块）", preprocessing_summary)
 
         log_time("Data Loading & Chunking", time.time() - step_start, log_file)