From 6452733c4eb0ea2c6cdcb6b21ea470ae48cb2183 Mon Sep 17 00:00:00 2001
From: Ke Sun <kesun5@illinois.edu>
Date: Wed, 14 Jan 2026 15:58:24 +0800
Subject: [PATCH] fix(memory): simplify summary tool by removing LLM processing

- Remove template_service extraction and template rendering logic
- Remove LLM client initialization from MemoryClientFactory
- Remove structured response call to LLM with RetrieveSummaryResponse model
- Replace LLM-based answer generation with direct retrieval information
- Simplify response to use raw retrieved info or default fallback message
- Update logging to reflect non-LLM quick answer approach
- Reduce unnecessary dependencies and improve performance by eliminating LLM call overhead
---
 .../agent/mcp_server/tools/summary_tools.py   | 34 +++----------------
 1 file changed, 4 insertions(+), 30 deletions(-)

diff --git a/api/app/core/memory/agent/mcp_server/tools/summary_tools.py b/api/app/core/memory/agent/mcp_server/tools/summary_tools.py
index 6d5012f1..0f306572 100644
--- a/api/app/core/memory/agent/mcp_server/tools/summary_tools.py
+++ b/api/app/core/memory/agent/mcp_server/tools/summary_tools.py
@@ -425,15 +425,9 @@ async def Input_Summary(
 
     try:
         # Extract services from context
-        template_service = get_context_resource(ctx, "template_service")
         session_service = get_context_resource(ctx, "session_service")
         search_service = get_context_resource(ctx, "search_service")
 
-        # Get LLM client from memory_config
-        with get_db_context() as db:
-            factory = MemoryClientFactory(db)
-            llm_client = factory.get_llm_client_from_config(memory_config)
-
         # Resolve session ID
         sessionid = Resolve_username(usermessages) or ""
         sessionid = sessionid.replace('call_id_', '')
@@ -539,31 +533,11 @@ async def Input_Summary(
             )
             retrieve_info, question, raw_results = "", query, []
 
+        # Return retrieved information directly without LLM processing
+        # Use the raw retrieved info as the answer
+        aimessages = retrieve_info if retrieve_info else "信息不足，无法回答"
 
-        # Render template
-        system_prompt = await template_service.render_template(
-            template_name='Retrieve_Summary_prompt.jinja2',
-            operation_name='input_summary',
-            query=query,
-            history=history,
-            retrieve_info=retrieve_info
-        )
-
-        # Call LLM with structured response
-        try:
-            structured = await llm_client.response_structured(
-                messages=[{"role": "system", "content": system_prompt}],
-                response_model=RetrieveSummaryResponse
-            )
-            aimessages = structured.data.query_answer or "信息不足，无法回答"
-        except Exception as e:
-            logger.error(
-                f"Input_Summary: response_structured failed, using default answer: {e}",
-                exc_info=True
-            )
-            aimessages = "信息不足，无法回答"
-
-        logger.info(f"Quick answer summary: {storage_type}--{user_rag_memory_id}--{aimessages}")
+        logger.info(f"Quick answer (no LLM): {storage_type}--{user_rag_memory_id}--{aimessages[:500]}...")
 
         # Emit intermediate output for frontend
         return {