From 940c594066ead35917d1b8d8f1a85f2a755c224c Mon Sep 17 00:00:00 2001 From: Ke Sun <33739460+keeees@users.noreply.github.com> Date: Tue, 3 Feb 2026 10:33:39 +0800 Subject: [PATCH] Release/v0.2.3 (#281) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(app and model): token consumption statistics of the cluster * fix(web): prompt history remove pageLoading * fix(prompt): remove hard-coded import of prompt file paths (#279) * Fix/develop memory bug (#274) * 遗漏的历史映射 * 遗漏的历史映射 * fix_timeline_memories * fix(web): update retrieve_type key * Fix/develop memory bug (#276) * 遗漏的历史映射 * 遗漏的历史映射 * fix_timeline_memories * fix_timeline_memories * write_gragp/bug_fix * write_gragp/bug_fix * write_gragp/bug_fix * chore(celery): disable periodic task scheduling * fix(prompt): remove hard-coded import of prompt file paths --------- Co-authored-by: lixinyue11 <94037597+lixinyue11@users.noreply.github.com> Co-authored-by: zhaoying Co-authored-by: yingzhao Co-authored-by: Ke Sun --------- Co-authored-by: Timebomb2018 <18868801967@163.com> Co-authored-by: Mark Co-authored-by: zhaoying Co-authored-by: Eternity <61316157+myhMARS@users.noreply.github.com> Co-authored-by: lixinyue11 <94037597+lixinyue11@users.noreply.github.com> Co-authored-by: yingzhao --- api/app/services/app_chat_service.py | 44 +++++--- api/app/services/conversation_service.py | 7 +- api/app/services/draft_run_service.py | 5 + api/app/services/handoffs_service.py | 18 +++- api/app/services/multi_agent_orchestrator.py | 20 +++- api/app/services/multi_agent_service.py | 106 ++++++++++++++++++- api/app/services/prompt_optimizer_service.py | 6 +- web/src/components/PageScrollList/index.tsx | 5 +- web/src/styles/index.css | 3 + web/src/views/Prompt/History.tsx | 1 + 10 files changed, 191 insertions(+), 24 deletions(-) diff --git a/api/app/services/app_chat_service.py b/api/app/services/app_chat_service.py index 26abd0f9..bd9106e5 100644 --- a/api/app/services/app_chat_service.py +++ b/api/app/services/app_chat_service.py @@ -427,7 +427,11 @@ class AppChatService: meta_data={ "mode": result.get("mode"), "elapsed_time": result.get("elapsed_time"), - "sub_results": result.get("sub_results") + "usage": result.get("usage", { + "prompt_tokens": 0, + "completion_tokens": 0, + "total_tokens": 0 + }) } ) @@ -469,6 +473,7 @@ class AppChatService: yield f"event: start\ndata: {json.dumps({'conversation_id': str(conversation_id)}, ensure_ascii=False)}\n\n" full_content = "" + total_tokens = 0 # 2. 创建编排器 orchestrator = MultiAgentOrchestrator(self.db, config) @@ -485,16 +490,26 @@ class AppChatService: storage_type=storage_type, user_rag_memory_id=user_rag_memory_id ): - yield event - # 尝试提取内容(用于保存) - if "data:" in event: - try: - data_line = event.split("data: ", 1)[1].strip() - data = json.loads(data_line) - if "content" in data: - full_content += data["content"] - except: - pass + if "sub_usage" in event: + if "data:" in event: + try: + data_line = event.split("data: ", 1)[1].strip() + data = json.loads(data_line) + if "total_tokens" in data: + total_tokens += data["total_tokens"] + except: + pass + else: + yield event + # 尝试提取内容(用于保存) + if "data:" in event: + try: + data_line = event.split("data: ", 1)[1].strip() + data = json.loads(data_line) + if "content" in data: + full_content += data["content"] + except: + pass elapsed_time = time.time() - start_time @@ -510,7 +525,12 @@ class AppChatService: role="assistant", content=full_content, meta_data={ - "elapsed_time": elapsed_time + "elapsed_time": elapsed_time, + "usage": { + "prompt_tokens": 0, + "completion_tokens": 0, + "total_tokens": total_tokens + } } ) diff --git a/api/app/services/conversation_service.py b/api/app/services/conversation_service.py index 526e0fe2..553aefc4 100644 --- a/api/app/services/conversation_service.py +++ b/api/app/services/conversation_service.py @@ -1,4 +1,5 @@ """会话服务""" +import os import uuid from datetime import datetime, timedelta from typing import Annotated @@ -529,12 +530,12 @@ class ConversationService: takeaways=[], info_score=0, ) - - with open('app/services/prompt/conversation_summary_system.jinja2', 'r', encoding='utf-8') as f: + prompt_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'prompt') + with open(os.path.join(prompt_path, 'conversation_summary_system.jinja2'), 'r', encoding='utf-8') as f: system_prompt = f.read() rendered_system_message = Template(system_prompt).render() - with open('app/services/prompt/conversation_summary_user.jinja2', 'r', encoding='utf-8') as f: + with open(os.path.join(prompt_path, 'conversation_summary_user.jinja2'), 'r', encoding='utf-8') as f: user_prompt = f.read() rendered_user_message = Template(user_prompt).render( language=language, diff --git a/api/app/services/draft_run_service.py b/api/app/services/draft_run_service.py index dc01e541..9a3e1d37 100644 --- a/api/app/services/draft_run_service.py +++ b/api/app/services/draft_run_service.py @@ -678,6 +678,11 @@ class DraftRunService: elapsed_time = time.time() - start_time + if sub_agent: + yield self._format_sse_event("sub_usage", { + "total_tokens": total_tokens + }) + # 10. 保存会话消息 if not sub_agent and agent_config.memory and agent_config.memory.get("enabled"): await self._save_conversation_message( diff --git a/api/app/services/handoffs_service.py b/api/app/services/handoffs_service.py index 114e9945..10e4d646 100644 --- a/api/app/services/handoffs_service.py +++ b/api/app/services/handoffs_service.py @@ -4,7 +4,7 @@ import uuid from typing import List, Dict, Any, Optional, AsyncGenerator, Annotated from typing_extensions import TypedDict -from langchain_core.messages import HumanMessage, AIMessage, BaseMessage +from langchain_core.messages import HumanMessage, AIMessage, BaseMessage, AIMessageChunk from langgraph.graph import StateGraph, START, END from langgraph.types import Command from langgraph.checkpoint.memory import MemorySaver @@ -727,9 +727,12 @@ class HandoffsService: # 提取响应 response_content = "" + total_tokens = 0 for msg in result.get("messages", []): if isinstance(msg, AIMessage): response_content = msg.content + response_meta = msg.response_metadata if hasattr(msg, 'response_metadata') else None + total_tokens = response_meta.get("token_usage", {}).get("total_tokens", 0) if response_meta else 0 break return { @@ -737,7 +740,12 @@ class HandoffsService: "active_agent": result.get("active_agent"), "response": response_content, "message_count": len(result.get("messages", [])), - "handoff_count": result.get("handoff_count", 0) + "handoff_count": result.get("handoff_count", 0), + "usage": { + "prompt_tokens": 0, + "completion_tokens": 0, + "total_tokens": total_tokens + } } async def chat_stream( @@ -830,6 +838,12 @@ class HandoffsService: # 捕获 LLM 结束事件,输出收集到的工具调用 elif kind == "on_chat_model_end": + output_message = event.get("data", {}).get("output", {}) + if isinstance(output_message, AIMessageChunk): + response_meta = output_message.response_metadata if hasattr(output_message, 'response_metadata') else None + total_tokens = response_meta.get("token_usage", {}).get("total_tokens", + 0) if response_meta else 0 + yield f"event: sub_usage\ndata: {json.dumps({"total_tokens": total_tokens}, ensure_ascii=False)}\n\n" if collected_tool_calls: # 找到参数最完整的 transfer 工具调用 best_tc = None diff --git a/api/app/services/multi_agent_orchestrator.py b/api/app/services/multi_agent_orchestrator.py index d9062eaf..b28bafbf 100644 --- a/api/app/services/multi_agent_orchestrator.py +++ b/api/app/services/multi_agent_orchestrator.py @@ -280,14 +280,22 @@ class MultiAgentOrchestrator: # 4. 提取子 Agent 的 conversation_id(用于多轮对话) sub_conversation_id = None + total_tokens = 0 + if isinstance(results, dict): sub_conversation_id = results.get("conversation_id") or results.get("result", {}).get("conversation_id") + # 提取 token 信息 + usage = results.get("usage", {}) or results.get("result", {}).get("usage", {}) + total_tokens += usage.get("total_tokens", 0) elif isinstance(results, list) and results: for item in results: if "result" in item: sub_conversation_id = item["result"].get("conversation_id") if sub_conversation_id: break + # 累加每个子 Agent 的 token + usage = item.get("usage", {}) or item.get("result", {}).get("usage", {}) + total_tokens += usage.get("total_tokens", 0) logger.info( "多 Agent 任务完成", @@ -301,9 +309,15 @@ class MultiAgentOrchestrator: return { "message": final_result, "conversation_id": sub_conversation_id, + "mode": OrchestrationMode.SUPERVISOR, "elapsed_time": elapsed_time, "strategy": routing_decision.get("collaboration_strategy", "single"), - "sub_results": results + "sub_results": results, + "usage": { + "prompt_tokens": 0, + "completion_tokens": 0, + "total_tokens": total_tokens + } } except Exception as e: @@ -1552,10 +1566,12 @@ class MultiAgentOrchestrator: return { "message": result.get("response", ""), "conversation_id": result.get("conversation_id"), + "mode": OrchestrationMode.COLLABORATION, "elapsed_time": elapsed_time, "strategy": "collaboration", "active_agent": result.get("active_agent"), - "sub_results": result + "sub_results": result, + "usage": result.get("usage") } except Exception as e: diff --git a/api/app/services/multi_agent_service.py b/api/app/services/multi_agent_service.py index da984d16..c52814ed 100644 --- a/api/app/services/multi_agent_service.py +++ b/api/app/services/multi_agent_service.py @@ -1,5 +1,6 @@ """多 Agent 配置管理服务""" import uuid +import json from typing import Optional, List, Tuple, Any, Annotated from fastapi import Depends @@ -427,6 +428,23 @@ class MultiAgentService: memory=getattr(request, 'memory', True) # 记忆功能参数 ) + await self._save_conversation_message( + conversation_id=request.conversation_id, + user_message=request.message, + assistant_message=result.get("message", ""), + app_id=app_id, + user_id=request.user_id, + meta_data={ + "mode": result.get("mode"), + "elapsed_time": result.get("elapsed_time"), + "usage": result.get("usage", { + "prompt_tokens": 0, + "completion_tokens": 0, + "total_tokens": 0 + }) + } + ) + return result async def run_stream( @@ -451,11 +469,14 @@ class MultiAgentService: raise ResourceNotFoundException("多 Agent 配置", str(app_id)) if not config.is_active: - raise BusinessException("多 Agent 配置已禁用", BizCode.RESOURCE_DISABLED) + raise BusinessException("多 Agent 配置已禁用", BizCode.NOT_FOUND) # 2. 创建编排器 orchestrator = MultiAgentOrchestrator(self.db, config) + full_content = "" + total_tokens = 0 + # 3. 流式执行任务 async for event in orchestrator.execute_stream( message=request.message, @@ -468,7 +489,88 @@ class MultiAgentService: storage_type=storage_type, user_rag_memory_id=user_rag_memory_id ): - yield event + if "sub_usage" in event: + if "data:" in event: + try: + data_line = event.split("data: ", 1)[1].strip() + data = json.loads(data_line) + if "total_tokens" in data: + total_tokens += data["total_tokens"] + except: + pass + else: + yield event + if "data:" in event: + try: + data_line = event.split("data: ", 1)[1].strip() + data = json.loads(data_line) + if "content" in data: + full_content += data["content"] + except: + pass + + await self._save_conversation_message( + conversation_id=request.conversation_id, + user_message=request.message, + assistant_message=full_content, + app_id=app_id, + user_id=request.user_id, + meta_data={ + "usage": { + "prompt_tokens": 0, + "completion_tokens": 0, + "total_tokens": total_tokens + } + } + ) + + async def _save_conversation_message( + self, + conversation_id: uuid.UUID, + user_message: str, + assistant_message: str, + meta_data: dict, + app_id: Optional[uuid.UUID] = None, + user_id: Optional[str] = None + ) -> None: + """保存会话消息 + + Args: + conversation_id: 会话ID + user_message: 用户消息 + assistant_message: AI 回复消息 + meta_data: 元数据(包括 token 消耗) + app_id: 应用ID + user_id: 用户ID + """ + try: + from app.services.conversation_service import ConversationService + + conversation_service = ConversationService(self.db) + + conversation_service.add_message( + conversation_id=conversation_id, + role="user", + content=user_message + ) + conversation_service.add_message( + conversation_id=conversation_id, + role="assistant", + content=assistant_message, + meta_data=meta_data + ) + + logger.debug( + "保存多 Agent 会话消息", + extra={ + "conversation_id": conversation_id, + "user_message_length": len(user_message), + "assistant_message_length": len(assistant_message) + } + ) + + except Exception as e: + logger.warning("保存会话消息失败", extra={"error": str(e)}) # def add_sub_agent( # self, diff --git a/api/app/services/prompt_optimizer_service.py b/api/app/services/prompt_optimizer_service.py index 81bf899d..2c0b57ac 100644 --- a/api/app/services/prompt_optimizer_service.py +++ b/api/app/services/prompt_optimizer_service.py @@ -1,3 +1,4 @@ +import os import re import uuid from typing import Any, AsyncGenerator @@ -182,11 +183,12 @@ class PromptOptimizerService: base_url=api_config.api_base ), type=ModelType(model_config.type)) try: - with open('app/services/prompt/prompt_optimizer_system.jinja2', 'r', encoding='utf-8') as f: + prompt_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'prompt') + with open(os.path.join(prompt_path, 'prompt_optimizer_system.jinja2'), 'r', encoding='utf-8') as f: opt_system_prompt = f.read() rendered_system_message = Template(opt_system_prompt).render() - with open('app/services/prompt/prompt_optimizer_user.jinja2', 'r', encoding='utf-8') as f: + with open(os.path.join(prompt_path, 'prompt_optimizer_user.jinja2'), 'r', encoding='utf-8') as f: opt_user_prompt = f.read() except FileNotFoundError: raise BusinessException(message="System prompt template not found", code=BizCode.NOT_FOUND) diff --git a/web/src/components/PageScrollList/index.tsx b/web/src/components/PageScrollList/index.tsx index ef413d9f..bea97b04 100644 --- a/web/src/components/PageScrollList/index.tsx +++ b/web/src/components/PageScrollList/index.tsx @@ -26,6 +26,7 @@ interface PageScrollListProps> { query?: Q; column?: number; className?: string; + needLoading?: boolean; } const PageScrollList = forwardRef(>({ renderItem, @@ -33,6 +34,7 @@ const PageScrollList = forwardRef(>({ url, column = 4, className = '', + needLoading = true, }: PageScrollListProps, ref: React.Ref) => { useImperativeHandle(ref, () => ({ refresh, @@ -104,9 +106,10 @@ const PageScrollList = forwardRef(>({ dataLength={data.length} next={loadMoreData} hasMore={hasMore} - loader={} + loader={needLoading ? : undefined} // endMessage={It is all, nothing more 🤐} scrollableTarget="scrollableDiv" + className='rb:h-full!' > {data.length > 0 ? ( body { min-height: 100%; max-height: 100%; +} +#scrollableDiv .infinite-scroll-component__outerdiv { + height: 100%; } \ No newline at end of file diff --git a/web/src/views/Prompt/History.tsx b/web/src/views/Prompt/History.tsx index 712cbf82..fda662b4 100644 --- a/web/src/views/Prompt/History.tsx +++ b/web/src/views/Prompt/History.tsx @@ -50,6 +50,7 @@ const History: React.FC<{ query: HistoryQuery; edit: (item: HistoryItem) => void url={getPromptReleaseListUrl} query={query} column={3} + needLoading={false} renderItem={(item) => { const historyItem = item as unknown as HistoryItem; return (