From e4fb58496b6ef0b92feb34ad9718f06c1b7a1fd4 Mon Sep 17 00:00:00 2001
From: Timebomb2018 <18868801967@163.com>
Date: Mon, 2 Feb 2026 11:49:44 +0800
Subject: [PATCH] feat(app and model): token consumption statistics

---
 api/app/core/agent/langchain_agent.py    | 18 ++++++++++---
 api/app/services/app_chat_service.py     | 21 +++++++++++----
 api/app/services/conversation_service.py |  7 +++--
 api/app/services/draft_run_service.py    | 33 ++++++++++++++++++------
 api/app/services/shared_chat_service.py  | 21 +++++++++++----
 api/app/version_info.json                | 28 ++++++++++++++++++++
 6 files changed, 105 insertions(+), 23 deletions(-)

diff --git a/api/app/core/agent/langchain_agent.py b/api/app/core/agent/langchain_agent.py
index a34c781f..647196c5 100644
--- a/api/app/core/agent/langchain_agent.py
+++ b/api/app/core/agent/langchain_agent.py
@@ -106,7 +106,7 @@ class LangChainAgent:
                 "streaming": streaming,
                 "tool_count": len(self.tools),
                 "tool_names": [tool.name for tool in self.tools] if self.tools else [],
-                "tool_count": len(self.tools)
+                # "tool_count": len(self.tools)
             }
         )
 
@@ -332,9 +332,12 @@ class LangChainAgent:
             # 获取最后的 AI 消息
             output_messages = result.get("messages", [])
             content = ""
+            total_tokens = 0
             for msg in reversed(output_messages):
                 if isinstance(msg, AIMessage):
                     content = msg.content
+                    response_meta = msg.response_metadata if hasattr(msg, 'response_metadata') else None
+                    total_tokens = response_meta.get("token_usage", {}).get("total_tokens", 0) if response_meta else 0
                     break
 
             elapsed_time = time.time() - start_time
@@ -350,7 +353,7 @@ class LangChainAgent:
                 "usage": {
                     "prompt_tokens": 0,
                     "completion_tokens": 0,
-                    "total_tokens": 0
+                    "total_tokens": total_tokens
                 }
             }
 
@@ -444,7 +447,7 @@ class LangChainAgent:
 
             # 统一使用 agent 的 astream_events 实现流式输出
             logger.debug("使用 Agent astream_events 实现流式输出")
-            full_content=''
+            full_content = ''
             try:
                 async for event in self.agent.astream_events(
                     {"messages": messages},
@@ -481,6 +484,15 @@ class LangChainAgent:
                         logger.debug(f"工具调用结束: {event.get('name')}")
                 
                 logger.debug(f"Agent 流式完成，共 {chunk_count} 个事件")
+                # 统计token消耗
+                output_messages = event.get("data", {}).get("output", {}).get("messages", [])
+                for msg in reversed(output_messages):
+                    if isinstance(msg, AIMessage):
+                        response_meta = msg.response_metadata if hasattr(msg, 'response_metadata') else None
+                        total_tokens = response_meta.get("token_usage", {}).get("total_tokens",
+                                                                                0) if response_meta else 0
+                        yield total_tokens
+                        break
                 if memory_flag:
                     # AI 回复写入（用户消息和 AI 回复配对，一次性写入完整对话）
                     await self.write(storage_type, end_user_id, message_chat, full_content, user_rag_memory_id, end_user_id, actual_config_id)
diff --git a/api/app/services/app_chat_service.py b/api/app/services/app_chat_service.py
index c0a66e03..26abd0f9 100644
--- a/api/app/services/app_chat_service.py
+++ b/api/app/services/app_chat_service.py
@@ -171,7 +171,14 @@ class AppChatService:
         self.conversation_service.save_conversation_messages(
             conversation_id=conversation_id,
             user_message=message,
-            assistant_message=result["content"]
+            assistant_message=result["content"],
+            meta_data={
+                "usage": result.get("usage", {
+                    "prompt_tokens": 0,
+                    "completion_tokens": 0,
+                    "total_tokens": 0
+                })
+            }
         )
 
         elapsed_time = time.time() - start_time
@@ -310,6 +317,7 @@ class AppChatService:
 
             # 流式调用 Agent
             full_content = ""
+            total_tokens = 0
             async for chunk in agent.chat_stream(
                     message=message,
                     history=history,
@@ -320,9 +328,12 @@ class AppChatService:
                     config_id=config_id,
                     memory_flag=memory_flag
             ):
-                full_content += chunk
-                # 发送消息块事件
-                yield f"event: message\ndata: {json.dumps({'content': chunk}, ensure_ascii=False)}\n\n"
+                if isinstance(chunk, int):
+                    total_tokens = chunk
+                else:
+                    full_content += chunk
+                    # 发送消息块事件
+                    yield f"event: message\ndata: {json.dumps({'content': chunk}, ensure_ascii=False)}\n\n"
 
             elapsed_time = time.time() - start_time
 
@@ -339,7 +350,7 @@ class AppChatService:
                 content=full_content,
                 meta_data={
                     "model": api_key_obj.model_name,
-                    "usage": {}
+                    "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens}
                 }
             )
 
diff --git a/api/app/services/conversation_service.py b/api/app/services/conversation_service.py
index 275d6413..526e0fe2 100644
--- a/api/app/services/conversation_service.py
+++ b/api/app/services/conversation_service.py
@@ -298,7 +298,8 @@ class ConversationService:
             self,
             conversation_id: uuid.UUID,
             user_message: str,
-            assistant_message: str
+            assistant_message: str,
+            meta_data: Optional[dict] = None
     ):
         """
         Save a pair of user and assistant messages to the conversation.
@@ -307,6 +308,7 @@ class ConversationService:
             conversation_id (uuid.UUID): Conversation UUID.
             user_message (str): User's message content.
             assistant_message (str): Assistant's response content.
+            meta_data (Optional[dict]): Optional metadata for the messages.
         """
         self.add_message(
             conversation_id=conversation_id,
@@ -317,7 +319,8 @@ class ConversationService:
         self.add_message(
             conversation_id=conversation_id,
             role="assistant",
-            content=assistant_message
+            content=assistant_message,
+            meta_data=meta_data
         )
 
         logger.debug(
diff --git a/api/app/services/draft_run_service.py b/api/app/services/draft_run_service.py
index 524c9ff6..dc01e541 100644
--- a/api/app/services/draft_run_service.py
+++ b/api/app/services/draft_run_service.py
@@ -442,7 +442,14 @@ class DraftRunService:
                     user_message=message,
                     assistant_message=result["content"],
                     app_id=agent_config.app_id,
-                    user_id=user_id
+                    user_id=user_id,
+                    meta_data={
+                        "usage": result.get("usage", {
+                            "prompt_tokens": 0,
+                            "completion_tokens": 0,
+                            "total_tokens": 0
+                        })
+                    }
                 )
 
             response = {
@@ -649,6 +656,7 @@ class DraftRunService:
 
             # 9. 流式调用 Agent
             full_content = ""
+            total_tokens = 0
             async for chunk in agent.chat_stream(
                 message=message,
                 history=history,
@@ -659,11 +667,14 @@ class DraftRunService:
                 user_rag_memory_id=user_rag_memory_id,
                 memory_flag=memory_flag
             ):
-                full_content += chunk
-                # 发送消息块事件
-                yield self._format_sse_event("message", {
-                    "content": chunk
-                })
+                if isinstance(chunk, int):
+                    total_tokens = chunk
+                else:
+                    full_content += chunk
+                    # 发送消息块事件
+                    yield self._format_sse_event("message", {
+                        "content": chunk
+                    })
 
             elapsed_time = time.time() - start_time
 
@@ -674,7 +685,10 @@ class DraftRunService:
                     user_message=message,
                     assistant_message=full_content,
                     app_id=agent_config.app_id,
-                    user_id=user_id
+                    user_id=user_id,
+                    meta_data={
+                        "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens}
+                    }
                 )
 
             # 11. 发送结束事件
@@ -898,6 +912,7 @@ class DraftRunService:
         conversation_id: str,
         user_message: str,
         assistant_message: str,
+        meta_data: dict,
         app_id: Optional[uuid.UUID] = None,
         user_id: Optional[str] = None
     ) -> None:
@@ -909,6 +924,7 @@ class DraftRunService:
             assistant_message: AI 回复消息
             app_id: 应用ID（未使用，保留用于兼容性）
             user_id: 用户ID（未使用，保留用于兼容性）
+            meta_data: token消耗
         """
         try:
             from app.services.conversation_service import ConversationService
@@ -927,7 +943,8 @@ class DraftRunService:
             conversation_service.add_message(
                 conversation_id=conv_uuid,
                 role="assistant",
-                content=assistant_message
+                content=assistant_message,
+                meta_data=meta_data
             )
 
             logger.debug(
diff --git a/api/app/services/shared_chat_service.py b/api/app/services/shared_chat_service.py
index 1d012088..a92c2649 100644
--- a/api/app/services/shared_chat_service.py
+++ b/api/app/services/shared_chat_service.py
@@ -282,7 +282,14 @@ class SharedChatService:
         self.conversation_service.save_conversation_messages(
             conversation_id=conversation.id,
             user_message=message,
-            assistant_message=result["content"]
+            assistant_message=result["content"],
+            meta_data={
+                "usage": result.get("usage", {
+                    "prompt_tokens": 0,
+                    "completion_tokens": 0,
+                    "total_tokens": 0
+                })
+            }
         )
         # self.conversation_service.add_message(
         #     conversation_id=conversation.id,
@@ -469,6 +476,7 @@ class SharedChatService:
             
             # 流式调用 Agent
             full_content = ""
+            total_tokens = 0
             async for chunk in agent.chat_stream(
                 message=message,
                 history=history,
@@ -479,9 +487,12 @@ class SharedChatService:
                 config_id=config_id,
                 memory_flag=memory_flag
             ):
-                full_content += chunk
-                # 发送消息块事件
-                yield f"event: message\ndata: {json.dumps({'content': chunk}, ensure_ascii=False)}\n\n"
+                if isinstance(chunk, int):
+                    total_tokens = chunk
+                else:
+                    full_content += chunk
+                    # 发送消息块事件
+                    yield f"event: message\ndata: {json.dumps({'content': chunk}, ensure_ascii=False)}\n\n"
             
             elapsed_time = time.time() - start_time
             
@@ -498,7 +509,7 @@ class SharedChatService:
                 content=full_content,
                 meta_data={
                     "model": api_key_obj.model_name,
-                    "usage": {}
+                    "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens}
                 }
             )
 
diff --git a/api/app/version_info.json b/api/app/version_info.json
index 86a5e33e..e82243a4 100644
--- a/api/app/version_info.json
+++ b/api/app/version_info.json
@@ -1,4 +1,32 @@
 {
+  "v0.2.2": {
+    "introduction": {
+      "codeName": "淬锋（Temper）",
+      "releaseDate": "2026-1-31",
+      "upgradePosition": "本次发布聚焦平台稳定性和性能优化。正如\"淬锋\"之名——千锤百炼，淬火成锋，我们通过严格测试和修复打磨系统品质。引入 Agent 工作流的代码执行能力、改进模型并发管理，并修复了记忆系统的多个关键问题。",
+      "coreUpgrades": [
+        "1. Agent平台增强<br>* 模型并发管理：优化模型广场的并发请求处理和资源分配能力。",
+        "2. 记忆系统优化<br>* Celery 队列修复：解决任务队列问题，提升异步记忆处理的可靠性<br>* 记忆 Agent 优化：提升记忆 Agent 的性能和效率<br>* 接口响应速度优化：优化记忆接口响应时间，加快操作速度。",
+        "3. 情绪记忆与识别升级<br>* 情绪记忆角色识别修复：解决情绪记忆上下文中的角色/人物识别问题<br>* 角色识别增强：提升对话记忆中的角色/人物识别准确性。",
+        "<br>",
+        "MemoryBear 持续致力于为 AI 应用提供类人记忆能力。本次以稳定性为核心的发布，进一步夯实了「感知→精炼→关联→遗忘」范式的基础。",
+        "未来版本将在此坚实基础上，扩展 Agent 能力并深化记忆智能特性。"
+      ]
+    },
+    "introduction_en": {
+      "codeName": "Temper (淬锋)",
+      "releaseDate": "2026-1-31",
+      "upgradePosition": "This release focuses on platform stability and performance optimization — true to its codename \"淬锋\" (tempered blade), we've refined the system through rigorous testing and fixes. Introducing Python code execution for Agent workflows, improved model concurrency management, and critical fixes across the memory system.",
+      "coreUpgrades": [
+        "1. Agent Platform Enhancements<br>* Model Concurrency Management: Enhanced Model Plaza with improved concurrent model request handling and resource allocation.",
+        "2. Memory System Improvements<br>* Celery Queue Fix: Resolved task queue issues for more reliable asynchronous memory processing<br>* Memory Agent Optimization: Improved memory Agent performance and efficiency<br>* API Response Speed: Optimized memory interface response times for faster operations.",
+        "3. Emotional Memory & Recognition Upgrades<br>* Emotion Memory Role Recognition Fix: Resolved issues with role/character identification in emotional memory contexts<br>* Role Recognition Enhancement: Improved character/role identification accuracy in conversation memory.",
+        "<br>",
+        "MemoryBear continues advancing toward human-like memory capabilities for AI applications. This stability-focused release strengthens the foundation for our Perception → Refinement → Association → Forgetting paradigm.",
+        "Future releases will build on this solid base with expanded Agent capabilities and deeper memory intelligence features."
+      ]
+    }
+  },
   "v0.2.1": {
     "introduction": {
       "codeName": "启知",