fix(app):

1. Token consumption of the omni model;
2. Token consumption of the cluster includes sub-agents
This commit is contained in:
Timebomb2018
2026-03-30 18:37:09 +08:00
parent ed90405439
commit 876c39b1b0
6 changed files with 92 additions and 21 deletions

View File

@@ -631,13 +631,13 @@ class AppChatService:
storage_type=storage_type,
user_rag_memory_id=user_rag_memory_id
):
if "sub_usage" in event:
# 拦截 sub_usage 事件,累加 token
if "event: sub_usage" in event:
if "data:" in event:
try:
data_line = event.split("data: ", 1)[1].strip()
data = json.loads(data_line)
if "total_tokens" in data:
total_tokens += data["total_tokens"]
total_tokens += data.get("total_tokens", 0)
except:
pass
else:

View File

@@ -403,6 +403,17 @@ class MasterAgentRouter:
response = await llm.ainvoke(prompt)
ModelApiKeyService.record_api_key_usage(self.db, api_key_config.id)
# 提取 token 消耗
self._last_routing_tokens = 0
if hasattr(response, 'usage_metadata') and response.usage_metadata:
um = response.usage_metadata
self._last_routing_tokens = um.get("total_tokens", 0) if isinstance(um, dict) else getattr(um, "total_tokens", 0)
elif hasattr(response, 'response_metadata') and response.response_metadata:
token_usage = response.response_metadata.get("token_usage") or response.response_metadata.get("usage", {})
if isinstance(token_usage, dict):
self._last_routing_tokens = token_usage.get("total_tokens", 0)
logger.info(f"Master Agent 路由 token 消耗: {self._last_routing_tokens}")
# 提取响应内容
if hasattr(response, 'content'):
return response.content

View File

@@ -287,6 +287,11 @@ class MultiAgentOrchestrator:
sub_conversation_id = None
total_tokens = 0
# 累加 Master Agent 路由决策消耗的 token
total_tokens += task_analysis.get("routing_tokens", 0)
# 累加 Master Agent 整合消耗的 token
total_tokens += getattr(self, '_last_merge_tokens', 0)
if isinstance(results, dict):
sub_conversation_id = results.get("conversation_id") or results.get("result", {}).get("conversation_id")
# 提取 token 信息
@@ -358,12 +363,16 @@ class MultiAgentOrchestrator:
variables=variables
)
# 获取路由决策消耗的 token
routing_tokens = getattr(self.router, '_last_routing_tokens', 0)
logger.info(
"Master Agent 分析完成",
extra={
"selected_agent": routing_decision.get("selected_agent_id"),
"confidence": routing_decision.get("confidence"),
"strategy": routing_decision.get("strategy")
"strategy": routing_decision.get("strategy"),
"routing_tokens": routing_tokens
}
)
@@ -372,7 +381,8 @@ class MultiAgentOrchestrator:
"variables": variables or {},
"sub_agents": self.config.sub_agents,
"initial_context": variables or {},
"routing_decision": routing_decision
"routing_decision": routing_decision,
"routing_tokens": routing_tokens
}
async def _execute_sequential(
@@ -1032,6 +1042,11 @@ class MultiAgentOrchestrator:
# 5. 流式执行子 Agent
sub_conversation_id = None
# Master Agent 路由决策消耗的 token通过 sub_usage 事件发送给上层
routing_tokens = task_analysis.get("routing_tokens", 0)
if routing_tokens > 0:
yield self._format_sse_event("sub_usage", {"total_tokens": routing_tokens})
async for event in self._execute_sub_agent_stream(
agent_data["config"],
message,
@@ -1054,6 +1069,7 @@ class MultiAgentOrchestrator:
except:
pass
# 直接透传所有事件(包括 sub_usage累加统一由上层处理
yield event
# 6. 如果有会话 ID发送一个包含它的事件
@@ -2612,6 +2628,17 @@ class MultiAgentOrchestrator:
ModelApiKeyService.record_api_key_usage(self.db, api_key_config.id)
# 提取整合消耗的 token
merge_tokens = 0
if hasattr(response, 'usage_metadata') and response.usage_metadata:
um = response.usage_metadata
merge_tokens = um.get("total_tokens", 0) if isinstance(um, dict) else getattr(um, "total_tokens", 0)
elif hasattr(response, 'response_metadata') and response.response_metadata:
token_usage = response.response_metadata.get("token_usage") or response.response_metadata.get("usage", {})
if isinstance(token_usage, dict):
merge_tokens = token_usage.get("total_tokens", 0)
self._last_merge_tokens = merge_tokens
# 提取响应内容
if hasattr(response, 'content'):
merged_response = response.content
@@ -2621,7 +2648,8 @@ class MultiAgentOrchestrator:
logger.info(
"Master Agent 整合完成",
extra={
"merged_length": len(merged_response)
"merged_length": len(merged_response),
"merge_tokens": merge_tokens
}
)