feat(app and model): token consumption statistics
This commit is contained in:
@@ -171,7 +171,14 @@ class AppChatService:
|
||||
self.conversation_service.save_conversation_messages(
|
||||
conversation_id=conversation_id,
|
||||
user_message=message,
|
||||
assistant_message=result["content"]
|
||||
assistant_message=result["content"],
|
||||
meta_data={
|
||||
"usage": result.get("usage", {
|
||||
"prompt_tokens": 0,
|
||||
"completion_tokens": 0,
|
||||
"total_tokens": 0
|
||||
})
|
||||
}
|
||||
)
|
||||
|
||||
elapsed_time = time.time() - start_time
|
||||
@@ -310,6 +317,7 @@ class AppChatService:
|
||||
|
||||
# 流式调用 Agent
|
||||
full_content = ""
|
||||
total_tokens = 0
|
||||
async for chunk in agent.chat_stream(
|
||||
message=message,
|
||||
history=history,
|
||||
@@ -320,9 +328,12 @@ class AppChatService:
|
||||
config_id=config_id,
|
||||
memory_flag=memory_flag
|
||||
):
|
||||
full_content += chunk
|
||||
# 发送消息块事件
|
||||
yield f"event: message\ndata: {json.dumps({'content': chunk}, ensure_ascii=False)}\n\n"
|
||||
if isinstance(chunk, int):
|
||||
total_tokens = chunk
|
||||
else:
|
||||
full_content += chunk
|
||||
# 发送消息块事件
|
||||
yield f"event: message\ndata: {json.dumps({'content': chunk}, ensure_ascii=False)}\n\n"
|
||||
|
||||
elapsed_time = time.time() - start_time
|
||||
|
||||
@@ -339,7 +350,7 @@ class AppChatService:
|
||||
content=full_content,
|
||||
meta_data={
|
||||
"model": api_key_obj.model_name,
|
||||
"usage": {}
|
||||
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens}
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@@ -298,7 +298,8 @@ class ConversationService:
|
||||
self,
|
||||
conversation_id: uuid.UUID,
|
||||
user_message: str,
|
||||
assistant_message: str
|
||||
assistant_message: str,
|
||||
meta_data: Optional[dict] = None
|
||||
):
|
||||
"""
|
||||
Save a pair of user and assistant messages to the conversation.
|
||||
@@ -307,6 +308,7 @@ class ConversationService:
|
||||
conversation_id (uuid.UUID): Conversation UUID.
|
||||
user_message (str): User's message content.
|
||||
assistant_message (str): Assistant's response content.
|
||||
meta_data (Optional[dict]): Optional metadata for the messages.
|
||||
"""
|
||||
self.add_message(
|
||||
conversation_id=conversation_id,
|
||||
@@ -317,7 +319,8 @@ class ConversationService:
|
||||
self.add_message(
|
||||
conversation_id=conversation_id,
|
||||
role="assistant",
|
||||
content=assistant_message
|
||||
content=assistant_message,
|
||||
meta_data=meta_data
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
|
||||
@@ -442,7 +442,14 @@ class DraftRunService:
|
||||
user_message=message,
|
||||
assistant_message=result["content"],
|
||||
app_id=agent_config.app_id,
|
||||
user_id=user_id
|
||||
user_id=user_id,
|
||||
meta_data={
|
||||
"usage": result.get("usage", {
|
||||
"prompt_tokens": 0,
|
||||
"completion_tokens": 0,
|
||||
"total_tokens": 0
|
||||
})
|
||||
}
|
||||
)
|
||||
|
||||
response = {
|
||||
@@ -649,6 +656,7 @@ class DraftRunService:
|
||||
|
||||
# 9. 流式调用 Agent
|
||||
full_content = ""
|
||||
total_tokens = 0
|
||||
async for chunk in agent.chat_stream(
|
||||
message=message,
|
||||
history=history,
|
||||
@@ -659,11 +667,14 @@ class DraftRunService:
|
||||
user_rag_memory_id=user_rag_memory_id,
|
||||
memory_flag=memory_flag
|
||||
):
|
||||
full_content += chunk
|
||||
# 发送消息块事件
|
||||
yield self._format_sse_event("message", {
|
||||
"content": chunk
|
||||
})
|
||||
if isinstance(chunk, int):
|
||||
total_tokens = chunk
|
||||
else:
|
||||
full_content += chunk
|
||||
# 发送消息块事件
|
||||
yield self._format_sse_event("message", {
|
||||
"content": chunk
|
||||
})
|
||||
|
||||
elapsed_time = time.time() - start_time
|
||||
|
||||
@@ -674,7 +685,10 @@ class DraftRunService:
|
||||
user_message=message,
|
||||
assistant_message=full_content,
|
||||
app_id=agent_config.app_id,
|
||||
user_id=user_id
|
||||
user_id=user_id,
|
||||
meta_data={
|
||||
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens}
|
||||
}
|
||||
)
|
||||
|
||||
# 11. 发送结束事件
|
||||
@@ -898,6 +912,7 @@ class DraftRunService:
|
||||
conversation_id: str,
|
||||
user_message: str,
|
||||
assistant_message: str,
|
||||
meta_data: dict,
|
||||
app_id: Optional[uuid.UUID] = None,
|
||||
user_id: Optional[str] = None
|
||||
) -> None:
|
||||
@@ -909,6 +924,7 @@ class DraftRunService:
|
||||
assistant_message: AI 回复消息
|
||||
app_id: 应用ID(未使用,保留用于兼容性)
|
||||
user_id: 用户ID(未使用,保留用于兼容性)
|
||||
meta_data: token消耗
|
||||
"""
|
||||
try:
|
||||
from app.services.conversation_service import ConversationService
|
||||
@@ -927,7 +943,8 @@ class DraftRunService:
|
||||
conversation_service.add_message(
|
||||
conversation_id=conv_uuid,
|
||||
role="assistant",
|
||||
content=assistant_message
|
||||
content=assistant_message,
|
||||
meta_data=meta_data
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
|
||||
@@ -282,7 +282,14 @@ class SharedChatService:
|
||||
self.conversation_service.save_conversation_messages(
|
||||
conversation_id=conversation.id,
|
||||
user_message=message,
|
||||
assistant_message=result["content"]
|
||||
assistant_message=result["content"],
|
||||
meta_data={
|
||||
"usage": result.get("usage", {
|
||||
"prompt_tokens": 0,
|
||||
"completion_tokens": 0,
|
||||
"total_tokens": 0
|
||||
})
|
||||
}
|
||||
)
|
||||
# self.conversation_service.add_message(
|
||||
# conversation_id=conversation.id,
|
||||
@@ -469,6 +476,7 @@ class SharedChatService:
|
||||
|
||||
# 流式调用 Agent
|
||||
full_content = ""
|
||||
total_tokens = 0
|
||||
async for chunk in agent.chat_stream(
|
||||
message=message,
|
||||
history=history,
|
||||
@@ -479,9 +487,12 @@ class SharedChatService:
|
||||
config_id=config_id,
|
||||
memory_flag=memory_flag
|
||||
):
|
||||
full_content += chunk
|
||||
# 发送消息块事件
|
||||
yield f"event: message\ndata: {json.dumps({'content': chunk}, ensure_ascii=False)}\n\n"
|
||||
if isinstance(chunk, int):
|
||||
total_tokens = chunk
|
||||
else:
|
||||
full_content += chunk
|
||||
# 发送消息块事件
|
||||
yield f"event: message\ndata: {json.dumps({'content': chunk}, ensure_ascii=False)}\n\n"
|
||||
|
||||
elapsed_time = time.time() - start_time
|
||||
|
||||
@@ -498,7 +509,7 @@ class SharedChatService:
|
||||
content=full_content,
|
||||
meta_data={
|
||||
"model": api_key_obj.model_name,
|
||||
"usage": {}
|
||||
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens}
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user