feat(models): support reasoning_content streaming

This commit is contained in:
Timebomb2018
2026-04-01 15:47:43 +08:00
parent 9561578a2a
commit 264183cec2
28 changed files with 495 additions and 109 deletions

View File

@@ -117,7 +117,9 @@ class AppChatService:
max_tokens=model_parameters.get("max_tokens", 2000),
system_prompt=system_prompt,
tools=tools,
deep_thinking=model_parameters.get("deep_thinking", False),
thinking_budget_tokens=model_parameters.get("thinking_budget_tokens"),
capability=api_key_obj.capability or [],
)
model_info = ModelInfo(
@@ -205,7 +207,8 @@ class AppChatService:
"model": api_key_obj.model_name,
"usage": result.get("usage", {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}),
"audio_url": None,
"citations": filtered_citations
"citations": filtered_citations,
"reasoning_content": result.get("reasoning_content")
}
if files:
for f in files:
@@ -258,6 +261,7 @@ class AppChatService:
"conversation_id": conversation_id,
"message_id": str(message_id),
"message": result["content"],
"reasoning_content": result.get("reasoning_content"),
"usage": result.get("usage", {
"prompt_tokens": 0,
"completion_tokens": 0,
@@ -354,7 +358,10 @@ class AppChatService:
max_tokens=model_parameters.get("max_tokens", 2000),
system_prompt=system_prompt,
tools=tools,
streaming=True
streaming=True,
deep_thinking=model_parameters.get("deep_thinking", False),
thinking_budget_tokens=model_parameters.get("thinking_budget_tokens"),
capability=api_key_obj.capability or [],
)
model_info = ModelInfo(
@@ -403,6 +410,7 @@ class AppChatService:
# 流式调用 Agent支持多模态同时并行启动 TTS
full_content = ""
full_reasoning = ""
total_tokens = 0
text_queue: asyncio.Queue = asyncio.Queue()
@@ -426,6 +434,9 @@ class AppChatService:
):
if isinstance(chunk, int):
total_tokens = chunk
elif isinstance(chunk, dict) and chunk.get("type") == "reasoning":
full_reasoning += chunk['content']
yield f"event: reasoning\ndata: {json.dumps({'content': chunk['content']}, ensure_ascii=False)}\n\n"
else:
full_content += chunk
yield f"event: message\ndata: {json.dumps({'content': chunk}, ensure_ascii=False)}\n\n"
@@ -472,7 +483,8 @@ class AppChatService:
"model": api_key_obj.model_name,
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens},
"audio_url": None,
"citations": filtered_citations
"citations": filtered_citations,
"reasoning_content": full_reasoning or None
}
if files:

View File

@@ -534,6 +534,7 @@ class ConversationService:
api_key = api_config.api_key
api_base = api_config.api_base
is_omni = api_config.is_omni
capability = api_config.capability
model_type = config.type
llm = RedBearLLM(
@@ -542,7 +543,8 @@ class ConversationService:
provider=provider,
api_key=api_key,
base_url=api_base,
is_omni=is_omni
is_omni=is_omni,
support_thinking="thinking" in (capability or []),
),
type=ModelType(model_type)
)

View File

@@ -458,7 +458,7 @@ class AgentRunService:
statement = opening["statement"]
suggested_questions = opening["suggested_questions"]
# 如果有变量,进行替换(仅支持 {{var_name}} 格式)
if variables:
for var_name, var_value in variables.items():
@@ -595,6 +595,9 @@ class AgentRunService:
max_tokens=effective_params.get("max_tokens", 2000),
system_prompt=system_prompt,
tools=tools,
deep_thinking=effective_params.get("deep_thinking", False),
thinking_budget_tokens=effective_params.get("thinking_budget_tokens"),
capability=api_key_config.get("capability", []),
)
# 5. 处理会话ID创建或验证新会话时写入开场白
@@ -689,7 +692,8 @@ class AgentRunService:
"prompt_tokens": 0,
"completion_tokens": 0,
"total_tokens": 0
})
}),
"reasoning_content": result.get("reasoning_content")
},
files=files,
processed_files=processed_files,
@@ -701,6 +705,7 @@ class AgentRunService:
response = {
"message": result["content"],
"reasoning_content": result.get("reasoning_content"),
"conversation_id": conversation_id,
"usage": result.get("usage", {
"prompt_tokens": 0,
@@ -838,7 +843,10 @@ class AgentRunService:
max_tokens=effective_params.get("max_tokens", 2000),
system_prompt=system_prompt,
tools=tools,
streaming=True
streaming=True,
deep_thinking=effective_params.get("deep_thinking", False),
thinking_budget_tokens=effective_params.get("thinking_budget_tokens"),
capability=api_key_config.get("capability", []),
)
# 5. 处理会话ID创建或验证新会话时写入开场白
@@ -898,6 +906,7 @@ class AgentRunService:
# 9. 流式调用 Agent支持多模态同时并行启动 TTS
full_content = ""
full_reasoning = ""
total_tokens = 0
# 启动流式 TTS文本边输出边合成
@@ -916,6 +925,9 @@ class AgentRunService:
):
if isinstance(chunk, int):
total_tokens = chunk
elif isinstance(chunk, dict) and chunk.get("type") == "reasoning":
full_reasoning += chunk["content"]
yield self._format_sse_event("reasoning", {"content": chunk["content"]})
else:
full_content += chunk
yield self._format_sse_event("message", {"content": chunk})
@@ -944,7 +956,8 @@ class AgentRunService:
app_id=agent_config.app_id,
user_id=user_id,
meta_data={
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens}
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens},
"reasoning_content": full_reasoning or None
},
files=files,
processed_files=processed_files,
@@ -1665,7 +1678,7 @@ class AgentRunService:
"""从 text_queue 取文本按句子切分后喂给 synthesizer"""
import re
buf = ""
sentence_end = re.compile(r'[\u3002\uff01\uff1f\.!?\n]')
sentence_end = re.compile(r'[\u3002\uff01\uff1f.!?\n]')
while True:
chunk = await text_queue.get()
if chunk is None:
@@ -1894,6 +1907,7 @@ class AgentRunService:
"conversation_id": result['conversation_id'],
"parameters_used": model_info["parameters"],
"message": result.get("message"),
"reasoning_content": result.get("reasoning_content"),
"usage": usage,
"elapsed_time": elapsed,
"tokens_per_second": (
@@ -2012,7 +2026,7 @@ class AgentRunService:
# 需要从 ModelApiKey 获取实际的模型名称,或者在 ModelConfig 中添加 model 字段
return None
def _with_parameters(self, agent_config: AgentConfig, parameters: Dict[str, Any]) -> AgentConfig:
def _with_parameters(self, agent_config: AgentConfig, parameters: Dict[str, Any]) -> tuple[AgentConfig, Any]:
"""创建一个带有覆盖参数的 agent_config浅拷贝只修改 model_parameters
Args:
@@ -2110,6 +2124,7 @@ class AgentRunService:
start_time = time.time()
full_content = ""
full_reasoning = ""
returned_conversation_id = model_conversation_id
audio_url = None
audio_status = None
@@ -2168,6 +2183,18 @@ class AgentRunService:
"content": chunk
}))
# 转发深度思考事件(带模型标识)
if event_type == "reasoning" and event_data:
reasoning_chunk = event_data.get("content", "")
full_reasoning += reasoning_chunk
await event_queue.put(self._format_sse_event("model_reasoning", {
"model_index": idx,
"model_config_id": model_config_id,
"label": model_label,
"conversation_id": returned_conversation_id,
"content": event_data.get("content", "")
}))
# 从 end 事件中提取 features 输出字段
if event_type == "end" and event_data:
audio_url = event_data.get("audio_url")
@@ -2199,6 +2226,7 @@ class AgentRunService:
"conversation_id": returned_conversation_id,
"parameters_used": model_info["parameters"],
"message": full_content,
"reasoning_content": full_reasoning or None,
"elapsed_time": elapsed,
"audio_url": audio_url,
"audio_status": audio_status,
@@ -2351,6 +2379,7 @@ class AgentRunService:
"label": r["label"],
"conversation_id": r.get("conversation_id"),
"message": r.get("message"),
"reasoning_content": r.get("reasoning_content"),
"elapsed_time": r.get("elapsed_time", 0),
"audio_url": r.get("audio_url"),
"audio_status": r.get("audio_status"),

View File

@@ -415,6 +415,7 @@ class LLMRouter:
api_key=api_key_config.api_key,
base_url=api_key_config.api_base,
is_omni=api_key_config.is_omni,
support_thinking="thinking" in (api_key_config.capability or []),
temperature=0.3,
max_tokens=500
)

View File

@@ -393,6 +393,7 @@ class MasterAgentRouter:
api_key=api_key_config.api_key,
base_url=api_key_config.api_base,
is_omni=api_key_config.is_omni,
support_thinking="thinking" in (api_key_config.capability or []),
extra_params = extra_params
)

View File

@@ -232,7 +232,8 @@ class MemoryPerceptualService:
provider=model_config.provider,
api_key=model_config.api_key,
base_url=model_config.api_base,
is_omni=model_config.is_omni
is_omni=model_config.is_omni,
support_thinking="thinking" in (model_config.capability or []),
)
)
return llm, model_config

View File

@@ -45,12 +45,20 @@ class ModelParameterMerger:
"frequency_penalty": 0.0,
"presence_penalty": 0.0,
"n": 1,
"stop": None
"stop": None,
"deep_thinking": False,
"thinking_budget_tokens": None
}
# 合并参数:默认值 -> 模型配置 -> Agent 配置
merged = default_params.copy()
# Pydantic 对象转为 dict
if model_config_params and hasattr(model_config_params, 'model_dump'):
model_config_params = model_config_params.model_dump()
if agent_config_params and hasattr(agent_config_params, 'model_dump'):
agent_config_params = agent_config_params.model_dump()
# 应用模型配置参数
if model_config_params:
for key in default_params:

View File

@@ -85,15 +85,16 @@ class ModelConfigService:
@staticmethod
async def validate_model_config(
db: Session,
*,
model_name: str,
provider: str,
api_key: str,
api_base: Optional[str] = None,
model_type: str = "llm",
test_message: str = "Hello",
is_omni: bool = False
db: Session,
*,
model_name: str,
provider: str,
api_key: str,
api_base: Optional[str] = None,
model_type: str = "llm",
test_message: str = "Hello",
is_omni: bool = False,
capability: Optional[list] = None
) -> Dict[str, Any]:
"""验证模型配置是否有效
@@ -124,6 +125,7 @@ class ModelConfigService:
api_key=api_key,
base_url=api_base,
is_omni=is_omni,
support_thinking="thinking" in (capability or []),
temperature=0.7,
max_tokens=100
)
@@ -320,7 +322,8 @@ class ModelConfigService:
api_base=api_key_data.api_base,
model_type=model_data.type,
test_message="Hello",
is_omni=model_data.is_omni
is_omni=model_data.is_omni,
capability=model_data.capability
)
if not validation_result["valid"]:
raise BusinessException(
@@ -590,7 +593,8 @@ class ModelApiKeyService:
api_base=data.api_base,
model_type=model_config.type,
test_message="Hello",
is_omni=data.is_omni
is_omni=data.is_omni,
capability=model_config.capability
)
if not validation_result["valid"]:
# 记录验证失败的模型,但不抛出异常
@@ -675,7 +679,8 @@ class ModelApiKeyService:
api_base=api_key_data.api_base,
model_type=model_config.type,
test_message="Hello",
is_omni=api_key_data.is_omni
is_omni=api_key_data.is_omni,
capability=model_config.capability
)
if not validation_result["valid"]:
raise BusinessException(
@@ -707,7 +712,8 @@ class ModelApiKeyService:
api_base=api_key_data.api_base or existing_api_key.api_base,
model_type=model_config.type,
test_message="Hello",
is_omni=model_config.is_omni
is_omni=model_config.is_omni,
capability=model_config.capability
)
if not validation_result["valid"]:
raise BusinessException(

View File

@@ -2616,6 +2616,7 @@ class MultiAgentOrchestrator:
api_key=api_key_config.api_key,
base_url=api_key_config.api_base,
is_omni=api_key_config.is_omni,
support_thinking="thinking" in (api_key_config.capability or []),
temperature=0.7, # 整合任务使用中等温度
max_tokens=2000
)
@@ -2794,6 +2795,7 @@ class MultiAgentOrchestrator:
api_key=api_key_config.api_key,
base_url=api_key_config.api_base,
is_omni=api_key_config.is_omni,
support_thinking="thinking" in (api_key_config.capability or []),
temperature=0.7,
max_tokens=2000,
extra_params={"streaming": True} # 启用流式输出

View File

@@ -185,7 +185,8 @@ class PromptOptimizerService:
provider=api_config.provider,
api_key=api_config.api_key,
base_url=api_config.api_base,
is_omni=api_config.is_omni
is_omni=api_config.is_omni,
support_thinking="thinking" in (api_config.capability or []),
), type=ModelType(model_config.type))
try:
prompt_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'prompt')

View File

@@ -248,7 +248,9 @@ class SharedChatService:
max_tokens=model_parameters.get("max_tokens", 2000),
system_prompt=system_prompt,
tools=tools,
deep_thinking=model_parameters.get("deep_thinking", False),
thinking_budget_tokens=model_parameters.get("thinking_budget_tokens"),
capability=api_key_obj.capability or [],
)
# 加载历史消息
@@ -450,7 +452,10 @@ class SharedChatService:
max_tokens=model_parameters.get("max_tokens", 2000),
system_prompt=system_prompt,
tools=tools,
streaming=True
streaming=True,
deep_thinking=model_parameters.get("deep_thinking", False),
thinking_budget_tokens=model_parameters.get("thinking_budget_tokens"),
capability=api_key_obj.capability or [],
)
# 加载历史消息
@@ -479,6 +484,8 @@ class SharedChatService:
):
if isinstance(chunk, int):
total_tokens = chunk
elif isinstance(chunk, dict) and chunk.get("type") == "reasoning":
yield f"event: reasoning\ndata: {json.dumps({'content': chunk['content']}, ensure_ascii=False)}\n\n"
else:
full_content += chunk
# 发送消息块事件