feat(models): support reasoning_content streaming
This commit is contained in:
@@ -117,7 +117,9 @@ class AppChatService:
|
||||
max_tokens=model_parameters.get("max_tokens", 2000),
|
||||
system_prompt=system_prompt,
|
||||
tools=tools,
|
||||
|
||||
deep_thinking=model_parameters.get("deep_thinking", False),
|
||||
thinking_budget_tokens=model_parameters.get("thinking_budget_tokens"),
|
||||
capability=api_key_obj.capability or [],
|
||||
)
|
||||
|
||||
model_info = ModelInfo(
|
||||
@@ -205,7 +207,8 @@ class AppChatService:
|
||||
"model": api_key_obj.model_name,
|
||||
"usage": result.get("usage", {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}),
|
||||
"audio_url": None,
|
||||
"citations": filtered_citations
|
||||
"citations": filtered_citations,
|
||||
"reasoning_content": result.get("reasoning_content")
|
||||
}
|
||||
if files:
|
||||
for f in files:
|
||||
@@ -258,6 +261,7 @@ class AppChatService:
|
||||
"conversation_id": conversation_id,
|
||||
"message_id": str(message_id),
|
||||
"message": result["content"],
|
||||
"reasoning_content": result.get("reasoning_content"),
|
||||
"usage": result.get("usage", {
|
||||
"prompt_tokens": 0,
|
||||
"completion_tokens": 0,
|
||||
@@ -354,7 +358,10 @@ class AppChatService:
|
||||
max_tokens=model_parameters.get("max_tokens", 2000),
|
||||
system_prompt=system_prompt,
|
||||
tools=tools,
|
||||
streaming=True
|
||||
streaming=True,
|
||||
deep_thinking=model_parameters.get("deep_thinking", False),
|
||||
thinking_budget_tokens=model_parameters.get("thinking_budget_tokens"),
|
||||
capability=api_key_obj.capability or [],
|
||||
)
|
||||
|
||||
model_info = ModelInfo(
|
||||
@@ -403,6 +410,7 @@ class AppChatService:
|
||||
|
||||
# 流式调用 Agent(支持多模态),同时并行启动 TTS
|
||||
full_content = ""
|
||||
full_reasoning = ""
|
||||
total_tokens = 0
|
||||
|
||||
text_queue: asyncio.Queue = asyncio.Queue()
|
||||
@@ -426,6 +434,9 @@ class AppChatService:
|
||||
):
|
||||
if isinstance(chunk, int):
|
||||
total_tokens = chunk
|
||||
elif isinstance(chunk, dict) and chunk.get("type") == "reasoning":
|
||||
full_reasoning += chunk['content']
|
||||
yield f"event: reasoning\ndata: {json.dumps({'content': chunk['content']}, ensure_ascii=False)}\n\n"
|
||||
else:
|
||||
full_content += chunk
|
||||
yield f"event: message\ndata: {json.dumps({'content': chunk}, ensure_ascii=False)}\n\n"
|
||||
@@ -472,7 +483,8 @@ class AppChatService:
|
||||
"model": api_key_obj.model_name,
|
||||
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens},
|
||||
"audio_url": None,
|
||||
"citations": filtered_citations
|
||||
"citations": filtered_citations,
|
||||
"reasoning_content": full_reasoning or None
|
||||
}
|
||||
|
||||
if files:
|
||||
|
||||
@@ -534,6 +534,7 @@ class ConversationService:
|
||||
api_key = api_config.api_key
|
||||
api_base = api_config.api_base
|
||||
is_omni = api_config.is_omni
|
||||
capability = api_config.capability
|
||||
model_type = config.type
|
||||
|
||||
llm = RedBearLLM(
|
||||
@@ -542,7 +543,8 @@ class ConversationService:
|
||||
provider=provider,
|
||||
api_key=api_key,
|
||||
base_url=api_base,
|
||||
is_omni=is_omni
|
||||
is_omni=is_omni,
|
||||
support_thinking="thinking" in (capability or []),
|
||||
),
|
||||
type=ModelType(model_type)
|
||||
)
|
||||
|
||||
@@ -458,7 +458,7 @@ class AgentRunService:
|
||||
|
||||
statement = opening["statement"]
|
||||
suggested_questions = opening["suggested_questions"]
|
||||
|
||||
|
||||
# 如果有变量,进行替换(仅支持 {{var_name}} 格式)
|
||||
if variables:
|
||||
for var_name, var_value in variables.items():
|
||||
@@ -595,6 +595,9 @@ class AgentRunService:
|
||||
max_tokens=effective_params.get("max_tokens", 2000),
|
||||
system_prompt=system_prompt,
|
||||
tools=tools,
|
||||
deep_thinking=effective_params.get("deep_thinking", False),
|
||||
thinking_budget_tokens=effective_params.get("thinking_budget_tokens"),
|
||||
capability=api_key_config.get("capability", []),
|
||||
)
|
||||
|
||||
# 5. 处理会话ID(创建或验证),新会话时写入开场白
|
||||
@@ -689,7 +692,8 @@ class AgentRunService:
|
||||
"prompt_tokens": 0,
|
||||
"completion_tokens": 0,
|
||||
"total_tokens": 0
|
||||
})
|
||||
}),
|
||||
"reasoning_content": result.get("reasoning_content")
|
||||
},
|
||||
files=files,
|
||||
processed_files=processed_files,
|
||||
@@ -701,6 +705,7 @@ class AgentRunService:
|
||||
|
||||
response = {
|
||||
"message": result["content"],
|
||||
"reasoning_content": result.get("reasoning_content"),
|
||||
"conversation_id": conversation_id,
|
||||
"usage": result.get("usage", {
|
||||
"prompt_tokens": 0,
|
||||
@@ -838,7 +843,10 @@ class AgentRunService:
|
||||
max_tokens=effective_params.get("max_tokens", 2000),
|
||||
system_prompt=system_prompt,
|
||||
tools=tools,
|
||||
streaming=True
|
||||
streaming=True,
|
||||
deep_thinking=effective_params.get("deep_thinking", False),
|
||||
thinking_budget_tokens=effective_params.get("thinking_budget_tokens"),
|
||||
capability=api_key_config.get("capability", []),
|
||||
)
|
||||
|
||||
# 5. 处理会话ID(创建或验证),新会话时写入开场白
|
||||
@@ -898,6 +906,7 @@ class AgentRunService:
|
||||
|
||||
# 9. 流式调用 Agent(支持多模态),同时并行启动 TTS
|
||||
full_content = ""
|
||||
full_reasoning = ""
|
||||
total_tokens = 0
|
||||
|
||||
# 启动流式 TTS(文本边输出边合成)
|
||||
@@ -916,6 +925,9 @@ class AgentRunService:
|
||||
):
|
||||
if isinstance(chunk, int):
|
||||
total_tokens = chunk
|
||||
elif isinstance(chunk, dict) and chunk.get("type") == "reasoning":
|
||||
full_reasoning += chunk["content"]
|
||||
yield self._format_sse_event("reasoning", {"content": chunk["content"]})
|
||||
else:
|
||||
full_content += chunk
|
||||
yield self._format_sse_event("message", {"content": chunk})
|
||||
@@ -944,7 +956,8 @@ class AgentRunService:
|
||||
app_id=agent_config.app_id,
|
||||
user_id=user_id,
|
||||
meta_data={
|
||||
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens}
|
||||
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens},
|
||||
"reasoning_content": full_reasoning or None
|
||||
},
|
||||
files=files,
|
||||
processed_files=processed_files,
|
||||
@@ -1665,7 +1678,7 @@ class AgentRunService:
|
||||
"""从 text_queue 取文本按句子切分后喂给 synthesizer"""
|
||||
import re
|
||||
buf = ""
|
||||
sentence_end = re.compile(r'[\u3002\uff01\uff1f\.!?\n]')
|
||||
sentence_end = re.compile(r'[\u3002\uff01\uff1f.!?\n]')
|
||||
while True:
|
||||
chunk = await text_queue.get()
|
||||
if chunk is None:
|
||||
@@ -1894,6 +1907,7 @@ class AgentRunService:
|
||||
"conversation_id": result['conversation_id'],
|
||||
"parameters_used": model_info["parameters"],
|
||||
"message": result.get("message"),
|
||||
"reasoning_content": result.get("reasoning_content"),
|
||||
"usage": usage,
|
||||
"elapsed_time": elapsed,
|
||||
"tokens_per_second": (
|
||||
@@ -2012,7 +2026,7 @@ class AgentRunService:
|
||||
# 需要从 ModelApiKey 获取实际的模型名称,或者在 ModelConfig 中添加 model 字段
|
||||
return None
|
||||
|
||||
def _with_parameters(self, agent_config: AgentConfig, parameters: Dict[str, Any]) -> AgentConfig:
|
||||
def _with_parameters(self, agent_config: AgentConfig, parameters: Dict[str, Any]) -> tuple[AgentConfig, Any]:
|
||||
"""创建一个带有覆盖参数的 agent_config(浅拷贝,只修改 model_parameters)
|
||||
|
||||
Args:
|
||||
@@ -2110,6 +2124,7 @@ class AgentRunService:
|
||||
|
||||
start_time = time.time()
|
||||
full_content = ""
|
||||
full_reasoning = ""
|
||||
returned_conversation_id = model_conversation_id
|
||||
audio_url = None
|
||||
audio_status = None
|
||||
@@ -2168,6 +2183,18 @@ class AgentRunService:
|
||||
"content": chunk
|
||||
}))
|
||||
|
||||
# 转发深度思考事件(带模型标识)
|
||||
if event_type == "reasoning" and event_data:
|
||||
reasoning_chunk = event_data.get("content", "")
|
||||
full_reasoning += reasoning_chunk
|
||||
await event_queue.put(self._format_sse_event("model_reasoning", {
|
||||
"model_index": idx,
|
||||
"model_config_id": model_config_id,
|
||||
"label": model_label,
|
||||
"conversation_id": returned_conversation_id,
|
||||
"content": event_data.get("content", "")
|
||||
}))
|
||||
|
||||
# 从 end 事件中提取 features 输出字段
|
||||
if event_type == "end" and event_data:
|
||||
audio_url = event_data.get("audio_url")
|
||||
@@ -2199,6 +2226,7 @@ class AgentRunService:
|
||||
"conversation_id": returned_conversation_id,
|
||||
"parameters_used": model_info["parameters"],
|
||||
"message": full_content,
|
||||
"reasoning_content": full_reasoning or None,
|
||||
"elapsed_time": elapsed,
|
||||
"audio_url": audio_url,
|
||||
"audio_status": audio_status,
|
||||
@@ -2351,6 +2379,7 @@ class AgentRunService:
|
||||
"label": r["label"],
|
||||
"conversation_id": r.get("conversation_id"),
|
||||
"message": r.get("message"),
|
||||
"reasoning_content": r.get("reasoning_content"),
|
||||
"elapsed_time": r.get("elapsed_time", 0),
|
||||
"audio_url": r.get("audio_url"),
|
||||
"audio_status": r.get("audio_status"),
|
||||
|
||||
@@ -415,6 +415,7 @@ class LLMRouter:
|
||||
api_key=api_key_config.api_key,
|
||||
base_url=api_key_config.api_base,
|
||||
is_omni=api_key_config.is_omni,
|
||||
support_thinking="thinking" in (api_key_config.capability or []),
|
||||
temperature=0.3,
|
||||
max_tokens=500
|
||||
)
|
||||
|
||||
@@ -393,6 +393,7 @@ class MasterAgentRouter:
|
||||
api_key=api_key_config.api_key,
|
||||
base_url=api_key_config.api_base,
|
||||
is_omni=api_key_config.is_omni,
|
||||
support_thinking="thinking" in (api_key_config.capability or []),
|
||||
extra_params = extra_params
|
||||
)
|
||||
|
||||
|
||||
@@ -232,7 +232,8 @@ class MemoryPerceptualService:
|
||||
provider=model_config.provider,
|
||||
api_key=model_config.api_key,
|
||||
base_url=model_config.api_base,
|
||||
is_omni=model_config.is_omni
|
||||
is_omni=model_config.is_omni,
|
||||
support_thinking="thinking" in (model_config.capability or []),
|
||||
)
|
||||
)
|
||||
return llm, model_config
|
||||
|
||||
@@ -45,12 +45,20 @@ class ModelParameterMerger:
|
||||
"frequency_penalty": 0.0,
|
||||
"presence_penalty": 0.0,
|
||||
"n": 1,
|
||||
"stop": None
|
||||
"stop": None,
|
||||
"deep_thinking": False,
|
||||
"thinking_budget_tokens": None
|
||||
}
|
||||
|
||||
# 合并参数:默认值 -> 模型配置 -> Agent 配置
|
||||
merged = default_params.copy()
|
||||
|
||||
# Pydantic 对象转为 dict
|
||||
if model_config_params and hasattr(model_config_params, 'model_dump'):
|
||||
model_config_params = model_config_params.model_dump()
|
||||
if agent_config_params and hasattr(agent_config_params, 'model_dump'):
|
||||
agent_config_params = agent_config_params.model_dump()
|
||||
|
||||
# 应用模型配置参数
|
||||
if model_config_params:
|
||||
for key in default_params:
|
||||
|
||||
@@ -85,15 +85,16 @@ class ModelConfigService:
|
||||
|
||||
@staticmethod
|
||||
async def validate_model_config(
|
||||
db: Session,
|
||||
*,
|
||||
model_name: str,
|
||||
provider: str,
|
||||
api_key: str,
|
||||
api_base: Optional[str] = None,
|
||||
model_type: str = "llm",
|
||||
test_message: str = "Hello",
|
||||
is_omni: bool = False
|
||||
db: Session,
|
||||
*,
|
||||
model_name: str,
|
||||
provider: str,
|
||||
api_key: str,
|
||||
api_base: Optional[str] = None,
|
||||
model_type: str = "llm",
|
||||
test_message: str = "Hello",
|
||||
is_omni: bool = False,
|
||||
capability: Optional[list] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""验证模型配置是否有效
|
||||
|
||||
@@ -124,6 +125,7 @@ class ModelConfigService:
|
||||
api_key=api_key,
|
||||
base_url=api_base,
|
||||
is_omni=is_omni,
|
||||
support_thinking="thinking" in (capability or []),
|
||||
temperature=0.7,
|
||||
max_tokens=100
|
||||
)
|
||||
@@ -320,7 +322,8 @@ class ModelConfigService:
|
||||
api_base=api_key_data.api_base,
|
||||
model_type=model_data.type,
|
||||
test_message="Hello",
|
||||
is_omni=model_data.is_omni
|
||||
is_omni=model_data.is_omni,
|
||||
capability=model_data.capability
|
||||
)
|
||||
if not validation_result["valid"]:
|
||||
raise BusinessException(
|
||||
@@ -590,7 +593,8 @@ class ModelApiKeyService:
|
||||
api_base=data.api_base,
|
||||
model_type=model_config.type,
|
||||
test_message="Hello",
|
||||
is_omni=data.is_omni
|
||||
is_omni=data.is_omni,
|
||||
capability=model_config.capability
|
||||
)
|
||||
if not validation_result["valid"]:
|
||||
# 记录验证失败的模型,但不抛出异常
|
||||
@@ -675,7 +679,8 @@ class ModelApiKeyService:
|
||||
api_base=api_key_data.api_base,
|
||||
model_type=model_config.type,
|
||||
test_message="Hello",
|
||||
is_omni=api_key_data.is_omni
|
||||
is_omni=api_key_data.is_omni,
|
||||
capability=model_config.capability
|
||||
)
|
||||
if not validation_result["valid"]:
|
||||
raise BusinessException(
|
||||
@@ -707,7 +712,8 @@ class ModelApiKeyService:
|
||||
api_base=api_key_data.api_base or existing_api_key.api_base,
|
||||
model_type=model_config.type,
|
||||
test_message="Hello",
|
||||
is_omni=model_config.is_omni
|
||||
is_omni=model_config.is_omni,
|
||||
capability=model_config.capability
|
||||
)
|
||||
if not validation_result["valid"]:
|
||||
raise BusinessException(
|
||||
|
||||
@@ -2616,6 +2616,7 @@ class MultiAgentOrchestrator:
|
||||
api_key=api_key_config.api_key,
|
||||
base_url=api_key_config.api_base,
|
||||
is_omni=api_key_config.is_omni,
|
||||
support_thinking="thinking" in (api_key_config.capability or []),
|
||||
temperature=0.7, # 整合任务使用中等温度
|
||||
max_tokens=2000
|
||||
)
|
||||
@@ -2794,6 +2795,7 @@ class MultiAgentOrchestrator:
|
||||
api_key=api_key_config.api_key,
|
||||
base_url=api_key_config.api_base,
|
||||
is_omni=api_key_config.is_omni,
|
||||
support_thinking="thinking" in (api_key_config.capability or []),
|
||||
temperature=0.7,
|
||||
max_tokens=2000,
|
||||
extra_params={"streaming": True} # 启用流式输出
|
||||
|
||||
@@ -185,7 +185,8 @@ class PromptOptimizerService:
|
||||
provider=api_config.provider,
|
||||
api_key=api_config.api_key,
|
||||
base_url=api_config.api_base,
|
||||
is_omni=api_config.is_omni
|
||||
is_omni=api_config.is_omni,
|
||||
support_thinking="thinking" in (api_config.capability or []),
|
||||
), type=ModelType(model_config.type))
|
||||
try:
|
||||
prompt_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'prompt')
|
||||
|
||||
@@ -248,7 +248,9 @@ class SharedChatService:
|
||||
max_tokens=model_parameters.get("max_tokens", 2000),
|
||||
system_prompt=system_prompt,
|
||||
tools=tools,
|
||||
|
||||
deep_thinking=model_parameters.get("deep_thinking", False),
|
||||
thinking_budget_tokens=model_parameters.get("thinking_budget_tokens"),
|
||||
capability=api_key_obj.capability or [],
|
||||
)
|
||||
|
||||
# 加载历史消息
|
||||
@@ -450,7 +452,10 @@ class SharedChatService:
|
||||
max_tokens=model_parameters.get("max_tokens", 2000),
|
||||
system_prompt=system_prompt,
|
||||
tools=tools,
|
||||
streaming=True
|
||||
streaming=True,
|
||||
deep_thinking=model_parameters.get("deep_thinking", False),
|
||||
thinking_budget_tokens=model_parameters.get("thinking_budget_tokens"),
|
||||
capability=api_key_obj.capability or [],
|
||||
)
|
||||
|
||||
# 加载历史消息
|
||||
@@ -479,6 +484,8 @@ class SharedChatService:
|
||||
):
|
||||
if isinstance(chunk, int):
|
||||
total_tokens = chunk
|
||||
elif isinstance(chunk, dict) and chunk.get("type") == "reasoning":
|
||||
yield f"event: reasoning\ndata: {json.dumps({'content': chunk['content']}, ensure_ascii=False)}\n\n"
|
||||
else:
|
||||
full_content += chunk
|
||||
# 发送消息块事件
|
||||
|
||||
Reference in New Issue
Block a user