feat(models): support reasoning_content streaming

This commit is contained in:
Timebomb2018
2026-04-01 15:47:43 +08:00
parent 9561578a2a
commit 264183cec2
28 changed files with 495 additions and 109 deletions

View File

@@ -37,7 +37,10 @@ class LangChainAgent:
tools: Optional[Sequence[BaseTool]] = None,
streaming: bool = False,
max_iterations: Optional[int] = None, # 最大迭代次数None 表示自动计算)
max_tool_consecutive_calls: int = 3 # 单个工具最大连续调用次数
max_tool_consecutive_calls: int = 3, # 单个工具最大连续调用次数
deep_thinking: bool = False, # 是否启用深度思考模式
thinking_budget_tokens: Optional[int] = None, # 深度思考 token 预算
capability: Optional[List[str]] = None # 模型能力列表,用于校验是否支持深度思考
):
"""初始化 LangChain Agent
@@ -60,6 +63,7 @@ class LangChainAgent:
self.streaming = streaming
self.is_omni = is_omni
self.max_tool_consecutive_calls = max_tool_consecutive_calls
self.deep_thinking = deep_thinking and ("thinking" in (capability or []))
# 工具调用计数器:记录每个工具的连续调用次数
self.tool_call_counter: Dict[str, int] = {}
@@ -82,6 +86,13 @@ class LangChainAgent:
f"auto_calculated={max_iterations is None}"
)
# 根据 capability 校验是否真正支持深度思考
actual_deep_thinking = self.deep_thinking
if deep_thinking and not actual_deep_thinking:
logger.warning(
f"模型 {model_name} 不支持深度思考capability 中无 'thinking'),已自动关闭 deep_thinking"
)
# 创建 RedBearLLM支持多提供商
model_config = RedBearModelConfig(
model_name=model_name,
@@ -89,10 +100,13 @@ class LangChainAgent:
api_key=api_key,
base_url=api_base,
is_omni=is_omni,
deep_thinking=actual_deep_thinking,
thinking_budget_tokens=thinking_budget_tokens if actual_deep_thinking else None,
support_thinking="thinking" in (capability or []),
extra_params={
"temperature": temperature,
"max_tokens": max_tokens,
"streaming": streaming # 使用参数控制流式
"streaming": streaming
}
)
@@ -310,6 +324,17 @@ class LangChainAgent:
return content_parts
@staticmethod
def _extract_reasoning_content(msg) -> str:
"""从 AIMessage 中提取深度思考内容reasoning_content
所有 provider 统一通过 additional_kwargs.reasoning_content 传递:
- DeepSeek-R1 / QwQ: 原生字段
- Volcano (Doubao-thinking): 由 VolcanoChatOpenAI 从 delta.reasoning_content 注入
"""
additional = getattr(msg, "additional_kwargs", None) or {}
return additional.get("reasoning_content") or additional.get("reasoning", "")
async def chat(
self,
message: str,
@@ -375,6 +400,7 @@ class LangChainAgent:
logger.debug(f"输出消息数量: {len(output_messages)}")
total_tokens = 0
reasoning_content = ""
for msg in reversed(output_messages):
if isinstance(msg, AIMessage):
logger.debug(f"找到 AI 消息content 类型: {type(msg.content)}")
@@ -410,6 +436,7 @@ class LangChainAgent:
content = str(msg.content)
logger.debug(f"转换为字符串: {content[:100]}...")
total_tokens = self._extract_tokens_from_message(msg)
reasoning_content = self._extract_reasoning_content(msg) if self.deep_thinking else ""
break
logger.info(f"最终提取的内容长度: {len(content)}")
@@ -425,6 +452,8 @@ class LangChainAgent:
"total_tokens": total_tokens
}
}
if reasoning_content:
response["reasoning_content"] = reasoning_content
logger.debug(
"Agent 调用完成",
@@ -457,6 +486,8 @@ class LangChainAgent:
Yields:
str: 消息内容块
int: token 统计
Dict: 深度思考内容 {"type": "reasoning", "content": "..."}
"""
logger.info("=" * 80)
logger.info(" chat_stream 方法开始执行")
@@ -477,6 +508,7 @@ class LangChainAgent:
# 统一使用 agent 的 astream_events 实现流式输出
logger.debug("使用 Agent astream_events 实现流式输出")
full_content = ''
full_reasoning = ''
try:
last_event = {}
async for event in self.agent.astream_events(
@@ -493,6 +525,13 @@ class LangChainAgent:
# LLM 流式输出
chunk = event.get("data", {}).get("chunk")
if chunk and hasattr(chunk, "content"):
# 提取深度思考内容(仅在启用深度思考时)
if self.deep_thinking:
reasoning_chunk = self._extract_reasoning_content(chunk)
if reasoning_chunk:
full_reasoning += reasoning_chunk
yield {"type": "reasoning", "content": reasoning_chunk}
# 处理多模态响应content 可能是字符串或列表
chunk_content = chunk.content
if isinstance(chunk_content, str) and chunk_content:
@@ -523,6 +562,13 @@ class LangChainAgent:
chunk = event.get("data", {}).get("chunk")
if chunk:
if hasattr(chunk, "content"):
# 提取深度思考内容(仅在启用深度思考时)
if self.deep_thinking:
reasoning_chunk = self._extract_reasoning_content(chunk)
if reasoning_chunk:
full_reasoning += reasoning_chunk
yield {"type": "reasoning", "content": reasoning_chunk}
chunk_content = chunk.content
if isinstance(chunk_content, str) and chunk_content:
full_content += chunk_content