diff --git a/api/app/controllers/ontology_controller.py b/api/app/controllers/ontology_controller.py index 3d2a1bdb..fe6b3598 100644 --- a/api/app/controllers/ontology_controller.py +++ b/api/app/controllers/ontology_controller.py @@ -163,6 +163,7 @@ def _get_ontology_service( api_key=api_key_config.api_key, base_url=api_key_config.api_base, is_omni=api_key_config.is_omni, + support_thinking="thinking" in (api_key_config.capability or []), max_retries=3, timeout=60.0 ) diff --git a/api/app/controllers/public_share_controller.py b/api/app/controllers/public_share_controller.py index c10ad14b..ddd31071 100644 --- a/api/app/controllers/public_share_controller.py +++ b/api/app/controllers/public_share_controller.py @@ -453,6 +453,9 @@ async def chat( # 流式返回 agent_config = agent_config_4_app_release(release) + if not (agent_config.model_parameters.get("deep_thinking", False) and payload.thinking): + agent_config.model_parameters["deep_thinking"] = False + if payload.stream: async def event_generator(): async for event in app_chat_service.agnet_chat_stream( @@ -634,7 +637,8 @@ async def config_query( "app_type": release.app.type, "variables": release.config.get("variables"), "memory": release.config.get("memory", {}).get("enabled"), - "features": release.config.get("features") + "features": release.config.get("features"), + "model_parameters": release.config.get("model_parameters") } elif release.app.type == AppType.MULTI_AGENT: content = { diff --git a/api/app/controllers/service/app_api_controller.py b/api/app/controllers/service/app_api_controller.py index d4573464..93caa200 100644 --- a/api/app/controllers/service/app_api_controller.py +++ b/api/app/controllers/service/app_api_controller.py @@ -144,6 +144,11 @@ async def chat( # print(app.current_release.default_model_config_id) agent_config = agent_config_4_app_release(app.current_release) # print(agent_config.default_model_config_id) + + # thinking 开关:仅当 agent 配置了 deep_thinking 且请求 thinking=True 时才启用 + if not (agent_config.model_parameters.get("deep_thinking", False) and payload.thinking): + agent_config.model_parameters["deep_thinking"] = False + # 流式返回 if payload.stream: async def event_generator(): diff --git a/api/app/controllers/service/end_user_api_controller.py b/api/app/controllers/service/end_user_api_controller.py index 9d410bd2..df9996c2 100644 --- a/api/app/controllers/service/end_user_api_controller.py +++ b/api/app/controllers/service/end_user_api_controller.py @@ -42,7 +42,7 @@ async def create_end_user( payload = CreateEndUserRequest(**body) workspace_id = api_key_auth.workspace_id - logger.info(f"Create end user request - other_id: {payload.other_id}, workspace_id: {workspace_id}") + logger.info("Create end user request - other_id: %s, workspace_id: %s", payload.other_id, workspace_id) # Resolve memory_config_id: explicit > workspace default memory_config_id = None diff --git a/api/app/core/agent/langchain_agent.py b/api/app/core/agent/langchain_agent.py index 38821313..044e7cc9 100644 --- a/api/app/core/agent/langchain_agent.py +++ b/api/app/core/agent/langchain_agent.py @@ -37,7 +37,10 @@ class LangChainAgent: tools: Optional[Sequence[BaseTool]] = None, streaming: bool = False, max_iterations: Optional[int] = None, # 最大迭代次数(None 表示自动计算) - max_tool_consecutive_calls: int = 3 # 单个工具最大连续调用次数 + max_tool_consecutive_calls: int = 3, # 单个工具最大连续调用次数 + deep_thinking: bool = False, # 是否启用深度思考模式 + thinking_budget_tokens: Optional[int] = None, # 深度思考 token 预算 + capability: Optional[List[str]] = None # 模型能力列表,用于校验是否支持深度思考 ): """初始化 LangChain Agent @@ -60,6 +63,7 @@ class LangChainAgent: self.streaming = streaming self.is_omni = is_omni self.max_tool_consecutive_calls = max_tool_consecutive_calls + self.deep_thinking = deep_thinking and ("thinking" in (capability or [])) # 工具调用计数器:记录每个工具的连续调用次数 self.tool_call_counter: Dict[str, int] = {} @@ -82,6 +86,13 @@ class LangChainAgent: f"auto_calculated={max_iterations is None}" ) + # 根据 capability 校验是否真正支持深度思考 + actual_deep_thinking = self.deep_thinking + if deep_thinking and not actual_deep_thinking: + logger.warning( + f"模型 {model_name} 不支持深度思考(capability 中无 'thinking'),已自动关闭 deep_thinking" + ) + # 创建 RedBearLLM(支持多提供商) model_config = RedBearModelConfig( model_name=model_name, @@ -89,10 +100,13 @@ class LangChainAgent: api_key=api_key, base_url=api_base, is_omni=is_omni, + deep_thinking=actual_deep_thinking, + thinking_budget_tokens=thinking_budget_tokens if actual_deep_thinking else None, + support_thinking="thinking" in (capability or []), extra_params={ "temperature": temperature, "max_tokens": max_tokens, - "streaming": streaming # 使用参数控制流式 + "streaming": streaming } ) @@ -249,6 +263,33 @@ class LangChainAgent: return messages + @staticmethod + def _extract_tokens_from_message(msg) -> int: + """从 AIMessage 或类似对象中提取 total_tokens,兼容多种 provider 格式 + + 支持的格式: + - response_metadata.token_usage.total_tokens (OpenAI/ChatOpenAI) + - response_metadata.usage.total_tokens (部分 provider) + - usage_metadata.total_tokens (LangChain 新版) + """ + total = 0 + # 1. response_metadata + response_meta = getattr(msg, "response_metadata", None) + if response_meta and isinstance(response_meta, dict): + # 尝试 token_usage 路径 + token_usage = response_meta.get("token_usage") or response_meta.get("usage", {}) + if isinstance(token_usage, dict): + total = token_usage.get("total_tokens", 0) + # 2. usage_metadata(LangChain 新版 AIMessage 属性) + if not total: + usage_meta = getattr(msg, "usage_metadata", None) + if usage_meta: + if isinstance(usage_meta, dict): + total = usage_meta.get("total_tokens", 0) + else: + total = getattr(usage_meta, "total_tokens", 0) + return total or 0 + def _build_multimodal_content(self, text: str, files: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ 构建多模态消息内容 @@ -283,6 +324,17 @@ class LangChainAgent: return content_parts + @staticmethod + def _extract_reasoning_content(msg) -> str: + """从 AIMessage 中提取深度思考内容(reasoning_content) + + 所有 provider 统一通过 additional_kwargs.reasoning_content 传递: + - DeepSeek-R1 / QwQ: 原生字段 + - Volcano (Doubao-thinking): 由 VolcanoChatOpenAI 从 delta.reasoning_content 注入 + """ + additional = getattr(msg, "additional_kwargs", None) or {} + return additional.get("reasoning_content") or additional.get("reasoning", "") + async def chat( self, message: str, @@ -348,6 +400,7 @@ class LangChainAgent: logger.debug(f"输出消息数量: {len(output_messages)}") total_tokens = 0 + reasoning_content = "" for msg in reversed(output_messages): if isinstance(msg, AIMessage): logger.debug(f"找到 AI 消息,content 类型: {type(msg.content)}") @@ -382,8 +435,8 @@ class LangChainAgent: else: content = str(msg.content) logger.debug(f"转换为字符串: {content[:100]}...") - response_meta = msg.response_metadata if hasattr(msg, 'response_metadata') else None - total_tokens = response_meta.get("token_usage", {}).get("total_tokens", 0) if response_meta else 0 + total_tokens = self._extract_tokens_from_message(msg) + reasoning_content = self._extract_reasoning_content(msg) if self.deep_thinking else "" break logger.info(f"最终提取的内容长度: {len(content)}") @@ -399,6 +452,8 @@ class LangChainAgent: "total_tokens": total_tokens } } + if reasoning_content: + response["reasoning_content"] = reasoning_content logger.debug( "Agent 调用完成", @@ -420,7 +475,7 @@ class LangChainAgent: history: Optional[List[Dict[str, str]]] = None, context: Optional[str] = None, files: Optional[List[Dict[str, Any]]] = None - ) -> AsyncGenerator[str, None]: + ) -> AsyncGenerator[str | int, None]: """执行流式对话 Args: @@ -431,6 +486,8 @@ class LangChainAgent: Yields: str: 消息内容块 + int: token 统计 + Dict: 深度思考内容 {"type": "reasoning", "content": "..."} """ logger.info("=" * 80) logger.info(" chat_stream 方法开始执行") @@ -451,6 +508,7 @@ class LangChainAgent: # 统一使用 agent 的 astream_events 实现流式输出 logger.debug("使用 Agent astream_events 实现流式输出") full_content = '' + full_reasoning = '' try: last_event = {} async for event in self.agent.astream_events( @@ -467,6 +525,13 @@ class LangChainAgent: # LLM 流式输出 chunk = event.get("data", {}).get("chunk") if chunk and hasattr(chunk, "content"): + # 提取深度思考内容(仅在启用深度思考时) + if self.deep_thinking: + reasoning_chunk = self._extract_reasoning_content(chunk) + if reasoning_chunk: + full_reasoning += reasoning_chunk + yield {"type": "reasoning", "content": reasoning_chunk} + # 处理多模态响应:content 可能是字符串或列表 chunk_content = chunk.content if isinstance(chunk_content, str) and chunk_content: @@ -497,6 +562,13 @@ class LangChainAgent: chunk = event.get("data", {}).get("chunk") if chunk: if hasattr(chunk, "content"): + # 提取深度思考内容(仅在启用深度思考时) + if self.deep_thinking: + reasoning_chunk = self._extract_reasoning_content(chunk) + if reasoning_chunk: + full_reasoning += reasoning_chunk + yield {"type": "reasoning", "content": reasoning_chunk} + chunk_content = chunk.content if isinstance(chunk_content, str) and chunk_content: full_content += chunk_content @@ -535,12 +607,9 @@ class LangChainAgent: output_messages = last_event.get("data", {}).get("output", {}).get("messages", []) for msg in reversed(output_messages): if isinstance(msg, AIMessage): - response_meta = msg.response_metadata if hasattr(msg, 'response_metadata') else None - total_tokens = response_meta.get("token_usage", {}).get( - "total_tokens", - 0 - ) if response_meta else 0 - yield total_tokens + stream_total_tokens = self._extract_tokens_from_message(msg) + logger.info(f"流式 token 统计: total_tokens={stream_total_tokens}") + yield stream_total_tokens break except Exception as e: diff --git a/api/app/core/memory/src/search.py b/api/app/core/memory/src/search.py index e4f0d4d0..a3c40dcd 100644 --- a/api/app/core/memory/src/search.py +++ b/api/app/core/memory/src/search.py @@ -758,8 +758,7 @@ async def run_hybrid_search( model_name=embedder_config_dict["model_name"], provider=embedder_config_dict["provider"], api_key=embedder_config_dict["api_key"], - base_url=embedder_config_dict["base_url"], - type="llm" + base_url=embedder_config_dict["base_url"] ) config_load_time = time.time() - config_load_start logger.info(f"[PERF] Config loading took {config_load_time:.4f}s") diff --git a/api/app/core/models/base.py b/api/app/core/models/base.py index 80117f27..c7d8cfed 100644 --- a/api/app/core/models/base.py +++ b/api/app/core/models/base.py @@ -14,6 +14,7 @@ from pydantic import BaseModel, Field from app.core.error_codes import BizCode from app.core.exceptions import BusinessException from app.models.models_model import ModelProvider, ModelType +from app.core.models.volcano_chat import VolcanoChatOpenAI T = TypeVar("T") @@ -25,6 +26,9 @@ class RedBearModelConfig(BaseModel): api_key: str base_url: Optional[str] = None is_omni: bool = False # 是否为 Omni 模型 + deep_thinking: bool = False # 是否启用深度思考模式 + thinking_budget_tokens: Optional[int] = None # 深度思考 token 预算 + support_thinking: bool = False # 模型是否支持 enable_thinking 参数(capability 含 thinking) # 请求超时时间(秒)- 默认120秒以支持复杂的LLM调用,可通过环境变量 LLM_TIMEOUT 配置 timeout: float = Field(default_factory=lambda: float(os.getenv("LLM_TIMEOUT", "120.0"))) # 最大重试次数 - 默认2次以避免过长等待,可通过环境变量 LLM_MAX_RETRIES 配置 @@ -44,7 +48,7 @@ class RedBearModelFactory: # 打印供应商信息用于调试 from app.core.logging_config import get_business_logger logger = get_business_logger() - logger.debug(f"获取模型参数 - Provider: {provider}, Model: {config.model_name}, is_omni: {config.is_omni}") + logger.debug(f"获取模型参数 - Provider: {provider}, Model: {config.model_name}, is_omni: {config.is_omni}, deep_thinking: {config.deep_thinking}") # dashscope 的 omni 模型使用 OpenAI 兼容模式 if provider == ModelProvider.DASHSCOPE and config.is_omni: @@ -58,7 +62,7 @@ class RedBearModelFactory: write=60.0, pool=10.0, ) - return { + params: Dict[str, Any] = { "model": config.model_name, "base_url": config.base_url, "api_key": config.api_key, @@ -66,6 +70,21 @@ class RedBearModelFactory: "max_retries": config.max_retries, **config.extra_params } + # 流式模式下启用 stream_usage 以获取 token 统计 + is_streaming = bool(config.extra_params.get("streaming")) + if is_streaming: + params["stream_usage"] = True + # 只有支持 thinking 的模型才传 enable_thinking + if config.support_thinking: + model_kwargs: Dict[str, Any] = config.extra_params.get("model_kwargs", {}) + if is_streaming: + model_kwargs["enable_thinking"] = config.deep_thinking + if config.deep_thinking and config.thinking_budget_tokens: + model_kwargs["thinking_budget"] = config.thinking_budget_tokens + else: + model_kwargs["enable_thinking"] = False + params["model_kwargs"] = model_kwargs + return params if provider in [ModelProvider.OPENAI, ModelProvider.XINFERENCE, ModelProvider.GPUSTACK, ModelProvider.OLLAMA, ModelProvider.VOLCANO]: # 使用 httpx.Timeout 对象来设置详细的超时配置 @@ -78,7 +97,7 @@ class RedBearModelFactory: write=60.0, # 写入超时:60秒 pool=10.0, # 连接池超时:10秒 ) - return { + params: Dict[str, Any] = { "model": config.model_name, "base_url": config.base_url, "api_key": config.api_key, @@ -86,16 +105,47 @@ class RedBearModelFactory: "max_retries": config.max_retries, **config.extra_params } + # 流式模式下启用 stream_usage 以获取 token 统计 + if config.extra_params.get("streaming"): + params["stream_usage"] = True + # 深度思考模式 + is_streaming = bool(config.extra_params.get("streaming")) + if is_streaming: + if provider == ModelProvider.VOLCANO: + # 火山引擎深度思考仅流式调用支持,非流式时不传 thinking 参数 + thinking_config: Dict[str, Any] = { + "type": "enabled" if config.deep_thinking else "disabled" + } + if config.deep_thinking and config.thinking_budget_tokens: + thinking_config["budget_tokens"] = config.thinking_budget_tokens + params["extra_body"] = {"thinking": thinking_config} + else: + # 始终显式传递 enable_thinking,不支持该参数的模型(如 DeepSeek-R1)会直接忽略 + model_kwargs: Dict[str, Any] = config.extra_params.get("model_kwargs", {}) + model_kwargs["enable_thinking"] = config.deep_thinking + if config.deep_thinking and config.thinking_budget_tokens: + model_kwargs["thinking_budget"] = config.thinking_budget_tokens + params["model_kwargs"] = model_kwargs + return params elif provider == ModelProvider.DASHSCOPE: - # DashScope (通义千问) 使用自己的参数格式 - # 注意: DashScopeEmbeddings 不支持 timeout 和 base_url 参数 - # 只支持: model, dashscope_api_key, max_retries, client - return { + params = { "model": config.model_name, "dashscope_api_key": config.api_key, "max_retries": config.max_retries, **config.extra_params } + # 只有支持 thinking 的模型才传 enable_thinking + if config.support_thinking: + is_streaming = bool(config.extra_params.get("streaming")) + model_kwargs: Dict[str, Any] = config.extra_params.get("model_kwargs", {}) + if is_streaming: + model_kwargs["enable_thinking"] = config.deep_thinking + if config.deep_thinking and config.thinking_budget_tokens: + model_kwargs["thinking_budget"] = config.thinking_budget_tokens + else: + model_kwargs["enable_thinking"] = False + params["model_kwargs"] = model_kwargs + return params elif provider == ModelProvider.BEDROCK: # Bedrock 使用 AWS 凭证 # api_key 格式: "access_key_id:secret_access_key" 或只是 access_key_id @@ -134,6 +184,13 @@ class RedBearModelFactory: elif "region_name" not in params: params["region_name"] = "us-east-1" # 默认区域 + # 深度思考模式:Claude 3.7 Sonnet 等支持思考的模型 + # 通过 additional_model_request_fields 传递 thinking 块,关闭时不传(Bedrock 无 disabled 选项) + if config.deep_thinking: + budget = config.thinking_budget_tokens or 10000 + params["additional_model_request_fields"] = { + "thinking": {"type": "enabled", "budget_tokens": budget} + } return params else: raise BusinessException(f"不支持的提供商: {provider}", code=BizCode.PROVIDER_NOT_SUPPORTED) @@ -160,7 +217,9 @@ def get_provider_llm_class(config: RedBearModelConfig, type: ModelType = ModelTy # dashscope 的 omni 模型使用 OpenAI 兼容模式 if provider == ModelProvider.DASHSCOPE and config.is_omni: return ChatOpenAI - if provider in [ModelProvider.OPENAI, ModelProvider.XINFERENCE, ModelProvider.GPUSTACK, ModelProvider.VOLCANO]: + if provider == ModelProvider.VOLCANO: + return VolcanoChatOpenAI + if provider in [ModelProvider.OPENAI, ModelProvider.XINFERENCE, ModelProvider.GPUSTACK]: if type == ModelType.LLM: return OpenAI elif type == ModelType.CHAT: diff --git a/api/app/core/models/scripts/bedrock_models.yaml b/api/app/core/models/scripts/bedrock_models.yaml index 2c0ab757..5b3a2f64 100644 --- a/api/app/core/models/scripts/bedrock_models.yaml +++ b/api/app/core/models/scripts/bedrock_models.yaml @@ -11,6 +11,7 @@ models: tags: - 大语言模型 logo: bedrock + - name: amazon nova type: llm provider: bedrock @@ -27,6 +28,7 @@ models: - stream-tool-call - vision logo: bedrock + - name: anthropic claude type: llm provider: bedrock @@ -35,6 +37,7 @@ models: is_official: true capability: - vision + - thinking is_omni: false tags: - 大语言模型 @@ -44,6 +47,7 @@ models: - stream-tool-call - document logo: bedrock + - name: cohere type: llm provider: bedrock @@ -58,6 +62,7 @@ models: - tool-call - stream-tool-call logo: bedrock + - name: deepseek type: llm provider: bedrock @@ -66,6 +71,7 @@ models: is_official: true capability: - vision + - thinking is_omni: false tags: - 大语言模型 @@ -74,6 +80,7 @@ models: - tool-call - stream-tool-call logo: bedrock + - name: meta type: llm provider: bedrock @@ -87,6 +94,7 @@ models: - agent-thought - tool-call logo: bedrock + - name: mistral type: llm provider: bedrock @@ -100,6 +108,7 @@ models: - agent-thought - tool-call logo: bedrock + - name: openai type: llm provider: bedrock @@ -114,6 +123,7 @@ models: - tool-call - stream-tool-call logo: bedrock + - name: qwen type: llm provider: bedrock @@ -128,6 +138,7 @@ models: - tool-call - stream-tool-call logo: bedrock + - name: amazon.rerank-v1:0 type: rerank provider: bedrock @@ -139,6 +150,7 @@ models: tags: - 重排序模型 logo: bedrock + - name: cohere.rerank-v3-5:0 type: rerank provider: bedrock @@ -150,6 +162,7 @@ models: tags: - 重排序模型 logo: bedrock + - name: amazon.nova-2-multimodal-embeddings-v1:0 type: embedding provider: bedrock @@ -163,6 +176,7 @@ models: - 文本嵌入模型 - vision logo: bedrock + - name: amazon.titan-embed-text-v1 type: embedding provider: bedrock @@ -174,6 +188,7 @@ models: tags: - 文本嵌入模型 logo: bedrock + - name: amazon.titan-embed-text-v2:0 type: embedding provider: bedrock @@ -185,6 +200,7 @@ models: tags: - 文本嵌入模型 logo: bedrock + - name: cohere.embed-english-v3 type: embedding provider: bedrock @@ -196,6 +212,7 @@ models: tags: - 文本嵌入模型 logo: bedrock + - name: cohere.embed-multilingual-v3 type: embedding provider: bedrock diff --git a/api/app/core/models/scripts/dashscope_models.yaml b/api/app/core/models/scripts/dashscope_models.yaml index 89a16966..d1b604e0 100644 --- a/api/app/core/models/scripts/dashscope_models.yaml +++ b/api/app/core/models/scripts/dashscope_models.yaml @@ -6,36 +6,42 @@ models: description: DeepSeek-R1-Distill-Qwen-14B大语言模型,支持智能体思考,32000上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 - agent-thought logo: dashscope + - name: deepseek-r1-distill-qwen-32b type: llm provider: dashscope description: DeepSeek-R1-Distill-Qwen-32B大语言模型,支持智能体思考,32000上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 - agent-thought logo: dashscope + - name: deepseek-r1 type: llm provider: dashscope description: DeepSeek-R1大语言模型,支持智能体思考,131072超大上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 - agent-thought logo: dashscope + - name: deepseek-v3.1 type: llm provider: dashscope @@ -48,6 +54,7 @@ models: - 大语言模型 - agent-thought logo: dashscope + - name: deepseek-v3.2-exp type: llm provider: dashscope @@ -60,6 +67,7 @@ models: - 大语言模型 - agent-thought logo: dashscope + - name: deepseek-v3.2 type: llm provider: dashscope @@ -72,6 +80,7 @@ models: - 大语言模型 - agent-thought logo: dashscope + - name: deepseek-v3 type: llm provider: dashscope @@ -84,6 +93,7 @@ models: - 大语言模型 - agent-thought logo: dashscope + - name: farui-plus type: llm provider: dashscope @@ -98,6 +108,7 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: glm-4.7 type: llm provider: dashscope @@ -112,6 +123,7 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qvq-max-latest type: llm provider: dashscope @@ -119,7 +131,8 @@ models: is_deprecated: false is_official: true capability: - - vision + - vision + - thinking is_omni: false tags: - 大语言模型 @@ -127,6 +140,7 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qvq-max type: llm provider: dashscope @@ -134,7 +148,8 @@ models: is_deprecated: false is_official: true capability: - - vision + - vision + - thinking is_omni: false tags: - 大语言模型 @@ -142,6 +157,7 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen-coder-turbo-0919 type: llm provider: dashscope @@ -155,13 +171,15 @@ models: - 代码模型 - agent-thought logo: dashscope + - name: qwen-max-latest type: llm provider: dashscope description: qwen-max-latest大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式,支持联网搜索 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 @@ -169,6 +187,7 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen-max-longcontext type: llm provider: dashscope @@ -183,13 +202,15 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen-max type: llm provider: dashscope description: qwen-max大语言模型,支持多工具调用、智能体思考、流式工具调用,32768上下文窗口,对话模式,支持联网搜索 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 @@ -197,6 +218,7 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen-mt-plus type: llm provider: dashscope @@ -210,6 +232,7 @@ models: - 翻译模型 - agent-thought logo: dashscope + - name: qwen-mt-turbo type: llm provider: dashscope @@ -223,6 +246,7 @@ models: - 翻译模型 - agent-thought logo: dashscope + - name: qwen-plus-0112 type: llm provider: dashscope @@ -237,6 +261,7 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen-plus-0125 type: llm provider: dashscope @@ -251,6 +276,7 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen-plus-0723 type: llm provider: dashscope @@ -265,6 +291,7 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen-plus-0806 type: llm provider: dashscope @@ -279,6 +306,7 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen-plus-0919 type: llm provider: dashscope @@ -293,6 +321,7 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen-plus-1125 type: llm provider: dashscope @@ -307,6 +336,7 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen-plus-1127 type: llm provider: dashscope @@ -321,6 +351,7 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen-plus-1220 type: llm provider: dashscope @@ -335,6 +366,7 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen-vl-max type: chat provider: dashscope @@ -342,8 +374,8 @@ models: is_deprecated: false is_official: true capability: - - vision - - video + - vision + - video is_omni: false tags: - 大语言模型 @@ -352,6 +384,7 @@ models: - agent-thought - video logo: dashscope + - name: qwen-vl-plus-0809 type: chat provider: dashscope @@ -359,8 +392,8 @@ models: is_deprecated: true is_official: true capability: - - vision - - video + - vision + - video is_omni: false tags: - 大语言模型 @@ -369,6 +402,7 @@ models: - agent-thought - video logo: dashscope + - name: qwen-vl-plus-2025-01-02 type: chat provider: dashscope @@ -376,8 +410,8 @@ models: is_deprecated: false is_official: true capability: - - vision - - video + - vision + - video is_omni: false tags: - 大语言模型 @@ -386,6 +420,7 @@ models: - agent-thought - video logo: dashscope + - name: qwen-vl-plus-2025-01-25 type: chat provider: dashscope @@ -393,8 +428,8 @@ models: is_deprecated: false is_official: true capability: - - vision - - video + - vision + - video is_omni: false tags: - 大语言模型 @@ -403,6 +438,7 @@ models: - agent-thought - video logo: dashscope + - name: qwen-vl-plus-latest type: chat provider: dashscope @@ -410,8 +446,8 @@ models: is_deprecated: false is_official: true capability: - - vision - - video + - vision + - video is_omni: false tags: - 大语言模型 @@ -420,6 +456,7 @@ models: - agent-thought - video logo: dashscope + - name: qwen-vl-plus type: chat provider: dashscope @@ -427,8 +464,8 @@ models: is_deprecated: false is_official: true capability: - - vision - - video + - vision + - video is_omni: false tags: - 大语言模型 @@ -437,6 +474,7 @@ models: - agent-thought - video logo: dashscope + - name: qwen2.5-0.5b-instruct type: llm provider: dashscope @@ -451,13 +489,15 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen3-14b type: llm provider: dashscope description: qwen3-14b大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 @@ -465,13 +505,15 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen3-235b-a22b-instruct-2507 type: llm provider: dashscope description: qwen3-235b-a22b-instruct-2507大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 @@ -479,13 +521,15 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen3-235b-a22b-thinking-2507 type: llm provider: dashscope description: qwen3-235b-a22b-thinking-2507大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 @@ -493,13 +537,15 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen3-235b-a22b type: llm provider: dashscope description: qwen3-235b-a22b大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 @@ -507,13 +553,15 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen3-30b-a3b-instruct-2507 type: llm provider: dashscope description: qwen3-30b-a3b-instruct-2507大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 @@ -521,13 +569,15 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen3-30b-a3b type: llm provider: dashscope description: qwen3-30b-a3b大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 @@ -535,13 +585,15 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen3-32b type: llm provider: dashscope description: qwen3-32b大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 @@ -549,13 +601,15 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen3-4b type: llm provider: dashscope description: qwen3-4b大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 @@ -563,13 +617,15 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen3-8b type: llm provider: dashscope description: qwen3-8b大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 @@ -577,65 +633,75 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen3-coder-30b-a3b-instruct type: llm provider: dashscope description: qwen3-coder-30b-a3b-instruct大语言模型,支持智能体思考,262144上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 - 代码模型 - agent-thought logo: dashscope + - name: qwen3-coder-480b-a35b-instruct type: llm provider: dashscope description: qwen3-coder-480b-a35b-instruct大语言模型,支持智能体思考,262144上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 - 代码模型 - agent-thought logo: dashscope + - name: qwen3-coder-plus-2025-09-23 type: llm provider: dashscope description: qwen3-coder-plus-2025-09-23大语言模型,支持智能体思考,1000000上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 - 代码模型 - agent-thought logo: dashscope + - name: qwen3-coder-plus type: llm provider: dashscope description: qwen3-coder-plus大语言模型,支持智能体思考,1000000上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 - 代码模型 - agent-thought logo: dashscope + - name: qwen3-max-2025-09-23 type: llm provider: dashscope description: qwen3-max-2025-09-23大语言模型,支持多工具调用、智能体思考、流式工具调用,262144上下文窗口,对话模式,支持联网搜索 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 @@ -644,13 +710,15 @@ models: - stream-tool-call - 联网搜索 logo: dashscope + - name: qwen3-max-2026-01-23 type: llm provider: dashscope description: qwen3-max-2026-01-23大语言模型,支持多工具调用、智能体思考、流式工具调用,262144上下文窗口,对话模式,支持联网搜索 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 @@ -659,13 +727,15 @@ models: - stream-tool-call - 联网搜索 logo: dashscope + - name: qwen3-max-preview type: llm provider: dashscope description: qwen3-max-preview大语言模型,支持多工具调用、智能体思考、流式工具调用,262144上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 @@ -673,13 +743,15 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen3-max type: llm provider: dashscope description: qwen3-max大语言模型,支持多工具调用、智能体思考、流式工具调用,262144上下文窗口,对话模式,支持联网搜索 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 @@ -688,13 +760,15 @@ models: - stream-tool-call - 联网搜索 logo: dashscope + - name: qwen3-next-80b-a3b-instruct type: llm provider: dashscope description: qwen3-next-80b-a3b-instruct大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 @@ -702,13 +776,15 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen3-next-80b-a3b-thinking type: llm provider: dashscope description: qwen3-next-80b-a3b-thinking大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 @@ -716,6 +792,7 @@ models: - agent-thought - stream-tool-call logo: dashscope + - name: qwen3-omni-flash-2025-12-01 type: llm provider: dashscope @@ -723,9 +800,10 @@ models: is_deprecated: false is_official: true capability: - - vision - - video - - audio + - vision + - video + - audio + - thinking is_omni: true tags: - 大语言模型 @@ -735,6 +813,7 @@ models: - video - audio logo: dashscope + - name: qwen3-vl-235b-a22b-instruct type: chat provider: dashscope @@ -742,8 +821,9 @@ models: is_deprecated: false is_official: true capability: - - vision - - video + - vision + - video + - thinking is_omni: false tags: - 大语言模型 @@ -754,6 +834,7 @@ models: - vision - video logo: dashscope + - name: qwen3-vl-235b-a22b-thinking type: chat provider: dashscope @@ -761,8 +842,9 @@ models: is_deprecated: false is_official: true capability: - - vision - - video + - vision + - video + - thinking is_omni: false tags: - 大语言模型 @@ -773,6 +855,7 @@ models: - vision - video logo: dashscope + - name: qwen3-vl-30b-a3b-instruct type: chat provider: dashscope @@ -780,8 +863,9 @@ models: is_deprecated: false is_official: true capability: - - vision - - video + - vision + - video + - thinking is_omni: false tags: - 大语言模型 @@ -792,6 +876,7 @@ models: - vision - video logo: dashscope + - name: qwen3-vl-30b-a3b-thinking type: chat provider: dashscope @@ -799,8 +884,9 @@ models: is_deprecated: false is_official: true capability: - - vision - - video + - vision + - video + - thinking is_omni: false tags: - 大语言模型 @@ -811,6 +897,7 @@ models: - vision - video logo: dashscope + - name: qwen3-vl-flash type: chat provider: dashscope @@ -818,8 +905,9 @@ models: is_deprecated: false is_official: true capability: - - vision - - video + - vision + - video + - thinking is_omni: false tags: - 大语言模型 @@ -830,6 +918,7 @@ models: - vision - video logo: dashscope + - name: qwen3-vl-plus-2025-09-23 type: chat provider: dashscope @@ -837,8 +926,9 @@ models: is_deprecated: false is_official: true capability: - - vision - - video + - vision + - video + - thinking is_omni: false tags: - 大语言模型 @@ -847,6 +937,7 @@ models: - agent-thought - video logo: dashscope + - name: qwen3-vl-plus type: chat provider: dashscope @@ -854,8 +945,9 @@ models: is_deprecated: false is_official: true capability: - - vision - - video + - vision + - video + - thinking is_omni: false tags: - 大语言模型 @@ -864,45 +956,52 @@ models: - agent-thought - video logo: dashscope + - name: qwq-32b type: llm provider: dashscope description: qwq-32b大语言模型,支持智能体思考、流式工具调用,131072上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 - agent-thought - stream-tool-call logo: dashscope + - name: qwq-plus-0305 type: llm provider: dashscope description: qwq-plus-0305大语言模型,支持智能体思考、流式工具调用,131072上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 - agent-thought - stream-tool-call logo: dashscope + - name: qwq-plus type: llm provider: dashscope description: qwq-plus大语言模型,支持智能体思考、流式工具调用,131072上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 - agent-thought - stream-tool-call logo: dashscope + - name: gte-rerank-v2 type: rerank provider: dashscope @@ -914,6 +1013,7 @@ models: tags: - 重排序模型 logo: dashscope + - name: gte-rerank type: rerank provider: dashscope @@ -925,6 +1025,7 @@ models: tags: - 重排序模型 logo: dashscope + - name: multimodal-embedding-v1 type: embedding provider: dashscope @@ -932,13 +1033,14 @@ models: is_deprecated: false is_official: true capability: - - vision + - vision is_omni: false tags: - 嵌入模型 - 多模态模型 - vision logo: dashscope + - name: text-embedding-v1 type: embedding provider: dashscope @@ -951,6 +1053,7 @@ models: - 嵌入模型 - 文本嵌入 logo: dashscope + - name: text-embedding-v2 type: embedding provider: dashscope @@ -963,6 +1066,7 @@ models: - 嵌入模型 - 文本嵌入 logo: dashscope + - name: text-embedding-v3 type: embedding provider: dashscope @@ -975,6 +1079,7 @@ models: - 嵌入模型 - 文本嵌入 logo: dashscope + - name: text-embedding-v4 type: embedding provider: dashscope @@ -986,4 +1091,4 @@ models: tags: - 嵌入模型 - 文本嵌入 - logo: dashscope \ No newline at end of file + logo: dashscope diff --git a/api/app/core/models/scripts/openai_models.yaml b/api/app/core/models/scripts/openai_models.yaml index 7f6d3a51..08b81008 100644 --- a/api/app/core/models/scripts/openai_models.yaml +++ b/api/app/core/models/scripts/openai_models.yaml @@ -20,6 +20,7 @@ models: - audio - video logo: openai + - name: gpt-3.5-turbo-0125 type: llm provider: openai @@ -34,6 +35,7 @@ models: - agent-thought - stream-tool-call logo: openai + - name: gpt-3.5-turbo-1106 type: llm provider: openai @@ -48,6 +50,7 @@ models: - agent-thought - stream-tool-call logo: openai + - name: gpt-3.5-turbo-16k type: llm provider: openai @@ -62,6 +65,7 @@ models: - agent-thought - stream-tool-call logo: openai + - name: gpt-3.5-turbo-instruct type: llm provider: openai @@ -73,6 +77,7 @@ models: tags: - 大语言模型 logo: openai + - name: gpt-3.5-turbo type: llm provider: openai @@ -87,6 +92,7 @@ models: - agent-thought - stream-tool-call logo: openai + - name: gpt-4-0125-preview type: llm provider: openai @@ -101,6 +107,7 @@ models: - agent-thought - stream-tool-call logo: openai + - name: gpt-4-1106-preview type: llm provider: openai @@ -115,6 +122,7 @@ models: - agent-thought - stream-tool-call logo: openai + - name: gpt-4-turbo-2024-04-09 type: llm provider: openai @@ -131,6 +139,7 @@ models: - stream-tool-call - vision logo: openai + - name: gpt-4-turbo-preview type: llm provider: openai @@ -145,6 +154,7 @@ models: - agent-thought - stream-tool-call logo: openai + - name: gpt-4-turbo type: llm provider: openai @@ -161,6 +171,7 @@ models: - stream-tool-call - vision logo: openai + - name: o1-preview type: llm provider: openai @@ -173,6 +184,7 @@ models: - 大语言模型 - agent-thought logo: openai + - name: o1 type: llm provider: openai @@ -181,6 +193,7 @@ models: is_official: true capability: - vision + - thinking is_omni: false tags: - 大语言模型 @@ -190,6 +203,7 @@ models: - vision - structured-output logo: openai + - name: o3-2025-04-16 type: llm provider: openai @@ -198,6 +212,7 @@ models: is_official: true capability: - vision + - thinking is_omni: false tags: - 大语言模型 @@ -207,13 +222,15 @@ models: - stream-tool-call - structured-output logo: openai + - name: o3-mini-2025-01-31 type: llm provider: openai description: o3-mini-2025-01-31大语言模型,支持智能体思考、工具调用、流式工具调用、结构化输出,200000上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 @@ -222,13 +239,15 @@ models: - stream-tool-call - structured-output logo: openai + - name: o3-mini type: llm provider: openai description: o3-mini大语言模型,支持智能体思考、工具调用、流式工具调用、结构化输出,200000上下文窗口,对话模式 is_deprecated: false is_official: true - capability: [] + capability: + - thinking is_omni: false tags: - 大语言模型 @@ -237,6 +256,7 @@ models: - stream-tool-call - structured-output logo: openai + - name: o3-pro-2025-06-10 type: llm provider: openai @@ -245,6 +265,7 @@ models: is_official: true capability: - vision + - thinking is_omni: false tags: - 大语言模型 @@ -253,6 +274,7 @@ models: - vision - structured-output logo: openai + - name: o3-pro type: llm provider: openai @@ -261,6 +283,7 @@ models: is_official: true capability: - vision + - thinking is_omni: false tags: - 大语言模型 @@ -269,6 +292,7 @@ models: - vision - structured-output logo: openai + - name: o3 type: llm provider: openai @@ -277,6 +301,7 @@ models: is_official: true capability: - vision + - thinking is_omni: false tags: - 大语言模型 @@ -286,6 +311,7 @@ models: - stream-tool-call - structured-output logo: openai + - name: o4-mini-2025-04-16 type: llm provider: openai @@ -294,6 +320,7 @@ models: is_official: true capability: - vision + - thinking is_omni: false tags: - 大语言模型 @@ -303,6 +330,7 @@ models: - stream-tool-call - structured-output logo: openai + - name: o4-mini type: llm provider: openai @@ -311,6 +339,7 @@ models: is_official: true capability: - vision + - thinking is_omni: false tags: - 大语言模型 @@ -320,6 +349,7 @@ models: - stream-tool-call - structured-output logo: openai + - name: text-embedding-3-large type: embedding provider: openai @@ -331,6 +361,7 @@ models: tags: - 文本向量模型 logo: openai + - name: text-embedding-3-small type: embedding provider: openai @@ -342,6 +373,7 @@ models: tags: - 文本向量模型 logo: openai + - name: text-embedding-ada-002 type: embedding provider: openai diff --git a/api/app/core/models/scripts/volcano_models.yaml b/api/app/core/models/scripts/volcano_models.yaml index 24609f5a..c86d41ac 100644 --- a/api/app/core/models/scripts/volcano_models.yaml +++ b/api/app/core/models/scripts/volcano_models.yaml @@ -10,6 +10,7 @@ models: capability: - vision - video + - thinking is_omni: false tags: - 大语言模型 @@ -24,6 +25,7 @@ models: capability: - vision - video + - thinking is_omni: false tags: - 大语言模型 @@ -38,6 +40,7 @@ models: capability: - vision - video + - thinking is_omni: false tags: - 大语言模型 @@ -52,6 +55,7 @@ models: capability: - vision - video + - thinking is_omni: false tags: - 大语言模型 @@ -82,6 +86,7 @@ models: capability: - vision - video + - thinking is_omni: false tags: - 大语言模型 @@ -96,6 +101,7 @@ models: capability: - vision - video + - thinking is_omni: false tags: - 大语言模型 @@ -110,6 +116,7 @@ models: capability: - vision - video + - thinking is_omni: false tags: - 大语言模型 @@ -124,6 +131,7 @@ models: capability: - vision - video + - thinking is_omni: false tags: - 大语言模型 @@ -139,6 +147,7 @@ models: capability: - vision - video + - thinking is_omni: false tags: - 大语言模型 diff --git a/api/app/core/models/volcano_chat.py b/api/app/core/models/volcano_chat.py new file mode 100644 index 00000000..d86484a5 --- /dev/null +++ b/api/app/core/models/volcano_chat.py @@ -0,0 +1,38 @@ +""" +火山引擎 ChatOpenAI 扩展 + +ChatOpenAI 在解析流式 SSE 时只取 delta.content,会丢弃 delta.reasoning_content。 +此类仅重写 _convert_chunk_to_generation_chunk,将 reasoning_content 补入 additional_kwargs。 +""" +from __future__ import annotations + +from typing import Any, Optional + +from langchain_core.outputs import ChatGenerationChunk +from langchain_openai import ChatOpenAI + + +class VolcanoChatOpenAI(ChatOpenAI): + """火山引擎 Chat 模型,支持深度思考内容(reasoning_content)的流式透传。""" + + def _convert_chunk_to_generation_chunk( + self, + chunk: dict, + default_chunk_class: type, + base_generation_info: Optional[dict], + ) -> Optional[ChatGenerationChunk]: + gen_chunk = super()._convert_chunk_to_generation_chunk( + chunk, default_chunk_class, base_generation_info + ) + if gen_chunk is None: + return None + + # 从原始 chunk 中提取 reasoning_content + choices = chunk.get("choices") or chunk.get("chunk", {}).get("choices", []) + if choices: + delta = choices[0].get("delta") or {} + reasoning: Any = delta.get("reasoning_content") + if reasoning: + gen_chunk.message.additional_kwargs["reasoning_content"] = reasoning + + return gen_chunk diff --git a/api/app/core/tools/mcp/client.py b/api/app/core/tools/mcp/client.py index b437d021..3539d33a 100644 --- a/api/app/core/tools/mcp/client.py +++ b/api/app/core/tools/mcp/client.py @@ -99,7 +99,7 @@ class SimpleMCPClient: # 建立 SSE 连接 response = await self._session.get(self.server_url) - if response.status not in (200, 202): + if not (200 <= response.status < 300): error_text = await response.text() raise MCPConnectionError(f"SSE 连接失败 {response.status}: {error_text}") @@ -190,9 +190,7 @@ class SimpleMCPClient: try: async with self._session.post(self._endpoint_url, json=request) as response: - # MCP SSE 协议:POST 请求返回 200 或 202 均为正常 - # 202 Accepted 表示请求已接受,结果通过 SSE 流异步返回 - if response.status not in (200, 202): + if not (200 <= response.status < 300): error_text = await response.text() raise MCPConnectionError(f"请求失败 {response.status}: {error_text}") @@ -207,7 +205,7 @@ class SimpleMCPClient: raise MCPConnectionError("endpoint URL 未初始化") async with self._session.post(self._endpoint_url, json=notification) as response: - if response.status not in (200, 202): + if not (200 <= response.status < 300): logger.warning(f"通知发送失败: {response.status}") async def _initialize_modelscope_session(self): @@ -225,7 +223,7 @@ class SimpleMCPClient: try: async with self._session.post(self.server_url, json=init_request) as response: - if response.status != 200: + if not (200 <= response.status < 300): error_text = await response.text() raise MCPConnectionError(f"初始化失败 {response.status}: {error_text}") diff --git a/api/app/core/workflow/nodes/llm/node.py b/api/app/core/workflow/nodes/llm/node.py index a691001f..e3c68420 100644 --- a/api/app/core/workflow/nodes/llm/node.py +++ b/api/app/core/workflow/nodes/llm/node.py @@ -135,7 +135,8 @@ class LLMNode(BaseNode): api_key=model_info.api_key, base_url=model_info.api_base, extra_params=extra_params, - is_omni=model_info.is_omni + is_omni=model_info.is_omni, + support_thinking="thinking" in (model_info.capability or []), ), type=model_info.model_type ) diff --git a/api/app/core/workflow/nodes/parameter_extractor/node.py b/api/app/core/workflow/nodes/parameter_extractor/node.py index 3dc5fcc3..d7a2a501 100644 --- a/api/app/core/workflow/nodes/parameter_extractor/node.py +++ b/api/app/core/workflow/nodes/parameter_extractor/node.py @@ -109,6 +109,7 @@ class ParameterExtractorNode(BaseNode): api_key = api_config.api_key api_base = api_config.api_base is_omni = api_config.is_omni + capability = api_config.capability model_type = config.type llm = RedBearLLM( @@ -117,7 +118,8 @@ class ParameterExtractorNode(BaseNode): provider=provider, api_key=api_key, base_url=api_base, - is_omni=is_omni + is_omni=is_omni, + support_thinking="thinking" in (capability or []), ), type=ModelType(model_type) ) diff --git a/api/app/core/workflow/nodes/question_classifier/node.py b/api/app/core/workflow/nodes/question_classifier/node.py index 31fadaf6..ddae1ced 100644 --- a/api/app/core/workflow/nodes/question_classifier/node.py +++ b/api/app/core/workflow/nodes/question_classifier/node.py @@ -62,6 +62,7 @@ class QuestionClassifierNode(BaseNode): api_key = api_config.api_key base_url = api_config.api_base is_omni = api_config.is_omni + capability = api_config.capability model_type = config.type return RedBearLLM( @@ -70,7 +71,8 @@ class QuestionClassifierNode(BaseNode): provider=provider, api_key=api_key, base_url=base_url, - is_omni=is_omni + is_omni=is_omni, + support_thinking="thinking" in (capability or []), ), type=ModelType(model_type) ) diff --git a/api/app/models/models_model.py b/api/app/models/models_model.py index 69bedc3d..fab85ea6 100644 --- a/api/app/models/models_model.py +++ b/api/app/models/models_model.py @@ -81,7 +81,7 @@ class ModelConfig(BaseModel): # 模型配置参数 capability = Column(ARRAY(String), default=list, nullable=False, server_default=text("'{}'::varchar[]"), - comment="模型能力列表(如['vision', 'audio', 'video'])") + comment="模型能力列表(如['vision', 'audio', 'video', 'thinking'])") is_omni = Column(Boolean, default=False, nullable=False, server_default="false", comment="是否为Omni模型(使用特殊API调用)") config = Column(JSON, comment="模型配置参数") # - temperature : 控制生成文本的随机性。值越高,输出越随机、越有创造性;值越低,输出越确定、越保守。 diff --git a/api/app/schemas/app_schema.py b/api/app/schemas/app_schema.py index f1e9132f..4ca3e7de 100644 --- a/api/app/schemas/app_schema.py +++ b/api/app/schemas/app_schema.py @@ -241,6 +241,8 @@ class ModelParameters(BaseModel): presence_penalty: float = Field(default=0.0, ge=-2.0, le=2.0, description="存在惩罚") n: int = Field(default=1, ge=1, le=10, description="生成的回复数量") stop: Optional[List[str]] = Field(default=None, description="停止序列") + deep_thinking: bool = Field(default=False, description="是否启用深度思考模式(需模型支持,如 DeepSeek-R1、QwQ 等)") + thinking_budget_tokens: Optional[int] = Field(default=None, ge=1024, le=131072, description="深度思考 token 预算(仅部分模型支持)") class VariableDefinition(BaseModel): @@ -612,6 +614,7 @@ class AppChatRequest(BaseModel): user_id: Optional[str] = Field(default=None, description="用户ID(用于会话管理)") variables: Optional[Dict[str, Any]] = Field(default=None, description="自定义变量参数值") stream: bool = Field(default=False, description="是否流式返回") + thinking: bool = Field(default=False, description="是否启用深度思考(需Agent配置支持)") files: List[FileInput] = Field(default_factory=list, description="附件列表(支持多文件)") diff --git a/api/app/schemas/conversation_schema.py b/api/app/schemas/conversation_schema.py index 98715612..b2f565ef 100644 --- a/api/app/schemas/conversation_schema.py +++ b/api/app/schemas/conversation_schema.py @@ -31,6 +31,7 @@ class ChatRequest(BaseModel): stream: bool = Field(default=False, description="是否流式返回") web_search: bool = Field(default=False, description="是否启用网络搜索") memory: bool = Field(default=True, description="是否启用记忆功能") + thinking: bool = Field(default=False, description="是否启用深度思考(需Agent配置支持)") files: Optional[List[FileInput]] = Field(default=None, description="附件列表(支持多文件)") diff --git a/api/app/services/app_chat_service.py b/api/app/services/app_chat_service.py index df81568f..53ac577a 100644 --- a/api/app/services/app_chat_service.py +++ b/api/app/services/app_chat_service.py @@ -117,7 +117,9 @@ class AppChatService: max_tokens=model_parameters.get("max_tokens", 2000), system_prompt=system_prompt, tools=tools, - + deep_thinking=model_parameters.get("deep_thinking", False), + thinking_budget_tokens=model_parameters.get("thinking_budget_tokens"), + capability=api_key_obj.capability or [], ) model_info = ModelInfo( @@ -205,7 +207,8 @@ class AppChatService: "model": api_key_obj.model_name, "usage": result.get("usage", {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}), "audio_url": None, - "citations": filtered_citations + "citations": filtered_citations, + "reasoning_content": result.get("reasoning_content") } if files: for f in files: @@ -258,6 +261,7 @@ class AppChatService: "conversation_id": conversation_id, "message_id": str(message_id), "message": result["content"], + "reasoning_content": result.get("reasoning_content"), "usage": result.get("usage", { "prompt_tokens": 0, "completion_tokens": 0, @@ -354,7 +358,10 @@ class AppChatService: max_tokens=model_parameters.get("max_tokens", 2000), system_prompt=system_prompt, tools=tools, - streaming=True + streaming=True, + deep_thinking=model_parameters.get("deep_thinking", False), + thinking_budget_tokens=model_parameters.get("thinking_budget_tokens"), + capability=api_key_obj.capability or [], ) model_info = ModelInfo( @@ -403,6 +410,7 @@ class AppChatService: # 流式调用 Agent(支持多模态),同时并行启动 TTS full_content = "" + full_reasoning = "" total_tokens = 0 text_queue: asyncio.Queue = asyncio.Queue() @@ -426,6 +434,9 @@ class AppChatService: ): if isinstance(chunk, int): total_tokens = chunk + elif isinstance(chunk, dict) and chunk.get("type") == "reasoning": + full_reasoning += chunk['content'] + yield f"event: reasoning\ndata: {json.dumps({'content': chunk['content']}, ensure_ascii=False)}\n\n" else: full_content += chunk yield f"event: message\ndata: {json.dumps({'content': chunk}, ensure_ascii=False)}\n\n" @@ -472,7 +483,8 @@ class AppChatService: "model": api_key_obj.model_name, "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens}, "audio_url": None, - "citations": filtered_citations + "citations": filtered_citations, + "reasoning_content": full_reasoning or None } if files: @@ -652,13 +664,13 @@ class AppChatService: storage_type=storage_type, user_rag_memory_id=user_rag_memory_id ): - if "sub_usage" in event: + # 拦截 sub_usage 事件,累加 token + if "event: sub_usage" in event: if "data:" in event: try: data_line = event.split("data: ", 1)[1].strip() data = json.loads(data_line) - if "total_tokens" in data: - total_tokens += data["total_tokens"] + total_tokens += data.get("total_tokens", 0) except: pass else: diff --git a/api/app/services/conversation_service.py b/api/app/services/conversation_service.py index bd7f7496..6e9f3544 100644 --- a/api/app/services/conversation_service.py +++ b/api/app/services/conversation_service.py @@ -534,6 +534,7 @@ class ConversationService: api_key = api_config.api_key api_base = api_config.api_base is_omni = api_config.is_omni + capability = api_config.capability model_type = config.type llm = RedBearLLM( @@ -542,7 +543,8 @@ class ConversationService: provider=provider, api_key=api_key, base_url=api_base, - is_omni=is_omni + is_omni=is_omni, + support_thinking="thinking" in (capability or []), ), type=ModelType(model_type) ) diff --git a/api/app/services/draft_run_service.py b/api/app/services/draft_run_service.py index 4b503f2b..978dfdab 100644 --- a/api/app/services/draft_run_service.py +++ b/api/app/services/draft_run_service.py @@ -458,7 +458,7 @@ class AgentRunService: statement = opening["statement"] suggested_questions = opening["suggested_questions"] - + # 如果有变量,进行替换(仅支持 {{var_name}} 格式) if variables: for var_name, var_value in variables.items(): @@ -595,6 +595,9 @@ class AgentRunService: max_tokens=effective_params.get("max_tokens", 2000), system_prompt=system_prompt, tools=tools, + deep_thinking=effective_params.get("deep_thinking", False), + thinking_budget_tokens=effective_params.get("thinking_budget_tokens"), + capability=api_key_config.get("capability", []), ) # 5. 处理会话ID(创建或验证),新会话时写入开场白 @@ -689,7 +692,8 @@ class AgentRunService: "prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0 - }) + }), + "reasoning_content": result.get("reasoning_content") }, files=files, processed_files=processed_files, @@ -701,6 +705,7 @@ class AgentRunService: response = { "message": result["content"], + "reasoning_content": result.get("reasoning_content"), "conversation_id": conversation_id, "usage": result.get("usage", { "prompt_tokens": 0, @@ -838,7 +843,10 @@ class AgentRunService: max_tokens=effective_params.get("max_tokens", 2000), system_prompt=system_prompt, tools=tools, - streaming=True + streaming=True, + deep_thinking=effective_params.get("deep_thinking", False), + thinking_budget_tokens=effective_params.get("thinking_budget_tokens"), + capability=api_key_config.get("capability", []), ) # 5. 处理会话ID(创建或验证),新会话时写入开场白 @@ -898,6 +906,7 @@ class AgentRunService: # 9. 流式调用 Agent(支持多模态),同时并行启动 TTS full_content = "" + full_reasoning = "" total_tokens = 0 # 启动流式 TTS(文本边输出边合成) @@ -916,6 +925,9 @@ class AgentRunService: ): if isinstance(chunk, int): total_tokens = chunk + elif isinstance(chunk, dict) and chunk.get("type") == "reasoning": + full_reasoning += chunk["content"] + yield self._format_sse_event("reasoning", {"content": chunk["content"]}) else: full_content += chunk yield self._format_sse_event("message", {"content": chunk}) @@ -944,7 +956,8 @@ class AgentRunService: app_id=agent_config.app_id, user_id=user_id, meta_data={ - "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens} + "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens}, + "reasoning_content": full_reasoning or None }, files=files, processed_files=processed_files, @@ -1665,7 +1678,7 @@ class AgentRunService: """从 text_queue 取文本按句子切分后喂给 synthesizer""" import re buf = "" - sentence_end = re.compile(r'[\u3002\uff01\uff1f\.!?\n]') + sentence_end = re.compile(r'[\u3002\uff01\uff1f.!?\n]') while True: chunk = await text_queue.get() if chunk is None: @@ -1894,6 +1907,7 @@ class AgentRunService: "conversation_id": result['conversation_id'], "parameters_used": model_info["parameters"], "message": result.get("message"), + "reasoning_content": result.get("reasoning_content"), "usage": usage, "elapsed_time": elapsed, "tokens_per_second": ( @@ -2012,7 +2026,7 @@ class AgentRunService: # 需要从 ModelApiKey 获取实际的模型名称,或者在 ModelConfig 中添加 model 字段 return None - def _with_parameters(self, agent_config: AgentConfig, parameters: Dict[str, Any]) -> AgentConfig: + def _with_parameters(self, agent_config: AgentConfig, parameters: Dict[str, Any]) -> tuple[AgentConfig, Any]: """创建一个带有覆盖参数的 agent_config(浅拷贝,只修改 model_parameters) Args: @@ -2110,6 +2124,7 @@ class AgentRunService: start_time = time.time() full_content = "" + full_reasoning = "" returned_conversation_id = model_conversation_id audio_url = None audio_status = None @@ -2168,6 +2183,18 @@ class AgentRunService: "content": chunk })) + # 转发深度思考事件(带模型标识) + if event_type == "reasoning" and event_data: + reasoning_chunk = event_data.get("content", "") + full_reasoning += reasoning_chunk + await event_queue.put(self._format_sse_event("model_reasoning", { + "model_index": idx, + "model_config_id": model_config_id, + "label": model_label, + "conversation_id": returned_conversation_id, + "content": event_data.get("content", "") + })) + # 从 end 事件中提取 features 输出字段 if event_type == "end" and event_data: audio_url = event_data.get("audio_url") @@ -2199,6 +2226,7 @@ class AgentRunService: "conversation_id": returned_conversation_id, "parameters_used": model_info["parameters"], "message": full_content, + "reasoning_content": full_reasoning or None, "elapsed_time": elapsed, "audio_url": audio_url, "audio_status": audio_status, @@ -2351,6 +2379,7 @@ class AgentRunService: "label": r["label"], "conversation_id": r.get("conversation_id"), "message": r.get("message"), + "reasoning_content": r.get("reasoning_content"), "elapsed_time": r.get("elapsed_time", 0), "audio_url": r.get("audio_url"), "audio_status": r.get("audio_status"), diff --git a/api/app/services/llm_router.py b/api/app/services/llm_router.py index 02895d6b..7087415e 100644 --- a/api/app/services/llm_router.py +++ b/api/app/services/llm_router.py @@ -415,6 +415,7 @@ class LLMRouter: api_key=api_key_config.api_key, base_url=api_key_config.api_base, is_omni=api_key_config.is_omni, + support_thinking="thinking" in (api_key_config.capability or []), temperature=0.3, max_tokens=500 ) diff --git a/api/app/services/master_agent_router.py b/api/app/services/master_agent_router.py index b0f43b51..206443bd 100644 --- a/api/app/services/master_agent_router.py +++ b/api/app/services/master_agent_router.py @@ -393,6 +393,7 @@ class MasterAgentRouter: api_key=api_key_config.api_key, base_url=api_key_config.api_base, is_omni=api_key_config.is_omni, + support_thinking="thinking" in (api_key_config.capability or []), extra_params = extra_params ) @@ -403,6 +404,17 @@ class MasterAgentRouter: response = await llm.ainvoke(prompt) ModelApiKeyService.record_api_key_usage(self.db, api_key_config.id) + # 提取 token 消耗 + self._last_routing_tokens = 0 + if hasattr(response, 'usage_metadata') and response.usage_metadata: + um = response.usage_metadata + self._last_routing_tokens = um.get("total_tokens", 0) if isinstance(um, dict) else getattr(um, "total_tokens", 0) + elif hasattr(response, 'response_metadata') and response.response_metadata: + token_usage = response.response_metadata.get("token_usage") or response.response_metadata.get("usage", {}) + if isinstance(token_usage, dict): + self._last_routing_tokens = token_usage.get("total_tokens", 0) + logger.info(f"Master Agent 路由 token 消耗: {self._last_routing_tokens}") + # 提取响应内容 if hasattr(response, 'content'): return response.content diff --git a/api/app/services/memory_perceptual_service.py b/api/app/services/memory_perceptual_service.py index 7cf94a1a..7d6d1092 100644 --- a/api/app/services/memory_perceptual_service.py +++ b/api/app/services/memory_perceptual_service.py @@ -232,7 +232,8 @@ class MemoryPerceptualService: provider=model_config.provider, api_key=model_config.api_key, base_url=model_config.api_base, - is_omni=model_config.is_omni + is_omni=model_config.is_omni, + support_thinking="thinking" in (model_config.capability or []), ) ) return llm, model_config diff --git a/api/app/services/model_parameter_merger.py b/api/app/services/model_parameter_merger.py index 262e3d49..4be83851 100644 --- a/api/app/services/model_parameter_merger.py +++ b/api/app/services/model_parameter_merger.py @@ -45,12 +45,20 @@ class ModelParameterMerger: "frequency_penalty": 0.0, "presence_penalty": 0.0, "n": 1, - "stop": None + "stop": None, + "deep_thinking": False, + "thinking_budget_tokens": None } # 合并参数:默认值 -> 模型配置 -> Agent 配置 merged = default_params.copy() + # Pydantic 对象转为 dict + if model_config_params and hasattr(model_config_params, 'model_dump'): + model_config_params = model_config_params.model_dump() + if agent_config_params and hasattr(agent_config_params, 'model_dump'): + agent_config_params = agent_config_params.model_dump() + # 应用模型配置参数 if model_config_params: for key in default_params: diff --git a/api/app/services/model_service.py b/api/app/services/model_service.py index c9266667..4cbb3509 100644 --- a/api/app/services/model_service.py +++ b/api/app/services/model_service.py @@ -85,15 +85,16 @@ class ModelConfigService: @staticmethod async def validate_model_config( - db: Session, - *, - model_name: str, - provider: str, - api_key: str, - api_base: Optional[str] = None, - model_type: str = "llm", - test_message: str = "Hello", - is_omni: bool = False + db: Session, + *, + model_name: str, + provider: str, + api_key: str, + api_base: Optional[str] = None, + model_type: str = "llm", + test_message: str = "Hello", + is_omni: bool = False, + capability: Optional[list] = None ) -> Dict[str, Any]: """验证模型配置是否有效 @@ -124,6 +125,7 @@ class ModelConfigService: api_key=api_key, base_url=api_base, is_omni=is_omni, + support_thinking="thinking" in (capability or []), temperature=0.7, max_tokens=100 ) @@ -320,7 +322,8 @@ class ModelConfigService: api_base=api_key_data.api_base, model_type=model_data.type, test_message="Hello", - is_omni=model_data.is_omni + is_omni=model_data.is_omni, + capability=model_data.capability ) if not validation_result["valid"]: raise BusinessException( @@ -590,7 +593,8 @@ class ModelApiKeyService: api_base=data.api_base, model_type=model_config.type, test_message="Hello", - is_omni=data.is_omni + is_omni=data.is_omni, + capability=model_config.capability ) if not validation_result["valid"]: # 记录验证失败的模型,但不抛出异常 @@ -675,7 +679,8 @@ class ModelApiKeyService: api_base=api_key_data.api_base, model_type=model_config.type, test_message="Hello", - is_omni=api_key_data.is_omni + is_omni=api_key_data.is_omni, + capability=model_config.capability ) if not validation_result["valid"]: raise BusinessException( @@ -707,7 +712,8 @@ class ModelApiKeyService: api_base=api_key_data.api_base or existing_api_key.api_base, model_type=model_config.type, test_message="Hello", - is_omni=model_config.is_omni + is_omni=model_config.is_omni, + capability=model_config.capability ) if not validation_result["valid"]: raise BusinessException( diff --git a/api/app/services/multi_agent_orchestrator.py b/api/app/services/multi_agent_orchestrator.py index 60a3b5b8..216aeb6e 100644 --- a/api/app/services/multi_agent_orchestrator.py +++ b/api/app/services/multi_agent_orchestrator.py @@ -287,6 +287,11 @@ class MultiAgentOrchestrator: sub_conversation_id = None total_tokens = 0 + # 累加 Master Agent 路由决策消耗的 token + total_tokens += task_analysis.get("routing_tokens", 0) + # 累加 Master Agent 整合消耗的 token + total_tokens += getattr(self, '_last_merge_tokens', 0) + if isinstance(results, dict): sub_conversation_id = results.get("conversation_id") or results.get("result", {}).get("conversation_id") # 提取 token 信息 @@ -358,12 +363,16 @@ class MultiAgentOrchestrator: variables=variables ) + # 获取路由决策消耗的 token + routing_tokens = getattr(self.router, '_last_routing_tokens', 0) + logger.info( "Master Agent 分析完成", extra={ "selected_agent": routing_decision.get("selected_agent_id"), "confidence": routing_decision.get("confidence"), - "strategy": routing_decision.get("strategy") + "strategy": routing_decision.get("strategy"), + "routing_tokens": routing_tokens } ) @@ -372,7 +381,8 @@ class MultiAgentOrchestrator: "variables": variables or {}, "sub_agents": self.config.sub_agents, "initial_context": variables or {}, - "routing_decision": routing_decision + "routing_decision": routing_decision, + "routing_tokens": routing_tokens } async def _execute_sequential( @@ -1032,6 +1042,11 @@ class MultiAgentOrchestrator: # 5. 流式执行子 Agent sub_conversation_id = None + # Master Agent 路由决策消耗的 token,通过 sub_usage 事件发送给上层 + routing_tokens = task_analysis.get("routing_tokens", 0) + if routing_tokens > 0: + yield self._format_sse_event("sub_usage", {"total_tokens": routing_tokens}) + async for event in self._execute_sub_agent_stream( agent_data["config"], message, @@ -1054,6 +1069,7 @@ class MultiAgentOrchestrator: except: pass + # 直接透传所有事件(包括 sub_usage),累加统一由上层处理 yield event # 6. 如果有会话 ID,发送一个包含它的事件 @@ -2600,6 +2616,7 @@ class MultiAgentOrchestrator: api_key=api_key_config.api_key, base_url=api_key_config.api_base, is_omni=api_key_config.is_omni, + support_thinking="thinking" in (api_key_config.capability or []), temperature=0.7, # 整合任务使用中等温度 max_tokens=2000 ) @@ -2612,6 +2629,17 @@ class MultiAgentOrchestrator: ModelApiKeyService.record_api_key_usage(self.db, api_key_config.id) + # 提取整合消耗的 token + merge_tokens = 0 + if hasattr(response, 'usage_metadata') and response.usage_metadata: + um = response.usage_metadata + merge_tokens = um.get("total_tokens", 0) if isinstance(um, dict) else getattr(um, "total_tokens", 0) + elif hasattr(response, 'response_metadata') and response.response_metadata: + token_usage = response.response_metadata.get("token_usage") or response.response_metadata.get("usage", {}) + if isinstance(token_usage, dict): + merge_tokens = token_usage.get("total_tokens", 0) + self._last_merge_tokens = merge_tokens + # 提取响应内容 if hasattr(response, 'content'): merged_response = response.content @@ -2621,7 +2649,8 @@ class MultiAgentOrchestrator: logger.info( "Master Agent 整合完成", extra={ - "merged_length": len(merged_response) + "merged_length": len(merged_response), + "merge_tokens": merge_tokens } ) @@ -2766,6 +2795,7 @@ class MultiAgentOrchestrator: api_key=api_key_config.api_key, base_url=api_key_config.api_base, is_omni=api_key_config.is_omni, + support_thinking="thinking" in (api_key_config.capability or []), temperature=0.7, max_tokens=2000, extra_params={"streaming": True} # 启用流式输出 diff --git a/api/app/services/prompt_optimizer_service.py b/api/app/services/prompt_optimizer_service.py index 184220a8..fde8c4f9 100644 --- a/api/app/services/prompt_optimizer_service.py +++ b/api/app/services/prompt_optimizer_service.py @@ -185,7 +185,8 @@ class PromptOptimizerService: provider=api_config.provider, api_key=api_config.api_key, base_url=api_config.api_base, - is_omni=api_config.is_omni + is_omni=api_config.is_omni, + support_thinking="thinking" in (api_config.capability or []), ), type=ModelType(model_config.type)) try: prompt_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'prompt') diff --git a/api/app/services/shared_chat_service.py b/api/app/services/shared_chat_service.py index c74604a5..b1e40a2d 100644 --- a/api/app/services/shared_chat_service.py +++ b/api/app/services/shared_chat_service.py @@ -248,7 +248,9 @@ class SharedChatService: max_tokens=model_parameters.get("max_tokens", 2000), system_prompt=system_prompt, tools=tools, - + deep_thinking=model_parameters.get("deep_thinking", False), + thinking_budget_tokens=model_parameters.get("thinking_budget_tokens"), + capability=api_key_obj.capability or [], ) # 加载历史消息 @@ -450,7 +452,10 @@ class SharedChatService: max_tokens=model_parameters.get("max_tokens", 2000), system_prompt=system_prompt, tools=tools, - streaming=True + streaming=True, + deep_thinking=model_parameters.get("deep_thinking", False), + thinking_budget_tokens=model_parameters.get("thinking_budget_tokens"), + capability=api_key_obj.capability or [], ) # 加载历史消息 @@ -479,6 +484,8 @@ class SharedChatService: ): if isinstance(chunk, int): total_tokens = chunk + elif isinstance(chunk, dict) and chunk.get("type") == "reasoning": + yield f"event: reasoning\ndata: {json.dumps({'content': chunk['content']}, ensure_ascii=False)}\n\n" else: full_content += chunk # 发送消息块事件