Merge pull request #741 from SuanmoSuanyangTechnology/feature/agent-tool_xjn
fix(app)
This commit is contained in:
@@ -163,6 +163,7 @@ def _get_ontology_service(
|
|||||||
api_key=api_key_config.api_key,
|
api_key=api_key_config.api_key,
|
||||||
base_url=api_key_config.api_base,
|
base_url=api_key_config.api_base,
|
||||||
is_omni=api_key_config.is_omni,
|
is_omni=api_key_config.is_omni,
|
||||||
|
support_thinking="thinking" in (api_key_config.capability or []),
|
||||||
max_retries=3,
|
max_retries=3,
|
||||||
timeout=60.0
|
timeout=60.0
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -453,6 +453,9 @@ async def chat(
|
|||||||
# 流式返回
|
# 流式返回
|
||||||
agent_config = agent_config_4_app_release(release)
|
agent_config = agent_config_4_app_release(release)
|
||||||
|
|
||||||
|
if not (agent_config.model_parameters.get("deep_thinking", False) and payload.thinking):
|
||||||
|
agent_config.model_parameters["deep_thinking"] = False
|
||||||
|
|
||||||
if payload.stream:
|
if payload.stream:
|
||||||
async def event_generator():
|
async def event_generator():
|
||||||
async for event in app_chat_service.agnet_chat_stream(
|
async for event in app_chat_service.agnet_chat_stream(
|
||||||
@@ -634,7 +637,8 @@ async def config_query(
|
|||||||
"app_type": release.app.type,
|
"app_type": release.app.type,
|
||||||
"variables": release.config.get("variables"),
|
"variables": release.config.get("variables"),
|
||||||
"memory": release.config.get("memory", {}).get("enabled"),
|
"memory": release.config.get("memory", {}).get("enabled"),
|
||||||
"features": release.config.get("features")
|
"features": release.config.get("features"),
|
||||||
|
"model_parameters": release.config.get("model_parameters")
|
||||||
}
|
}
|
||||||
elif release.app.type == AppType.MULTI_AGENT:
|
elif release.app.type == AppType.MULTI_AGENT:
|
||||||
content = {
|
content = {
|
||||||
|
|||||||
@@ -144,6 +144,11 @@ async def chat(
|
|||||||
# print(app.current_release.default_model_config_id)
|
# print(app.current_release.default_model_config_id)
|
||||||
agent_config = agent_config_4_app_release(app.current_release)
|
agent_config = agent_config_4_app_release(app.current_release)
|
||||||
# print(agent_config.default_model_config_id)
|
# print(agent_config.default_model_config_id)
|
||||||
|
|
||||||
|
# thinking 开关:仅当 agent 配置了 deep_thinking 且请求 thinking=True 时才启用
|
||||||
|
if not (agent_config.model_parameters.get("deep_thinking", False) and payload.thinking):
|
||||||
|
agent_config.model_parameters["deep_thinking"] = False
|
||||||
|
|
||||||
# 流式返回
|
# 流式返回
|
||||||
if payload.stream:
|
if payload.stream:
|
||||||
async def event_generator():
|
async def event_generator():
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ async def create_end_user(
|
|||||||
payload = CreateEndUserRequest(**body)
|
payload = CreateEndUserRequest(**body)
|
||||||
workspace_id = api_key_auth.workspace_id
|
workspace_id = api_key_auth.workspace_id
|
||||||
|
|
||||||
logger.info(f"Create end user request - other_id: {payload.other_id}, workspace_id: {workspace_id}")
|
logger.info("Create end user request - other_id: %s, workspace_id: %s", payload.other_id, workspace_id)
|
||||||
|
|
||||||
# Resolve memory_config_id: explicit > workspace default
|
# Resolve memory_config_id: explicit > workspace default
|
||||||
memory_config_id = None
|
memory_config_id = None
|
||||||
|
|||||||
@@ -37,7 +37,10 @@ class LangChainAgent:
|
|||||||
tools: Optional[Sequence[BaseTool]] = None,
|
tools: Optional[Sequence[BaseTool]] = None,
|
||||||
streaming: bool = False,
|
streaming: bool = False,
|
||||||
max_iterations: Optional[int] = None, # 最大迭代次数(None 表示自动计算)
|
max_iterations: Optional[int] = None, # 最大迭代次数(None 表示自动计算)
|
||||||
max_tool_consecutive_calls: int = 3 # 单个工具最大连续调用次数
|
max_tool_consecutive_calls: int = 3, # 单个工具最大连续调用次数
|
||||||
|
deep_thinking: bool = False, # 是否启用深度思考模式
|
||||||
|
thinking_budget_tokens: Optional[int] = None, # 深度思考 token 预算
|
||||||
|
capability: Optional[List[str]] = None # 模型能力列表,用于校验是否支持深度思考
|
||||||
):
|
):
|
||||||
"""初始化 LangChain Agent
|
"""初始化 LangChain Agent
|
||||||
|
|
||||||
@@ -60,6 +63,7 @@ class LangChainAgent:
|
|||||||
self.streaming = streaming
|
self.streaming = streaming
|
||||||
self.is_omni = is_omni
|
self.is_omni = is_omni
|
||||||
self.max_tool_consecutive_calls = max_tool_consecutive_calls
|
self.max_tool_consecutive_calls = max_tool_consecutive_calls
|
||||||
|
self.deep_thinking = deep_thinking and ("thinking" in (capability or []))
|
||||||
|
|
||||||
# 工具调用计数器:记录每个工具的连续调用次数
|
# 工具调用计数器:记录每个工具的连续调用次数
|
||||||
self.tool_call_counter: Dict[str, int] = {}
|
self.tool_call_counter: Dict[str, int] = {}
|
||||||
@@ -82,6 +86,13 @@ class LangChainAgent:
|
|||||||
f"auto_calculated={max_iterations is None}"
|
f"auto_calculated={max_iterations is None}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# 根据 capability 校验是否真正支持深度思考
|
||||||
|
actual_deep_thinking = self.deep_thinking
|
||||||
|
if deep_thinking and not actual_deep_thinking:
|
||||||
|
logger.warning(
|
||||||
|
f"模型 {model_name} 不支持深度思考(capability 中无 'thinking'),已自动关闭 deep_thinking"
|
||||||
|
)
|
||||||
|
|
||||||
# 创建 RedBearLLM(支持多提供商)
|
# 创建 RedBearLLM(支持多提供商)
|
||||||
model_config = RedBearModelConfig(
|
model_config = RedBearModelConfig(
|
||||||
model_name=model_name,
|
model_name=model_name,
|
||||||
@@ -89,10 +100,13 @@ class LangChainAgent:
|
|||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
base_url=api_base,
|
base_url=api_base,
|
||||||
is_omni=is_omni,
|
is_omni=is_omni,
|
||||||
|
deep_thinking=actual_deep_thinking,
|
||||||
|
thinking_budget_tokens=thinking_budget_tokens if actual_deep_thinking else None,
|
||||||
|
support_thinking="thinking" in (capability or []),
|
||||||
extra_params={
|
extra_params={
|
||||||
"temperature": temperature,
|
"temperature": temperature,
|
||||||
"max_tokens": max_tokens,
|
"max_tokens": max_tokens,
|
||||||
"streaming": streaming # 使用参数控制流式
|
"streaming": streaming
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -249,6 +263,33 @@ class LangChainAgent:
|
|||||||
|
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_tokens_from_message(msg) -> int:
|
||||||
|
"""从 AIMessage 或类似对象中提取 total_tokens,兼容多种 provider 格式
|
||||||
|
|
||||||
|
支持的格式:
|
||||||
|
- response_metadata.token_usage.total_tokens (OpenAI/ChatOpenAI)
|
||||||
|
- response_metadata.usage.total_tokens (部分 provider)
|
||||||
|
- usage_metadata.total_tokens (LangChain 新版)
|
||||||
|
"""
|
||||||
|
total = 0
|
||||||
|
# 1. response_metadata
|
||||||
|
response_meta = getattr(msg, "response_metadata", None)
|
||||||
|
if response_meta and isinstance(response_meta, dict):
|
||||||
|
# 尝试 token_usage 路径
|
||||||
|
token_usage = response_meta.get("token_usage") or response_meta.get("usage", {})
|
||||||
|
if isinstance(token_usage, dict):
|
||||||
|
total = token_usage.get("total_tokens", 0)
|
||||||
|
# 2. usage_metadata(LangChain 新版 AIMessage 属性)
|
||||||
|
if not total:
|
||||||
|
usage_meta = getattr(msg, "usage_metadata", None)
|
||||||
|
if usage_meta:
|
||||||
|
if isinstance(usage_meta, dict):
|
||||||
|
total = usage_meta.get("total_tokens", 0)
|
||||||
|
else:
|
||||||
|
total = getattr(usage_meta, "total_tokens", 0)
|
||||||
|
return total or 0
|
||||||
|
|
||||||
def _build_multimodal_content(self, text: str, files: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
def _build_multimodal_content(self, text: str, files: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
构建多模态消息内容
|
构建多模态消息内容
|
||||||
@@ -283,6 +324,17 @@ class LangChainAgent:
|
|||||||
|
|
||||||
return content_parts
|
return content_parts
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_reasoning_content(msg) -> str:
|
||||||
|
"""从 AIMessage 中提取深度思考内容(reasoning_content)
|
||||||
|
|
||||||
|
所有 provider 统一通过 additional_kwargs.reasoning_content 传递:
|
||||||
|
- DeepSeek-R1 / QwQ: 原生字段
|
||||||
|
- Volcano (Doubao-thinking): 由 VolcanoChatOpenAI 从 delta.reasoning_content 注入
|
||||||
|
"""
|
||||||
|
additional = getattr(msg, "additional_kwargs", None) or {}
|
||||||
|
return additional.get("reasoning_content") or additional.get("reasoning", "")
|
||||||
|
|
||||||
async def chat(
|
async def chat(
|
||||||
self,
|
self,
|
||||||
message: str,
|
message: str,
|
||||||
@@ -348,6 +400,7 @@ class LangChainAgent:
|
|||||||
|
|
||||||
logger.debug(f"输出消息数量: {len(output_messages)}")
|
logger.debug(f"输出消息数量: {len(output_messages)}")
|
||||||
total_tokens = 0
|
total_tokens = 0
|
||||||
|
reasoning_content = ""
|
||||||
for msg in reversed(output_messages):
|
for msg in reversed(output_messages):
|
||||||
if isinstance(msg, AIMessage):
|
if isinstance(msg, AIMessage):
|
||||||
logger.debug(f"找到 AI 消息,content 类型: {type(msg.content)}")
|
logger.debug(f"找到 AI 消息,content 类型: {type(msg.content)}")
|
||||||
@@ -382,8 +435,8 @@ class LangChainAgent:
|
|||||||
else:
|
else:
|
||||||
content = str(msg.content)
|
content = str(msg.content)
|
||||||
logger.debug(f"转换为字符串: {content[:100]}...")
|
logger.debug(f"转换为字符串: {content[:100]}...")
|
||||||
response_meta = msg.response_metadata if hasattr(msg, 'response_metadata') else None
|
total_tokens = self._extract_tokens_from_message(msg)
|
||||||
total_tokens = response_meta.get("token_usage", {}).get("total_tokens", 0) if response_meta else 0
|
reasoning_content = self._extract_reasoning_content(msg) if self.deep_thinking else ""
|
||||||
break
|
break
|
||||||
|
|
||||||
logger.info(f"最终提取的内容长度: {len(content)}")
|
logger.info(f"最终提取的内容长度: {len(content)}")
|
||||||
@@ -399,6 +452,8 @@ class LangChainAgent:
|
|||||||
"total_tokens": total_tokens
|
"total_tokens": total_tokens
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if reasoning_content:
|
||||||
|
response["reasoning_content"] = reasoning_content
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"Agent 调用完成",
|
"Agent 调用完成",
|
||||||
@@ -420,7 +475,7 @@ class LangChainAgent:
|
|||||||
history: Optional[List[Dict[str, str]]] = None,
|
history: Optional[List[Dict[str, str]]] = None,
|
||||||
context: Optional[str] = None,
|
context: Optional[str] = None,
|
||||||
files: Optional[List[Dict[str, Any]]] = None
|
files: Optional[List[Dict[str, Any]]] = None
|
||||||
) -> AsyncGenerator[str, None]:
|
) -> AsyncGenerator[str | int, None]:
|
||||||
"""执行流式对话
|
"""执行流式对话
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@@ -431,6 +486,8 @@ class LangChainAgent:
|
|||||||
|
|
||||||
Yields:
|
Yields:
|
||||||
str: 消息内容块
|
str: 消息内容块
|
||||||
|
int: token 统计
|
||||||
|
Dict: 深度思考内容 {"type": "reasoning", "content": "..."}
|
||||||
"""
|
"""
|
||||||
logger.info("=" * 80)
|
logger.info("=" * 80)
|
||||||
logger.info(" chat_stream 方法开始执行")
|
logger.info(" chat_stream 方法开始执行")
|
||||||
@@ -451,6 +508,7 @@ class LangChainAgent:
|
|||||||
# 统一使用 agent 的 astream_events 实现流式输出
|
# 统一使用 agent 的 astream_events 实现流式输出
|
||||||
logger.debug("使用 Agent astream_events 实现流式输出")
|
logger.debug("使用 Agent astream_events 实现流式输出")
|
||||||
full_content = ''
|
full_content = ''
|
||||||
|
full_reasoning = ''
|
||||||
try:
|
try:
|
||||||
last_event = {}
|
last_event = {}
|
||||||
async for event in self.agent.astream_events(
|
async for event in self.agent.astream_events(
|
||||||
@@ -467,6 +525,13 @@ class LangChainAgent:
|
|||||||
# LLM 流式输出
|
# LLM 流式输出
|
||||||
chunk = event.get("data", {}).get("chunk")
|
chunk = event.get("data", {}).get("chunk")
|
||||||
if chunk and hasattr(chunk, "content"):
|
if chunk and hasattr(chunk, "content"):
|
||||||
|
# 提取深度思考内容(仅在启用深度思考时)
|
||||||
|
if self.deep_thinking:
|
||||||
|
reasoning_chunk = self._extract_reasoning_content(chunk)
|
||||||
|
if reasoning_chunk:
|
||||||
|
full_reasoning += reasoning_chunk
|
||||||
|
yield {"type": "reasoning", "content": reasoning_chunk}
|
||||||
|
|
||||||
# 处理多模态响应:content 可能是字符串或列表
|
# 处理多模态响应:content 可能是字符串或列表
|
||||||
chunk_content = chunk.content
|
chunk_content = chunk.content
|
||||||
if isinstance(chunk_content, str) and chunk_content:
|
if isinstance(chunk_content, str) and chunk_content:
|
||||||
@@ -497,6 +562,13 @@ class LangChainAgent:
|
|||||||
chunk = event.get("data", {}).get("chunk")
|
chunk = event.get("data", {}).get("chunk")
|
||||||
if chunk:
|
if chunk:
|
||||||
if hasattr(chunk, "content"):
|
if hasattr(chunk, "content"):
|
||||||
|
# 提取深度思考内容(仅在启用深度思考时)
|
||||||
|
if self.deep_thinking:
|
||||||
|
reasoning_chunk = self._extract_reasoning_content(chunk)
|
||||||
|
if reasoning_chunk:
|
||||||
|
full_reasoning += reasoning_chunk
|
||||||
|
yield {"type": "reasoning", "content": reasoning_chunk}
|
||||||
|
|
||||||
chunk_content = chunk.content
|
chunk_content = chunk.content
|
||||||
if isinstance(chunk_content, str) and chunk_content:
|
if isinstance(chunk_content, str) and chunk_content:
|
||||||
full_content += chunk_content
|
full_content += chunk_content
|
||||||
@@ -535,12 +607,9 @@ class LangChainAgent:
|
|||||||
output_messages = last_event.get("data", {}).get("output", {}).get("messages", [])
|
output_messages = last_event.get("data", {}).get("output", {}).get("messages", [])
|
||||||
for msg in reversed(output_messages):
|
for msg in reversed(output_messages):
|
||||||
if isinstance(msg, AIMessage):
|
if isinstance(msg, AIMessage):
|
||||||
response_meta = msg.response_metadata if hasattr(msg, 'response_metadata') else None
|
stream_total_tokens = self._extract_tokens_from_message(msg)
|
||||||
total_tokens = response_meta.get("token_usage", {}).get(
|
logger.info(f"流式 token 统计: total_tokens={stream_total_tokens}")
|
||||||
"total_tokens",
|
yield stream_total_tokens
|
||||||
0
|
|
||||||
) if response_meta else 0
|
|
||||||
yield total_tokens
|
|
||||||
break
|
break
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -758,8 +758,7 @@ async def run_hybrid_search(
|
|||||||
model_name=embedder_config_dict["model_name"],
|
model_name=embedder_config_dict["model_name"],
|
||||||
provider=embedder_config_dict["provider"],
|
provider=embedder_config_dict["provider"],
|
||||||
api_key=embedder_config_dict["api_key"],
|
api_key=embedder_config_dict["api_key"],
|
||||||
base_url=embedder_config_dict["base_url"],
|
base_url=embedder_config_dict["base_url"]
|
||||||
type="llm"
|
|
||||||
)
|
)
|
||||||
config_load_time = time.time() - config_load_start
|
config_load_time = time.time() - config_load_start
|
||||||
logger.info(f"[PERF] Config loading took {config_load_time:.4f}s")
|
logger.info(f"[PERF] Config loading took {config_load_time:.4f}s")
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ from pydantic import BaseModel, Field
|
|||||||
from app.core.error_codes import BizCode
|
from app.core.error_codes import BizCode
|
||||||
from app.core.exceptions import BusinessException
|
from app.core.exceptions import BusinessException
|
||||||
from app.models.models_model import ModelProvider, ModelType
|
from app.models.models_model import ModelProvider, ModelType
|
||||||
|
from app.core.models.volcano_chat import VolcanoChatOpenAI
|
||||||
|
|
||||||
T = TypeVar("T")
|
T = TypeVar("T")
|
||||||
|
|
||||||
@@ -25,6 +26,9 @@ class RedBearModelConfig(BaseModel):
|
|||||||
api_key: str
|
api_key: str
|
||||||
base_url: Optional[str] = None
|
base_url: Optional[str] = None
|
||||||
is_omni: bool = False # 是否为 Omni 模型
|
is_omni: bool = False # 是否为 Omni 模型
|
||||||
|
deep_thinking: bool = False # 是否启用深度思考模式
|
||||||
|
thinking_budget_tokens: Optional[int] = None # 深度思考 token 预算
|
||||||
|
support_thinking: bool = False # 模型是否支持 enable_thinking 参数(capability 含 thinking)
|
||||||
# 请求超时时间(秒)- 默认120秒以支持复杂的LLM调用,可通过环境变量 LLM_TIMEOUT 配置
|
# 请求超时时间(秒)- 默认120秒以支持复杂的LLM调用,可通过环境变量 LLM_TIMEOUT 配置
|
||||||
timeout: float = Field(default_factory=lambda: float(os.getenv("LLM_TIMEOUT", "120.0")))
|
timeout: float = Field(default_factory=lambda: float(os.getenv("LLM_TIMEOUT", "120.0")))
|
||||||
# 最大重试次数 - 默认2次以避免过长等待,可通过环境变量 LLM_MAX_RETRIES 配置
|
# 最大重试次数 - 默认2次以避免过长等待,可通过环境变量 LLM_MAX_RETRIES 配置
|
||||||
@@ -44,7 +48,7 @@ class RedBearModelFactory:
|
|||||||
# 打印供应商信息用于调试
|
# 打印供应商信息用于调试
|
||||||
from app.core.logging_config import get_business_logger
|
from app.core.logging_config import get_business_logger
|
||||||
logger = get_business_logger()
|
logger = get_business_logger()
|
||||||
logger.debug(f"获取模型参数 - Provider: {provider}, Model: {config.model_name}, is_omni: {config.is_omni}")
|
logger.debug(f"获取模型参数 - Provider: {provider}, Model: {config.model_name}, is_omni: {config.is_omni}, deep_thinking: {config.deep_thinking}")
|
||||||
|
|
||||||
# dashscope 的 omni 模型使用 OpenAI 兼容模式
|
# dashscope 的 omni 模型使用 OpenAI 兼容模式
|
||||||
if provider == ModelProvider.DASHSCOPE and config.is_omni:
|
if provider == ModelProvider.DASHSCOPE and config.is_omni:
|
||||||
@@ -58,7 +62,7 @@ class RedBearModelFactory:
|
|||||||
write=60.0,
|
write=60.0,
|
||||||
pool=10.0,
|
pool=10.0,
|
||||||
)
|
)
|
||||||
return {
|
params: Dict[str, Any] = {
|
||||||
"model": config.model_name,
|
"model": config.model_name,
|
||||||
"base_url": config.base_url,
|
"base_url": config.base_url,
|
||||||
"api_key": config.api_key,
|
"api_key": config.api_key,
|
||||||
@@ -66,6 +70,21 @@ class RedBearModelFactory:
|
|||||||
"max_retries": config.max_retries,
|
"max_retries": config.max_retries,
|
||||||
**config.extra_params
|
**config.extra_params
|
||||||
}
|
}
|
||||||
|
# 流式模式下启用 stream_usage 以获取 token 统计
|
||||||
|
is_streaming = bool(config.extra_params.get("streaming"))
|
||||||
|
if is_streaming:
|
||||||
|
params["stream_usage"] = True
|
||||||
|
# 只有支持 thinking 的模型才传 enable_thinking
|
||||||
|
if config.support_thinking:
|
||||||
|
model_kwargs: Dict[str, Any] = config.extra_params.get("model_kwargs", {})
|
||||||
|
if is_streaming:
|
||||||
|
model_kwargs["enable_thinking"] = config.deep_thinking
|
||||||
|
if config.deep_thinking and config.thinking_budget_tokens:
|
||||||
|
model_kwargs["thinking_budget"] = config.thinking_budget_tokens
|
||||||
|
else:
|
||||||
|
model_kwargs["enable_thinking"] = False
|
||||||
|
params["model_kwargs"] = model_kwargs
|
||||||
|
return params
|
||||||
|
|
||||||
if provider in [ModelProvider.OPENAI, ModelProvider.XINFERENCE, ModelProvider.GPUSTACK, ModelProvider.OLLAMA, ModelProvider.VOLCANO]:
|
if provider in [ModelProvider.OPENAI, ModelProvider.XINFERENCE, ModelProvider.GPUSTACK, ModelProvider.OLLAMA, ModelProvider.VOLCANO]:
|
||||||
# 使用 httpx.Timeout 对象来设置详细的超时配置
|
# 使用 httpx.Timeout 对象来设置详细的超时配置
|
||||||
@@ -78,7 +97,7 @@ class RedBearModelFactory:
|
|||||||
write=60.0, # 写入超时:60秒
|
write=60.0, # 写入超时:60秒
|
||||||
pool=10.0, # 连接池超时:10秒
|
pool=10.0, # 连接池超时:10秒
|
||||||
)
|
)
|
||||||
return {
|
params: Dict[str, Any] = {
|
||||||
"model": config.model_name,
|
"model": config.model_name,
|
||||||
"base_url": config.base_url,
|
"base_url": config.base_url,
|
||||||
"api_key": config.api_key,
|
"api_key": config.api_key,
|
||||||
@@ -86,16 +105,47 @@ class RedBearModelFactory:
|
|||||||
"max_retries": config.max_retries,
|
"max_retries": config.max_retries,
|
||||||
**config.extra_params
|
**config.extra_params
|
||||||
}
|
}
|
||||||
|
# 流式模式下启用 stream_usage 以获取 token 统计
|
||||||
|
if config.extra_params.get("streaming"):
|
||||||
|
params["stream_usage"] = True
|
||||||
|
# 深度思考模式
|
||||||
|
is_streaming = bool(config.extra_params.get("streaming"))
|
||||||
|
if is_streaming:
|
||||||
|
if provider == ModelProvider.VOLCANO:
|
||||||
|
# 火山引擎深度思考仅流式调用支持,非流式时不传 thinking 参数
|
||||||
|
thinking_config: Dict[str, Any] = {
|
||||||
|
"type": "enabled" if config.deep_thinking else "disabled"
|
||||||
|
}
|
||||||
|
if config.deep_thinking and config.thinking_budget_tokens:
|
||||||
|
thinking_config["budget_tokens"] = config.thinking_budget_tokens
|
||||||
|
params["extra_body"] = {"thinking": thinking_config}
|
||||||
|
else:
|
||||||
|
# 始终显式传递 enable_thinking,不支持该参数的模型(如 DeepSeek-R1)会直接忽略
|
||||||
|
model_kwargs: Dict[str, Any] = config.extra_params.get("model_kwargs", {})
|
||||||
|
model_kwargs["enable_thinking"] = config.deep_thinking
|
||||||
|
if config.deep_thinking and config.thinking_budget_tokens:
|
||||||
|
model_kwargs["thinking_budget"] = config.thinking_budget_tokens
|
||||||
|
params["model_kwargs"] = model_kwargs
|
||||||
|
return params
|
||||||
elif provider == ModelProvider.DASHSCOPE:
|
elif provider == ModelProvider.DASHSCOPE:
|
||||||
# DashScope (通义千问) 使用自己的参数格式
|
params = {
|
||||||
# 注意: DashScopeEmbeddings 不支持 timeout 和 base_url 参数
|
|
||||||
# 只支持: model, dashscope_api_key, max_retries, client
|
|
||||||
return {
|
|
||||||
"model": config.model_name,
|
"model": config.model_name,
|
||||||
"dashscope_api_key": config.api_key,
|
"dashscope_api_key": config.api_key,
|
||||||
"max_retries": config.max_retries,
|
"max_retries": config.max_retries,
|
||||||
**config.extra_params
|
**config.extra_params
|
||||||
}
|
}
|
||||||
|
# 只有支持 thinking 的模型才传 enable_thinking
|
||||||
|
if config.support_thinking:
|
||||||
|
is_streaming = bool(config.extra_params.get("streaming"))
|
||||||
|
model_kwargs: Dict[str, Any] = config.extra_params.get("model_kwargs", {})
|
||||||
|
if is_streaming:
|
||||||
|
model_kwargs["enable_thinking"] = config.deep_thinking
|
||||||
|
if config.deep_thinking and config.thinking_budget_tokens:
|
||||||
|
model_kwargs["thinking_budget"] = config.thinking_budget_tokens
|
||||||
|
else:
|
||||||
|
model_kwargs["enable_thinking"] = False
|
||||||
|
params["model_kwargs"] = model_kwargs
|
||||||
|
return params
|
||||||
elif provider == ModelProvider.BEDROCK:
|
elif provider == ModelProvider.BEDROCK:
|
||||||
# Bedrock 使用 AWS 凭证
|
# Bedrock 使用 AWS 凭证
|
||||||
# api_key 格式: "access_key_id:secret_access_key" 或只是 access_key_id
|
# api_key 格式: "access_key_id:secret_access_key" 或只是 access_key_id
|
||||||
@@ -134,6 +184,13 @@ class RedBearModelFactory:
|
|||||||
elif "region_name" not in params:
|
elif "region_name" not in params:
|
||||||
params["region_name"] = "us-east-1" # 默认区域
|
params["region_name"] = "us-east-1" # 默认区域
|
||||||
|
|
||||||
|
# 深度思考模式:Claude 3.7 Sonnet 等支持思考的模型
|
||||||
|
# 通过 additional_model_request_fields 传递 thinking 块,关闭时不传(Bedrock 无 disabled 选项)
|
||||||
|
if config.deep_thinking:
|
||||||
|
budget = config.thinking_budget_tokens or 10000
|
||||||
|
params["additional_model_request_fields"] = {
|
||||||
|
"thinking": {"type": "enabled", "budget_tokens": budget}
|
||||||
|
}
|
||||||
return params
|
return params
|
||||||
else:
|
else:
|
||||||
raise BusinessException(f"不支持的提供商: {provider}", code=BizCode.PROVIDER_NOT_SUPPORTED)
|
raise BusinessException(f"不支持的提供商: {provider}", code=BizCode.PROVIDER_NOT_SUPPORTED)
|
||||||
@@ -160,7 +217,9 @@ def get_provider_llm_class(config: RedBearModelConfig, type: ModelType = ModelTy
|
|||||||
# dashscope 的 omni 模型使用 OpenAI 兼容模式
|
# dashscope 的 omni 模型使用 OpenAI 兼容模式
|
||||||
if provider == ModelProvider.DASHSCOPE and config.is_omni:
|
if provider == ModelProvider.DASHSCOPE and config.is_omni:
|
||||||
return ChatOpenAI
|
return ChatOpenAI
|
||||||
if provider in [ModelProvider.OPENAI, ModelProvider.XINFERENCE, ModelProvider.GPUSTACK, ModelProvider.VOLCANO]:
|
if provider == ModelProvider.VOLCANO:
|
||||||
|
return VolcanoChatOpenAI
|
||||||
|
if provider in [ModelProvider.OPENAI, ModelProvider.XINFERENCE, ModelProvider.GPUSTACK]:
|
||||||
if type == ModelType.LLM:
|
if type == ModelType.LLM:
|
||||||
return OpenAI
|
return OpenAI
|
||||||
elif type == ModelType.CHAT:
|
elif type == ModelType.CHAT:
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ models:
|
|||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
logo: bedrock
|
logo: bedrock
|
||||||
|
|
||||||
- name: amazon nova
|
- name: amazon nova
|
||||||
type: llm
|
type: llm
|
||||||
provider: bedrock
|
provider: bedrock
|
||||||
@@ -27,6 +28,7 @@ models:
|
|||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
- vision
|
- vision
|
||||||
logo: bedrock
|
logo: bedrock
|
||||||
|
|
||||||
- name: anthropic claude
|
- name: anthropic claude
|
||||||
type: llm
|
type: llm
|
||||||
provider: bedrock
|
provider: bedrock
|
||||||
@@ -35,6 +37,7 @@ models:
|
|||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -44,6 +47,7 @@ models:
|
|||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
- document
|
- document
|
||||||
logo: bedrock
|
logo: bedrock
|
||||||
|
|
||||||
- name: cohere
|
- name: cohere
|
||||||
type: llm
|
type: llm
|
||||||
provider: bedrock
|
provider: bedrock
|
||||||
@@ -58,6 +62,7 @@ models:
|
|||||||
- tool-call
|
- tool-call
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: bedrock
|
logo: bedrock
|
||||||
|
|
||||||
- name: deepseek
|
- name: deepseek
|
||||||
type: llm
|
type: llm
|
||||||
provider: bedrock
|
provider: bedrock
|
||||||
@@ -66,6 +71,7 @@ models:
|
|||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -74,6 +80,7 @@ models:
|
|||||||
- tool-call
|
- tool-call
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: bedrock
|
logo: bedrock
|
||||||
|
|
||||||
- name: meta
|
- name: meta
|
||||||
type: llm
|
type: llm
|
||||||
provider: bedrock
|
provider: bedrock
|
||||||
@@ -87,6 +94,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- tool-call
|
- tool-call
|
||||||
logo: bedrock
|
logo: bedrock
|
||||||
|
|
||||||
- name: mistral
|
- name: mistral
|
||||||
type: llm
|
type: llm
|
||||||
provider: bedrock
|
provider: bedrock
|
||||||
@@ -100,6 +108,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- tool-call
|
- tool-call
|
||||||
logo: bedrock
|
logo: bedrock
|
||||||
|
|
||||||
- name: openai
|
- name: openai
|
||||||
type: llm
|
type: llm
|
||||||
provider: bedrock
|
provider: bedrock
|
||||||
@@ -114,6 +123,7 @@ models:
|
|||||||
- tool-call
|
- tool-call
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: bedrock
|
logo: bedrock
|
||||||
|
|
||||||
- name: qwen
|
- name: qwen
|
||||||
type: llm
|
type: llm
|
||||||
provider: bedrock
|
provider: bedrock
|
||||||
@@ -128,6 +138,7 @@ models:
|
|||||||
- tool-call
|
- tool-call
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: bedrock
|
logo: bedrock
|
||||||
|
|
||||||
- name: amazon.rerank-v1:0
|
- name: amazon.rerank-v1:0
|
||||||
type: rerank
|
type: rerank
|
||||||
provider: bedrock
|
provider: bedrock
|
||||||
@@ -139,6 +150,7 @@ models:
|
|||||||
tags:
|
tags:
|
||||||
- 重排序模型
|
- 重排序模型
|
||||||
logo: bedrock
|
logo: bedrock
|
||||||
|
|
||||||
- name: cohere.rerank-v3-5:0
|
- name: cohere.rerank-v3-5:0
|
||||||
type: rerank
|
type: rerank
|
||||||
provider: bedrock
|
provider: bedrock
|
||||||
@@ -150,6 +162,7 @@ models:
|
|||||||
tags:
|
tags:
|
||||||
- 重排序模型
|
- 重排序模型
|
||||||
logo: bedrock
|
logo: bedrock
|
||||||
|
|
||||||
- name: amazon.nova-2-multimodal-embeddings-v1:0
|
- name: amazon.nova-2-multimodal-embeddings-v1:0
|
||||||
type: embedding
|
type: embedding
|
||||||
provider: bedrock
|
provider: bedrock
|
||||||
@@ -163,6 +176,7 @@ models:
|
|||||||
- 文本嵌入模型
|
- 文本嵌入模型
|
||||||
- vision
|
- vision
|
||||||
logo: bedrock
|
logo: bedrock
|
||||||
|
|
||||||
- name: amazon.titan-embed-text-v1
|
- name: amazon.titan-embed-text-v1
|
||||||
type: embedding
|
type: embedding
|
||||||
provider: bedrock
|
provider: bedrock
|
||||||
@@ -174,6 +188,7 @@ models:
|
|||||||
tags:
|
tags:
|
||||||
- 文本嵌入模型
|
- 文本嵌入模型
|
||||||
logo: bedrock
|
logo: bedrock
|
||||||
|
|
||||||
- name: amazon.titan-embed-text-v2:0
|
- name: amazon.titan-embed-text-v2:0
|
||||||
type: embedding
|
type: embedding
|
||||||
provider: bedrock
|
provider: bedrock
|
||||||
@@ -185,6 +200,7 @@ models:
|
|||||||
tags:
|
tags:
|
||||||
- 文本嵌入模型
|
- 文本嵌入模型
|
||||||
logo: bedrock
|
logo: bedrock
|
||||||
|
|
||||||
- name: cohere.embed-english-v3
|
- name: cohere.embed-english-v3
|
||||||
type: embedding
|
type: embedding
|
||||||
provider: bedrock
|
provider: bedrock
|
||||||
@@ -196,6 +212,7 @@ models:
|
|||||||
tags:
|
tags:
|
||||||
- 文本嵌入模型
|
- 文本嵌入模型
|
||||||
logo: bedrock
|
logo: bedrock
|
||||||
|
|
||||||
- name: cohere.embed-multilingual-v3
|
- name: cohere.embed-multilingual-v3
|
||||||
type: embedding
|
type: embedding
|
||||||
provider: bedrock
|
provider: bedrock
|
||||||
|
|||||||
@@ -6,36 +6,42 @@ models:
|
|||||||
description: DeepSeek-R1-Distill-Qwen-14B大语言模型,支持智能体思考,32000上下文窗口,对话模式
|
description: DeepSeek-R1-Distill-Qwen-14B大语言模型,支持智能体思考,32000上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
- agent-thought
|
- agent-thought
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: deepseek-r1-distill-qwen-32b
|
- name: deepseek-r1-distill-qwen-32b
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: DeepSeek-R1-Distill-Qwen-32B大语言模型,支持智能体思考,32000上下文窗口,对话模式
|
description: DeepSeek-R1-Distill-Qwen-32B大语言模型,支持智能体思考,32000上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
- agent-thought
|
- agent-thought
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: deepseek-r1
|
- name: deepseek-r1
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: DeepSeek-R1大语言模型,支持智能体思考,131072超大上下文窗口,对话模式
|
description: DeepSeek-R1大语言模型,支持智能体思考,131072超大上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
- agent-thought
|
- agent-thought
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: deepseek-v3.1
|
- name: deepseek-v3.1
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -48,6 +54,7 @@ models:
|
|||||||
- 大语言模型
|
- 大语言模型
|
||||||
- agent-thought
|
- agent-thought
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: deepseek-v3.2-exp
|
- name: deepseek-v3.2-exp
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -60,6 +67,7 @@ models:
|
|||||||
- 大语言模型
|
- 大语言模型
|
||||||
- agent-thought
|
- agent-thought
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: deepseek-v3.2
|
- name: deepseek-v3.2
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -72,6 +80,7 @@ models:
|
|||||||
- 大语言模型
|
- 大语言模型
|
||||||
- agent-thought
|
- agent-thought
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: deepseek-v3
|
- name: deepseek-v3
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -84,6 +93,7 @@ models:
|
|||||||
- 大语言模型
|
- 大语言模型
|
||||||
- agent-thought
|
- agent-thought
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: farui-plus
|
- name: farui-plus
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -98,6 +108,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: glm-4.7
|
- name: glm-4.7
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -112,6 +123,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qvq-max-latest
|
- name: qvq-max-latest
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -119,7 +131,8 @@ models:
|
|||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -127,6 +140,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qvq-max
|
- name: qvq-max
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -134,7 +148,8 @@ models:
|
|||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -142,6 +157,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen-coder-turbo-0919
|
- name: qwen-coder-turbo-0919
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -155,13 +171,15 @@ models:
|
|||||||
- 代码模型
|
- 代码模型
|
||||||
- agent-thought
|
- agent-thought
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen-max-latest
|
- name: qwen-max-latest
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwen-max-latest大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式,支持联网搜索
|
description: qwen-max-latest大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式,支持联网搜索
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -169,6 +187,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen-max-longcontext
|
- name: qwen-max-longcontext
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -183,13 +202,15 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen-max
|
- name: qwen-max
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwen-max大语言模型,支持多工具调用、智能体思考、流式工具调用,32768上下文窗口,对话模式,支持联网搜索
|
description: qwen-max大语言模型,支持多工具调用、智能体思考、流式工具调用,32768上下文窗口,对话模式,支持联网搜索
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -197,6 +218,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen-mt-plus
|
- name: qwen-mt-plus
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -210,6 +232,7 @@ models:
|
|||||||
- 翻译模型
|
- 翻译模型
|
||||||
- agent-thought
|
- agent-thought
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen-mt-turbo
|
- name: qwen-mt-turbo
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -223,6 +246,7 @@ models:
|
|||||||
- 翻译模型
|
- 翻译模型
|
||||||
- agent-thought
|
- agent-thought
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen-plus-0112
|
- name: qwen-plus-0112
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -237,6 +261,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen-plus-0125
|
- name: qwen-plus-0125
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -251,6 +276,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen-plus-0723
|
- name: qwen-plus-0723
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -265,6 +291,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen-plus-0806
|
- name: qwen-plus-0806
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -279,6 +306,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen-plus-0919
|
- name: qwen-plus-0919
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -293,6 +321,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen-plus-1125
|
- name: qwen-plus-1125
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -307,6 +336,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen-plus-1127
|
- name: qwen-plus-1127
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -321,6 +351,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen-plus-1220
|
- name: qwen-plus-1220
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -335,6 +366,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen-vl-max
|
- name: qwen-vl-max
|
||||||
type: chat
|
type: chat
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -342,8 +374,8 @@ models:
|
|||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -352,6 +384,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- video
|
- video
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen-vl-plus-0809
|
- name: qwen-vl-plus-0809
|
||||||
type: chat
|
type: chat
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -359,8 +392,8 @@ models:
|
|||||||
is_deprecated: true
|
is_deprecated: true
|
||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -369,6 +402,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- video
|
- video
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen-vl-plus-2025-01-02
|
- name: qwen-vl-plus-2025-01-02
|
||||||
type: chat
|
type: chat
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -376,8 +410,8 @@ models:
|
|||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -386,6 +420,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- video
|
- video
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen-vl-plus-2025-01-25
|
- name: qwen-vl-plus-2025-01-25
|
||||||
type: chat
|
type: chat
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -393,8 +428,8 @@ models:
|
|||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -403,6 +438,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- video
|
- video
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen-vl-plus-latest
|
- name: qwen-vl-plus-latest
|
||||||
type: chat
|
type: chat
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -410,8 +446,8 @@ models:
|
|||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -420,6 +456,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- video
|
- video
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen-vl-plus
|
- name: qwen-vl-plus
|
||||||
type: chat
|
type: chat
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -427,8 +464,8 @@ models:
|
|||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -437,6 +474,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- video
|
- video
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen2.5-0.5b-instruct
|
- name: qwen2.5-0.5b-instruct
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -451,13 +489,15 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-14b
|
- name: qwen3-14b
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwen3-14b大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式
|
description: qwen3-14b大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -465,13 +505,15 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-235b-a22b-instruct-2507
|
- name: qwen3-235b-a22b-instruct-2507
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwen3-235b-a22b-instruct-2507大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式
|
description: qwen3-235b-a22b-instruct-2507大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -479,13 +521,15 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-235b-a22b-thinking-2507
|
- name: qwen3-235b-a22b-thinking-2507
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwen3-235b-a22b-thinking-2507大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式
|
description: qwen3-235b-a22b-thinking-2507大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -493,13 +537,15 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-235b-a22b
|
- name: qwen3-235b-a22b
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwen3-235b-a22b大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式
|
description: qwen3-235b-a22b大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -507,13 +553,15 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-30b-a3b-instruct-2507
|
- name: qwen3-30b-a3b-instruct-2507
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwen3-30b-a3b-instruct-2507大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式
|
description: qwen3-30b-a3b-instruct-2507大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -521,13 +569,15 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-30b-a3b
|
- name: qwen3-30b-a3b
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwen3-30b-a3b大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式
|
description: qwen3-30b-a3b大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -535,13 +585,15 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-32b
|
- name: qwen3-32b
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwen3-32b大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式
|
description: qwen3-32b大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -549,13 +601,15 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-4b
|
- name: qwen3-4b
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwen3-4b大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式
|
description: qwen3-4b大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -563,13 +617,15 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-8b
|
- name: qwen3-8b
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwen3-8b大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式
|
description: qwen3-8b大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -577,65 +633,75 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-coder-30b-a3b-instruct
|
- name: qwen3-coder-30b-a3b-instruct
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwen3-coder-30b-a3b-instruct大语言模型,支持智能体思考,262144上下文窗口,对话模式
|
description: qwen3-coder-30b-a3b-instruct大语言模型,支持智能体思考,262144上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
- 代码模型
|
- 代码模型
|
||||||
- agent-thought
|
- agent-thought
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-coder-480b-a35b-instruct
|
- name: qwen3-coder-480b-a35b-instruct
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwen3-coder-480b-a35b-instruct大语言模型,支持智能体思考,262144上下文窗口,对话模式
|
description: qwen3-coder-480b-a35b-instruct大语言模型,支持智能体思考,262144上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
- 代码模型
|
- 代码模型
|
||||||
- agent-thought
|
- agent-thought
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-coder-plus-2025-09-23
|
- name: qwen3-coder-plus-2025-09-23
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwen3-coder-plus-2025-09-23大语言模型,支持智能体思考,1000000上下文窗口,对话模式
|
description: qwen3-coder-plus-2025-09-23大语言模型,支持智能体思考,1000000上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
- 代码模型
|
- 代码模型
|
||||||
- agent-thought
|
- agent-thought
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-coder-plus
|
- name: qwen3-coder-plus
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwen3-coder-plus大语言模型,支持智能体思考,1000000上下文窗口,对话模式
|
description: qwen3-coder-plus大语言模型,支持智能体思考,1000000上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
- 代码模型
|
- 代码模型
|
||||||
- agent-thought
|
- agent-thought
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-max-2025-09-23
|
- name: qwen3-max-2025-09-23
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwen3-max-2025-09-23大语言模型,支持多工具调用、智能体思考、流式工具调用,262144上下文窗口,对话模式,支持联网搜索
|
description: qwen3-max-2025-09-23大语言模型,支持多工具调用、智能体思考、流式工具调用,262144上下文窗口,对话模式,支持联网搜索
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -644,13 +710,15 @@ models:
|
|||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
- 联网搜索
|
- 联网搜索
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-max-2026-01-23
|
- name: qwen3-max-2026-01-23
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwen3-max-2026-01-23大语言模型,支持多工具调用、智能体思考、流式工具调用,262144上下文窗口,对话模式,支持联网搜索
|
description: qwen3-max-2026-01-23大语言模型,支持多工具调用、智能体思考、流式工具调用,262144上下文窗口,对话模式,支持联网搜索
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -659,13 +727,15 @@ models:
|
|||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
- 联网搜索
|
- 联网搜索
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-max-preview
|
- name: qwen3-max-preview
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwen3-max-preview大语言模型,支持多工具调用、智能体思考、流式工具调用,262144上下文窗口,对话模式
|
description: qwen3-max-preview大语言模型,支持多工具调用、智能体思考、流式工具调用,262144上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -673,13 +743,15 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-max
|
- name: qwen3-max
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwen3-max大语言模型,支持多工具调用、智能体思考、流式工具调用,262144上下文窗口,对话模式,支持联网搜索
|
description: qwen3-max大语言模型,支持多工具调用、智能体思考、流式工具调用,262144上下文窗口,对话模式,支持联网搜索
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -688,13 +760,15 @@ models:
|
|||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
- 联网搜索
|
- 联网搜索
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-next-80b-a3b-instruct
|
- name: qwen3-next-80b-a3b-instruct
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwen3-next-80b-a3b-instruct大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式
|
description: qwen3-next-80b-a3b-instruct大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -702,13 +776,15 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-next-80b-a3b-thinking
|
- name: qwen3-next-80b-a3b-thinking
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwen3-next-80b-a3b-thinking大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式
|
description: qwen3-next-80b-a3b-thinking大语言模型,支持多工具调用、智能体思考、流式工具调用,131072上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -716,6 +792,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-omni-flash-2025-12-01
|
- name: qwen3-omni-flash-2025-12-01
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -723,9 +800,10 @@ models:
|
|||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
- audio
|
- audio
|
||||||
|
- thinking
|
||||||
is_omni: true
|
is_omni: true
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -735,6 +813,7 @@ models:
|
|||||||
- video
|
- video
|
||||||
- audio
|
- audio
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-vl-235b-a22b-instruct
|
- name: qwen3-vl-235b-a22b-instruct
|
||||||
type: chat
|
type: chat
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -742,8 +821,9 @@ models:
|
|||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -754,6 +834,7 @@ models:
|
|||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-vl-235b-a22b-thinking
|
- name: qwen3-vl-235b-a22b-thinking
|
||||||
type: chat
|
type: chat
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -761,8 +842,9 @@ models:
|
|||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -773,6 +855,7 @@ models:
|
|||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-vl-30b-a3b-instruct
|
- name: qwen3-vl-30b-a3b-instruct
|
||||||
type: chat
|
type: chat
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -780,8 +863,9 @@ models:
|
|||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -792,6 +876,7 @@ models:
|
|||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-vl-30b-a3b-thinking
|
- name: qwen3-vl-30b-a3b-thinking
|
||||||
type: chat
|
type: chat
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -799,8 +884,9 @@ models:
|
|||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -811,6 +897,7 @@ models:
|
|||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-vl-flash
|
- name: qwen3-vl-flash
|
||||||
type: chat
|
type: chat
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -818,8 +905,9 @@ models:
|
|||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -830,6 +918,7 @@ models:
|
|||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-vl-plus-2025-09-23
|
- name: qwen3-vl-plus-2025-09-23
|
||||||
type: chat
|
type: chat
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -837,8 +926,9 @@ models:
|
|||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -847,6 +937,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- video
|
- video
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwen3-vl-plus
|
- name: qwen3-vl-plus
|
||||||
type: chat
|
type: chat
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -854,8 +945,9 @@ models:
|
|||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -864,45 +956,52 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- video
|
- video
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwq-32b
|
- name: qwq-32b
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwq-32b大语言模型,支持智能体思考、流式工具调用,131072上下文窗口,对话模式
|
description: qwq-32b大语言模型,支持智能体思考、流式工具调用,131072上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwq-plus-0305
|
- name: qwq-plus-0305
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwq-plus-0305大语言模型,支持智能体思考、流式工具调用,131072上下文窗口,对话模式
|
description: qwq-plus-0305大语言模型,支持智能体思考、流式工具调用,131072上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: qwq-plus
|
- name: qwq-plus
|
||||||
type: llm
|
type: llm
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
description: qwq-plus大语言模型,支持智能体思考、流式工具调用,131072上下文窗口,对话模式
|
description: qwq-plus大语言模型,支持智能体思考、流式工具调用,131072上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: gte-rerank-v2
|
- name: gte-rerank-v2
|
||||||
type: rerank
|
type: rerank
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -914,6 +1013,7 @@ models:
|
|||||||
tags:
|
tags:
|
||||||
- 重排序模型
|
- 重排序模型
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: gte-rerank
|
- name: gte-rerank
|
||||||
type: rerank
|
type: rerank
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -925,6 +1025,7 @@ models:
|
|||||||
tags:
|
tags:
|
||||||
- 重排序模型
|
- 重排序模型
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: multimodal-embedding-v1
|
- name: multimodal-embedding-v1
|
||||||
type: embedding
|
type: embedding
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -932,13 +1033,14 @@ models:
|
|||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 嵌入模型
|
- 嵌入模型
|
||||||
- 多模态模型
|
- 多模态模型
|
||||||
- vision
|
- vision
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: text-embedding-v1
|
- name: text-embedding-v1
|
||||||
type: embedding
|
type: embedding
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -951,6 +1053,7 @@ models:
|
|||||||
- 嵌入模型
|
- 嵌入模型
|
||||||
- 文本嵌入
|
- 文本嵌入
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: text-embedding-v2
|
- name: text-embedding-v2
|
||||||
type: embedding
|
type: embedding
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -963,6 +1066,7 @@ models:
|
|||||||
- 嵌入模型
|
- 嵌入模型
|
||||||
- 文本嵌入
|
- 文本嵌入
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: text-embedding-v3
|
- name: text-embedding-v3
|
||||||
type: embedding
|
type: embedding
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
@@ -975,6 +1079,7 @@ models:
|
|||||||
- 嵌入模型
|
- 嵌入模型
|
||||||
- 文本嵌入
|
- 文本嵌入
|
||||||
logo: dashscope
|
logo: dashscope
|
||||||
|
|
||||||
- name: text-embedding-v4
|
- name: text-embedding-v4
|
||||||
type: embedding
|
type: embedding
|
||||||
provider: dashscope
|
provider: dashscope
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ models:
|
|||||||
- audio
|
- audio
|
||||||
- video
|
- video
|
||||||
logo: openai
|
logo: openai
|
||||||
|
|
||||||
- name: gpt-3.5-turbo-0125
|
- name: gpt-3.5-turbo-0125
|
||||||
type: llm
|
type: llm
|
||||||
provider: openai
|
provider: openai
|
||||||
@@ -34,6 +35,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: openai
|
logo: openai
|
||||||
|
|
||||||
- name: gpt-3.5-turbo-1106
|
- name: gpt-3.5-turbo-1106
|
||||||
type: llm
|
type: llm
|
||||||
provider: openai
|
provider: openai
|
||||||
@@ -48,6 +50,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: openai
|
logo: openai
|
||||||
|
|
||||||
- name: gpt-3.5-turbo-16k
|
- name: gpt-3.5-turbo-16k
|
||||||
type: llm
|
type: llm
|
||||||
provider: openai
|
provider: openai
|
||||||
@@ -62,6 +65,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: openai
|
logo: openai
|
||||||
|
|
||||||
- name: gpt-3.5-turbo-instruct
|
- name: gpt-3.5-turbo-instruct
|
||||||
type: llm
|
type: llm
|
||||||
provider: openai
|
provider: openai
|
||||||
@@ -73,6 +77,7 @@ models:
|
|||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
logo: openai
|
logo: openai
|
||||||
|
|
||||||
- name: gpt-3.5-turbo
|
- name: gpt-3.5-turbo
|
||||||
type: llm
|
type: llm
|
||||||
provider: openai
|
provider: openai
|
||||||
@@ -87,6 +92,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: openai
|
logo: openai
|
||||||
|
|
||||||
- name: gpt-4-0125-preview
|
- name: gpt-4-0125-preview
|
||||||
type: llm
|
type: llm
|
||||||
provider: openai
|
provider: openai
|
||||||
@@ -101,6 +107,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: openai
|
logo: openai
|
||||||
|
|
||||||
- name: gpt-4-1106-preview
|
- name: gpt-4-1106-preview
|
||||||
type: llm
|
type: llm
|
||||||
provider: openai
|
provider: openai
|
||||||
@@ -115,6 +122,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: openai
|
logo: openai
|
||||||
|
|
||||||
- name: gpt-4-turbo-2024-04-09
|
- name: gpt-4-turbo-2024-04-09
|
||||||
type: llm
|
type: llm
|
||||||
provider: openai
|
provider: openai
|
||||||
@@ -131,6 +139,7 @@ models:
|
|||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
- vision
|
- vision
|
||||||
logo: openai
|
logo: openai
|
||||||
|
|
||||||
- name: gpt-4-turbo-preview
|
- name: gpt-4-turbo-preview
|
||||||
type: llm
|
type: llm
|
||||||
provider: openai
|
provider: openai
|
||||||
@@ -145,6 +154,7 @@ models:
|
|||||||
- agent-thought
|
- agent-thought
|
||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
logo: openai
|
logo: openai
|
||||||
|
|
||||||
- name: gpt-4-turbo
|
- name: gpt-4-turbo
|
||||||
type: llm
|
type: llm
|
||||||
provider: openai
|
provider: openai
|
||||||
@@ -161,6 +171,7 @@ models:
|
|||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
- vision
|
- vision
|
||||||
logo: openai
|
logo: openai
|
||||||
|
|
||||||
- name: o1-preview
|
- name: o1-preview
|
||||||
type: llm
|
type: llm
|
||||||
provider: openai
|
provider: openai
|
||||||
@@ -173,6 +184,7 @@ models:
|
|||||||
- 大语言模型
|
- 大语言模型
|
||||||
- agent-thought
|
- agent-thought
|
||||||
logo: openai
|
logo: openai
|
||||||
|
|
||||||
- name: o1
|
- name: o1
|
||||||
type: llm
|
type: llm
|
||||||
provider: openai
|
provider: openai
|
||||||
@@ -181,6 +193,7 @@ models:
|
|||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -190,6 +203,7 @@ models:
|
|||||||
- vision
|
- vision
|
||||||
- structured-output
|
- structured-output
|
||||||
logo: openai
|
logo: openai
|
||||||
|
|
||||||
- name: o3-2025-04-16
|
- name: o3-2025-04-16
|
||||||
type: llm
|
type: llm
|
||||||
provider: openai
|
provider: openai
|
||||||
@@ -198,6 +212,7 @@ models:
|
|||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -207,13 +222,15 @@ models:
|
|||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
- structured-output
|
- structured-output
|
||||||
logo: openai
|
logo: openai
|
||||||
|
|
||||||
- name: o3-mini-2025-01-31
|
- name: o3-mini-2025-01-31
|
||||||
type: llm
|
type: llm
|
||||||
provider: openai
|
provider: openai
|
||||||
description: o3-mini-2025-01-31大语言模型,支持智能体思考、工具调用、流式工具调用、结构化输出,200000上下文窗口,对话模式
|
description: o3-mini-2025-01-31大语言模型,支持智能体思考、工具调用、流式工具调用、结构化输出,200000上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -222,13 +239,15 @@ models:
|
|||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
- structured-output
|
- structured-output
|
||||||
logo: openai
|
logo: openai
|
||||||
|
|
||||||
- name: o3-mini
|
- name: o3-mini
|
||||||
type: llm
|
type: llm
|
||||||
provider: openai
|
provider: openai
|
||||||
description: o3-mini大语言模型,支持智能体思考、工具调用、流式工具调用、结构化输出,200000上下文窗口,对话模式
|
description: o3-mini大语言模型,支持智能体思考、工具调用、流式工具调用、结构化输出,200000上下文窗口,对话模式
|
||||||
is_deprecated: false
|
is_deprecated: false
|
||||||
is_official: true
|
is_official: true
|
||||||
capability: []
|
capability:
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -237,6 +256,7 @@ models:
|
|||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
- structured-output
|
- structured-output
|
||||||
logo: openai
|
logo: openai
|
||||||
|
|
||||||
- name: o3-pro-2025-06-10
|
- name: o3-pro-2025-06-10
|
||||||
type: llm
|
type: llm
|
||||||
provider: openai
|
provider: openai
|
||||||
@@ -245,6 +265,7 @@ models:
|
|||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -253,6 +274,7 @@ models:
|
|||||||
- vision
|
- vision
|
||||||
- structured-output
|
- structured-output
|
||||||
logo: openai
|
logo: openai
|
||||||
|
|
||||||
- name: o3-pro
|
- name: o3-pro
|
||||||
type: llm
|
type: llm
|
||||||
provider: openai
|
provider: openai
|
||||||
@@ -261,6 +283,7 @@ models:
|
|||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -269,6 +292,7 @@ models:
|
|||||||
- vision
|
- vision
|
||||||
- structured-output
|
- structured-output
|
||||||
logo: openai
|
logo: openai
|
||||||
|
|
||||||
- name: o3
|
- name: o3
|
||||||
type: llm
|
type: llm
|
||||||
provider: openai
|
provider: openai
|
||||||
@@ -277,6 +301,7 @@ models:
|
|||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -286,6 +311,7 @@ models:
|
|||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
- structured-output
|
- structured-output
|
||||||
logo: openai
|
logo: openai
|
||||||
|
|
||||||
- name: o4-mini-2025-04-16
|
- name: o4-mini-2025-04-16
|
||||||
type: llm
|
type: llm
|
||||||
provider: openai
|
provider: openai
|
||||||
@@ -294,6 +320,7 @@ models:
|
|||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -303,6 +330,7 @@ models:
|
|||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
- structured-output
|
- structured-output
|
||||||
logo: openai
|
logo: openai
|
||||||
|
|
||||||
- name: o4-mini
|
- name: o4-mini
|
||||||
type: llm
|
type: llm
|
||||||
provider: openai
|
provider: openai
|
||||||
@@ -311,6 +339,7 @@ models:
|
|||||||
is_official: true
|
is_official: true
|
||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -320,6 +349,7 @@ models:
|
|||||||
- stream-tool-call
|
- stream-tool-call
|
||||||
- structured-output
|
- structured-output
|
||||||
logo: openai
|
logo: openai
|
||||||
|
|
||||||
- name: text-embedding-3-large
|
- name: text-embedding-3-large
|
||||||
type: embedding
|
type: embedding
|
||||||
provider: openai
|
provider: openai
|
||||||
@@ -331,6 +361,7 @@ models:
|
|||||||
tags:
|
tags:
|
||||||
- 文本向量模型
|
- 文本向量模型
|
||||||
logo: openai
|
logo: openai
|
||||||
|
|
||||||
- name: text-embedding-3-small
|
- name: text-embedding-3-small
|
||||||
type: embedding
|
type: embedding
|
||||||
provider: openai
|
provider: openai
|
||||||
@@ -342,6 +373,7 @@ models:
|
|||||||
tags:
|
tags:
|
||||||
- 文本向量模型
|
- 文本向量模型
|
||||||
logo: openai
|
logo: openai
|
||||||
|
|
||||||
- name: text-embedding-ada-002
|
- name: text-embedding-ada-002
|
||||||
type: embedding
|
type: embedding
|
||||||
provider: openai
|
provider: openai
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ models:
|
|||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -24,6 +25,7 @@ models:
|
|||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -38,6 +40,7 @@ models:
|
|||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -52,6 +55,7 @@ models:
|
|||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -82,6 +86,7 @@ models:
|
|||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -96,6 +101,7 @@ models:
|
|||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -110,6 +116,7 @@ models:
|
|||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -124,6 +131,7 @@ models:
|
|||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
@@ -139,6 +147,7 @@ models:
|
|||||||
capability:
|
capability:
|
||||||
- vision
|
- vision
|
||||||
- video
|
- video
|
||||||
|
- thinking
|
||||||
is_omni: false
|
is_omni: false
|
||||||
tags:
|
tags:
|
||||||
- 大语言模型
|
- 大语言模型
|
||||||
|
|||||||
38
api/app/core/models/volcano_chat.py
Normal file
38
api/app/core/models/volcano_chat.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
"""
|
||||||
|
火山引擎 ChatOpenAI 扩展
|
||||||
|
|
||||||
|
ChatOpenAI 在解析流式 SSE 时只取 delta.content,会丢弃 delta.reasoning_content。
|
||||||
|
此类仅重写 _convert_chunk_to_generation_chunk,将 reasoning_content 补入 additional_kwargs。
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
from langchain_core.outputs import ChatGenerationChunk
|
||||||
|
from langchain_openai import ChatOpenAI
|
||||||
|
|
||||||
|
|
||||||
|
class VolcanoChatOpenAI(ChatOpenAI):
|
||||||
|
"""火山引擎 Chat 模型,支持深度思考内容(reasoning_content)的流式透传。"""
|
||||||
|
|
||||||
|
def _convert_chunk_to_generation_chunk(
|
||||||
|
self,
|
||||||
|
chunk: dict,
|
||||||
|
default_chunk_class: type,
|
||||||
|
base_generation_info: Optional[dict],
|
||||||
|
) -> Optional[ChatGenerationChunk]:
|
||||||
|
gen_chunk = super()._convert_chunk_to_generation_chunk(
|
||||||
|
chunk, default_chunk_class, base_generation_info
|
||||||
|
)
|
||||||
|
if gen_chunk is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 从原始 chunk 中提取 reasoning_content
|
||||||
|
choices = chunk.get("choices") or chunk.get("chunk", {}).get("choices", [])
|
||||||
|
if choices:
|
||||||
|
delta = choices[0].get("delta") or {}
|
||||||
|
reasoning: Any = delta.get("reasoning_content")
|
||||||
|
if reasoning:
|
||||||
|
gen_chunk.message.additional_kwargs["reasoning_content"] = reasoning
|
||||||
|
|
||||||
|
return gen_chunk
|
||||||
@@ -99,7 +99,7 @@ class SimpleMCPClient:
|
|||||||
# 建立 SSE 连接
|
# 建立 SSE 连接
|
||||||
response = await self._session.get(self.server_url)
|
response = await self._session.get(self.server_url)
|
||||||
|
|
||||||
if response.status not in (200, 202):
|
if not (200 <= response.status < 300):
|
||||||
error_text = await response.text()
|
error_text = await response.text()
|
||||||
raise MCPConnectionError(f"SSE 连接失败 {response.status}: {error_text}")
|
raise MCPConnectionError(f"SSE 连接失败 {response.status}: {error_text}")
|
||||||
|
|
||||||
@@ -190,9 +190,7 @@ class SimpleMCPClient:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
async with self._session.post(self._endpoint_url, json=request) as response:
|
async with self._session.post(self._endpoint_url, json=request) as response:
|
||||||
# MCP SSE 协议:POST 请求返回 200 或 202 均为正常
|
if not (200 <= response.status < 300):
|
||||||
# 202 Accepted 表示请求已接受,结果通过 SSE 流异步返回
|
|
||||||
if response.status not in (200, 202):
|
|
||||||
error_text = await response.text()
|
error_text = await response.text()
|
||||||
raise MCPConnectionError(f"请求失败 {response.status}: {error_text}")
|
raise MCPConnectionError(f"请求失败 {response.status}: {error_text}")
|
||||||
|
|
||||||
@@ -207,7 +205,7 @@ class SimpleMCPClient:
|
|||||||
raise MCPConnectionError("endpoint URL 未初始化")
|
raise MCPConnectionError("endpoint URL 未初始化")
|
||||||
|
|
||||||
async with self._session.post(self._endpoint_url, json=notification) as response:
|
async with self._session.post(self._endpoint_url, json=notification) as response:
|
||||||
if response.status not in (200, 202):
|
if not (200 <= response.status < 300):
|
||||||
logger.warning(f"通知发送失败: {response.status}")
|
logger.warning(f"通知发送失败: {response.status}")
|
||||||
|
|
||||||
async def _initialize_modelscope_session(self):
|
async def _initialize_modelscope_session(self):
|
||||||
@@ -225,7 +223,7 @@ class SimpleMCPClient:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
async with self._session.post(self.server_url, json=init_request) as response:
|
async with self._session.post(self.server_url, json=init_request) as response:
|
||||||
if response.status != 200:
|
if not (200 <= response.status < 300):
|
||||||
error_text = await response.text()
|
error_text = await response.text()
|
||||||
raise MCPConnectionError(f"初始化失败 {response.status}: {error_text}")
|
raise MCPConnectionError(f"初始化失败 {response.status}: {error_text}")
|
||||||
|
|
||||||
|
|||||||
@@ -135,7 +135,8 @@ class LLMNode(BaseNode):
|
|||||||
api_key=model_info.api_key,
|
api_key=model_info.api_key,
|
||||||
base_url=model_info.api_base,
|
base_url=model_info.api_base,
|
||||||
extra_params=extra_params,
|
extra_params=extra_params,
|
||||||
is_omni=model_info.is_omni
|
is_omni=model_info.is_omni,
|
||||||
|
support_thinking="thinking" in (model_info.capability or []),
|
||||||
),
|
),
|
||||||
type=model_info.model_type
|
type=model_info.model_type
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -109,6 +109,7 @@ class ParameterExtractorNode(BaseNode):
|
|||||||
api_key = api_config.api_key
|
api_key = api_config.api_key
|
||||||
api_base = api_config.api_base
|
api_base = api_config.api_base
|
||||||
is_omni = api_config.is_omni
|
is_omni = api_config.is_omni
|
||||||
|
capability = api_config.capability
|
||||||
model_type = config.type
|
model_type = config.type
|
||||||
|
|
||||||
llm = RedBearLLM(
|
llm = RedBearLLM(
|
||||||
@@ -117,7 +118,8 @@ class ParameterExtractorNode(BaseNode):
|
|||||||
provider=provider,
|
provider=provider,
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
base_url=api_base,
|
base_url=api_base,
|
||||||
is_omni=is_omni
|
is_omni=is_omni,
|
||||||
|
support_thinking="thinking" in (capability or []),
|
||||||
),
|
),
|
||||||
type=ModelType(model_type)
|
type=ModelType(model_type)
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -62,6 +62,7 @@ class QuestionClassifierNode(BaseNode):
|
|||||||
api_key = api_config.api_key
|
api_key = api_config.api_key
|
||||||
base_url = api_config.api_base
|
base_url = api_config.api_base
|
||||||
is_omni = api_config.is_omni
|
is_omni = api_config.is_omni
|
||||||
|
capability = api_config.capability
|
||||||
model_type = config.type
|
model_type = config.type
|
||||||
|
|
||||||
return RedBearLLM(
|
return RedBearLLM(
|
||||||
@@ -70,7 +71,8 @@ class QuestionClassifierNode(BaseNode):
|
|||||||
provider=provider,
|
provider=provider,
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
base_url=base_url,
|
base_url=base_url,
|
||||||
is_omni=is_omni
|
is_omni=is_omni,
|
||||||
|
support_thinking="thinking" in (capability or []),
|
||||||
),
|
),
|
||||||
type=ModelType(model_type)
|
type=ModelType(model_type)
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -81,7 +81,7 @@ class ModelConfig(BaseModel):
|
|||||||
|
|
||||||
# 模型配置参数
|
# 模型配置参数
|
||||||
capability = Column(ARRAY(String), default=list, nullable=False, server_default=text("'{}'::varchar[]"),
|
capability = Column(ARRAY(String), default=list, nullable=False, server_default=text("'{}'::varchar[]"),
|
||||||
comment="模型能力列表(如['vision', 'audio', 'video'])")
|
comment="模型能力列表(如['vision', 'audio', 'video', 'thinking'])")
|
||||||
is_omni = Column(Boolean, default=False, nullable=False, server_default="false", comment="是否为Omni模型(使用特殊API调用)")
|
is_omni = Column(Boolean, default=False, nullable=False, server_default="false", comment="是否为Omni模型(使用特殊API调用)")
|
||||||
config = Column(JSON, comment="模型配置参数")
|
config = Column(JSON, comment="模型配置参数")
|
||||||
# - temperature : 控制生成文本的随机性。值越高,输出越随机、越有创造性;值越低,输出越确定、越保守。
|
# - temperature : 控制生成文本的随机性。值越高,输出越随机、越有创造性;值越低,输出越确定、越保守。
|
||||||
|
|||||||
@@ -241,6 +241,8 @@ class ModelParameters(BaseModel):
|
|||||||
presence_penalty: float = Field(default=0.0, ge=-2.0, le=2.0, description="存在惩罚")
|
presence_penalty: float = Field(default=0.0, ge=-2.0, le=2.0, description="存在惩罚")
|
||||||
n: int = Field(default=1, ge=1, le=10, description="生成的回复数量")
|
n: int = Field(default=1, ge=1, le=10, description="生成的回复数量")
|
||||||
stop: Optional[List[str]] = Field(default=None, description="停止序列")
|
stop: Optional[List[str]] = Field(default=None, description="停止序列")
|
||||||
|
deep_thinking: bool = Field(default=False, description="是否启用深度思考模式(需模型支持,如 DeepSeek-R1、QwQ 等)")
|
||||||
|
thinking_budget_tokens: Optional[int] = Field(default=None, ge=1024, le=131072, description="深度思考 token 预算(仅部分模型支持)")
|
||||||
|
|
||||||
|
|
||||||
class VariableDefinition(BaseModel):
|
class VariableDefinition(BaseModel):
|
||||||
@@ -612,6 +614,7 @@ class AppChatRequest(BaseModel):
|
|||||||
user_id: Optional[str] = Field(default=None, description="用户ID(用于会话管理)")
|
user_id: Optional[str] = Field(default=None, description="用户ID(用于会话管理)")
|
||||||
variables: Optional[Dict[str, Any]] = Field(default=None, description="自定义变量参数值")
|
variables: Optional[Dict[str, Any]] = Field(default=None, description="自定义变量参数值")
|
||||||
stream: bool = Field(default=False, description="是否流式返回")
|
stream: bool = Field(default=False, description="是否流式返回")
|
||||||
|
thinking: bool = Field(default=False, description="是否启用深度思考(需Agent配置支持)")
|
||||||
files: List[FileInput] = Field(default_factory=list, description="附件列表(支持多文件)")
|
files: List[FileInput] = Field(default_factory=list, description="附件列表(支持多文件)")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ class ChatRequest(BaseModel):
|
|||||||
stream: bool = Field(default=False, description="是否流式返回")
|
stream: bool = Field(default=False, description="是否流式返回")
|
||||||
web_search: bool = Field(default=False, description="是否启用网络搜索")
|
web_search: bool = Field(default=False, description="是否启用网络搜索")
|
||||||
memory: bool = Field(default=True, description="是否启用记忆功能")
|
memory: bool = Field(default=True, description="是否启用记忆功能")
|
||||||
|
thinking: bool = Field(default=False, description="是否启用深度思考(需Agent配置支持)")
|
||||||
files: Optional[List[FileInput]] = Field(default=None, description="附件列表(支持多文件)")
|
files: Optional[List[FileInput]] = Field(default=None, description="附件列表(支持多文件)")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -117,7 +117,9 @@ class AppChatService:
|
|||||||
max_tokens=model_parameters.get("max_tokens", 2000),
|
max_tokens=model_parameters.get("max_tokens", 2000),
|
||||||
system_prompt=system_prompt,
|
system_prompt=system_prompt,
|
||||||
tools=tools,
|
tools=tools,
|
||||||
|
deep_thinking=model_parameters.get("deep_thinking", False),
|
||||||
|
thinking_budget_tokens=model_parameters.get("thinking_budget_tokens"),
|
||||||
|
capability=api_key_obj.capability or [],
|
||||||
)
|
)
|
||||||
|
|
||||||
model_info = ModelInfo(
|
model_info = ModelInfo(
|
||||||
@@ -205,7 +207,8 @@ class AppChatService:
|
|||||||
"model": api_key_obj.model_name,
|
"model": api_key_obj.model_name,
|
||||||
"usage": result.get("usage", {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}),
|
"usage": result.get("usage", {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}),
|
||||||
"audio_url": None,
|
"audio_url": None,
|
||||||
"citations": filtered_citations
|
"citations": filtered_citations,
|
||||||
|
"reasoning_content": result.get("reasoning_content")
|
||||||
}
|
}
|
||||||
if files:
|
if files:
|
||||||
for f in files:
|
for f in files:
|
||||||
@@ -258,6 +261,7 @@ class AppChatService:
|
|||||||
"conversation_id": conversation_id,
|
"conversation_id": conversation_id,
|
||||||
"message_id": str(message_id),
|
"message_id": str(message_id),
|
||||||
"message": result["content"],
|
"message": result["content"],
|
||||||
|
"reasoning_content": result.get("reasoning_content"),
|
||||||
"usage": result.get("usage", {
|
"usage": result.get("usage", {
|
||||||
"prompt_tokens": 0,
|
"prompt_tokens": 0,
|
||||||
"completion_tokens": 0,
|
"completion_tokens": 0,
|
||||||
@@ -354,7 +358,10 @@ class AppChatService:
|
|||||||
max_tokens=model_parameters.get("max_tokens", 2000),
|
max_tokens=model_parameters.get("max_tokens", 2000),
|
||||||
system_prompt=system_prompt,
|
system_prompt=system_prompt,
|
||||||
tools=tools,
|
tools=tools,
|
||||||
streaming=True
|
streaming=True,
|
||||||
|
deep_thinking=model_parameters.get("deep_thinking", False),
|
||||||
|
thinking_budget_tokens=model_parameters.get("thinking_budget_tokens"),
|
||||||
|
capability=api_key_obj.capability or [],
|
||||||
)
|
)
|
||||||
|
|
||||||
model_info = ModelInfo(
|
model_info = ModelInfo(
|
||||||
@@ -403,6 +410,7 @@ class AppChatService:
|
|||||||
|
|
||||||
# 流式调用 Agent(支持多模态),同时并行启动 TTS
|
# 流式调用 Agent(支持多模态),同时并行启动 TTS
|
||||||
full_content = ""
|
full_content = ""
|
||||||
|
full_reasoning = ""
|
||||||
total_tokens = 0
|
total_tokens = 0
|
||||||
|
|
||||||
text_queue: asyncio.Queue = asyncio.Queue()
|
text_queue: asyncio.Queue = asyncio.Queue()
|
||||||
@@ -426,6 +434,9 @@ class AppChatService:
|
|||||||
):
|
):
|
||||||
if isinstance(chunk, int):
|
if isinstance(chunk, int):
|
||||||
total_tokens = chunk
|
total_tokens = chunk
|
||||||
|
elif isinstance(chunk, dict) and chunk.get("type") == "reasoning":
|
||||||
|
full_reasoning += chunk['content']
|
||||||
|
yield f"event: reasoning\ndata: {json.dumps({'content': chunk['content']}, ensure_ascii=False)}\n\n"
|
||||||
else:
|
else:
|
||||||
full_content += chunk
|
full_content += chunk
|
||||||
yield f"event: message\ndata: {json.dumps({'content': chunk}, ensure_ascii=False)}\n\n"
|
yield f"event: message\ndata: {json.dumps({'content': chunk}, ensure_ascii=False)}\n\n"
|
||||||
@@ -472,7 +483,8 @@ class AppChatService:
|
|||||||
"model": api_key_obj.model_name,
|
"model": api_key_obj.model_name,
|
||||||
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens},
|
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens},
|
||||||
"audio_url": None,
|
"audio_url": None,
|
||||||
"citations": filtered_citations
|
"citations": filtered_citations,
|
||||||
|
"reasoning_content": full_reasoning or None
|
||||||
}
|
}
|
||||||
|
|
||||||
if files:
|
if files:
|
||||||
@@ -652,13 +664,13 @@ class AppChatService:
|
|||||||
storage_type=storage_type,
|
storage_type=storage_type,
|
||||||
user_rag_memory_id=user_rag_memory_id
|
user_rag_memory_id=user_rag_memory_id
|
||||||
):
|
):
|
||||||
if "sub_usage" in event:
|
# 拦截 sub_usage 事件,累加 token
|
||||||
|
if "event: sub_usage" in event:
|
||||||
if "data:" in event:
|
if "data:" in event:
|
||||||
try:
|
try:
|
||||||
data_line = event.split("data: ", 1)[1].strip()
|
data_line = event.split("data: ", 1)[1].strip()
|
||||||
data = json.loads(data_line)
|
data = json.loads(data_line)
|
||||||
if "total_tokens" in data:
|
total_tokens += data.get("total_tokens", 0)
|
||||||
total_tokens += data["total_tokens"]
|
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -534,6 +534,7 @@ class ConversationService:
|
|||||||
api_key = api_config.api_key
|
api_key = api_config.api_key
|
||||||
api_base = api_config.api_base
|
api_base = api_config.api_base
|
||||||
is_omni = api_config.is_omni
|
is_omni = api_config.is_omni
|
||||||
|
capability = api_config.capability
|
||||||
model_type = config.type
|
model_type = config.type
|
||||||
|
|
||||||
llm = RedBearLLM(
|
llm = RedBearLLM(
|
||||||
@@ -542,7 +543,8 @@ class ConversationService:
|
|||||||
provider=provider,
|
provider=provider,
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
base_url=api_base,
|
base_url=api_base,
|
||||||
is_omni=is_omni
|
is_omni=is_omni,
|
||||||
|
support_thinking="thinking" in (capability or []),
|
||||||
),
|
),
|
||||||
type=ModelType(model_type)
|
type=ModelType(model_type)
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -595,6 +595,9 @@ class AgentRunService:
|
|||||||
max_tokens=effective_params.get("max_tokens", 2000),
|
max_tokens=effective_params.get("max_tokens", 2000),
|
||||||
system_prompt=system_prompt,
|
system_prompt=system_prompt,
|
||||||
tools=tools,
|
tools=tools,
|
||||||
|
deep_thinking=effective_params.get("deep_thinking", False),
|
||||||
|
thinking_budget_tokens=effective_params.get("thinking_budget_tokens"),
|
||||||
|
capability=api_key_config.get("capability", []),
|
||||||
)
|
)
|
||||||
|
|
||||||
# 5. 处理会话ID(创建或验证),新会话时写入开场白
|
# 5. 处理会话ID(创建或验证),新会话时写入开场白
|
||||||
@@ -689,7 +692,8 @@ class AgentRunService:
|
|||||||
"prompt_tokens": 0,
|
"prompt_tokens": 0,
|
||||||
"completion_tokens": 0,
|
"completion_tokens": 0,
|
||||||
"total_tokens": 0
|
"total_tokens": 0
|
||||||
})
|
}),
|
||||||
|
"reasoning_content": result.get("reasoning_content")
|
||||||
},
|
},
|
||||||
files=files,
|
files=files,
|
||||||
processed_files=processed_files,
|
processed_files=processed_files,
|
||||||
@@ -701,6 +705,7 @@ class AgentRunService:
|
|||||||
|
|
||||||
response = {
|
response = {
|
||||||
"message": result["content"],
|
"message": result["content"],
|
||||||
|
"reasoning_content": result.get("reasoning_content"),
|
||||||
"conversation_id": conversation_id,
|
"conversation_id": conversation_id,
|
||||||
"usage": result.get("usage", {
|
"usage": result.get("usage", {
|
||||||
"prompt_tokens": 0,
|
"prompt_tokens": 0,
|
||||||
@@ -838,7 +843,10 @@ class AgentRunService:
|
|||||||
max_tokens=effective_params.get("max_tokens", 2000),
|
max_tokens=effective_params.get("max_tokens", 2000),
|
||||||
system_prompt=system_prompt,
|
system_prompt=system_prompt,
|
||||||
tools=tools,
|
tools=tools,
|
||||||
streaming=True
|
streaming=True,
|
||||||
|
deep_thinking=effective_params.get("deep_thinking", False),
|
||||||
|
thinking_budget_tokens=effective_params.get("thinking_budget_tokens"),
|
||||||
|
capability=api_key_config.get("capability", []),
|
||||||
)
|
)
|
||||||
|
|
||||||
# 5. 处理会话ID(创建或验证),新会话时写入开场白
|
# 5. 处理会话ID(创建或验证),新会话时写入开场白
|
||||||
@@ -898,6 +906,7 @@ class AgentRunService:
|
|||||||
|
|
||||||
# 9. 流式调用 Agent(支持多模态),同时并行启动 TTS
|
# 9. 流式调用 Agent(支持多模态),同时并行启动 TTS
|
||||||
full_content = ""
|
full_content = ""
|
||||||
|
full_reasoning = ""
|
||||||
total_tokens = 0
|
total_tokens = 0
|
||||||
|
|
||||||
# 启动流式 TTS(文本边输出边合成)
|
# 启动流式 TTS(文本边输出边合成)
|
||||||
@@ -916,6 +925,9 @@ class AgentRunService:
|
|||||||
):
|
):
|
||||||
if isinstance(chunk, int):
|
if isinstance(chunk, int):
|
||||||
total_tokens = chunk
|
total_tokens = chunk
|
||||||
|
elif isinstance(chunk, dict) and chunk.get("type") == "reasoning":
|
||||||
|
full_reasoning += chunk["content"]
|
||||||
|
yield self._format_sse_event("reasoning", {"content": chunk["content"]})
|
||||||
else:
|
else:
|
||||||
full_content += chunk
|
full_content += chunk
|
||||||
yield self._format_sse_event("message", {"content": chunk})
|
yield self._format_sse_event("message", {"content": chunk})
|
||||||
@@ -944,7 +956,8 @@ class AgentRunService:
|
|||||||
app_id=agent_config.app_id,
|
app_id=agent_config.app_id,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
meta_data={
|
meta_data={
|
||||||
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens}
|
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens},
|
||||||
|
"reasoning_content": full_reasoning or None
|
||||||
},
|
},
|
||||||
files=files,
|
files=files,
|
||||||
processed_files=processed_files,
|
processed_files=processed_files,
|
||||||
@@ -1665,7 +1678,7 @@ class AgentRunService:
|
|||||||
"""从 text_queue 取文本按句子切分后喂给 synthesizer"""
|
"""从 text_queue 取文本按句子切分后喂给 synthesizer"""
|
||||||
import re
|
import re
|
||||||
buf = ""
|
buf = ""
|
||||||
sentence_end = re.compile(r'[\u3002\uff01\uff1f\.!?\n]')
|
sentence_end = re.compile(r'[\u3002\uff01\uff1f.!?\n]')
|
||||||
while True:
|
while True:
|
||||||
chunk = await text_queue.get()
|
chunk = await text_queue.get()
|
||||||
if chunk is None:
|
if chunk is None:
|
||||||
@@ -1894,6 +1907,7 @@ class AgentRunService:
|
|||||||
"conversation_id": result['conversation_id'],
|
"conversation_id": result['conversation_id'],
|
||||||
"parameters_used": model_info["parameters"],
|
"parameters_used": model_info["parameters"],
|
||||||
"message": result.get("message"),
|
"message": result.get("message"),
|
||||||
|
"reasoning_content": result.get("reasoning_content"),
|
||||||
"usage": usage,
|
"usage": usage,
|
||||||
"elapsed_time": elapsed,
|
"elapsed_time": elapsed,
|
||||||
"tokens_per_second": (
|
"tokens_per_second": (
|
||||||
@@ -2012,7 +2026,7 @@ class AgentRunService:
|
|||||||
# 需要从 ModelApiKey 获取实际的模型名称,或者在 ModelConfig 中添加 model 字段
|
# 需要从 ModelApiKey 获取实际的模型名称,或者在 ModelConfig 中添加 model 字段
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _with_parameters(self, agent_config: AgentConfig, parameters: Dict[str, Any]) -> AgentConfig:
|
def _with_parameters(self, agent_config: AgentConfig, parameters: Dict[str, Any]) -> tuple[AgentConfig, Any]:
|
||||||
"""创建一个带有覆盖参数的 agent_config(浅拷贝,只修改 model_parameters)
|
"""创建一个带有覆盖参数的 agent_config(浅拷贝,只修改 model_parameters)
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@@ -2110,6 +2124,7 @@ class AgentRunService:
|
|||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
full_content = ""
|
full_content = ""
|
||||||
|
full_reasoning = ""
|
||||||
returned_conversation_id = model_conversation_id
|
returned_conversation_id = model_conversation_id
|
||||||
audio_url = None
|
audio_url = None
|
||||||
audio_status = None
|
audio_status = None
|
||||||
@@ -2168,6 +2183,18 @@ class AgentRunService:
|
|||||||
"content": chunk
|
"content": chunk
|
||||||
}))
|
}))
|
||||||
|
|
||||||
|
# 转发深度思考事件(带模型标识)
|
||||||
|
if event_type == "reasoning" and event_data:
|
||||||
|
reasoning_chunk = event_data.get("content", "")
|
||||||
|
full_reasoning += reasoning_chunk
|
||||||
|
await event_queue.put(self._format_sse_event("model_reasoning", {
|
||||||
|
"model_index": idx,
|
||||||
|
"model_config_id": model_config_id,
|
||||||
|
"label": model_label,
|
||||||
|
"conversation_id": returned_conversation_id,
|
||||||
|
"content": event_data.get("content", "")
|
||||||
|
}))
|
||||||
|
|
||||||
# 从 end 事件中提取 features 输出字段
|
# 从 end 事件中提取 features 输出字段
|
||||||
if event_type == "end" and event_data:
|
if event_type == "end" and event_data:
|
||||||
audio_url = event_data.get("audio_url")
|
audio_url = event_data.get("audio_url")
|
||||||
@@ -2199,6 +2226,7 @@ class AgentRunService:
|
|||||||
"conversation_id": returned_conversation_id,
|
"conversation_id": returned_conversation_id,
|
||||||
"parameters_used": model_info["parameters"],
|
"parameters_used": model_info["parameters"],
|
||||||
"message": full_content,
|
"message": full_content,
|
||||||
|
"reasoning_content": full_reasoning or None,
|
||||||
"elapsed_time": elapsed,
|
"elapsed_time": elapsed,
|
||||||
"audio_url": audio_url,
|
"audio_url": audio_url,
|
||||||
"audio_status": audio_status,
|
"audio_status": audio_status,
|
||||||
@@ -2351,6 +2379,7 @@ class AgentRunService:
|
|||||||
"label": r["label"],
|
"label": r["label"],
|
||||||
"conversation_id": r.get("conversation_id"),
|
"conversation_id": r.get("conversation_id"),
|
||||||
"message": r.get("message"),
|
"message": r.get("message"),
|
||||||
|
"reasoning_content": r.get("reasoning_content"),
|
||||||
"elapsed_time": r.get("elapsed_time", 0),
|
"elapsed_time": r.get("elapsed_time", 0),
|
||||||
"audio_url": r.get("audio_url"),
|
"audio_url": r.get("audio_url"),
|
||||||
"audio_status": r.get("audio_status"),
|
"audio_status": r.get("audio_status"),
|
||||||
|
|||||||
@@ -415,6 +415,7 @@ class LLMRouter:
|
|||||||
api_key=api_key_config.api_key,
|
api_key=api_key_config.api_key,
|
||||||
base_url=api_key_config.api_base,
|
base_url=api_key_config.api_base,
|
||||||
is_omni=api_key_config.is_omni,
|
is_omni=api_key_config.is_omni,
|
||||||
|
support_thinking="thinking" in (api_key_config.capability or []),
|
||||||
temperature=0.3,
|
temperature=0.3,
|
||||||
max_tokens=500
|
max_tokens=500
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -393,6 +393,7 @@ class MasterAgentRouter:
|
|||||||
api_key=api_key_config.api_key,
|
api_key=api_key_config.api_key,
|
||||||
base_url=api_key_config.api_base,
|
base_url=api_key_config.api_base,
|
||||||
is_omni=api_key_config.is_omni,
|
is_omni=api_key_config.is_omni,
|
||||||
|
support_thinking="thinking" in (api_key_config.capability or []),
|
||||||
extra_params = extra_params
|
extra_params = extra_params
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -403,6 +404,17 @@ class MasterAgentRouter:
|
|||||||
response = await llm.ainvoke(prompt)
|
response = await llm.ainvoke(prompt)
|
||||||
ModelApiKeyService.record_api_key_usage(self.db, api_key_config.id)
|
ModelApiKeyService.record_api_key_usage(self.db, api_key_config.id)
|
||||||
|
|
||||||
|
# 提取 token 消耗
|
||||||
|
self._last_routing_tokens = 0
|
||||||
|
if hasattr(response, 'usage_metadata') and response.usage_metadata:
|
||||||
|
um = response.usage_metadata
|
||||||
|
self._last_routing_tokens = um.get("total_tokens", 0) if isinstance(um, dict) else getattr(um, "total_tokens", 0)
|
||||||
|
elif hasattr(response, 'response_metadata') and response.response_metadata:
|
||||||
|
token_usage = response.response_metadata.get("token_usage") or response.response_metadata.get("usage", {})
|
||||||
|
if isinstance(token_usage, dict):
|
||||||
|
self._last_routing_tokens = token_usage.get("total_tokens", 0)
|
||||||
|
logger.info(f"Master Agent 路由 token 消耗: {self._last_routing_tokens}")
|
||||||
|
|
||||||
# 提取响应内容
|
# 提取响应内容
|
||||||
if hasattr(response, 'content'):
|
if hasattr(response, 'content'):
|
||||||
return response.content
|
return response.content
|
||||||
|
|||||||
@@ -232,7 +232,8 @@ class MemoryPerceptualService:
|
|||||||
provider=model_config.provider,
|
provider=model_config.provider,
|
||||||
api_key=model_config.api_key,
|
api_key=model_config.api_key,
|
||||||
base_url=model_config.api_base,
|
base_url=model_config.api_base,
|
||||||
is_omni=model_config.is_omni
|
is_omni=model_config.is_omni,
|
||||||
|
support_thinking="thinking" in (model_config.capability or []),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
return llm, model_config
|
return llm, model_config
|
||||||
|
|||||||
@@ -45,12 +45,20 @@ class ModelParameterMerger:
|
|||||||
"frequency_penalty": 0.0,
|
"frequency_penalty": 0.0,
|
||||||
"presence_penalty": 0.0,
|
"presence_penalty": 0.0,
|
||||||
"n": 1,
|
"n": 1,
|
||||||
"stop": None
|
"stop": None,
|
||||||
|
"deep_thinking": False,
|
||||||
|
"thinking_budget_tokens": None
|
||||||
}
|
}
|
||||||
|
|
||||||
# 合并参数:默认值 -> 模型配置 -> Agent 配置
|
# 合并参数:默认值 -> 模型配置 -> Agent 配置
|
||||||
merged = default_params.copy()
|
merged = default_params.copy()
|
||||||
|
|
||||||
|
# Pydantic 对象转为 dict
|
||||||
|
if model_config_params and hasattr(model_config_params, 'model_dump'):
|
||||||
|
model_config_params = model_config_params.model_dump()
|
||||||
|
if agent_config_params and hasattr(agent_config_params, 'model_dump'):
|
||||||
|
agent_config_params = agent_config_params.model_dump()
|
||||||
|
|
||||||
# 应用模型配置参数
|
# 应用模型配置参数
|
||||||
if model_config_params:
|
if model_config_params:
|
||||||
for key in default_params:
|
for key in default_params:
|
||||||
|
|||||||
@@ -85,15 +85,16 @@ class ModelConfigService:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def validate_model_config(
|
async def validate_model_config(
|
||||||
db: Session,
|
db: Session,
|
||||||
*,
|
*,
|
||||||
model_name: str,
|
model_name: str,
|
||||||
provider: str,
|
provider: str,
|
||||||
api_key: str,
|
api_key: str,
|
||||||
api_base: Optional[str] = None,
|
api_base: Optional[str] = None,
|
||||||
model_type: str = "llm",
|
model_type: str = "llm",
|
||||||
test_message: str = "Hello",
|
test_message: str = "Hello",
|
||||||
is_omni: bool = False
|
is_omni: bool = False,
|
||||||
|
capability: Optional[list] = None
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""验证模型配置是否有效
|
"""验证模型配置是否有效
|
||||||
|
|
||||||
@@ -124,6 +125,7 @@ class ModelConfigService:
|
|||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
base_url=api_base,
|
base_url=api_base,
|
||||||
is_omni=is_omni,
|
is_omni=is_omni,
|
||||||
|
support_thinking="thinking" in (capability or []),
|
||||||
temperature=0.7,
|
temperature=0.7,
|
||||||
max_tokens=100
|
max_tokens=100
|
||||||
)
|
)
|
||||||
@@ -320,7 +322,8 @@ class ModelConfigService:
|
|||||||
api_base=api_key_data.api_base,
|
api_base=api_key_data.api_base,
|
||||||
model_type=model_data.type,
|
model_type=model_data.type,
|
||||||
test_message="Hello",
|
test_message="Hello",
|
||||||
is_omni=model_data.is_omni
|
is_omni=model_data.is_omni,
|
||||||
|
capability=model_data.capability
|
||||||
)
|
)
|
||||||
if not validation_result["valid"]:
|
if not validation_result["valid"]:
|
||||||
raise BusinessException(
|
raise BusinessException(
|
||||||
@@ -590,7 +593,8 @@ class ModelApiKeyService:
|
|||||||
api_base=data.api_base,
|
api_base=data.api_base,
|
||||||
model_type=model_config.type,
|
model_type=model_config.type,
|
||||||
test_message="Hello",
|
test_message="Hello",
|
||||||
is_omni=data.is_omni
|
is_omni=data.is_omni,
|
||||||
|
capability=model_config.capability
|
||||||
)
|
)
|
||||||
if not validation_result["valid"]:
|
if not validation_result["valid"]:
|
||||||
# 记录验证失败的模型,但不抛出异常
|
# 记录验证失败的模型,但不抛出异常
|
||||||
@@ -675,7 +679,8 @@ class ModelApiKeyService:
|
|||||||
api_base=api_key_data.api_base,
|
api_base=api_key_data.api_base,
|
||||||
model_type=model_config.type,
|
model_type=model_config.type,
|
||||||
test_message="Hello",
|
test_message="Hello",
|
||||||
is_omni=api_key_data.is_omni
|
is_omni=api_key_data.is_omni,
|
||||||
|
capability=model_config.capability
|
||||||
)
|
)
|
||||||
if not validation_result["valid"]:
|
if not validation_result["valid"]:
|
||||||
raise BusinessException(
|
raise BusinessException(
|
||||||
@@ -707,7 +712,8 @@ class ModelApiKeyService:
|
|||||||
api_base=api_key_data.api_base or existing_api_key.api_base,
|
api_base=api_key_data.api_base or existing_api_key.api_base,
|
||||||
model_type=model_config.type,
|
model_type=model_config.type,
|
||||||
test_message="Hello",
|
test_message="Hello",
|
||||||
is_omni=model_config.is_omni
|
is_omni=model_config.is_omni,
|
||||||
|
capability=model_config.capability
|
||||||
)
|
)
|
||||||
if not validation_result["valid"]:
|
if not validation_result["valid"]:
|
||||||
raise BusinessException(
|
raise BusinessException(
|
||||||
|
|||||||
@@ -287,6 +287,11 @@ class MultiAgentOrchestrator:
|
|||||||
sub_conversation_id = None
|
sub_conversation_id = None
|
||||||
total_tokens = 0
|
total_tokens = 0
|
||||||
|
|
||||||
|
# 累加 Master Agent 路由决策消耗的 token
|
||||||
|
total_tokens += task_analysis.get("routing_tokens", 0)
|
||||||
|
# 累加 Master Agent 整合消耗的 token
|
||||||
|
total_tokens += getattr(self, '_last_merge_tokens', 0)
|
||||||
|
|
||||||
if isinstance(results, dict):
|
if isinstance(results, dict):
|
||||||
sub_conversation_id = results.get("conversation_id") or results.get("result", {}).get("conversation_id")
|
sub_conversation_id = results.get("conversation_id") or results.get("result", {}).get("conversation_id")
|
||||||
# 提取 token 信息
|
# 提取 token 信息
|
||||||
@@ -358,12 +363,16 @@ class MultiAgentOrchestrator:
|
|||||||
variables=variables
|
variables=variables
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# 获取路由决策消耗的 token
|
||||||
|
routing_tokens = getattr(self.router, '_last_routing_tokens', 0)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Master Agent 分析完成",
|
"Master Agent 分析完成",
|
||||||
extra={
|
extra={
|
||||||
"selected_agent": routing_decision.get("selected_agent_id"),
|
"selected_agent": routing_decision.get("selected_agent_id"),
|
||||||
"confidence": routing_decision.get("confidence"),
|
"confidence": routing_decision.get("confidence"),
|
||||||
"strategy": routing_decision.get("strategy")
|
"strategy": routing_decision.get("strategy"),
|
||||||
|
"routing_tokens": routing_tokens
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -372,7 +381,8 @@ class MultiAgentOrchestrator:
|
|||||||
"variables": variables or {},
|
"variables": variables or {},
|
||||||
"sub_agents": self.config.sub_agents,
|
"sub_agents": self.config.sub_agents,
|
||||||
"initial_context": variables or {},
|
"initial_context": variables or {},
|
||||||
"routing_decision": routing_decision
|
"routing_decision": routing_decision,
|
||||||
|
"routing_tokens": routing_tokens
|
||||||
}
|
}
|
||||||
|
|
||||||
async def _execute_sequential(
|
async def _execute_sequential(
|
||||||
@@ -1032,6 +1042,11 @@ class MultiAgentOrchestrator:
|
|||||||
|
|
||||||
# 5. 流式执行子 Agent
|
# 5. 流式执行子 Agent
|
||||||
sub_conversation_id = None
|
sub_conversation_id = None
|
||||||
|
# Master Agent 路由决策消耗的 token,通过 sub_usage 事件发送给上层
|
||||||
|
routing_tokens = task_analysis.get("routing_tokens", 0)
|
||||||
|
if routing_tokens > 0:
|
||||||
|
yield self._format_sse_event("sub_usage", {"total_tokens": routing_tokens})
|
||||||
|
|
||||||
async for event in self._execute_sub_agent_stream(
|
async for event in self._execute_sub_agent_stream(
|
||||||
agent_data["config"],
|
agent_data["config"],
|
||||||
message,
|
message,
|
||||||
@@ -1054,6 +1069,7 @@ class MultiAgentOrchestrator:
|
|||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# 直接透传所有事件(包括 sub_usage),累加统一由上层处理
|
||||||
yield event
|
yield event
|
||||||
|
|
||||||
# 6. 如果有会话 ID,发送一个包含它的事件
|
# 6. 如果有会话 ID,发送一个包含它的事件
|
||||||
@@ -2600,6 +2616,7 @@ class MultiAgentOrchestrator:
|
|||||||
api_key=api_key_config.api_key,
|
api_key=api_key_config.api_key,
|
||||||
base_url=api_key_config.api_base,
|
base_url=api_key_config.api_base,
|
||||||
is_omni=api_key_config.is_omni,
|
is_omni=api_key_config.is_omni,
|
||||||
|
support_thinking="thinking" in (api_key_config.capability or []),
|
||||||
temperature=0.7, # 整合任务使用中等温度
|
temperature=0.7, # 整合任务使用中等温度
|
||||||
max_tokens=2000
|
max_tokens=2000
|
||||||
)
|
)
|
||||||
@@ -2612,6 +2629,17 @@ class MultiAgentOrchestrator:
|
|||||||
|
|
||||||
ModelApiKeyService.record_api_key_usage(self.db, api_key_config.id)
|
ModelApiKeyService.record_api_key_usage(self.db, api_key_config.id)
|
||||||
|
|
||||||
|
# 提取整合消耗的 token
|
||||||
|
merge_tokens = 0
|
||||||
|
if hasattr(response, 'usage_metadata') and response.usage_metadata:
|
||||||
|
um = response.usage_metadata
|
||||||
|
merge_tokens = um.get("total_tokens", 0) if isinstance(um, dict) else getattr(um, "total_tokens", 0)
|
||||||
|
elif hasattr(response, 'response_metadata') and response.response_metadata:
|
||||||
|
token_usage = response.response_metadata.get("token_usage") or response.response_metadata.get("usage", {})
|
||||||
|
if isinstance(token_usage, dict):
|
||||||
|
merge_tokens = token_usage.get("total_tokens", 0)
|
||||||
|
self._last_merge_tokens = merge_tokens
|
||||||
|
|
||||||
# 提取响应内容
|
# 提取响应内容
|
||||||
if hasattr(response, 'content'):
|
if hasattr(response, 'content'):
|
||||||
merged_response = response.content
|
merged_response = response.content
|
||||||
@@ -2621,7 +2649,8 @@ class MultiAgentOrchestrator:
|
|||||||
logger.info(
|
logger.info(
|
||||||
"Master Agent 整合完成",
|
"Master Agent 整合完成",
|
||||||
extra={
|
extra={
|
||||||
"merged_length": len(merged_response)
|
"merged_length": len(merged_response),
|
||||||
|
"merge_tokens": merge_tokens
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -2766,6 +2795,7 @@ class MultiAgentOrchestrator:
|
|||||||
api_key=api_key_config.api_key,
|
api_key=api_key_config.api_key,
|
||||||
base_url=api_key_config.api_base,
|
base_url=api_key_config.api_base,
|
||||||
is_omni=api_key_config.is_omni,
|
is_omni=api_key_config.is_omni,
|
||||||
|
support_thinking="thinking" in (api_key_config.capability or []),
|
||||||
temperature=0.7,
|
temperature=0.7,
|
||||||
max_tokens=2000,
|
max_tokens=2000,
|
||||||
extra_params={"streaming": True} # 启用流式输出
|
extra_params={"streaming": True} # 启用流式输出
|
||||||
|
|||||||
@@ -185,7 +185,8 @@ class PromptOptimizerService:
|
|||||||
provider=api_config.provider,
|
provider=api_config.provider,
|
||||||
api_key=api_config.api_key,
|
api_key=api_config.api_key,
|
||||||
base_url=api_config.api_base,
|
base_url=api_config.api_base,
|
||||||
is_omni=api_config.is_omni
|
is_omni=api_config.is_omni,
|
||||||
|
support_thinking="thinking" in (api_config.capability or []),
|
||||||
), type=ModelType(model_config.type))
|
), type=ModelType(model_config.type))
|
||||||
try:
|
try:
|
||||||
prompt_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'prompt')
|
prompt_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'prompt')
|
||||||
|
|||||||
@@ -248,7 +248,9 @@ class SharedChatService:
|
|||||||
max_tokens=model_parameters.get("max_tokens", 2000),
|
max_tokens=model_parameters.get("max_tokens", 2000),
|
||||||
system_prompt=system_prompt,
|
system_prompt=system_prompt,
|
||||||
tools=tools,
|
tools=tools,
|
||||||
|
deep_thinking=model_parameters.get("deep_thinking", False),
|
||||||
|
thinking_budget_tokens=model_parameters.get("thinking_budget_tokens"),
|
||||||
|
capability=api_key_obj.capability or [],
|
||||||
)
|
)
|
||||||
|
|
||||||
# 加载历史消息
|
# 加载历史消息
|
||||||
@@ -450,7 +452,10 @@ class SharedChatService:
|
|||||||
max_tokens=model_parameters.get("max_tokens", 2000),
|
max_tokens=model_parameters.get("max_tokens", 2000),
|
||||||
system_prompt=system_prompt,
|
system_prompt=system_prompt,
|
||||||
tools=tools,
|
tools=tools,
|
||||||
streaming=True
|
streaming=True,
|
||||||
|
deep_thinking=model_parameters.get("deep_thinking", False),
|
||||||
|
thinking_budget_tokens=model_parameters.get("thinking_budget_tokens"),
|
||||||
|
capability=api_key_obj.capability or [],
|
||||||
)
|
)
|
||||||
|
|
||||||
# 加载历史消息
|
# 加载历史消息
|
||||||
@@ -479,6 +484,8 @@ class SharedChatService:
|
|||||||
):
|
):
|
||||||
if isinstance(chunk, int):
|
if isinstance(chunk, int):
|
||||||
total_tokens = chunk
|
total_tokens = chunk
|
||||||
|
elif isinstance(chunk, dict) and chunk.get("type") == "reasoning":
|
||||||
|
yield f"event: reasoning\ndata: {json.dumps({'content': chunk['content']}, ensure_ascii=False)}\n\n"
|
||||||
else:
|
else:
|
||||||
full_content += chunk
|
full_content += chunk
|
||||||
# 发送消息块事件
|
# 发送消息块事件
|
||||||
|
|||||||
Reference in New Issue
Block a user