From e298b38de95552dfe8175176ff345083b0600521 Mon Sep 17 00:00:00 2001 From: miao <1468212639@qq.com> Date: Tue, 7 Apr 2026 10:35:14 +0800 Subject: [PATCH] feat(tools): add OpenClaw remote agent tool integration - Detect x-openclaw flag in OpenAPI schema and init dedicated config - Implement multimodal input/output (image download, compress, base64) - Add OpenClaw connection test and status validation in tool service - Fix auth_config token check to support both api_key and bearer_token - Inject runtime context (user_id, conversation_id, files) in chat services --- api/app/core/tools/custom/base.py | 349 +++++++++++++++++++++++++- api/app/services/app_chat_service.py | 24 +- api/app/services/draft_run_service.py | 26 +- api/app/services/tool_service.py | 74 ++++++ 4 files changed, 467 insertions(+), 6 deletions(-) diff --git a/api/app/core/tools/custom/base.py b/api/app/core/tools/custom/base.py index 3dfe4c93..3f3daad7 100644 --- a/api/app/core/tools/custom/base.py +++ b/api/app/core/tools/custom/base.py @@ -30,9 +30,60 @@ class CustomTool(BaseTool): self.auth_config = config.get("auth_config", {}) self.base_url = config.get("base_url", "") self.timeout = config.get("timeout", 30) - - # 解析schema - self._parsed_operations = self._parse_openapi_schema() + + #===========OpenClaw特殊判断(取到OpenClaw特殊配置)========== + schema = self.schema_content + if isinstance(schema, str): + try: + schema = json.loads(schema) + self.schema_content = schema + except json.JSONDecodeError: + schema = {} + + info = schema.get("info", {}) if isinstance(schema, dict) else {} + self._is_openclaw = info.get("x-openclaw", False) + + if self._is_openclaw: + # 从扩展字段读取 OpenClaw 配置 + self._openclaw_agent_id = info.get("x-openclaw-agent-id", "main") + self._openclaw_model = info.get("x-openclaw-default-model", "openclaw") + self._openclaw_session_strategy = info.get( + "x-openclaw-session-strategy", "by_user") + self._openclaw_timeout = info.get("x-openclaw-timeout", 60) + self._openclaw_input_mode = info.get("x-openclaw-input-mode", "text") + self._openclaw_output_mode = info.get("x-openclaw-output-mode", "text") + + # 从 servers 读取 base_url + servers = schema.get("servers", []) + if servers: + self.base_url = servers[0].get("url", "") + + # 从 auth_config 读取 token(兼容 api_key 和 bearer_token 两种认证方式) + self._openclaw_token = ( + self.auth_config.get("api_key") # api_key 认证方式 + or self.auth_config.get("token") # bearer_token 认证方式 + or "" + ) + + # 覆盖 timeout + self.timeout = self._openclaw_timeout + + # 运行时上下文(后续注入) + self._user_id = "anonymous" + self._conversation_id = None + self._uploaded_files = [] # 新增:用户上传的文件 + + # 跳过 Schema 解析 + self._parsed_operations = {} + + logger.info( + f"检测到 OpenClaw 工具: agent_id={self._openclaw_agent_id}, " + f"base_url={self.base_url}, " + f"input_mode={self._openclaw_input_mode}, " + f"output_mode={self._openclaw_output_mode}") + else: + # 解析schema + self._parsed_operations = self._parse_openapi_schema() @property def name(self) -> str: @@ -58,6 +109,31 @@ class CustomTool(BaseTool): @property def parameters(self) -> List[ToolParameter]: """工具参数定义""" + # ========== OpenClaw 特判 根据输入模式解析是否需要image_url ========== + if self._is_openclaw: + params = [ + ToolParameter( + name="message", + type=ParameterType.STRING, + description="发送给 OpenClaw Agent 的文本请求内容", + required=True + ) + ] + # 多模态输入模式下,增加 image_url 参数 + if self._openclaw_input_mode == "multimodal": + params.append(ToolParameter( + name="image_url", + type=ParameterType.STRING, + description=( + "可选,附带的图片URL或base64 data URI" + "(如 data:image/png;base64,...)。" + "传入后 Agent 可以理解图片内容。" + ), + required=False + )) + return params + # ========== 特判结束 ========== + params = [] # 添加操作选择参数 @@ -90,6 +166,10 @@ class CustomTool(BaseTool): async def execute(self, **kwargs) -> ToolResult: """执行自定义工具""" + # ========== OpenClaw 特判 ========== + if self._is_openclaw: + return await self._execute_openclaw(**kwargs) + # ========== 特判结束 ========== start_time = time.time() try: @@ -130,6 +210,269 @@ class CustomTool(BaseTool): execution_time=execution_time ) + #=============openclaw执行函数开始=============== + async def _execute_openclaw(self, **kwargs) -> ToolResult: + """OpenClaw 专属执行逻辑(支持多模态输入)""" + start_time = time.time() + try: + message = kwargs.get("message", "") + # 从用户实际上传的文件中提取图片 URL + image_url = None + if self._uploaded_files: + for f in self._uploaded_files: + if f.get("type") == "image": + source = f.get("source", {}) + if source.get("type") == "base64": + media_type = source.get("media_type", "image/jpeg") + data = source.get("data", "") + image_url = f"data:{media_type};base64,{data}" + elif f.get("image"): + # DashScope 格式:{"type": "image", "image": "url"} + image_url = f.get("image") + elif f.get("url"): + # 其他格式:{"type": "image", "url": "https://..."} + image_url = f.get("url") + break # 只取第一张图片 + + # 如果 image_url 是服务器中转 URL,直接下载图片转 base64 + # 避免 OSS 签名 URL 在重定向解析过程中被破坏 + if image_url and not image_url.startswith("data:"): + try: + import base64 + from io import BytesIO + from PIL import Image + + MAX_RAW_SIZE = 4 * 1024 * 1024 # 超过 4MB 则压缩 + + async with aiohttp.ClientSession() as _session: + async with _session.get(image_url, allow_redirects=True, timeout=aiohttp.ClientTimeout(total=30)) as _resp: + if _resp.status == 200: + content_type = _resp.headers.get("Content-Type", "image/jpeg") + if content_type.startswith("image/"): + img_bytes = await _resp.read() + original_size = len(img_bytes) + logger.info(f"OpenClaw 下载图片: size={original_size} bytes, type={content_type}") + + if original_size > MAX_RAW_SIZE: + img = Image.open(BytesIO(img_bytes)) + if img.mode in ("RGBA", "P", "LA"): + img = img.convert("RGB") + max_side = 2048 + if max(img.size) > max_side: + img.thumbnail((max_side, max_side), Image.LANCZOS) + buf = BytesIO() + img.save(buf, format="JPEG", quality=75, optimize=True) + img_bytes = buf.getvalue() + content_type = "image/jpeg" + logger.info(f"OpenClaw 图片已压缩: {original_size} -> {len(img_bytes)} bytes") + + b64_data = base64.b64encode(img_bytes).decode("utf-8") + image_url = f"data:{content_type};base64,{b64_data}" + logger.info(f"OpenClaw 图片已转为 base64, size={len(img_bytes)} bytes") + else: + logger.warning(f"OpenClaw 图片 URL 返回非图片类型: {content_type}") + else: + logger.warning(f"OpenClaw 下载图片失败: HTTP {_resp.status}") + except Exception as e: + logger.warning(f"OpenClaw 下载图片失败,使用原始 URL: {e}") + + + if not message: + return ToolResult.error_result( + error="message 参数不能为空", + error_code="OPENCLAW_INVALID_INPUT", + execution_time=time.time() - start_time) + + url = f"{self.base_url.rstrip('/')}/v1/responses" + #请求头 + headers = { + "Authorization": f"Bearer {self._openclaw_token}", + "Content-Type": "application/json", + "x-openclaw-agent-id": self._openclaw_agent_id + } + + # session 路由 + if (self._openclaw_session_strategy == "by_conversation" + and self._conversation_id): + user_field = f"conv-{self._conversation_id}" + else: + user_field = f"user-{self._user_id}" + + # 根据 input_mode 和是否有图片构造 input + input_field = self._build_openclaw_input(message, image_url) + #请求体 + body = { + "model": self._openclaw_model, + "user": user_field, + "input": input_field, + "stream": False + } + + logger.info(f"OpenClaw 请求体: {json.dumps(body, ensure_ascii=False)[:1000]}") + + timeout_config = aiohttp.ClientTimeout(total=self.timeout) + #请求 + async with aiohttp.ClientSession(timeout=timeout_config) as session: + async with session.post(url, json=body, headers=headers) as resp: + execution_time = time.time() - start_time + + if resp.status >= 400: + error_text = await resp.text() + _img_preview2 = (image_url[:100] + "...") if image_url and len(image_url) > 100 else image_url + logger.error( + f"OpenClaw 调用失败: HTTP {resp.status}, " + f"url={url}, agent_id={self._openclaw_agent_id}, " + f"has_image={bool(image_url)}, image_url={_img_preview2}, " + f"input_type={'multimodal' if isinstance(input_field, list) else 'text'}, " + f"error_response={error_text[:1000]}" + ) + return ToolResult.error_result( + error=f"OpenClaw HTTP {resp.status}: {error_text[:500]}", + error_code="OPENCLAW_HTTP_ERROR", + execution_time=execution_time) + + data = await resp.json() + + # 根据 output_mode 解析响应 + result = self._extract_openclaw_response( + data, self._openclaw_output_mode) + display_text = self._format_openclaw_result(result) + + logger.info( + "OpenClaw 调用成功", + extra={ + "tool_id": self.tool_id, + "agent_id": self._openclaw_agent_id, + "has_images": len(result["images"]) > 0, + "execution_time": execution_time + }) + return ToolResult.success_result( + data=display_text, execution_time=execution_time) + + except aiohttp.ClientError as e: + return ToolResult.error_result( + error=f"OpenClaw 网络连接失败: {str(e)}", + error_code="OPENCLAW_NETWORK_ERROR", + execution_time=time.time() - start_time) + except Exception as e: + return ToolResult.error_result( + error=f"OpenClaw 调用失败: {str(e)}", + error_code="OPENCLAW_EXECUTION_ERROR", + execution_time=time.time() - start_time) + + def _build_openclaw_input(self, message: str, image_url: str = None): + """根据 input_mode 和是否有图片构造 OpenClaw input 字段 + + 纯文本模式或无图片 → 返回字符串 + 多模态模式且有图片 → 返回结构化 item 数组 + """ + if not image_url or self._openclaw_input_mode != "multimodal": + return message + + # 构造多模态 content 数组 + content_parts = [ + {"type": "input_text", "text": message} + ] + + if image_url.startswith("data:"): + # base64 data URI: data:image/png;base64,iVBORw0KGgo... + try: + header, data = image_url.split(",", 1) + media_type = header.split(":")[1].split(";")[0] + content_parts.append({ + "type": "input_image", + "source": { + "type": "base64", + "media_type": media_type, + "data": data + } + }) + except (ValueError, IndexError): + logger.warning("无法解析 base64 data URI,回退为纯文本输入") + return message + else: + # URL 引用 + content_parts.append({ + "type": "input_image", + "source": { + "type": "url", + "url": image_url + } + }) + + return [{ + "type": "message", + "role": "user", + "content": content_parts + }] + + @staticmethod + def _extract_openclaw_response(response_data: Dict[str, Any], + output_mode: str = "text") -> Dict[str, Any]: + """从 OpenClaw 响应中提取文本和图片 + + 响应格式: + {"output": [{"type": "message", "content": [ + {"type": "output_text", "text": "..."}, + {"type": "output_image", "image_url": "..."} + ]}]} + + 返回: + {"text": "文本内容", "images": [{"url": "...", "media_type": "image/png"}]} + """ + output = response_data.get("output", []) + texts = [] + images = [] + + for item in output: + if item.get("type") == "message": + for content in item.get("content", []): + content_type = content.get("type") + + if content_type == "output_text": + text = content.get("text", "") + if text: + texts.append(text) + + elif content_type == "output_image" and output_mode == "multimodal": + image_url = content.get("image_url", "") + if image_url: + images.append({ + "url": image_url, + "media_type": content.get("media_type", "image/png") + }) + + text_result = "\n".join(texts) if texts else "" + + # text 模式下只返回文本(向后兼容) + if output_mode == "text": + return {"text": text_result or str(response_data), "images": []} + + return {"text": text_result, "images": images} + + @staticmethod + def _format_openclaw_result(result: Dict[str, Any]) -> str: + """将解析结果格式化为返回给 LLM 的字符串 + + 纯文本 → 直接返回 + 有图片 → 将图片以 Markdown 格式嵌入文本 + """ + text = result.get("text", "") + images = result.get("images", []) + + if not images: + return text or "(OpenClaw 返回了空内容)" + + parts = [] + if text: + parts.append(text) + for i, img in enumerate(images): + parts.append(f"![OpenClaw 生成的图片 {i+1}]({img['url']})") + + return "\n\n".join(parts) + + + #=============openclaw执行函数结束================ def _parse_openapi_schema(self) -> Dict[str, Any]: """解析OpenAPI schema""" operations = {} diff --git a/api/app/services/app_chat_service.py b/api/app/services/app_chat_service.py index fb4955b3..34037b12 100644 --- a/api/app/services/app_chat_service.py +++ b/api/app/services/app_chat_service.py @@ -165,7 +165,18 @@ class AppChatService: multimodal_service = MultimodalService(self.db, model_info) processed_files = await multimodal_service.process_files(files) logger.info(f"处理了 {len(processed_files)} 个文件") - + #============为 OpenClaw 工具注入会话session====== + # 为 OpenClaw 工具注入运行时上下文 + for t in tools: + if hasattr(t, 'tool_instance') and hasattr(t.tool_instance, '_is_openclaw'): + if t.tool_instance._is_openclaw: + t.tool_instance._user_id = user_id or "anonymous" + t.tool_instance._conversation_id = ( + str(conversation_id) if conversation_id else None) + # 注入用户上传的文件 + if processed_files: + t.tool_instance._uploaded_files = processed_files + #============为 OpenClaw 工具注入会话session====== # 调用 Agent(支持多模态) result = await agent.chat( message=message, @@ -413,6 +424,17 @@ class AppChatService: processed_files = await multimodal_service.process_files(files) logger.info(f"处理了 {len(processed_files)} 个文件") + #============为 OpenClaw 工具注入运行时上下文====== + for t in tools: + if hasattr(t, 'tool_instance') and hasattr(t.tool_instance, '_is_openclaw'): + if t.tool_instance._is_openclaw: + t.tool_instance._user_id = user_id or "anonymous" + t.tool_instance._conversation_id = ( + str(conversation_id) if conversation_id else None) + if processed_files: + t.tool_instance._uploaded_files = processed_files + #============为 OpenClaw 工具注入运行时上下文结束====== + # 流式调用 Agent(支持多模态),同时并行启动 TTS full_content = "" full_reasoning = "" diff --git a/api/app/services/draft_run_service.py b/api/app/services/draft_run_service.py index 978dfdab..62d7ea71 100644 --- a/api/app/services/draft_run_service.py +++ b/api/app/services/draft_run_service.py @@ -640,7 +640,18 @@ class AgentRunService: multimodal_service = MultimodalService(self.db, model_info) processed_files = await multimodal_service.process_files(files) logger.info(f"处理了 {len(processed_files)} 个文件,provider={provider}") - + #================= 为 OpenClaw 工具注入运行时上下文========== + for t in tools: + logger.info(f"检查工具: {type(t).__name__}, has_tool_instance={hasattr(t, 'tool_instance')}, is_openclaw={getattr(getattr(t, 'tool_instance', None), '_is_openclaw', 'N/A')}") + if hasattr(t, 'tool_instance') and hasattr(t.tool_instance, '_is_openclaw'): + if t.tool_instance._is_openclaw: + t.tool_instance._user_id = user_id or "anonymous" + t.tool_instance._conversation_id = ( + str(conversation_id) if conversation_id else None) + if processed_files: + t.tool_instance._uploaded_files = processed_files + logger.info(f"已注入 _uploaded_files, 数量: {len(processed_files)}") + #================= 为 OpenClaw 工具注入运行时上下文结束========== # 7. 知识库检索 context = None @@ -890,7 +901,18 @@ class AgentRunService: multimodal_service = MultimodalService(self.db, model_info) processed_files = await multimodal_service.process_files(files) logger.info(f"处理了 {len(processed_files)} 个文件,provider={provider}") - + #============为 OpenClaw 工具注入会话session====== + # 为 OpenClaw 工具注入运行时上下文 + for t in tools: + if hasattr(t, 'tool_instance') and hasattr(t.tool_instance, '_is_openclaw'): + if t.tool_instance._is_openclaw: + t.tool_instance._user_id = user_id or "anonymous" + t.tool_instance._conversation_id = ( + str(conversation_id) if conversation_id else None) + # 注入用户上传的文件 + if processed_files: + t.tool_instance._uploaded_files = processed_files + #============为 OpenClaw 工具注入会话session====== # 7. 知识库检索 context = None diff --git a/api/app/services/tool_service.py b/api/app/services/tool_service.py index 089f0ec5..0f88a65e 100644 --- a/api/app/services/tool_service.py +++ b/api/app/services/tool_service.py @@ -330,6 +330,20 @@ class ToolService: if config.tool_type == ToolType.MCP.value: return await self._test_mcp_connection(config) elif config.tool_type == ToolType.CUSTOM.value: + # ========== 测试工具连接 OpenClaw 特判 ========== + custom_config = self.custom_repo.find_by_tool_id(self.db, config.id) + if custom_config and custom_config.schema_content: + schema = custom_config.schema_content + if isinstance(schema, str): + try: + schema = json.loads(schema) + except json.JSONDecodeError: + schema = {} + #请求头中包含OpenClaw字段 + if isinstance(schema, dict) and schema.get("info", {}).get("x-openclaw"): + return await self._test_openclaw_connection(custom_config, schema) + # ========== OpenClaw 特判结束 ========== + #正常自定义工具逻辑 return await self._test_custom_connection(config) elif config.tool_type == ToolType.BUILTIN.value: return await self._test_builtin_connection(config) @@ -339,6 +353,45 @@ class ToolService: except Exception as e: return {"success": False, "message": f"测试失败: {str(e)}"} + #=============测试openclaw连接 特判=============== + async def _test_openclaw_connection( + self, custom_config: CustomToolConfig, schema: dict + ) -> Dict[str, Any]: + """测试 OpenClaw 连接""" + try: + info = schema.get("info", {}) + servers = schema.get("servers", []) + base_url = servers[0]["url"] if servers else "" + token = (custom_config.auth_config or {}).get("token", "") + agent_id = info.get("x-openclaw-agent-id", "main") + model = info.get("x-openclaw-default-model", "openclaw") + + url = f"{base_url.rstrip('/')}/v1/responses" + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + "x-openclaw-agent-id": agent_id + } + body = { + "model": model, + "user": "connection-test", + "input": "hi", + "stream": False + } + + timeout_config = aiohttp.ClientTimeout(total=30) + async with aiohttp.ClientSession(timeout=timeout_config) as session: + async with session.post(url, json=body, headers=headers) as resp: + if resp.status < 400: + return {"success": True, "message": "OpenClaw 连接成功"} + error_text = await resp.text() + return { + "success": False, + "message": f"OpenClaw HTTP {resp.status}: {error_text[:200]}" + } + except Exception as e: + return {"success": False, "message": f"OpenClaw 连接失败: {str(e)}"} + #=============测试openclaw连接结束=========== def ensure_builtin_tools_initialized(self, tenant_id: uuid.UUID): """确保内置工具已初始化""" existing = self.tool_repo.exists_builtin_for_tenant(self.db, tenant_id) @@ -1139,6 +1192,27 @@ class ToolService: custom_config = self.db.query(CustomToolConfig).filter( CustomToolConfig.id == tool_config.id ).first() + # ========== 更新工具 OpenClaw 特判 ========== + if custom_config and custom_config.schema_content: + schema = custom_config.schema_content + if isinstance(schema, str): + try: + schema = json.loads(schema) + except json.JSONDecodeError: + schema = {} + info = schema.get("info", {}) if isinstance(schema, dict) else {} + if info.get("x-openclaw"): + servers = schema.get("servers", []) + has_url = bool(servers and servers[0].get("url")) + has_agent_id = bool(info.get("x-openclaw-agent-id")) + has_token = bool(custom_config.auth_config + and custom_config.auth_config.get("api_key")) + if has_url and has_agent_id and has_token: + tool_config.status = ToolStatus.AVAILABLE.value + else: + tool_config.status = ToolStatus.UNCONFIGURED.value + return + # ========== OpenClaw 特判结束 ========== if custom_config and tool_config.name and (custom_config.schema_content or custom_config.schema_url): tool_config.status = ToolStatus.AVAILABLE.value