Merge pull request #843 from SuanmoSuanyangTechnology/feature/openclaw_lm

Feature/openclaw lm
This commit is contained in:
Ke Sun
2026-04-10 18:54:09 +08:00
committed by GitHub
10 changed files with 514 additions and 16 deletions

View File

@@ -0,0 +1,300 @@
"""OpenClaw 远程 Agent 内置工具"""
import time
import base64
from io import BytesIO
from typing import List, Dict, Any, Optional
import aiohttp
from app.core.tools.builtin.base import BuiltinTool
from app.schemas.tool_schema import ToolParameter, ToolResult, ParameterType
from app.core.logging_config import get_business_logger
logger = get_business_logger()
class OpenClawTool(BuiltinTool):
"""OpenClaw 远程 Agent 工具 — 支持文本和图片多模态输入"""
def __init__(self, tool_id: str, config: Dict[str, Any]):
super().__init__(tool_id, config)
params = self.parameters_config
# 用户配置项(前端表单填写)
self._server_url = params.get("server_url", "")
self._api_key = params.get("api_key", "")
self._agent_id = params.get("agent_id", "main")
# 内部默认值
self._model = "openclaw"
self._session_strategy = "by_user"
self._timeout = 120
# 运行时上下文(通过 set_runtime_context 注入)
self._user_id = "anonymous"
self._conversation_id = None
self._uploaded_files = []
@property
def name(self) -> str:
return "openclaw_tool"
@property
def description(self) -> str:
return (
"OpenClaw 远程 Agent将任务委托给远程 OpenClaw Agent。"
"具备 3D 模型生成与打印控制、设备管理、文件处理、浏览器自动化、"
"Shell 命令执行、网络搜索等能力。支持文本和图片多模态交互。"
)
def get_required_config_parameters(self) -> List[str]:
return ["server_url", "api_key"]
@property
def parameters(self) -> List[ToolParameter]:
return [
ToolParameter(
name="operation",
type=ParameterType.STRING,
description="任务类型",
required=True,
enum= ["print_task", "device_query", "image_understand", "general"]
),
ToolParameter(
name="message",
type=ParameterType.STRING,
description="发送给 OpenClaw Agent 的文本请求内容",
required=True
),
ToolParameter(
name="image_url",
type=ParameterType.STRING,
description="可选,附带的图片 URL 或 base64 data URIOpenClaw 支持图片输入)",
required=False
)
]
# ---------- 运行时上下文注入 ----------
def set_runtime_context(
self,
user_id: str = "anonymous",
conversation_id: Optional[str] = None,
uploaded_files: Optional[list] = None
):
"""注入运行时上下文(由 chat service 调用)"""
self._user_id = user_id
self._conversation_id = conversation_id
self._uploaded_files = uploaded_files or []
# ---------- 连接测试 ----------
async def test_connection(self) -> Dict[str, Any]:
"""测试 OpenClaw Gateway 连接"""
if not self._server_url:
return {"success": False, "message": "未配置 server_url"}
if not self._api_key:
return {"success": False, "message": "未配置 api_key"}
url = f"{self._server_url.rstrip('/')}/v1/responses"
headers = {
"Authorization": f"Bearer {self._api_key}",
"Content-Type": "application/json",
"x-openclaw-agent-id": self._agent_id
}
body = {
"model": self._model,
"user": "connection-test",
"input": "hi",
"stream": False
}
try:
timeout_cfg = aiohttp.ClientTimeout(total=30)
async with aiohttp.ClientSession(timeout=timeout_cfg) as session:
async with session.post(url, json=body, headers=headers) as resp:
if resp.status < 400:
return {"success": True, "message": "OpenClaw 连接成功"}
error_text = await resp.text()
return {
"success": False,
"message": f"OpenClaw HTTP {resp.status}: {error_text[:200]}"
}
except Exception as e:
return {"success": False, "message": f"OpenClaw 连接失败: {str(e)}"}
# ---------- 执行 ----------
async def execute(self, **kwargs) -> ToolResult:
"""执行 OpenClaw 调用"""
start_time = time.time()
try:
message = kwargs.get("message", "")
if not message:
return ToolResult.error_result(
error="message 参数不能为空",
error_code="OPENCLAW_INVALID_INPUT",
execution_time=time.time() - start_time
)
# 提取图片优先从用户上传文件中获取LLM 传的 image_url 作为兜底
image_url = self._extract_image_from_uploads()
if not image_url:
image_url = kwargs.get("image_url")
if image_url and not image_url.startswith("data:"):
image_url = await self._download_and_encode_image(image_url)
# 构建请求
url = f"{self._server_url.rstrip('/')}/v1/responses"
headers = {
"Authorization": f"Bearer {self._api_key}",
"Content-Type": "application/json",
"x-openclaw-agent-id": self._agent_id
}
user_field = (
f"conv-{self._conversation_id}"
if self._session_strategy == "by_conversation" and self._conversation_id
else f"user-{self._user_id}"
)
input_field = self._build_input(message, image_url)
body = {
"model": self._model,
"user": user_field,
"input": input_field,
"stream": False
}
timeout_cfg = aiohttp.ClientTimeout(total=self._timeout)
# 打印请求日志(截断 base64 避免日志过大)
log_body = {**body}
if isinstance(log_body.get("input"), list):
log_body["input"] = "[multimodal input, truncated]"
elif isinstance(log_body.get("input"), str) and len(log_body["input"]) > 500:
log_body["input"] = log_body["input"][:500] + "..."
logger.info(
f"OpenClaw 请求: url={url}, agent_id={self._agent_id}, "
f"has_image={bool(image_url)}, body={log_body}"
)
async with aiohttp.ClientSession(timeout=timeout_cfg) as session:
async with session.post(url, json=body, headers=headers) as resp:
execution_time = time.time() - start_time
if resp.status >= 400:
error_text = await resp.text()
return ToolResult.error_result(
error=f"OpenClaw HTTP {resp.status}: {error_text[:500]}",
error_code="OPENCLAW_HTTP_ERROR",
execution_time=execution_time
)
data = await resp.json()
text = self._extract_response(data)
display_text = self._format_result(text)
return ToolResult.success_result(
data=display_text,
execution_time=execution_time
)
except aiohttp.ClientError as e:
return ToolResult.error_result(
error=f"OpenClaw 网络连接失败: {str(e)}",
error_code="OPENCLAW_NETWORK_ERROR",
execution_time=time.time() - start_time
)
except Exception as e:
return ToolResult.error_result(
error=f"OpenClaw 调用失败: {str(e)}",
error_code="OPENCLAW_EXECUTION_ERROR",
execution_time=time.time() - start_time
)
# ---------- 私有方法 ----------
def _extract_image_from_uploads(self) -> Optional[str]:
"""从用户上传文件中提取图片 URL"""
for f in self._uploaded_files:
f_type = f.get("type", "")
if f_type == "image":
source = f.get("source", {})
if source.get("type") == "base64":
media_type = source.get("media_type", "image/jpeg")
data = source.get("data", "")
return f"data:{media_type};base64,{data}"
elif f.get("image"):
return f.get("image")
elif f.get("url"):
return f.get("url")
elif f_type == "image_url":
return f.get("image_url", {}).get("url", "")
return None
async def _download_and_encode_image(self, image_url: str) -> str:
"""下载图片并转为 base64 data URI"""
try:
from PIL import Image
MAX_RAW_SIZE = 4 * 1024 * 1024
async with aiohttp.ClientSession() as session:
async with session.get(
image_url, allow_redirects=True,
timeout=aiohttp.ClientTimeout(total=30)
) as resp:
if resp.status != 200:
return image_url
content_type = resp.headers.get("Content-Type", "image/jpeg")
if not content_type.startswith("image/"):
return image_url
img_bytes = await resp.read()
if len(img_bytes) > MAX_RAW_SIZE:
img = Image.open(BytesIO(img_bytes))
if img.mode in ("RGBA", "P", "LA"):
img = img.convert("RGB")
if max(img.size) > 2048:
img.thumbnail((2048, 2048), Image.LANCZOS)
buf = BytesIO()
img.save(buf, format="JPEG", quality=75, optimize=True)
img_bytes = buf.getvalue()
content_type = "image/jpeg"
b64 = base64.b64encode(img_bytes).decode("utf-8")
return f"data:{content_type};base64,{b64}"
except Exception as e:
logger.warning(f"OpenClaw 下载图片失败,使用原始 URL: {e}")
return image_url
def _build_input(self, message: str, image_url: Optional[str] = None):
"""构造请求 input 字段:有图片则构造多模态结构,否则纯文本"""
if not image_url:
return message
content_parts = [{"type": "input_text", "text": message}]
if image_url.startswith("data:"):
try:
header, data = image_url.split(",", 1)
media_type = header.split(":")[1].split(";")[0]
content_parts.append({
"type": "input_image",
"source": {"type": "base64", "media_type": media_type, "data": data}
})
except (ValueError, IndexError):
return message
else:
content_parts.append({
"type": "input_image",
"source": {"type": "url", "url": image_url}
})
return [{"type": "message", "role": "user", "content": content_parts}]
def _extract_response(self, response_data: Dict[str, Any]) -> str:
"""从 OpenClaw 响应中提取文本内容
OpenClaw /v1/responses 只返回 output_text 类型的内容。
图片信息(如有)由 OpenClaw Skill 以 Markdown 链接形式嵌入文本中返回。
"""
output = response_data.get("output", [])
texts = []
for item in output:
if item.get("type") == "message":
for content in item.get("content", []):
if content.get("type") == "output_text" and content.get("text"):
texts.append(content["text"])
return "\n".join(texts) if texts else str(response_data)
@staticmethod
def _format_result(text: str) -> str:
"""格式化结果为 LLM 可读字符串"""
return text or "OpenClaw 返回了空内容)"

View File

@@ -11,6 +11,11 @@ class OperationTool(BaseTool):
self.base_tool = base_tool
self.operation = operation
super().__init__(base_tool.tool_id, base_tool.config)
def set_runtime_context(self, **kwargs):
"""转发运行时上下文到 base_tool"""
if hasattr(self.base_tool, 'set_runtime_context'):
self.base_tool.set_runtime_context(**kwargs)
@property
def name(self) -> str:
@@ -32,6 +37,8 @@ class OperationTool(BaseTool):
return self._get_datetime_params()
elif self.base_tool.name == 'json_tool':
return self._get_json_params()
elif self.base_tool.name == 'openclaw_tool':
return self._get_openclaw_params()
else:
# 默认返回除operation外的所有参数
return [p for p in self.base_tool.parameters if p.name != "operation"]
@@ -232,6 +239,64 @@ class OperationTool(BaseTool):
else:
return base_params
def _get_openclaw_params(self) -> List[ToolParameter]:
"""获取 openclaw_tool 特定操作的参数"""
if self.operation == "print_task":
return [
ToolParameter(
name="message",
type=ParameterType.STRING,
description="发送给 OpenClaw 的打印任务描述,将用户的原始消息原封不动地传递给 OpenClaw禁止改写、补充或润色用户的原文",
required=True
),
ToolParameter(
name="image_url",
type=ParameterType.STRING,
description="可选附带的设计图片或参考图OpenClaw 可据此生成 3D 模型",
required=False
)
]
elif self.operation == "device_query":
return [
ToolParameter(
name="message",
type=ParameterType.STRING,
description="发送给 OpenClaw 的设备查询指令",
required=True
)
]
elif self.operation == "image_understand":
return [
ToolParameter(
name="message",
type=ParameterType.STRING,
description="发送给 OpenClaw 的图片理解任务,应描述需要对图片做什么(如描述内容、提取文字、分析信息)",
required=True
),
ToolParameter(
name="image_url",
type=ParameterType.STRING,
description="要分析的图片 URL 或 base64 data URI",
required=False
)
]
else:
# general 及其他
return [
ToolParameter(
name="message",
type=ParameterType.STRING,
description="发送给 OpenClaw Agent 的任务描述,应包含完整的任务需求",
required=True
),
ToolParameter(
name="image_url",
type=ParameterType.STRING,
description="可选,附带的图片 URL 或 base64 data URI",
required=False
)
]
async def execute(self, **kwargs) -> ToolResult:
"""执行特定操作"""
# 添加operation参数

View File

@@ -0,0 +1,15 @@
{
"name": "openclaw_tool",
"description": "调用OpenClaw Agent远程服务",
"tool_class": "OpenClawTool",
"category": "agent",
"requires_config": true,
"version": "1.0.0",
"enabled": true,
"parameters": {
"server_url": "",
"api_key": "",
"agent_id": "main"
},
"tags": ["agent", "openclaw", "multimodal", "3d-printing", "builtin"]
}

View File

@@ -30,5 +30,18 @@
"parameters": {
"api_key": {"type": "string", "description": "百度搜索API密钥", "sensitive": true, "required": true}
}
},
"openclaw": {
"name": "OpenClaw远程Agent",
"description": "OpenClaw Agent远程服务",
"tool_class": "OpenClawTool",
"category": "agent",
"requires_config": true,
"version": "1.0.0",
"enabled": true,
"parameters": {
"server_url": {"type": "string", "description": "OpenClaw Gateway 地址", "required": true},
"api_key": {"type": "string", "description": "OpenClaw API Key", "sensitive": true, "required": true}
}
}
}

View File

@@ -30,7 +30,7 @@ class CustomTool(BaseTool):
self.auth_config = config.get("auth_config", {})
self.base_url = config.get("base_url", "")
self.timeout = config.get("timeout", 30)
# 解析schema
self._parsed_operations = self._parse_openapi_schema()

View File

@@ -131,7 +131,7 @@ class LangchainAdapter:
def _tool_supports_operations(tool: BaseTool) -> bool:
"""检查工具是否支持多操作"""
# 内置工具中支持操作的工具
builtin_operation_tools = ['datetime_tool', 'json_tool']
builtin_operation_tools = ['datetime_tool', 'json_tool', 'openclaw_tool']
# 检查内置工具
if tool.tool_type.value == "builtin" and tool.name in builtin_operation_tools:

View File

@@ -161,6 +161,17 @@ class BuiltinToolRepository:
BuiltinToolConfig.id == tool_id
).first()
@staticmethod
def get_existing_tool_classes(db: Session, tenant_id: uuid.UUID) -> set:
"""获取该租户已有的内置工具 tool_class 集合"""
rows = db.query(BuiltinToolConfig.tool_class).join(
ToolConfig, BuiltinToolConfig.id == ToolConfig.id
).filter(
ToolConfig.tenant_id == tenant_id,
ToolConfig.tool_type == ToolType.BUILTIN.value
).all()
return {row[0] for row in rows}
class CustomToolRepository:
"""自定义工具仓储类"""

View File

@@ -165,7 +165,14 @@ class AppChatService:
multimodal_service = MultimodalService(self.db, model_info)
processed_files = await multimodal_service.process_files(files)
logger.info(f"处理了 {len(processed_files)} 个文件")
# 为需要运行时上下文的工具注入上下文
for t in tools:
if hasattr(t, 'tool_instance') and hasattr(t.tool_instance, 'set_runtime_context'):
t.tool_instance.set_runtime_context(
user_id=user_id or "anonymous",
conversation_id=str(conversation_id) if conversation_id else None,
uploaded_files=processed_files or []
)
# 调用 Agent支持多模态
result = await agent.chat(
message=message,
@@ -413,6 +420,15 @@ class AppChatService:
processed_files = await multimodal_service.process_files(files)
logger.info(f"处理了 {len(processed_files)} 个文件")
# 为需要运行时上下文的工具注入上下文
for t in tools:
if hasattr(t, 'tool_instance') and hasattr(t.tool_instance, 'set_runtime_context'):
t.tool_instance.set_runtime_context(
user_id=user_id or "anonymous",
conversation_id=str(conversation_id) if conversation_id else None,
uploaded_files=processed_files or []
)
# 流式调用 Agent支持多模态同时并行启动 TTS
full_content = ""
full_reasoning = ""

View File

@@ -640,7 +640,14 @@ class AgentRunService:
multimodal_service = MultimodalService(self.db, model_info)
processed_files = await multimodal_service.process_files(files)
logger.info(f"处理了 {len(processed_files)} 个文件provider={provider}")
# 为需要运行时上下文的工具注入上下文
for t in tools:
if hasattr(t, 'tool_instance') and hasattr(t.tool_instance, 'set_runtime_context'):
t.tool_instance.set_runtime_context(
user_id=user_id or "anonymous",
conversation_id=str(conversation_id) if conversation_id else None,
uploaded_files=processed_files or []
)
# 7. 知识库检索
context = None
@@ -890,7 +897,14 @@ class AgentRunService:
multimodal_service = MultimodalService(self.db, model_info)
processed_files = await multimodal_service.process_files(files)
logger.info(f"处理了 {len(processed_files)} 个文件provider={provider}")
# 为需要运行时上下文的工具注入上下文
for t in tools:
if hasattr(t, 'tool_instance') and hasattr(t.tool_instance, 'set_runtime_context'):
t.tool_instance.set_runtime_context(
user_id=user_id or "anonymous",
conversation_id=str(conversation_id) if conversation_id else None,
uploaded_files=processed_files or []
)
# 7. 知识库检索
context = None

View File

@@ -34,7 +34,8 @@ BUILTIN_TOOLS = {
"JsonTool": "app.core.tools.builtin.json_tool",
"BaiduSearchTool": "app.core.tools.builtin.baidu_search_tool",
"MinerUTool": "app.core.tools.builtin.mineru_tool",
"TextInTool": "app.core.tools.builtin.textin_tool"
"TextInTool": "app.core.tools.builtin.textin_tool",
"OpenClawTool": "app.core.tools.builtin.openclaw_tool",
}
@@ -340,18 +341,18 @@ class ToolService:
return {"success": False, "message": f"测试失败: {str(e)}"}
def ensure_builtin_tools_initialized(self, tenant_id: uuid.UUID):
"""确保内置工具已初始化"""
existing = self.tool_repo.exists_builtin_for_tenant(self.db, tenant_id)
if existing:
"""确保内置工具已初始化(支持增量补充新工具)"""
builtin_config = self._load_builtin_config()
if not builtin_config:
return
# 从配置文件加载内置工具定义
builtin_config = self._load_builtin_config()
existing_classes = self.builtin_repo.get_existing_tool_classes(self.db, tenant_id)
added = False
for tool_key, tool_info in builtin_config.items():
if tool_info['tool_class'] in existing_classes:
continue
try:
# 创建工具配置
initial_status = self._determine_initial_status(tool_info)
tool_config = ToolConfig(
name=tool_info['name'],
@@ -367,7 +368,6 @@ class ToolService:
self.db.add(tool_config)
self.db.flush()
# 创建内置工具配置
builtin_config_obj = BuiltinToolConfig(
id=tool_config.id,
tool_class=tool_info['tool_class'],
@@ -375,12 +375,14 @@ class ToolService:
requires_config=tool_info.get('requires_config', False)
)
self.db.add(builtin_config_obj)
added = True
except Exception as e:
logger.error(f"初始化内置工具失败: {tool_key}, {e}")
self.db.commit()
logger.info(f"租户 {tenant_id} 内置工具初始化完成")
if added:
self.db.commit()
logger.info(f"租户 {tenant_id} 内置工具增量初始化完成")
async def get_tool_methods(self, tool_id: str, tenant_id: uuid.UUID) -> Optional[List[Dict[str, Any]]]:
"""获取工具的所有方法
@@ -458,6 +460,9 @@ class ToolService:
# 对于json_tool根据操作类型返回相关参数
elif hasattr(tool_instance, 'name') and tool_instance.name == 'json_tool':
return self._get_json_tool_params(operation)
# 对于openclaw_tool根据操作类型返回不同描述的参数
elif hasattr(tool_instance, 'name') and tool_instance.name == 'openclaw_tool':
return self._get_openclaw_tool_params(operation)
# 其他工具的默认处理返回除operation外的所有参数
return [{
@@ -710,6 +715,65 @@ class ToolService:
return base_params
@staticmethod
def _get_openclaw_tool_params(operation: str) -> List[Dict[str, Any]]:
"""获取 openclaw_tool 特定操作的参数"""
if operation == "print_task":
return [
{
"name": "message",
"type": "string",
"description": "发送给 OpenClaw 的打印任务描述,将用户的原始消息原封不动地传递给 OpenClaw禁止改写、补充或润色用户的原文",
"required": True
},
{
"name": "image_url",
"type": "string",
"description": "可选附带的设计图片或参考图OpenClaw 可据此生成 3D 模型",
"required": False
}
]
elif operation == "device_query":
return [
{
"name": "message",
"type": "string",
"description": "发送给 OpenClaw 的设备查询指令",
"required": True
}
]
elif operation == "image_understand":
return [
{
"name": "message",
"type": "string",
"description": "发送给 OpenClaw 的图片理解任务,应描述需要对图片做什么(如描述内容、提取文字、分析信息)",
"required": True
},
{
"name": "image_url",
"type": "string",
"description": "要分析的图片 URL 或 base64 data URI",
"required": False
}
]
else:
# general 及其他
return [
{
"name": "message",
"type": "string",
"description": "发送给 OpenClaw Agent 的任务描述,应包含完整的任务需求",
"required": True
},
{
"name": "image_url",
"type": "string",
"description": "可选,附带的图片 URL 或 base64 data URI",
"required": False
}
]
async def _get_custom_tool_methods(self, config: ToolConfig) -> List[Dict[str, Any]]:
"""获取自定义工具的方法"""
custom_config = self.custom_repo.find_by_tool_id(self.db, config.id)