feat(apikey system): tool system development

2025-12-20 15:24:28 +08:00
parent 3fbd4f206e
commit c26af11f76
39 changed files with 9338 additions and 4 deletions
--- a/api/app/core/tools/builtin/init.py
+++ b/api/app/core/tools/builtin/init.py
@@ -0,0 +1,17 @@
+"""内置工具模块"""
+
+from .base import BuiltinTool
+from .datetime_tool import DateTimeTool
+from .json_tool import JsonTool
+from .baidu_search_tool import BaiduSearchTool
+from .mineru_tool import MinerUTool
+from .textin_tool import TextInTool
+
+__all__ = [
+    "BuiltinTool",
+    "DateTimeTool",
+    "JsonTool", 
+    "BaiduSearchTool",
+    "MinerUTool",
+    "TextInTool"
+]
--- a/api/app/core/tools/builtin/baidu_search_tool.py
+++ b/api/app/core/tools/builtin/baidu_search_tool.py
@@ -0,0 +1,334 @@
+"""百度搜索工具 - 搜索引擎服务"""
+import time
+from typing import List, Dict, Any
+import aiohttp
+
+from app.core.tools.base import ToolParameter, ToolResult, ParameterType
+from .base import BuiltinTool
+
+
+class BaiduSearchTool(BuiltinTool):
+    """百度搜索工具 - 提供网页搜索、新闻搜索、图片搜索、实时结果"""
+    
+    @property
+    def name(self) -> str:
+        return "baidu_search_tool"
+    
+    @property
+    def description(self) -> str:
+        return "百度搜索 - 搜索引擎服务：网页搜索、新闻搜索、图片搜索、实时结果"
+    
+    def get_required_config_parameters(self) -> List[str]:
+        return ["api_key"]
+    
+    @property
+    def parameters(self) -> List[ToolParameter]:
+        return [
+            ToolParameter(
+                name="query",
+                type=ParameterType.STRING,
+                description="搜索关键词",
+                required=True
+            ),
+            ToolParameter(
+                name="search_type",
+                type=ParameterType.STRING,
+                description="搜索类型",
+                required=False,
+                default="web",
+                enum=["web", "news", "image", "video"]
+            ),
+            ToolParameter(
+                name="page_size",
+                type=ParameterType.INTEGER,
+                description="每页结果数",
+                required=False,
+                default=10,
+                minimum=1,
+                maximum=50
+            ),
+            ToolParameter(
+                name="page_num",
+                type=ParameterType.INTEGER,
+                description="页码（从1开始）",
+                required=False,
+                default=1,
+                minimum=1,
+                maximum=10
+            ),
+            ToolParameter(
+                name="safe_search",
+                type=ParameterType.BOOLEAN,
+                description="是否启用安全搜索",
+                required=False,
+                default=True
+            ),
+            ToolParameter(
+                name="region",
+                type=ParameterType.STRING,
+                description="搜索地区",
+                required=False,
+                default="cn",
+                enum=["cn", "hk", "tw", "us", "jp", "kr"]
+            ),
+            ToolParameter(
+                name="time_filter",
+                type=ParameterType.STRING,
+                description="时间过滤",
+                required=False,
+                enum=["all", "day", "week", "month", "year"]
+            )
+        ]
+    
+    async def execute(self, **kwargs) -> ToolResult:
+        """执行百度搜索"""
+        start_time = time.time()
+        
+        try:
+            query = kwargs.get("query")
+            search_type = kwargs.get("search_type", "web")
+            page_size = kwargs.get("page_size", 10)
+            page_num = kwargs.get("page_num", 1)
+            safe_search = kwargs.get("safe_search", True)
+            region = kwargs.get("region", "cn")
+            time_filter = kwargs.get("time_filter")
+            
+            if not query:
+                raise ValueError("query 参数是必需的")
+            
+            # 根据搜索类型调用不同的API
+            if search_type == "web":
+                result = await self._web_search(query, page_size, page_num, safe_search, region, time_filter)
+            elif search_type == "news":
+                result = await self._news_search(query, page_size, page_num, region, time_filter)
+            elif search_type == "image":
+                result = await self._image_search(query, page_size, page_num, safe_search)
+            elif search_type == "video":
+                result = await self._video_search(query, page_size, page_num, safe_search)
+            else:
+                raise ValueError(f"不支持的搜索类型: {search_type}")
+            
+            execution_time = time.time() - start_time
+            return ToolResult.success_result(
+                data=result,
+                execution_time=execution_time
+            )
+            
+        except Exception as e:
+            execution_time = time.time() - start_time
+            return ToolResult.error_result(
+                error=str(e),
+                error_code="BAIDU_SEARCH_ERROR",
+                execution_time=execution_time
+            )
+    
+    async def _web_search(self, query: str, page_size: int, page_num: int, 
+                         safe_search: bool, region: str, time_filter: str = None) -> Dict[str, Any]:
+        """网页搜索"""
+        payload = {
+            "messages": [{"role": "user", "content": query}],
+            "edition": "standard",
+            "search_source": "baidu_search_v2",
+            "resource_type_filter": [{"type": "web", "top_k": min(page_size, 50)}],
+            "enable_full_content": True
+        }
+        
+        if time_filter:
+            time_map = {"day": "now-1d/d", "week": "now-1w/d", "month": "now-1M/d", "year": "now-1y/d"}
+            if time_filter in time_map:
+                payload["search_filter"] = {"range": {"page_time": {"gte": time_map[time_filter], "lt": "now/d"}}}
+                payload["search_recency_filter"] = time_filter
+        
+        results = await self._call_baidu_ai_search_api(payload)
+        
+        search_results = []
+        if "references" in results:
+            for item in results["references"]:
+                search_results.append({
+                    "title": item.get("title", ""),
+                    "url": item.get("url", ""),
+                    "snippet": item.get("content", ""),
+                    "display_url": item.get("url", ""),
+                    "rank": len(search_results) + 1
+                })
+        
+        return {
+            "search_type": "web",
+            "query": query,
+            "total_results": len(search_results),
+            "page_num": page_num,
+            "page_size": page_size,
+            "results": search_results,
+            "answer": results.get("result", ""),
+            "references": results.get("references", [])
+        }
+
+    async def _news_search(self, query: str, page_size: int, page_num: int,
+                          region: str, time_filter: str = None) -> Dict[str, Any]:
+        """新闻搜索"""
+        payload = {
+            "messages": [{"role": "user", "content": query}],
+            "edition": "standard",
+            "search_source": "baidu_search_v2",
+            "resource_type_filter": [{"type": "new", "top_k": min(page_size, 50)}],
+            "enable_full_content": True
+        }
+
+        if time_filter:
+            time_map = {"day": "now-1d/d", "week": "now-1w/d", "month": "now-1M/d", "year": "now-1y/d"}
+            if time_filter in time_map:
+                payload["search_filter"] = {"range": {"page_time": {"gte": time_map[time_filter], "lt": "now/d"}}}
+                payload["search_recency_filter"] = time_filter
+
+        results = await self._call_baidu_ai_search_api(payload)
+
+        search_results = []
+        if "references" in results:
+            for item in results["references"]:
+                search_results.append({
+                    "title": item.get("title", ""),
+                    "url": item.get("url", ""),
+                    "snippet": item.get("content", ""),
+                    "display_url": item.get("url", ""),
+                    "rank": len(search_results) + 1
+                })
+
+        return {
+            "search_type": "new",
+            "query": query,
+            "total_results": len(search_results),
+            "page_num": page_num,
+            "page_size": page_size,
+            "results": search_results,
+            "answer": results.get("result", ""),
+            "references": results.get("references", [])
+        }
+
+    async def _image_search(self, query: str, page_size: int, page_num: int,
+                           safe_search: bool) -> Dict[str, Any]:
+        """图片搜索"""
+        payload = {
+            "messages": [{"role": "user", "content": query}],
+            "edition": "standard",
+            "search_source": "baidu_search_v2",
+            "resource_type_filter": [{"type": "image", "top_k": min(page_size, 30)}],
+            "enable_full_content": True
+        }
+
+        results = await self._call_baidu_ai_search_api(payload)
+
+        search_results = []
+        if "references" in results:
+            for item in results["references"]:
+                search_results.append({
+                    "title": item.get("title", ""),
+                    "url": item.get("url", ""),
+                    "snippet": item.get("content", ""),
+                    "display_url": item.get("url", ""),
+                    "rank": len(search_results) + 1
+                })
+
+        return {
+            "search_type": "image",
+            "query": query,
+            "total_results": len(search_results),
+            "page_num": page_num,
+            "page_size": page_size,
+            "results": search_results,
+            "answer": results.get("result", ""),
+            "references": results.get("references", [])
+        }
+
+    async def _video_search(self, query: str, page_size: int, page_num: int,
+                           safe_search: bool) -> Dict[str, Any]:
+        """视频搜索"""
+        payload = {
+            "messages": [{"role": "user", "content": query}],
+            "edition": "standard",
+            "search_source": "baidu_search_v2",
+            "resource_type_filter": [{"type": "video", "top_k": min(page_size, 10)}],
+            "enable_full_content": True
+        }
+
+        results = await self._call_baidu_ai_search_api(payload)
+
+        search_results = []
+        if "references" in results:
+            for item in results["references"]:
+                search_results.append({
+                    "title": item.get("title", ""),
+                    "url": item.get("url", ""),
+                    "snippet": item.get("content", ""),
+                    "display_url": item.get("url", ""),
+                    "rank": len(search_results) + 1
+                })
+
+        return {
+            "search_type": "video",
+            "query": query,
+            "total_results": len(search_results),
+            "page_num": page_num,
+            "page_size": page_size,
+            "results": search_results,
+            "answer": results.get("result", ""),
+            "references": results.get("references", [])
+        }
+
+    async def _call_baidu_ai_search_api(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+        """调用百度AI搜索API"""
+        api_key = self.get_config_parameter("api_key")
+        
+        if not api_key:
+            raise ValueError("百度搜索API密钥未配置")
+        
+        url = "https://qianfan.baidubce.com/v2/ai_search/chat/completions"
+        headers = {
+            'Content-Type': 'application/json',
+            'Authorization': f'Bearer {api_key}'
+        }
+        
+        timeout = aiohttp.ClientTimeout(total=30)
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+            async with session.post(url, headers=headers, json=payload) as response:
+                if response.status == 200:
+                    return await response.json()
+                else:
+                    raise Exception(f"HTTP错误: {response.status}")
+
+    async def test_connection(self) -> Dict[str, Any]:
+        """测试连接"""
+        try:
+            api_key = self.get_config_parameter("api_key")
+
+            if not api_key:
+                return {
+                    "success": False,
+                    "error": "API密钥未配置"
+                }
+
+            # 发送测试请求验证API key是否有效
+            test_payload = {
+                "messages": [{"role": "user", "content": "test"}],
+                "edition": "standard",
+                "search_source": "baidu_search_v2",
+                "resource_type_filter": [{"type": "web", "top_k": 1}]
+            }
+
+            try:
+                await self._call_baidu_ai_search_api(test_payload)
+                return {
+                    "success": True,
+                    "message": "连接测试成功",
+                    "api_key_masked": api_key[:8] + "***" if len(api_key) > 8 else "***"
+                }
+            except Exception as e:
+                return {
+                    "success": False,
+                    "error": f"API连接失败: {str(e)}"
+                }
+
+        except Exception as e:
+            return {
+                "success": False,
+                "error": str(e)
+            }
--- a/api/app/core/tools/builtin/base.py
+++ b/api/app/core/tools/builtin/base.py
@@ -0,0 +1,118 @@
+"""内置工具基类"""
+from abc import ABC, abstractmethod
+from typing import Dict, Any, List
+
+from app.models.tool_model import ToolType
+from app.core.tools.base import BaseTool, ToolResult, ToolParameter
+
+
+class BuiltinTool(BaseTool, ABC):
+    """内置工具基类"""
+    
+    def __init__(self, tool_id: str, config: Dict[str, Any]):
+        """初始化内置工具
+        
+        Args:
+            tool_id: 工具ID
+            config: 工具配置
+        """
+        super().__init__(tool_id, config)
+        self.parameters_config = config.get("parameters", {})
+
+    @property
+    def tool_type(self) -> ToolType:
+        """工具类型"""
+        return ToolType.BUILTIN
+    
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """工具名称 - 子类必须实现"""
+        pass
+    
+    @property
+    @abstractmethod
+    def description(self) -> str:
+        """工具描述 - 子类必须实现"""
+        pass
+    
+    @property
+    @abstractmethod
+    def parameters(self) -> List[ToolParameter]:
+        """工具参数定义 - 子类必须实现"""
+        pass
+    
+    @abstractmethod
+    async def execute(self, **kwargs) -> ToolResult:
+        """执行工具 - 子类必须实现
+        
+        Args:
+            **kwargs: 工具参数
+            
+        Returns:
+            执行结果
+        """
+        pass
+    
+    @property
+    def is_configured(self) -> bool:
+        """检查工具是否已正确配置"""
+        required_params = self.get_required_config_parameters()
+        for param in required_params:
+            if not self.parameters_config.get(param):
+                return False
+        return True
+    
+    def get_required_config_parameters(self) -> List[str]:
+        """获取必需的配置参数列表
+        
+        Returns:
+            必需配置参数名称列表
+        """
+        return []
+    
+    def get_config_parameter(self, name: str, default: Any = None) -> Any:
+        """获取配置参数值
+        
+        Args:
+            name: 参数名称
+            default: 默认值
+            
+        Returns:
+            参数值
+        """
+        return self.parameters_config.get(name, default)
+    
+    def validate_configuration(self) -> tuple[bool, str]:
+        """验证工具配置
+        
+        Returns:
+            (是否有效, 错误信息)
+        """
+        if not self.is_configured:
+            required_params = self.get_required_config_parameters()
+            missing_params = [p for p in required_params if not self.parameters_config.get(p)]
+            return False, f"缺少必需的配置参数: {', '.join(missing_params)}"
+        
+        return True, ""
+    
+    async def safe_execute(self, **kwargs) -> ToolResult:
+        """安全执行工具（包含配置验证）
+        
+        Args:
+            **kwargs: 工具参数
+            
+        Returns:
+            执行结果
+        """
+        # 首先验证配置
+        is_valid, error_msg = self.validate_configuration()
+        if not is_valid:
+            return ToolResult.error_result(
+                error=f"工具配置无效: {error_msg}",
+                error_code="CONFIGURATION_ERROR",
+                execution_time=0.0
+            )
+        
+        # 调用父类的安全执行
+        return await super().safe_execute(**kwargs)
--- a/api/app/core/tools/builtin/datetime_tool.py
+++ b/api/app/core/tools/builtin/datetime_tool.py
@@ -0,0 +1,307 @@
+"""时间工具 - 日期时间处理"""
+import time
+from datetime import datetime, timezone, timedelta
+from typing import List
+import pytz
+
+from app.core.tools.base import ToolParameter, ToolResult, ParameterType
+from .base import BuiltinTool
+
+
+class DateTimeTool(BuiltinTool):
+    """时间工具 - 提供时间格式转换、时区转换、时间戳转换、时间计算功能"""
+    
+    @property
+    def name(self) -> str:
+        return "datetime_tool"
+    
+    @property
+    def description(self) -> str:
+        return "时间工具 - 日期时间处理：提供时间格式转化、时区转换、时间戳转换、时间计算"
+    
+    @property
+    def parameters(self) -> List[ToolParameter]:
+        return [
+            ToolParameter(
+                name="operation",
+                type=ParameterType.STRING,
+                description="操作类型",
+                required=True,
+                enum=["format", "convert_timezone", "timestamp_to_datetime", "datetime_to_timestamp", "calculate", "now"]
+            ),
+            ToolParameter(
+                name="input_value",
+                type=ParameterType.STRING,
+                description="输入值（时间字符串或时间戳）",
+                required=False
+            ),
+            ToolParameter(
+                name="input_format",
+                type=ParameterType.STRING,
+                description="输入时间格式（如：%Y-%m-%d %H:%M:%S）",
+                required=False,
+                default="%Y-%m-%d %H:%M:%S"
+            ),
+            ToolParameter(
+                name="output_format",
+                type=ParameterType.STRING,
+                description="输出时间格式（如：%Y-%m-%d %H:%M:%S）",
+                required=False,
+                default="%Y-%m-%d %H:%M:%S"
+            ),
+            ToolParameter(
+                name="from_timezone",
+                type=ParameterType.STRING,
+                description="源时区（如：UTC, Asia/Shanghai）",
+                required=False,
+                default="UTC"
+            ),
+            ToolParameter(
+                name="to_timezone",
+                type=ParameterType.STRING,
+                description="目标时区（如：UTC, Asia/Shanghai）",
+                required=False,
+                default="UTC"
+            ),
+            ToolParameter(
+                name="calculation",
+                type=ParameterType.STRING,
+                description="时间计算表达式（如：+1d, -2h, +30m）",
+                required=False
+            )
+        ]
+    
+    async def execute(self, **kwargs) -> ToolResult:
+        """执行时间工具操作"""
+        start_time = time.time()
+        
+        try:
+            operation = kwargs.get("operation")
+            
+            if operation == "now":
+                result = self._get_current_time(kwargs)
+            elif operation == "format":
+                result = self._format_datetime(kwargs)
+            elif operation == "convert_timezone":
+                result = self._convert_timezone(kwargs)
+            elif operation == "timestamp_to_datetime":
+                result = self._timestamp_to_datetime(kwargs)
+            elif operation == "datetime_to_timestamp":
+                result = self._datetime_to_timestamp(kwargs)
+            elif operation == "calculate":
+                result = self._calculate_datetime(kwargs)
+            else:
+                raise ValueError(f"不支持的操作类型: {operation}")
+            
+            execution_time = time.time() - start_time
+            return ToolResult.success_result(
+                data=result,
+                execution_time=execution_time
+            )
+            
+        except Exception as e:
+            execution_time = time.time() - start_time
+            return ToolResult.error_result(
+                error=str(e),
+                error_code="DATETIME_ERROR",
+                execution_time=execution_time
+            )
+    
+    def _get_current_time(self, kwargs) -> dict:
+        """获取当前时间"""
+        timezone_str = kwargs.get("to_timezone", "UTC")
+        output_format = kwargs.get("output_format", "%Y-%m-%d %H:%M:%S")
+        
+        if timezone_str == "UTC":
+            tz = timezone.utc
+        else:
+            tz = pytz.timezone(timezone_str)
+        
+        now = datetime.now(tz)
+        
+        return {
+            "datetime": now.strftime(output_format),
+            "timestamp": int(now.timestamp()),
+            "timezone": timezone_str,
+            "iso_format": now.isoformat()
+        }
+    
+    def _format_datetime(self, kwargs) -> dict:
+        """格式化时间"""
+        input_value = kwargs.get("input_value")
+        input_format = kwargs.get("input_format", "%Y-%m-%d %H:%M:%S")
+        output_format = kwargs.get("output_format", "%Y-%m-%d %H:%M:%S")
+        
+        if not input_value:
+            raise ValueError("input_value 参数是必需的")
+        
+        # 解析输入时间
+        dt = datetime.strptime(input_value, input_format)
+        
+        return {
+            "original": input_value,
+            "formatted": dt.strftime(output_format),
+            "timestamp": int(dt.timestamp()),
+            "iso_format": dt.isoformat()
+        }
+    
+    def _convert_timezone(self, kwargs) -> dict:
+        """时区转换"""
+        input_value = kwargs.get("input_value")
+        input_format = kwargs.get("input_format", "%Y-%m-%d %H:%M:%S")
+        output_format = kwargs.get("output_format", "%Y-%m-%d %H:%M:%S")
+        from_timezone = kwargs.get("from_timezone", "UTC")
+        to_timezone = kwargs.get("to_timezone", "UTC")
+        
+        if not input_value:
+            raise ValueError("input_value 参数是必需的")
+        
+        # 解析输入时间
+        dt = datetime.strptime(input_value, input_format)
+        
+        # 设置源时区
+        if from_timezone == "UTC":
+            from_tz = pytz.UTC
+        else:
+            from_tz = pytz.timezone(from_timezone)
+        
+        # 设置目标时区
+        if to_timezone == "UTC":
+            to_tz = pytz.UTC
+        else:
+            to_tz = pytz.timezone(to_timezone)
+        
+        # 本地化时间并转换时区
+        if dt.tzinfo is None:
+            dt = from_tz.localize(dt)
+        
+        converted_dt = dt.astimezone(to_tz)
+        
+        return {
+            "original": input_value,
+            "original_timezone": from_timezone,
+            "converted": converted_dt.strftime(output_format),
+            "converted_timezone": to_timezone,
+            "timestamp": int(converted_dt.timestamp())
+        }
+    
+    def _timestamp_to_datetime(self, kwargs) -> dict:
+        """时间戳转日期时间"""
+        input_value = kwargs.get("input_value")
+        output_format = kwargs.get("output_format", "%Y-%m-%d %H:%M:%S")
+        timezone_str = kwargs.get("to_timezone", "UTC")
+        
+        if not input_value:
+            raise ValueError("input_value 参数是必需的")
+        
+        # 转换时间戳
+        timestamp = float(input_value)
+        
+        # 设置时区
+        if timezone_str == "UTC":
+            tz = timezone.utc
+        else:
+            tz = pytz.timezone(timezone_str)
+        
+        dt = datetime.fromtimestamp(timestamp, tz)
+        
+        return {
+            "timestamp": timestamp,
+            "datetime": dt.strftime(output_format),
+            "timezone": timezone_str,
+            "iso_format": dt.isoformat()
+        }
+    
+    def _datetime_to_timestamp(self, kwargs) -> dict:
+        """日期时间转时间戳"""
+        input_value = kwargs.get("input_value")
+        input_format = kwargs.get("input_format", "%Y-%m-%d %H:%M:%S")
+        timezone_str = kwargs.get("from_timezone", "UTC")
+        
+        if not input_value:
+            raise ValueError("input_value 参数是必需的")
+        
+        # 解析输入时间
+        dt = datetime.strptime(input_value, input_format)
+        
+        # 设置时区
+        if timezone_str == "UTC":
+            tz = timezone.utc
+        else:
+            tz = pytz.timezone(timezone_str)
+        
+        # 本地化时间
+        if dt.tzinfo is None:
+            dt = tz.localize(dt)
+        
+        return {
+            "datetime": input_value,
+            "timezone": timezone_str,
+            "timestamp": int(dt.timestamp()),
+            "iso_format": dt.isoformat()
+        }
+    
+    def _calculate_datetime(self, kwargs) -> dict:
+        """时间计算"""
+        input_value = kwargs.get("input_value")
+        input_format = kwargs.get("input_format", "%Y-%m-%d %H:%M:%S")
+        output_format = kwargs.get("output_format", "%Y-%m-%d %H:%M:%S")
+        calculation = kwargs.get("calculation")
+        timezone_str = kwargs.get("from_timezone", "UTC")
+        
+        if not input_value:
+            raise ValueError("input_value 参数是必需的")
+        
+        if not calculation:
+            raise ValueError("calculation 参数是必需的")
+        
+        # 解析输入时间
+        dt = datetime.strptime(input_value, input_format)
+        
+        # 设置时区
+        if timezone_str == "UTC":
+            tz = timezone.utc
+        else:
+            tz = pytz.timezone(timezone_str)
+        
+        if dt.tzinfo is None:
+            dt = tz.localize(dt)
+        
+        # 解析计算表达式
+        delta = self._parse_time_delta(calculation)
+        calculated_dt = dt + delta
+        
+        return {
+            "original": input_value,
+            "calculation": calculation,
+            "result": calculated_dt.strftime(output_format),
+            "timezone": timezone_str,
+            "timestamp": int(calculated_dt.timestamp())
+        }
+    
+    def _parse_time_delta(self, calculation: str) -> timedelta:
+        """解析时间计算表达式"""
+        import re
+        
+        # 支持的单位：d(天), h(小时), m(分钟), s(秒)
+        pattern = r'([+-]?\d+)([dhms])'
+        matches = re.findall(pattern, calculation.lower())
+        
+        if not matches:
+            raise ValueError(f"无效的时间计算表达式: {calculation}")
+        
+        total_delta = timedelta()
+        
+        for value_str, unit in matches:
+            value = int(value_str)
+            
+            if unit == 'd':
+                total_delta += timedelta(days=value)
+            elif unit == 'h':
+                total_delta += timedelta(hours=value)
+            elif unit == 'm':
+                total_delta += timedelta(minutes=value)
+            elif unit == 's':
+                total_delta += timedelta(seconds=value)
+        
+        return total_delta
--- a/api/app/core/tools/builtin/json_tool.py
+++ b/api/app/core/tools/builtin/json_tool.py
@@ -0,0 +1,430 @@
+"""JSON转换工具 - 数据格式转换"""
+import json
+import time
+from typing import List, Any, Dict
+import yaml
+import xml.etree.ElementTree as ET
+from xml.dom import minidom
+
+from app.core.tools.base import ToolParameter, ToolResult, ParameterType
+from .base import BuiltinTool
+
+
+class JsonTool(BuiltinTool):
+    """JSON转换工具 - 提供JSON格式化、压缩、验证、格式转换功能"""
+    
+    @property
+    def name(self) -> str:
+        return "json_tool"
+    
+    @property
+    def description(self) -> str:
+        return "JSON转换工具 - 数据格式转换：JSON格式化、JSON压缩、JSON验证、格式转换"
+    
+    @property
+    def parameters(self) -> List[ToolParameter]:
+        return [
+            ToolParameter(
+                name="operation",
+                type=ParameterType.STRING,
+                description="操作类型",
+                required=True,
+                enum=["format", "minify", "validate", "convert", "to_yaml", "from_yaml", "to_xml", "from_xml", "merge", "extract"]
+            ),
+            ToolParameter(
+                name="input_data",
+                type=ParameterType.STRING,
+                description="输入数据（JSON字符串、YAML字符串或XML字符串）",
+                required=True
+            ),
+            ToolParameter(
+                name="indent",
+                type=ParameterType.INTEGER,
+                description="JSON格式化缩进空格数",
+                required=False,
+                default=2,
+                minimum=0,
+                maximum=8
+            ),
+            ToolParameter(
+                name="ensure_ascii",
+                type=ParameterType.BOOLEAN,
+                description="是否确保ASCII编码",
+                required=False,
+                default=False
+            ),
+            ToolParameter(
+                name="sort_keys",
+                type=ParameterType.BOOLEAN,
+                description="是否对键进行排序",
+                required=False,
+                default=False
+            ),
+            ToolParameter(
+                name="merge_data",
+                type=ParameterType.STRING,
+                description="要合并的JSON数据（用于merge操作）",
+                required=False
+            ),
+            ToolParameter(
+                name="json_path",
+                type=ParameterType.STRING,
+                description="JSON路径表达式（用于extract操作，如：$.user.name）",
+                required=False
+            )
+        ]
+    
+    async def execute(self, **kwargs) -> ToolResult:
+        """执行JSON工具操作"""
+        start_time = time.time()
+        
+        try:
+            operation = kwargs.get("operation")
+            input_data = kwargs.get("input_data")
+            
+            if not input_data:
+                raise ValueError("input_data 参数是必需的")
+            
+            if operation == "format":
+                result = self._format_json(input_data, kwargs)
+            elif operation == "minify":
+                result = self._minify_json(input_data)
+            elif operation == "validate":
+                result = self._validate_json(input_data)
+            elif operation == "convert":
+                result = self._convert_json(input_data)
+            elif operation == "to_yaml":
+                result = self._json_to_yaml(input_data)
+            elif operation == "from_yaml":
+                result = self._yaml_to_json(input_data, kwargs)
+            elif operation == "to_xml":
+                result = self._json_to_xml(input_data)
+            elif operation == "from_xml":
+                result = self._xml_to_json(input_data, kwargs)
+            elif operation == "merge":
+                result = self._merge_json(input_data, kwargs)
+            elif operation == "extract":
+                result = self._extract_json_path(input_data, kwargs)
+            else:
+                raise ValueError(f"不支持的操作类型: {operation}")
+            
+            execution_time = time.time() - start_time
+            return ToolResult.success_result(
+                data=result,
+                execution_time=execution_time
+            )
+            
+        except Exception as e:
+            execution_time = time.time() - start_time
+            return ToolResult.error_result(
+                error=str(e),
+                error_code="JSON_ERROR",
+                execution_time=execution_time
+            )
+    
+    def _format_json(self, input_data: str, kwargs: Dict[str, Any]) -> Dict[str, Any]:
+        """格式化JSON"""
+        indent = kwargs.get("indent", 2)
+        ensure_ascii = kwargs.get("ensure_ascii", False)
+        sort_keys = kwargs.get("sort_keys", False)
+        
+        # 解析JSON
+        data = json.loads(input_data)
+        
+        # 格式化输出
+        formatted = json.dumps(
+            data,
+            indent=indent,
+            ensure_ascii=ensure_ascii,
+            sort_keys=sort_keys,
+            separators=(',', ': ')
+        )
+        
+        return {
+            "original_size": len(input_data),
+            "formatted_size": len(formatted),
+            "formatted_json": formatted,
+            "is_valid": True,
+            "settings": {
+                "indent": indent,
+                "ensure_ascii": ensure_ascii,
+                "sort_keys": sort_keys
+            }
+        }
+    
+    def _minify_json(self, input_data: str) -> Dict[str, Any]:
+        """压缩JSON"""
+        # 解析并压缩
+        data = json.loads(input_data)
+        minified = json.dumps(data, separators=(',', ':'))
+        
+        return {
+            "original_size": len(input_data),
+            "minified_size": len(minified),
+            "compression_ratio": round((1 - len(minified) / len(input_data)) * 100, 2),
+            "minified_json": minified,
+            "is_valid": True
+        }
+    
+    def _validate_json(self, input_data: str) -> Dict[str, Any]:
+        """验证JSON"""
+        try:
+            data = json.loads(input_data)
+            
+            # 统计信息
+            stats = self._analyze_json_structure(data)
+            
+            return {
+                "is_valid": True,
+                "error": None,
+                "size": len(input_data),
+                "structure": stats
+            }
+            
+        except json.JSONDecodeError as e:
+            return {
+                "is_valid": False,
+                "error": str(e),
+                "error_line": getattr(e, 'lineno', None),
+                "error_column": getattr(e, 'colno', None),
+                "size": len(input_data)
+            }
+
+    def _convert_json(self, input_data: str) -> Dict[str, Any]:
+        """JSON转义"""
+        data = json.loads(input_data)
+        converted = json.dumps(data, ensure_ascii=False)
+
+        return {
+            "converted_json": converted,
+            "is_valid": True
+        }
+    
+    def _json_to_yaml(self, input_data: str) -> Dict[str, Any]:
+        """JSON转YAML"""
+        data = json.loads(input_data)
+        yaml_output = yaml.dump(data, default_flow_style=False, allow_unicode=True, indent=2)
+        
+        return {
+            "original_format": "json",
+            "target_format": "yaml",
+            "original_size": len(input_data),
+            "converted_size": len(yaml_output),
+            "converted_data": yaml_output
+        }
+    
+    def _yaml_to_json(self, input_data: str, kwargs: Dict[str, Any]) -> Dict[str, Any]:
+        """YAML转JSON"""
+        indent = kwargs.get("indent", 2)
+        ensure_ascii = kwargs.get("ensure_ascii", False)
+        
+        data = yaml.safe_load(input_data)
+        json_output = json.dumps(data, indent=indent, ensure_ascii=ensure_ascii)
+        
+        return {
+            "original_format": "yaml",
+            "target_format": "json",
+            "original_size": len(input_data),
+            "converted_size": len(json_output),
+            "converted_data": json_output
+        }
+    
+    def _json_to_xml(self, input_data: str) -> Dict[str, Any]:
+        """JSON转XML"""
+        data = json.loads(input_data)
+        
+        def dict_to_xml(data, root_name="root"):
+            """递归转换字典为XML"""
+            if isinstance(data, dict):
+                if len(data) == 1 and not root_name == "root":
+                    # 如果字典只有一个键，使用该键作为根元素
+                    key, value = next(iter(data.items()))
+                    return dict_to_xml(value, key)
+                
+                root = ET.Element(root_name)
+                for key, value in data.items():
+                    if isinstance(value, (dict, list)):
+                        child = dict_to_xml(value, key)
+                        root.append(child)
+                    else:
+                        child = ET.SubElement(root, key)
+                        child.text = str(value)
+                return root
+            
+            elif isinstance(data, list):
+                root = ET.Element(root_name)
+                for i, item in enumerate(data):
+                    if isinstance(item, (dict, list)):
+                        child = dict_to_xml(item, f"item_{i}")
+                        root.append(child)
+                    else:
+                        child = ET.SubElement(root, f"item_{i}")
+                        child.text = str(item)
+                return root
+            
+            else:
+                root = ET.Element(root_name)
+                root.text = str(data)
+                return root
+        
+        xml_element = dict_to_xml(data)
+        xml_string = ET.tostring(xml_element, encoding='unicode')
+        
+        # 格式化XML
+        dom = minidom.parseString(xml_string)
+        formatted_xml = dom.toprettyxml(indent="  ")
+        
+        # 移除空行
+        formatted_xml = '\n'.join([line for line in formatted_xml.split('\n') if line.strip()])
+        
+        return {
+            "original_format": "json",
+            "target_format": "xml",
+            "original_size": len(input_data),
+            "converted_size": len(formatted_xml),
+            "converted_data": formatted_xml
+        }
+    
+    def _xml_to_json(self, input_data: str, kwargs: Dict[str, Any]) -> Dict[str, Any]:
+        """XML转JSON"""
+        indent = kwargs.get("indent", 2)
+        
+        def xml_to_dict(element):
+            """递归转换XML元素为字典"""
+            result = {}
+            
+            # 处理属性
+            if element.attrib:
+                result.update(element.attrib)
+            
+            # 处理文本内容
+            if element.text and element.text.strip():
+                if len(element) == 0:  # 叶子节点
+                    return element.text.strip()
+                else:
+                    result['text'] = element.text.strip()
+            
+            # 处理子元素
+            for child in element:
+                child_data = xml_to_dict(child)
+                if child.tag in result:
+                    # 如果标签已存在，转换为列表
+                    if not isinstance(result[child.tag], list):
+                        result[child.tag] = [result[child.tag]]
+                    result[child.tag].append(child_data)
+                else:
+                    result[child.tag] = child_data
+            
+            return result
+        
+        root = ET.fromstring(input_data)
+        data = {root.tag: xml_to_dict(root)}
+        json_output = json.dumps(data, indent=indent, ensure_ascii=False)
+        
+        return {
+            "original_format": "xml",
+            "target_format": "json",
+            "original_size": len(input_data),
+            "converted_size": len(json_output),
+            "converted_data": json_output
+        }
+    
+    def _merge_json(self, input_data: str, kwargs: Dict[str, Any]) -> Dict[str, Any]:
+        """合并JSON"""
+        merge_data = kwargs.get("merge_data")
+        if not merge_data:
+            raise ValueError("merge_data 参数是必需的")
+        
+        data1 = json.loads(input_data)
+        data2 = json.loads(merge_data)
+        
+        def deep_merge(dict1, dict2):
+            """深度合并字典"""
+            result = dict1.copy()
+            for key, value in dict2.items():
+                if key in result and isinstance(result[key], dict) and isinstance(value, dict):
+                    result[key] = deep_merge(result[key], value)
+                else:
+                    result[key] = value
+            return result
+        
+        if isinstance(data1, dict) and isinstance(data2, dict):
+            merged = deep_merge(data1, data2)
+        elif isinstance(data1, list) and isinstance(data2, list):
+            merged = data1 + data2
+        else:
+            raise ValueError("无法合并不同类型的数据")
+        
+        merged_json = json.dumps(merged, indent=2, ensure_ascii=False)
+        
+        return {
+            "operation": "merge",
+            "original_size": len(input_data),
+            "merge_size": len(merge_data),
+            "result_size": len(merged_json),
+            "merged_data": merged_json
+        }
+    
+    def _extract_json_path(self, input_data: str, kwargs: Dict[str, Any]) -> Dict[str, Any]:
+        """提取JSON路径"""
+        json_path = kwargs.get("json_path")
+        if not json_path:
+            raise ValueError("json_path 参数是必需的")
+        
+        data = json.loads(input_data)
+        
+        # 简单的JSONPath实现（支持基本的点号路径）
+        try:
+            result = data
+            if json_path.startswith('$.'):
+                path_parts = json_path[2:].split('.')
+            else:
+                path_parts = json_path.split('.')
+            
+            for part in path_parts:
+                if part.isdigit():
+                    result = result[int(part)]
+                else:
+                    result = result[part]
+            
+            extracted_json = json.dumps(result, indent=2, ensure_ascii=False)
+            
+            return {
+                "operation": "extract",
+                "json_path": json_path,
+                "found": True,
+                "extracted_data": extracted_json,
+                "data_type": type(result).__name__
+            }
+            
+        except (KeyError, IndexError, TypeError) as e:
+            return {
+                "operation": "extract",
+                "json_path": json_path,
+                "found": False,
+                "error": str(e),
+                "extracted_data": None
+            }
+    
+    def _analyze_json_structure(self, data: Any, depth: int = 0) -> Dict[str, Any]:
+        """分析JSON结构"""
+        if isinstance(data, dict):
+            return {
+                "type": "object",
+                "keys": len(data),
+                "depth": depth,
+                "children": {k: self._analyze_json_structure(v, depth + 1) for k, v in data.items()}
+            }
+        elif isinstance(data, list):
+            return {
+                "type": "array",
+                "length": len(data),
+                "depth": depth,
+                "item_types": list(set(type(item).__name__ for item in data))
+            }
+        else:
+            return {
+                "type": type(data).__name__,
+                "depth": depth,
+                "value": str(data)[:100] + "..." if len(str(data)) > 100 else str(data)
+            }
--- a/api/app/core/tools/builtin/mineru_tool.py
+++ b/api/app/core/tools/builtin/mineru_tool.py
@@ -0,0 +1,327 @@
+"""MinerU PDF解析工具"""
+import time
+from typing import List, Dict, Any
+import aiohttp
+
+from app.core.tools.base import ToolParameter, ToolResult, ParameterType
+from .base import BuiltinTool
+
+
+class MinerUTool(BuiltinTool):
+    """MinerU PDF解析工具 - 提供PDF解析、表格提取、图片识别、文本提取功能"""
+    
+    @property
+    def name(self) -> str:
+        return "mineru_tool"
+    
+    @property
+    def description(self) -> str:
+        return "MinerU - PDF解析工具：PDF解析、表格提取、图片识别、文本提取"
+    
+    def get_required_config_parameters(self) -> List[str]:
+        return ["api_key", "api_url"]
+    
+    @property
+    def parameters(self) -> List[ToolParameter]:
+        return [
+            ToolParameter(
+                name="operation",
+                type=ParameterType.STRING,
+                description="操作类型",
+                required=True,
+                enum=["parse_pdf", "extract_text", "extract_tables", "extract_images", "analyze_layout"]
+            ),
+            ToolParameter(
+                name="file_content",
+                type=ParameterType.STRING,
+                description="PDF文件内容（Base64编码）",
+                required=False
+            ),
+            ToolParameter(
+                name="file_url",
+                type=ParameterType.STRING,
+                description="PDF文件URL",
+                required=False
+            ),
+            ToolParameter(
+                name="parse_mode",
+                type=ParameterType.STRING,
+                description="解析模式",
+                required=False,
+                default="auto",
+                enum=["auto", "text_only", "table_priority", "image_priority", "layout_analysis"]
+            ),
+            ToolParameter(
+                name="extract_images",
+                type=ParameterType.BOOLEAN,
+                description="是否提取图片",
+                required=False,
+                default=True
+            ),
+            ToolParameter(
+                name="extract_tables",
+                type=ParameterType.BOOLEAN,
+                description="是否提取表格",
+                required=False,
+                default=True
+            ),
+            ToolParameter(
+                name="page_range",
+                type=ParameterType.STRING,
+                description="页面范围（如：1-5, 1,3,5）",
+                required=False
+            ),
+            ToolParameter(
+                name="output_format",
+                type=ParameterType.STRING,
+                description="输出格式",
+                required=False,
+                default="json",
+                enum=["json", "markdown", "html", "text"]
+            )
+        ]
+    
+    async def execute(self, **kwargs) -> ToolResult:
+        """执行MinerU PDF解析"""
+        start_time = time.time()
+        
+        try:
+            operation = kwargs.get("operation")
+            file_content = kwargs.get("file_content")
+            file_url = kwargs.get("file_url")
+            
+            if not file_content and not file_url:
+                raise ValueError("必须提供 file_content 或 file_url 参数")
+            
+            if operation == "parse_pdf":
+                result = await self._parse_pdf(kwargs)
+            elif operation == "extract_text":
+                result = await self._extract_text(kwargs)
+            elif operation == "extract_tables":
+                result = await self._extract_tables(kwargs)
+            elif operation == "extract_images":
+                result = await self._extract_images(kwargs)
+            elif operation == "analyze_layout":
+                result = await self._analyze_layout(kwargs)
+            else:
+                raise ValueError(f"不支持的操作类型: {operation}")
+            
+            execution_time = time.time() - start_time
+            return ToolResult.success_result(
+                data=result,
+                execution_time=execution_time
+            )
+            
+        except Exception as e:
+            execution_time = time.time() - start_time
+            return ToolResult.error_result(
+                error=str(e),
+                error_code="MINERU_ERROR",
+                execution_time=execution_time
+            )
+    
+    async def _parse_pdf(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
+        """完整PDF解析"""
+        parse_mode = kwargs.get("parse_mode", "auto")
+        extract_images = kwargs.get("extract_images", True)
+        extract_tables = kwargs.get("extract_tables", True)
+        page_range = kwargs.get("page_range")
+        output_format = kwargs.get("output_format", "json")
+        
+        # 构建请求参数
+        request_data = {
+            "parse_mode": parse_mode,
+            "extract_images": extract_images,
+            "extract_tables": extract_tables,
+            "output_format": output_format
+        }
+        
+        if page_range:
+            request_data["page_range"] = page_range
+        
+        # 添加文件数据
+        if kwargs.get("file_content"):
+            request_data["file_content"] = kwargs["file_content"]
+        elif kwargs.get("file_url"):
+            request_data["file_url"] = kwargs["file_url"]
+        
+        # 调用MinerU API
+        result = await self._call_mineru_api("parse", request_data)
+        
+        return {
+            "operation": "parse_pdf",
+            "parse_mode": parse_mode,
+            "total_pages": result.get("total_pages", 0),
+            "processed_pages": result.get("processed_pages", 0),
+            "text_content": result.get("text_content", ""),
+            "tables": result.get("tables", []),
+            "images": result.get("images", []),
+            "layout_info": result.get("layout_info", {}),
+            "metadata": result.get("metadata", {}),
+            "processing_time": result.get("processing_time", 0)
+        }
+    
+    async def _extract_text(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
+        """提取文本"""
+        page_range = kwargs.get("page_range")
+        output_format = kwargs.get("output_format", "text")
+        
+        request_data = {
+            "operation": "extract_text",
+            "output_format": output_format
+        }
+        
+        if page_range:
+            request_data["page_range"] = page_range
+        
+        if kwargs.get("file_content"):
+            request_data["file_content"] = kwargs["file_content"]
+        elif kwargs.get("file_url"):
+            request_data["file_url"] = kwargs["file_url"]
+        
+        result = await self._call_mineru_api("extract_text", request_data)
+        
+        return {
+            "operation": "extract_text",
+            "total_pages": result.get("total_pages", 0),
+            "text_content": result.get("text_content", ""),
+            "word_count": len(result.get("text_content", "").split()),
+            "character_count": len(result.get("text_content", "")),
+            "pages_text": result.get("pages_text", [])
+        }
+    
+    async def _extract_tables(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
+        """提取表格"""
+        page_range = kwargs.get("page_range")
+        output_format = kwargs.get("output_format", "json")
+        
+        request_data = {
+            "operation": "extract_tables",
+            "output_format": output_format
+        }
+        
+        if page_range:
+            request_data["page_range"] = page_range
+        
+        if kwargs.get("file_content"):
+            request_data["file_content"] = kwargs["file_content"]
+        elif kwargs.get("file_url"):
+            request_data["file_url"] = kwargs["file_url"]
+        
+        result = await self._call_mineru_api("extract_tables", request_data)
+        
+        return {
+            "operation": "extract_tables",
+            "total_tables": result.get("total_tables", 0),
+            "tables": result.get("tables", []),
+            "table_locations": result.get("table_locations", [])
+        }
+    
+    async def _extract_images(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
+        """提取图片"""
+        page_range = kwargs.get("page_range")
+        
+        request_data = {
+            "operation": "extract_images"
+        }
+        
+        if page_range:
+            request_data["page_range"] = page_range
+        
+        if kwargs.get("file_content"):
+            request_data["file_content"] = kwargs["file_content"]
+        elif kwargs.get("file_url"):
+            request_data["file_url"] = kwargs["file_url"]
+        
+        result = await self._call_mineru_api("extract_images", request_data)
+        
+        return {
+            "operation": "extract_images",
+            "total_images": result.get("total_images", 0),
+            "images": result.get("images", []),
+            "image_locations": result.get("image_locations", [])
+        }
+    
+    async def _analyze_layout(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
+        """分析布局"""
+        page_range = kwargs.get("page_range")
+        
+        request_data = {
+            "operation": "analyze_layout"
+        }
+        
+        if page_range:
+            request_data["page_range"] = page_range
+        
+        if kwargs.get("file_content"):
+            request_data["file_content"] = kwargs["file_content"]
+        elif kwargs.get("file_url"):
+            request_data["file_url"] = kwargs["file_url"]
+        
+        result = await self._call_mineru_api("analyze_layout", request_data)
+        
+        return {
+            "operation": "analyze_layout",
+            "layout_info": result.get("layout_info", {}),
+            "page_layouts": result.get("page_layouts", []),
+            "text_blocks": result.get("text_blocks", []),
+            "image_blocks": result.get("image_blocks", []),
+            "table_blocks": result.get("table_blocks", [])
+        }
+    
+    async def _call_mineru_api(self, endpoint: str, data: Dict[str, Any]) -> Dict[str, Any]:
+        """调用MinerU API"""
+        api_key = self.get_config_parameter("api_key")
+        api_url = self.get_config_parameter("api_url")
+        timeout_seconds = self.get_config_parameter("timeout", 60)
+        
+        if not api_key or not api_url:
+            raise ValueError("MinerU API配置未完成")
+        
+        # 构建完整URL
+        url = f"{api_url.rstrip('/')}/{endpoint}"
+        
+        # 构建请求头
+        headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json"
+        }
+        
+        # 发送请求
+        timeout = aiohttp.ClientTimeout(total=timeout_seconds)
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+            async with session.post(url, json=data, headers=headers) as response:
+                if response.status == 200:
+                    result = await response.json()
+                    if result.get("success", True):
+                        return result.get("data", result)
+                    else:
+                        raise Exception(f"MinerU API错误: {result.get('message', '未知错误')}")
+                else:
+                    error_text = await response.text()
+                    raise Exception(f"HTTP错误 {response.status}: {error_text}")
+    
+    def test_connection(self) -> Dict[str, Any]:
+        """测试连接"""
+        try:
+            api_key = self.get_config_parameter("api_key")
+            api_url = self.get_config_parameter("api_url")
+            
+            if not api_key or not api_url:
+                return {
+                    "success": False,
+                    "error": "API配置未完成"
+                }
+            
+            return {
+                "success": True,
+                "message": "连接配置有效",
+                "api_url": api_url,
+                "api_key_masked": api_key[:8] + "***" if len(api_key) > 8 else "***"
+            }
+            
+        except Exception as e:
+            return {
+                "success": False,
+                "error": str(e)
+            }
--- a/api/app/core/tools/builtin/textin_tool.py
+++ b/api/app/core/tools/builtin/textin_tool.py
@@ -0,0 +1,401 @@
+"""TextIn OCR文字识别工具"""
+import time
+from typing import List, Dict, Any
+import aiohttp
+
+from app.core.tools.base import ToolParameter, ToolResult, ParameterType
+from .base import BuiltinTool
+
+
+class TextInTool(BuiltinTool):
+    """TextIn OCR工具 - 提供通用OCR、手写识别、多语言支持、高精度识别"""
+    
+    @property
+    def name(self) -> str:
+        return "textin_tool"
+    
+    @property
+    def description(self) -> str:
+        return "TextIn - OCR文字识别：通用OCR、手写识别、多语言支持、高精度识别"
+    
+    def get_required_config_parameters(self) -> List[str]:
+        return ["app_id", "secret_key", "api_url"]
+    
+    @property
+    def parameters(self) -> List[ToolParameter]:
+        return [
+            ToolParameter(
+                name="image_content",
+                type=ParameterType.STRING,
+                description="图片内容（Base64编码）",
+                required=False
+            ),
+            ToolParameter(
+                name="image_url",
+                type=ParameterType.STRING,
+                description="图片URL",
+                required=False
+            ),
+            ToolParameter(
+                name="language",
+                type=ParameterType.STRING,
+                description="识别语言",
+                required=False,
+                default="auto",
+                enum=["auto", "zh-cn", "zh-tw", "en", "ja", "ko", "fr", "de", "es", "ru"]
+            ),
+            ToolParameter(
+                name="recognition_mode",
+                type=ParameterType.STRING,
+                description="识别模式",
+                required=False,
+                default="general",
+                enum=["general", "accurate", "handwriting", "formula", "table", "document"]
+            ),
+            ToolParameter(
+                name="return_location",
+                type=ParameterType.BOOLEAN,
+                description="是否返回文字位置信息",
+                required=False,
+                default=False
+            ),
+            ToolParameter(
+                name="return_confidence",
+                type=ParameterType.BOOLEAN,
+                description="是否返回置信度",
+                required=False,
+                default=True
+            ),
+            ToolParameter(
+                name="merge_lines",
+                type=ParameterType.BOOLEAN,
+                description="是否合并行",
+                required=False,
+                default=True
+            ),
+            ToolParameter(
+                name="output_format",
+                type=ParameterType.STRING,
+                description="输出格式",
+                required=False,
+                default="text",
+                enum=["text", "json", "structured"]
+            )
+        ]
+    
+    async def execute(self, **kwargs) -> ToolResult:
+        """执行TextIn OCR识别"""
+        start_time = time.time()
+        
+        try:
+            image_content = kwargs.get("image_content")
+            image_url = kwargs.get("image_url")
+            
+            if not image_content and not image_url:
+                raise ValueError("必须提供 image_content 或 image_url 参数")
+            
+            language = kwargs.get("language", "auto")
+            recognition_mode = kwargs.get("recognition_mode", "general")
+            return_location = kwargs.get("return_location", False)
+            return_confidence = kwargs.get("return_confidence", True)
+            merge_lines = kwargs.get("merge_lines", True)
+            output_format = kwargs.get("output_format", "text")
+            
+            # 根据识别模式调用不同的API
+            if recognition_mode == "general":
+                result = await self._general_ocr(kwargs)
+            elif recognition_mode == "accurate":
+                result = await self._accurate_ocr(kwargs)
+            elif recognition_mode == "handwriting":
+                result = await self._handwriting_ocr(kwargs)
+            elif recognition_mode == "formula":
+                result = await self._formula_ocr(kwargs)
+            elif recognition_mode == "table":
+                result = await self._table_ocr(kwargs)
+            elif recognition_mode == "document":
+                result = await self._document_ocr(kwargs)
+            else:
+                raise ValueError(f"不支持的识别模式: {recognition_mode}")
+            
+            execution_time = time.time() - start_time
+            return ToolResult.success_result(
+                data=result,
+                execution_time=execution_time
+            )
+            
+        except Exception as e:
+            execution_time = time.time() - start_time
+            return ToolResult.error_result(
+                error=str(e),
+                error_code="TEXTIN_ERROR",
+                execution_time=execution_time
+            )
+    
+    async def _general_ocr(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
+        """通用OCR识别"""
+        request_data = {
+            "language": kwargs.get("language", "auto"),
+            "return_location": kwargs.get("return_location", False),
+            "return_confidence": kwargs.get("return_confidence", True),
+            "merge_lines": kwargs.get("merge_lines", True)
+        }
+        
+        if kwargs.get("image_content"):
+            request_data["image"] = kwargs["image_content"]
+        elif kwargs.get("image_url"):
+            request_data["image_url"] = kwargs["image_url"]
+        
+        result = await self._call_textin_api("general_ocr", request_data)
+        
+        return self._format_ocr_result(result, kwargs.get("output_format", "text"))
+    
+    async def _accurate_ocr(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
+        """高精度OCR识别"""
+        request_data = {
+            "language": kwargs.get("language", "auto"),
+            "return_location": kwargs.get("return_location", False),
+            "return_confidence": kwargs.get("return_confidence", True),
+            "merge_lines": kwargs.get("merge_lines", True)
+        }
+        
+        if kwargs.get("image_content"):
+            request_data["image"] = kwargs["image_content"]
+        elif kwargs.get("image_url"):
+            request_data["image_url"] = kwargs["image_url"]
+        
+        result = await self._call_textin_api("accurate_ocr", request_data)
+        
+        return self._format_ocr_result(result, kwargs.get("output_format", "text"))
+    
+    async def _handwriting_ocr(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
+        """手写体识别"""
+        request_data = {
+            "language": kwargs.get("language", "auto"),
+            "return_location": kwargs.get("return_location", False),
+            "return_confidence": kwargs.get("return_confidence", True)
+        }
+        
+        if kwargs.get("image_content"):
+            request_data["image"] = kwargs["image_content"]
+        elif kwargs.get("image_url"):
+            request_data["image_url"] = kwargs["image_url"]
+        
+        result = await self._call_textin_api("handwriting_ocr", request_data)
+        
+        return self._format_ocr_result(result, kwargs.get("output_format", "text"))
+    
+    async def _formula_ocr(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
+        """公式识别"""
+        request_data = {
+            "return_location": kwargs.get("return_location", False),
+            "return_confidence": kwargs.get("return_confidence", True),
+            "output_latex": True
+        }
+        
+        if kwargs.get("image_content"):
+            request_data["image"] = kwargs["image_content"]
+        elif kwargs.get("image_url"):
+            request_data["image_url"] = kwargs["image_url"]
+        
+        result = await self._call_textin_api("formula_ocr", request_data)
+        
+        return self._format_formula_result(result, kwargs.get("output_format", "text"))
+    
+    async def _table_ocr(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
+        """表格识别"""
+        request_data = {
+            "language": kwargs.get("language", "auto"),
+            "return_location": kwargs.get("return_location", False),
+            "return_confidence": kwargs.get("return_confidence", True),
+            "output_excel": True
+        }
+        
+        if kwargs.get("image_content"):
+            request_data["image"] = kwargs["image_content"]
+        elif kwargs.get("image_url"):
+            request_data["image_url"] = kwargs["image_url"]
+        
+        result = await self._call_textin_api("table_ocr", request_data)
+        
+        return self._format_table_result(result, kwargs.get("output_format", "text"))
+    
+    async def _document_ocr(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
+        """文档识别"""
+        request_data = {
+            "language": kwargs.get("language", "auto"),
+            "return_location": kwargs.get("return_location", False),
+            "return_confidence": kwargs.get("return_confidence", True),
+            "layout_analysis": True
+        }
+        
+        if kwargs.get("image_content"):
+            request_data["image"] = kwargs["image_content"]
+        elif kwargs.get("image_url"):
+            request_data["image_url"] = kwargs["image_url"]
+        
+        result = await self._call_textin_api("document_ocr", request_data)
+        
+        return self._format_document_result(result, kwargs.get("output_format", "text"))
+    
+    def _format_ocr_result(self, result: Dict[str, Any], output_format: str) -> Dict[str, Any] | None:
+        """格式化OCR结果"""
+        lines = result.get("lines", [])
+        
+        if output_format == "text":
+            text_content = "\n".join([line.get("text", "") for line in lines])
+            return {
+                "recognition_mode": "ocr",
+                "text_content": text_content,
+                "line_count": len(lines),
+                "total_confidence": result.get("confidence", 0),
+                "processing_time": result.get("processing_time", 0)
+            }
+        
+        elif output_format == "json":
+            return {
+                "recognition_mode": "ocr",
+                "lines": lines,
+                "total_confidence": result.get("confidence", 0),
+                "processing_time": result.get("processing_time", 0)
+            }
+        
+        elif output_format == "structured":
+            return {
+                "recognition_mode": "ocr",
+                "text_content": "\n".join([line.get("text", "") for line in lines]),
+                "structured_data": {
+                    "lines": lines,
+                    "paragraphs": self._group_lines_to_paragraphs(lines),
+                    "statistics": {
+                        "line_count": len(lines),
+                        "word_count": sum(len(line.get("text", "").split()) for line in lines),
+                        "character_count": sum(len(line.get("text", "")) for line in lines)
+                    }
+                },
+                "total_confidence": result.get("confidence", 0),
+                "processing_time": result.get("processing_time", 0)
+            }
+    
+    def _format_formula_result(self, result: Dict[str, Any], output_format: str) -> Dict[str, Any]:
+        """格式化公式识别结果"""
+        formulas = result.get("formulas", [])
+        
+        return {
+            "recognition_mode": "formula",
+            "formula_count": len(formulas),
+            "formulas": formulas,
+            "latex_content": "\n".join([f.get("latex", "") for f in formulas]),
+            "total_confidence": result.get("confidence", 0),
+            "processing_time": result.get("processing_time", 0)
+        }
+    
+    def _format_table_result(self, result: Dict[str, Any], output_format: str) -> Dict[str, Any]:
+        """格式化表格识别结果"""
+        tables = result.get("tables", [])
+        
+        return {
+            "recognition_mode": "table",
+            "table_count": len(tables),
+            "tables": tables,
+            "excel_data": result.get("excel_data"),
+            "total_confidence": result.get("confidence", 0),
+            "processing_time": result.get("processing_time", 0)
+        }
+    
+    def _format_document_result(self, result: Dict[str, Any], output_format: str) -> Dict[str, Any]:
+        """格式化文档识别结果"""
+        return {
+            "recognition_mode": "document",
+            "layout_info": result.get("layout_info", {}),
+            "text_blocks": result.get("text_blocks", []),
+            "image_blocks": result.get("image_blocks", []),
+            "table_blocks": result.get("table_blocks", []),
+            "full_text": result.get("full_text", ""),
+            "total_confidence": result.get("confidence", 0),
+            "processing_time": result.get("processing_time", 0)
+        }
+    
+    def _group_lines_to_paragraphs(self, lines: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """将行分组为段落"""
+        paragraphs = []
+        current_paragraph = []
+        
+        for line in lines:
+            text = line.get("text", "").strip()
+            if text:
+                current_paragraph.append(line)
+            else:
+                if current_paragraph:
+                    paragraphs.append({
+                        "text": " ".join([l.get("text", "") for l in current_paragraph]),
+                        "lines": current_paragraph
+                    })
+                    current_paragraph = []
+        
+        if current_paragraph:
+            paragraphs.append({
+                "text": " ".join([l.get("text", "") for l in current_paragraph]),
+                "lines": current_paragraph
+            })
+        
+        return paragraphs
+    
+    async def _call_textin_api(self, endpoint: str, data: Dict[str, Any]) -> Dict[str, Any]:
+        """调用TextIn API"""
+        app_id = self.get_config_parameter("app_id")
+        secret_key = self.get_config_parameter("secret_key")
+        api_url = self.get_config_parameter("api_url")
+        
+        if not app_id or not secret_key or not api_url:
+            raise ValueError("TextIn API配置未完成")
+        
+        # 构建完整URL
+        url = f"{api_url.rstrip('/')}/{endpoint}"
+        
+        # 构建请求头
+        headers = {
+            "X-App-Id": app_id,
+            "X-Secret-Key": secret_key,
+            "Content-Type": "application/json"
+        }
+        
+        # 发送请求
+        timeout = aiohttp.ClientTimeout(total=30)
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+            async with session.post(url, json=data, headers=headers) as response:
+                if response.status == 200:
+                    result = await response.json()
+                    if result.get("code") == 200:
+                        return result.get("data", result)
+                    else:
+                        raise Exception(f"TextIn API错误: {result.get('message', '未知错误')}")
+                else:
+                    error_text = await response.text()
+                    raise Exception(f"HTTP错误 {response.status}: {error_text}")
+    
+    def test_connection(self) -> Dict[str, Any]:
+        """测试连接"""
+        try:
+            app_id = self.get_config_parameter("app_id")
+            secret_key = self.get_config_parameter("secret_key")
+            api_url = self.get_config_parameter("api_url")
+            
+            if not app_id or not secret_key or not api_url:
+                return {
+                    "success": False,
+                    "error": "API配置未完成"
+                }
+            
+            return {
+                "success": True,
+                "message": "连接配置有效",
+                "api_url": api_url,
+                "app_id": app_id,
+                "secret_key_masked": secret_key[:8] + "***" if len(secret_key) > 8 else "***"
+            }
+            
+        except Exception as e:
+            return {
+                "success": False,
+                "error": str(e)
+            }