Merge pull request #779 from SuanmoSuanyangTechnology/develop

Develop
2026-04-02 16:01:08 +08:00
parent 9bce14be4e 377f79773d
commit a52e2137b7
54 changed files with 1578 additions and 585 deletions
--- a/.github/workflows/sync-to-gitee.yml
+++ b/.github/workflows/sync-to-gitee.yml
@@ -0,0 +1,36 @@
+name: Sync to Gitee
+
+on:
+  push:
+    branches:
+      - main     # Production
+      - develop  # Integration
+      - 'release/*' # Release preparation
+      - 'hotfix/*'  # Urgent fixes
+    tags:
+      - '*'      # All version tags (v1.0.0, etc.)
+
+jobs:
+  sync:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout Source Code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Sync to Gitee
+        run: |
+          GITEE_URL="https://${{ secrets.GITEE_USERNAME }}:${{ secrets.GITEE_TOKEN }}@gitee.com/hangzhou-hongxiong-intelligent_1/MemoryBear.git"
+          git remote add gitee "$GITEE_URL"
+
+          # 遍历并推送所有分支
+          for branch in $(git branch -r | grep -v HEAD | sed 's/origin\///'); do
+            echo "Syncing branch: $branch"
+            git push -f gitee "origin/$branch:refs/heads/$branch"
+          done
+
+          # 推送所有标签
+          echo "Syncing tags..."
+          git push gitee --tags --force
--- a/.gitignore
+++ b/.gitignore
@@ -18,6 +18,7 @@ examples/
 .kiro
 .vscode
 .idea
+.claude

 # Temporary outputs
 .DS_Store
--- a/README.md
+++ b/README.md
@@ -2,6 +2,10 @@

 # MemoryBear empowers AI with human-like memory capabilities

+[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE)
+[![Python](https://img.shields.io/badge/Python-3.12+-green?logo=python&logoColor=white)](https://www.python.org/)
+[![Gitee Sync](https://img.shields.io/github/actions/workflow/status/SuanmoSuanyangTechnology/MemoryBear/sync-to-gitee.yml?label=Gitee%20Sync&logo=gitee&logoColor=white)](https://github.com/SuanmoSuanyangTechnology/MemoryBear/actions/workflows/sync-to-gitee.yml)
+
 [中文](./README_CN.md) | English

 ### [Installation Guide](#memorybear-installation-guide)
--- a/README_CN.md
+++ b/README_CN.md
@@ -2,6 +2,10 @@

 # MemoryBear 让AI拥有如同人类一样的记忆

+[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE)
+[![Python](https://img.shields.io/badge/Python-3.12+-green?logo=python&logoColor=white)](https://www.python.org/)
+[![Gitee Sync](https://img.shields.io/github/actions/workflow/status/SuanmoSuanyangTechnology/MemoryBear/sync-to-gitee.yml?label=Gitee%20Sync&logo=gitee&logoColor=white)](https://github.com/SuanmoSuanyangTechnology/MemoryBear/actions/workflows/sync-to-gitee.yml)
+
 中文 | [English](./README.md)

 ### [安装教程](#memorybear安装教程)
--- a/api/app/controllers/home_page_controller.py
+++ b/api/app/controllers/home_page_controller.py
@@ -3,9 +3,10 @@ from sqlalchemy.orm import Session

 from app.core.config import settings
 from app.core.response_utils import success
-from app.db import get_db
+from app.db import get_db, SessionLocal
 from app.dependencies import get_current_user
 from app.models.user_model import User
+from app.repositories.home_page_repository import HomePageRepository
 from app.schemas.response_schema import ApiResponse
 from app.services.home_page_service import HomePageService

@@ -31,9 +32,32 @@ def get_workspace_list(

@router.get("/version", response_model=ApiResponse)
 def get_system_version():
-    """获取系统版本号+说明"""
-    current_version = settings.SYSTEM_VERSION
-    version_info = HomePageService.load_version_introduction(current_version)
+    """获取系统版本号 + 说明"""
+    current_version = None
+    version_info = None
+    
+    # 1️⃣ 优先从数据库获取最新已发布的版本
+    try:
+        db = SessionLocal()
+        try:
+            current_version, version_info = HomePageRepository.get_latest_version_introduction(db)
+        finally:
+            db.close()
+    except Exception as e:
+        pass
+    
+    # 2️⃣ 降级：使用环境变量中的版本号
+    if not current_version:
+        current_version = settings.SYSTEM_VERSION
+        version_info = HomePageService.load_version_introduction(current_version)
+    
+    # 3️⃣ 如果数据库和 JSON 都没有，返回基本信息
+    if not version_info:
+        version_info = {
+            "introduction": {"codeName": "", "releaseDate": "", "upgradePosition": "", "coreUpgrades": []},
+            "introduction_en": {"codeName": "", "releaseDate": "", "upgradePosition": "", "coreUpgrades": []}
+        }
+    
    return success(
        data={
            "version": current_version,
--- a/api/app/controllers/knowledge_controller.py
+++ b/api/app/controllers/knowledge_controller.py
@@ -352,6 +352,7 @@ async def delete_knowledge(
        # 2. Soft-delete knowledge base
        api_logger.debug(f"Perform a soft delete: {db_knowledge.name} (ID: {knowledge_id})")
        db_knowledge.status = 2
+        db_knowledge.updated_at = datetime.datetime.now()
        db.commit()
        api_logger.info(f"The knowledge base has been successfully deleted: {db_knowledge.name} (ID: {knowledge_id})")
        return success(msg="The knowledge base has been successfully deleted")
--- a/api/app/controllers/memory_dashboard_controller.py
+++ b/api/app/controllers/memory_dashboard_controller.py
@@ -591,7 +591,7 @@ async def dashboard_data(
                "total_api_call": None
            }
            
-            # 1. 获取记忆总量（total_memory）
+            # 1. 获取记忆总量（total_memory）—— neo4j 独有逻辑：查询 neo4j 存储节点
            try:
                total_memory_data = await memory_dashboard_service.get_workspace_total_memory_count(
                    db=db,
@@ -600,49 +600,33 @@ async def dashboard_data(
                    end_user_id=end_user_id
                )
                neo4j_data["total_memory"] = total_memory_data.get("total_memory_count", 0)
-                # total_app: 统计当前空间下的所有app数量
-                # 包含自有app + 被分享给本工作空间的app
-                from app.services import app_service as _app_svc
-                _, total_app = _app_svc.AppService(db).list_apps(
-                    workspace_id=workspace_id, include_shared=True, pagesize=1
-                )
-                neo4j_data["total_app"] = total_app
-                api_logger.info(f"成功获取记忆总量: {neo4j_data['total_memory']}, 应用数量: {neo4j_data['total_app']}")
+                api_logger.info(f"成功获取记忆总量: {neo4j_data['total_memory']}")
            except Exception as e:
                api_logger.warning(f"获取记忆总量失败: {str(e)}")
            
-            # 2. 获取知识库类型统计（total_knowledge）
-            try:
-                from app.services.memory_agent_service import MemoryAgentService 
-                memory_agent_service = MemoryAgentService()
-                knowledge_stats = await memory_agent_service.get_knowledge_type_stats(
-                    end_user_id=end_user_id,
-                    only_active=True,
-                    current_workspace_id=workspace_id,
-                    db=db
-                )
-                neo4j_data["total_knowledge"] = knowledge_stats.get("total", 0)
-                api_logger.info(f"成功获取知识库类型统计total: {neo4j_data['total_knowledge']}")
-            except Exception as e:
-                api_logger.warning(f"获取知识库类型统计失败: {str(e)}")
+            # 2. 获取共享统计数据（total_app、total_knowledge、total_api_call）
+            common_stats = memory_dashboard_service.get_dashboard_common_stats(db, workspace_id)
+            neo4j_data.update(common_stats)
+            api_logger.info(f"成功获取共享统计: app={common_stats['total_app']}, knowledge={common_stats['total_knowledge']}, api_call={common_stats['total_api_call']}")
            
-            # 3. 获取API调用统计（total_api_call）
+            # 计算昨日对比
            try:
-                # 使用 AppStatisticsService 获取真实的API调用统计
-                app_stats_service = AppStatisticsService(db)
-                api_stats = app_stats_service.get_workspace_api_statistics(
+                changes = memory_dashboard_service.get_dashboard_yesterday_changes(
+                    db=db,
                    workspace_id=workspace_id,
-                    start_date=start_date,
-                    end_date=end_date
+                    storage_type=storage_type,
+                    today_data=neo4j_data
                )
-                # 计算总调用次数
-                total_api_calls = sum(item.get("total_calls", 0) for item in api_stats)
-                neo4j_data["total_api_call"] = total_api_calls
-                api_logger.info(f"成功获取API调用统计: {neo4j_data['total_api_call']}")
+                neo4j_data.update(changes)
            except Exception as e:
-                api_logger.error(f"获取API调用统计失败: {str(e)}")
-                neo4j_data["total_api_call"] = 0
-            
+                api_logger.warning(f"计算neo4j昨日对比失败: {str(e)}")
+                neo4j_data.update({
+                    "total_memory_change": None,
+                    "total_app_change": None,
+                    "total_knowledge_change": None,
+                    "total_api_call_change": None,
+                })
+
            result["neo4j_data"] = neo4j_data
            api_logger.info("成功获取neo4j_data")
        
@@ -655,44 +639,37 @@ async def dashboard_data(
                "total_api_call": None
            }
            
-            # 获取RAG相关数据
+            # 1. 获取记忆总量（total_memory）—— rag 独有逻辑：查询 document 表的 chunk_num
            try:
-                # total_memory: 只统计用户知识库（permission_id='Memory'）的chunk数
                total_chunk = memory_dashboard_service.get_rag_user_kb_total_chunk(db, current_user)
                rag_data["total_memory"] = total_chunk
-                
-                # total_app: 统计当前空间下的所有app数量
-                # 包含自有app + 被分享给本工作空间的app
-                from app.services import app_service as _app_svc
-                _, total_app = _app_svc.AppService(db).list_apps(
-                    workspace_id=workspace_id, include_shared=True, pagesize=1
-                )
-                rag_data["total_app"] = total_app
-                
-                # total_knowledge: 使用 total_kb（总知识库数）
-                total_kb = memory_dashboard_service.get_rag_total_kb(db, current_user)
-                rag_data["total_knowledge"] = total_kb
-                
-                # total_api_call: 使用 AppStatisticsService 获取真实的API调用统计
-                try:
-                    app_stats_service = AppStatisticsService(db)
-                    api_stats = app_stats_service.get_workspace_api_statistics(
-                        workspace_id=workspace_id,
-                        start_date=start_date,
-                        end_date=end_date
-                    )
-                    # 计算总调用次数
-                    total_api_calls = sum(item.get("total_calls", 0) for item in api_stats)
-                    rag_data["total_api_call"] = total_api_calls
-                    api_logger.info(f"成功获取RAG模式API调用统计: {rag_data['total_api_call']}")
-                except Exception as e:
-                    api_logger.warning(f"获取RAG模式API调用统计失败，使用默认值: {str(e)}")
-                    rag_data["total_api_call"] = 0
-                
-                api_logger.info(f"成功获取RAG相关数据: memory={total_chunk}, app={total_app}, knowledge={total_kb}, api_calls={rag_data['total_api_call']}")
+                api_logger.info(f"成功获取RAG记忆总量: {total_chunk}")
            except Exception as e:
-                api_logger.warning(f"获取RAG相关数据失败: {str(e)}")
+                api_logger.warning(f"获取RAG记忆总量失败: {str(e)}")
            
+            # 2. 获取共享统计数据（total_app、total_knowledge、total_api_call）
+            common_stats = memory_dashboard_service.get_dashboard_common_stats(db, workspace_id)
+            rag_data.update(common_stats)
+            api_logger.info(f"成功获取共享统计: app={common_stats['total_app']}, knowledge={common_stats['total_knowledge']}, api_call={common_stats['total_api_call']}")
+            
+            # 计算昨日对比
+            try:
+                changes = memory_dashboard_service.get_dashboard_yesterday_changes(
+                    db=db,
+                    workspace_id=workspace_id,
+                    storage_type=storage_type,
+                    today_data=rag_data
+                )
+                rag_data.update(changes)
+            except Exception as e:
+                api_logger.warning(f"计算RAG昨日对比失败: {str(e)}")
+                rag_data.update({
+                    "total_memory_change": None,
+                    "total_app_change": None,
+                    "total_knowledge_change": None,
+                    "total_api_call_change": None,
+                })
+
            result["rag_data"] = rag_data
            api_logger.info("成功获取rag_data")
        
--- a/api/app/controllers/memory_storage_controller.py
+++ b/api/app/controllers/memory_storage_controller.py
@@ -26,7 +26,7 @@ from app.services.memory_storage_service import (
    analytics_hot_memory_tags,
    analytics_recent_activity_stats,
    kb_type_distribution,
-    search_all,
+    search_all_batch,
    search_chunk,
    search_detials,
    search_dialogue,
@@ -409,7 +409,10 @@ async def search_all_num(
 ) -> dict:
    api_logger.info(f"Search all requested for end_user_id: {end_user_id}")
    try:
-        result = await search_all(end_user_id)
+        if not end_user_id:
+            return success(data={"total": 0}, msg="查询成功")
+        batch_result = await search_all_batch([end_user_id])
+        result = {"total": batch_result.get(end_user_id, 0)}
        return success(data=result, msg="查询成功")
    except Exception as e:
        api_logger.error(f"Search all failed: {str(e)}")
--- a/api/app/controllers/ontology_controller.py
+++ b/api/app/controllers/ontology_controller.py
@@ -163,6 +163,7 @@ def _get_ontology_service(
            api_key=api_key_config.api_key,
            base_url=api_key_config.api_base,
            is_omni=api_key_config.is_omni,
+            support_thinking="thinking" in (api_key_config.capability or []),
            max_retries=3,
            timeout=60.0
        )
--- a/api/app/controllers/public_share_controller.py
+++ b/api/app/controllers/public_share_controller.py
@@ -453,6 +453,9 @@ async def chat(
        # 流式返回
        agent_config = agent_config_4_app_release(release)

+        if not (agent_config.model_parameters.get("deep_thinking", False) and payload.thinking):
+            agent_config.model_parameters["deep_thinking"] = False
+
        if payload.stream:
            async def event_generator():
                async for event in app_chat_service.agnet_chat_stream(
@@ -634,7 +637,8 @@ async def config_query(
            "app_type": release.app.type,
            "variables": release.config.get("variables"),
            "memory": release.config.get("memory", {}).get("enabled"),
-            "features": release.config.get("features")
+            "features": release.config.get("features"),
+            "model_parameters": release.config.get("model_parameters")
        }
    elif release.app.type == AppType.MULTI_AGENT:
        content = {
--- a/api/app/controllers/service/init.py
+++ b/api/app/controllers/service/init.py
@@ -4,7 +4,7 @@
 认证方式: API Key
 """
 from fastapi import APIRouter
-from . import app_api_controller, rag_api_knowledge_controller, rag_api_document_controller, rag_api_file_controller, rag_api_chunk_controller, memory_api_controller
+from . import app_api_controller, rag_api_knowledge_controller, rag_api_document_controller, rag_api_file_controller, rag_api_chunk_controller, memory_api_controller, end_user_api_controller

 # 创建 V1 API 路由器
 service_router = APIRouter()
@@ -16,5 +16,6 @@ service_router.include_router(rag_api_document_controller.router)
 service_router.include_router(rag_api_file_controller.router)
 service_router.include_router(rag_api_chunk_controller.router)
 service_router.include_router(memory_api_controller.router)
+service_router.include_router(end_user_api_controller.router)

 __all__ = ["service_router"]
--- a/api/app/controllers/service/app_api_controller.py
+++ b/api/app/controllers/service/app_api_controller.py
@@ -144,6 +144,11 @@ async def chat(
        # print(app.current_release.default_model_config_id)
        agent_config = agent_config_4_app_release(app.current_release)
        # print(agent_config.default_model_config_id)
+
+        # thinking 开关：仅当 agent 配置了 deep_thinking 且请求 thinking=True 时才启用
+        if not (agent_config.model_parameters.get("deep_thinking", False) and payload.thinking):
+            agent_config.model_parameters["deep_thinking"] = False
+
        # 流式返回
        if payload.stream:
            async def event_generator():
--- a/api/app/controllers/service/end_user_api_controller.py
+++ b/api/app/controllers/service/end_user_api_controller.py
@@ -0,0 +1,92 @@
+"""End User 服务接口 - 基于 API Key 认证"""
+
+import uuid
+
+from fastapi import APIRouter, Body, Depends, Request
+from sqlalchemy.orm import Session
+
+from app.core.api_key_auth import require_api_key
+from app.core.error_codes import BizCode
+from app.core.exceptions import BusinessException
+from app.core.logging_config import get_business_logger
+from app.core.response_utils import success
+from app.db import get_db
+from app.repositories.end_user_repository import EndUserRepository
+from app.schemas.api_key_schema import ApiKeyAuth
+from app.schemas.memory_api_schema import CreateEndUserRequest, CreateEndUserResponse
+from app.services.memory_config_service import MemoryConfigService
+
+router = APIRouter(prefix="/end_user", tags=["V1 - End User API"])
+logger = get_business_logger()
+
+
+@router.post("/create")
+@require_api_key(scopes=["memory"])
+async def create_end_user(
+    request: Request,
+    api_key_auth: ApiKeyAuth = None,
+    db: Session = Depends(get_db),
+    message: str = Body(..., description="Request body"),
+):
+    """
+    Create or retrieve an end user for the workspace.
+
+    Creates a new end user and connects it to a memory configuration.
+    If an end user with the same other_id already exists in the workspace,
+    returns the existing one.
+
+    Optionally accepts a memory_config_id to connect the end user to a specific
+    memory configuration. If not provided, falls back to the workspace default config.
+    """
+    body = await request.json()
+    payload = CreateEndUserRequest(**body)
+    workspace_id = api_key_auth.workspace_id
+
+    logger.info("Create end user request - other_id: %s, workspace_id: %s", payload.other_id, workspace_id)
+
+    # Resolve memory_config_id: explicit > workspace default
+    memory_config_id = None
+    config_service = MemoryConfigService(db)
+
+    if payload.memory_config_id:
+        try:
+            memory_config_id = uuid.UUID(payload.memory_config_id)
+        except ValueError:
+            raise BusinessException(
+                f"Invalid memory_config_id format: {payload.memory_config_id}",
+                BizCode.INVALID_PARAMETER
+            )
+        config = config_service.get_config_with_fallback(memory_config_id, workspace_id)
+        if not config:
+            raise BusinessException(
+                f"Memory config not found: {payload.memory_config_id}",
+                BizCode.MEMORY_CONFIG_NOT_FOUND
+            )
+        memory_config_id = config.config_id
+    else:
+        default_config = config_service.get_workspace_default_config(workspace_id)
+        if default_config:
+            memory_config_id = default_config.config_id
+            logger.info(f"Using workspace default memory config: {memory_config_id}")
+        else:
+            logger.warning(f"No default memory config found for workspace: {workspace_id}")
+
+    end_user_repo = EndUserRepository(db)
+    end_user = end_user_repo.get_or_create_end_user_with_config(
+        app_id=api_key_auth.resource_id,
+        workspace_id=workspace_id,
+        other_id=payload.other_id,
+        memory_config_id=memory_config_id,
+    )
+
+    logger.info(f"End user ready: {end_user.id}")
+
+    result = {
+        "id": str(end_user.id),
+        "other_id": end_user.other_id or "",
+        "other_name": end_user.other_name or "",
+        "workspace_id": str(end_user.workspace_id),
+        "memory_config_id": str(end_user.memory_config_id) if end_user.memory_config_id else None,
+    }
+
+    return success(data=CreateEndUserResponse(**result).model_dump(), msg="End user created successfully")
--- a/api/app/core/agent/langchain_agent.py
+++ b/api/app/core/agent/langchain_agent.py
@@ -37,7 +37,10 @@ class LangChainAgent:
            tools: Optional[Sequence[BaseTool]] = None,
            streaming: bool = False,
            max_iterations: Optional[int] = None,  # 最大迭代次数（None 表示自动计算）
-            max_tool_consecutive_calls: int = 3  # 单个工具最大连续调用次数
+            max_tool_consecutive_calls: int = 3,  # 单个工具最大连续调用次数
+            deep_thinking: bool = False,  # 是否启用深度思考模式
+            thinking_budget_tokens: Optional[int] = None,  # 深度思考 token 预算
+            capability: Optional[List[str]] = None  # 模型能力列表，用于校验是否支持深度思考
    ):
        """初始化 LangChain Agent

@@ -60,6 +63,7 @@ class LangChainAgent:
        self.streaming = streaming
        self.is_omni = is_omni
        self.max_tool_consecutive_calls = max_tool_consecutive_calls
+        self.deep_thinking = deep_thinking and ("thinking" in (capability or []))

        # 工具调用计数器：记录每个工具的连续调用次数
        self.tool_call_counter: Dict[str, int] = {}
@@ -82,6 +86,13 @@ class LangChainAgent:
            f"auto_calculated={max_iterations is None}"
        )

+        # 根据 capability 校验是否真正支持深度思考
+        actual_deep_thinking = self.deep_thinking
+        if deep_thinking and not actual_deep_thinking:
+            logger.warning(
+                f"模型 {model_name} 不支持深度思考（capability 中无 'thinking'），已自动关闭 deep_thinking"
+            )
+
        # 创建 RedBearLLM（支持多提供商）
        model_config = RedBearModelConfig(
            model_name=model_name,
@@ -89,10 +100,13 @@ class LangChainAgent:
            api_key=api_key,
            base_url=api_base,
            is_omni=is_omni,
+            deep_thinking=actual_deep_thinking,
+            thinking_budget_tokens=thinking_budget_tokens if actual_deep_thinking else None,
+            support_thinking="thinking" in (capability or []),
            extra_params={
                "temperature": temperature,
                "max_tokens": max_tokens,
-                "streaming": streaming  # 使用参数控制流式
+                "streaming": streaming
            }
        )

@@ -249,6 +263,33 @@ class LangChainAgent:

        return messages

+    @staticmethod
+    def _extract_tokens_from_message(msg) -> int:
+        """从 AIMessage 或类似对象中提取 total_tokens，兼容多种 provider 格式
+
+        支持的格式：
+        - response_metadata.token_usage.total_tokens (OpenAI/ChatOpenAI)
+        - response_metadata.usage.total_tokens (部分 provider)
+        - usage_metadata.total_tokens (LangChain 新版)
+        """
+        total = 0
+        # 1. response_metadata
+        response_meta = getattr(msg, "response_metadata", None)
+        if response_meta and isinstance(response_meta, dict):
+            # 尝试 token_usage 路径
+            token_usage = response_meta.get("token_usage") or response_meta.get("usage", {})
+            if isinstance(token_usage, dict):
+                total = token_usage.get("total_tokens", 0)
+        # 2. usage_metadata（LangChain 新版 AIMessage 属性）
+        if not total:
+            usage_meta = getattr(msg, "usage_metadata", None)
+            if usage_meta:
+                if isinstance(usage_meta, dict):
+                    total = usage_meta.get("total_tokens", 0)
+                else:
+                    total = getattr(usage_meta, "total_tokens", 0)
+        return total or 0
+
    def _build_multimodal_content(self, text: str, files: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """
        构建多模态消息内容
@@ -283,6 +324,17 @@ class LangChainAgent:

        return content_parts

+    @staticmethod
+    def _extract_reasoning_content(msg) -> str:
+        """从 AIMessage 中提取深度思考内容（reasoning_content）
+
+        所有 provider 统一通过 additional_kwargs.reasoning_content 传递：
+        - DeepSeek-R1 / QwQ: 原生字段
+        - Volcano (Doubao-thinking): 由 VolcanoChatOpenAI 从 delta.reasoning_content 注入
+        """
+        additional = getattr(msg, "additional_kwargs", None) or {}
+        return additional.get("reasoning_content") or additional.get("reasoning", "")
+
    async def chat(
            self,
            message: str,
@@ -348,6 +400,7 @@ class LangChainAgent:

            logger.debug(f"输出消息数量: {len(output_messages)}")
            total_tokens = 0
+            reasoning_content = ""
            for msg in reversed(output_messages):
                if isinstance(msg, AIMessage):
                    logger.debug(f"找到 AI 消息，content 类型: {type(msg.content)}")
@@ -382,8 +435,8 @@ class LangChainAgent:
                    else:
                        content = str(msg.content)
                        logger.debug(f"转换为字符串: {content[:100]}...")
-                    response_meta = msg.response_metadata if hasattr(msg, 'response_metadata') else None
-                    total_tokens = response_meta.get("token_usage", {}).get("total_tokens", 0) if response_meta else 0
+                    total_tokens = self._extract_tokens_from_message(msg)
+                    reasoning_content = self._extract_reasoning_content(msg) if self.deep_thinking else ""
                    break

            logger.info(f"最终提取的内容长度: {len(content)}")
@@ -399,6 +452,8 @@ class LangChainAgent:
                    "total_tokens": total_tokens
                }
            }
+            if reasoning_content:
+                response["reasoning_content"] = reasoning_content

            logger.debug(
                "Agent 调用完成",
@@ -420,7 +475,7 @@ class LangChainAgent:
            history: Optional[List[Dict[str, str]]] = None,
            context: Optional[str] = None,
            files: Optional[List[Dict[str, Any]]] = None
-    ) -> AsyncGenerator[str, None]:
+    ) -> AsyncGenerator[str | int, None]:
        """执行流式对话

        Args:
@@ -431,6 +486,8 @@ class LangChainAgent:

        Yields:
            str: 消息内容块
+            int: token 统计
+            Dict: 深度思考内容 {"type": "reasoning", "content": "..."}
        """
        logger.info("=" * 80)
        logger.info(" chat_stream 方法开始执行")
@@ -451,6 +508,7 @@ class LangChainAgent:
            # 统一使用 agent 的 astream_events 实现流式输出
            logger.debug("使用 Agent astream_events 实现流式输出")
            full_content = ''
+            full_reasoning = ''
            try:
                last_event = {}
                async for event in self.agent.astream_events(
@@ -467,6 +525,13 @@ class LangChainAgent:
                        # LLM 流式输出
                        chunk = event.get("data", {}).get("chunk")
                        if chunk and hasattr(chunk, "content"):
+                            # 提取深度思考内容（仅在启用深度思考时）
+                            if self.deep_thinking:
+                                reasoning_chunk = self._extract_reasoning_content(chunk)
+                                if reasoning_chunk:
+                                    full_reasoning += reasoning_chunk
+                                    yield {"type": "reasoning", "content": reasoning_chunk}
+
                            # 处理多模态响应：content 可能是字符串或列表
                            chunk_content = chunk.content
                            if isinstance(chunk_content, str) and chunk_content:
@@ -497,6 +562,13 @@ class LangChainAgent:
                        chunk = event.get("data", {}).get("chunk")
                        if chunk:
                            if hasattr(chunk, "content"):
+                                # 提取深度思考内容（仅在启用深度思考时）
+                                if self.deep_thinking:
+                                    reasoning_chunk = self._extract_reasoning_content(chunk)
+                                    if reasoning_chunk:
+                                        full_reasoning += reasoning_chunk
+                                        yield {"type": "reasoning", "content": reasoning_chunk}
+
                                chunk_content = chunk.content
                                if isinstance(chunk_content, str) and chunk_content:
                                    full_content += chunk_content
@@ -535,12 +607,9 @@ class LangChainAgent:
                output_messages = last_event.get("data", {}).get("output", {}).get("messages", [])
                for msg in reversed(output_messages):
                    if isinstance(msg, AIMessage):
-                        response_meta = msg.response_metadata if hasattr(msg, 'response_metadata') else None
-                        total_tokens = response_meta.get("token_usage", {}).get(
-                            "total_tokens",
-                            0
-                        ) if response_meta else 0
-                        yield total_tokens
+                        stream_total_tokens = self._extract_tokens_from_message(msg)
+                        logger.info(f"流式 token 统计: total_tokens={stream_total_tokens}")
+                        yield stream_total_tokens
                        break

            except Exception as e:
--- a/api/app/core/memory/src/search.py
+++ b/api/app/core/memory/src/search.py
@@ -758,8 +758,7 @@ async def run_hybrid_search(
                    model_name=embedder_config_dict["model_name"],
                    provider=embedder_config_dict["provider"],
                    api_key=embedder_config_dict["api_key"],
-                    base_url=embedder_config_dict["base_url"],
-                    type="llm"
+                    base_url=embedder_config_dict["base_url"]
                )
                config_load_time = time.time() - config_load_start
                logger.info(f"[PERF] Config loading took {config_load_time:.4f}s")
--- a/api/app/core/models/base.py
+++ b/api/app/core/models/base.py
@@ -14,6 +14,7 @@ from pydantic import BaseModel, Field
 from app.core.error_codes import BizCode
 from app.core.exceptions import BusinessException
 from app.models.models_model import ModelProvider, ModelType
+from app.core.models.volcano_chat import VolcanoChatOpenAI

 T = TypeVar("T")

@@ -25,6 +26,9 @@ class RedBearModelConfig(BaseModel):
    api_key: str
    base_url: Optional[str] = None
    is_omni: bool = False  # 是否为 Omni 模型
+    deep_thinking: bool = False  # 是否启用深度思考模式
+    thinking_budget_tokens: Optional[int] = None  # 深度思考 token 预算
+    support_thinking: bool = False  # 模型是否支持 enable_thinking 参数（capability 含 thinking）
    # 请求超时时间（秒）- 默认120秒以支持复杂的LLM调用，可通过环境变量 LLM_TIMEOUT 配置
    timeout: float = Field(default_factory=lambda: float(os.getenv("LLM_TIMEOUT", "120.0")))
    # 最大重试次数 - 默认2次以避免过长等待，可通过环境变量 LLM_MAX_RETRIES 配置
@@ -44,7 +48,7 @@ class RedBearModelFactory:
        # 打印供应商信息用于调试
        from app.core.logging_config import get_business_logger
        logger = get_business_logger()
-        logger.debug(f"获取模型参数 - Provider: {provider}, Model: {config.model_name}, is_omni: {config.is_omni}")
+        logger.debug(f"获取模型参数 - Provider: {provider}, Model: {config.model_name}, is_omni: {config.is_omni}, deep_thinking: {config.deep_thinking}")

        # dashscope 的 omni 模型使用 OpenAI 兼容模式
        if provider == ModelProvider.DASHSCOPE and config.is_omni:
@@ -58,7 +62,7 @@ class RedBearModelFactory:
                write=60.0,
                pool=10.0,
            )
-            return {
+            params: Dict[str, Any] = {
                "model": config.model_name,
                "base_url": config.base_url,
                "api_key": config.api_key,
@@ -66,6 +70,21 @@ class RedBearModelFactory:
                "max_retries": config.max_retries,
                **config.extra_params
            }
+            # 流式模式下启用 stream_usage 以获取 token 统计
+            is_streaming = bool(config.extra_params.get("streaming"))
+            if is_streaming:
+                params["stream_usage"] = True
+            # 只有支持 thinking 的模型才传 enable_thinking
+            if config.support_thinking:
+                model_kwargs: Dict[str, Any] = config.extra_params.get("model_kwargs", {})
+                if is_streaming:
+                    model_kwargs["enable_thinking"] = config.deep_thinking
+                    if config.deep_thinking and config.thinking_budget_tokens:
+                        model_kwargs["thinking_budget"] = config.thinking_budget_tokens
+                else:
+                    model_kwargs["enable_thinking"] = False
+                params["model_kwargs"] = model_kwargs
+            return params

        if provider in [ModelProvider.OPENAI, ModelProvider.XINFERENCE, ModelProvider.GPUSTACK, ModelProvider.OLLAMA, ModelProvider.VOLCANO]:
            # 使用 httpx.Timeout 对象来设置详细的超时配置
@@ -78,7 +97,7 @@ class RedBearModelFactory:
                write=60.0,  # 写入超时：60秒
                pool=10.0,  # 连接池超时：10秒
            )
-            return {
+            params: Dict[str, Any] = {
                "model": config.model_name,
                "base_url": config.base_url,
                "api_key": config.api_key,
@@ -86,16 +105,47 @@ class RedBearModelFactory:
                "max_retries": config.max_retries,
                **config.extra_params
            }
+            # 流式模式下启用 stream_usage 以获取 token 统计
+            if config.extra_params.get("streaming"):
+                params["stream_usage"] = True
+            # 深度思考模式
+            is_streaming = bool(config.extra_params.get("streaming"))
+            if is_streaming:
+                if provider == ModelProvider.VOLCANO:
+                    # 火山引擎深度思考仅流式调用支持，非流式时不传 thinking 参数
+                    thinking_config: Dict[str, Any] = {
+                        "type": "enabled" if config.deep_thinking else "disabled"
+                    }
+                    if config.deep_thinking and config.thinking_budget_tokens:
+                        thinking_config["budget_tokens"] = config.thinking_budget_tokens
+                    params["extra_body"] = {"thinking": thinking_config}
+                else:
+                    # 始终显式传递 enable_thinking，不支持该参数的模型（如 DeepSeek-R1）会直接忽略
+                    model_kwargs: Dict[str, Any] = config.extra_params.get("model_kwargs", {})
+                    model_kwargs["enable_thinking"] = config.deep_thinking
+                    if config.deep_thinking and config.thinking_budget_tokens:
+                        model_kwargs["thinking_budget"] = config.thinking_budget_tokens
+                    params["model_kwargs"] = model_kwargs
+            return params
        elif provider == ModelProvider.DASHSCOPE:
-            # DashScope (通义千问) 使用自己的参数格式
-            # 注意: DashScopeEmbeddings 不支持 timeout 和 base_url 参数
-            # 只支持: model, dashscope_api_key, max_retries, client
-            return {
+            params = {
                "model": config.model_name,
                "dashscope_api_key": config.api_key,
                "max_retries": config.max_retries,
                **config.extra_params
            }
+            # 只有支持 thinking 的模型才传 enable_thinking
+            if config.support_thinking:
+                is_streaming = bool(config.extra_params.get("streaming"))
+                model_kwargs: Dict[str, Any] = config.extra_params.get("model_kwargs", {})
+                if is_streaming:
+                    model_kwargs["enable_thinking"] = config.deep_thinking
+                    if config.deep_thinking and config.thinking_budget_tokens:
+                        model_kwargs["thinking_budget"] = config.thinking_budget_tokens
+                else:
+                    model_kwargs["enable_thinking"] = False
+                params["model_kwargs"] = model_kwargs
+            return params
        elif provider == ModelProvider.BEDROCK:
            # Bedrock 使用 AWS 凭证
            # api_key 格式: "access_key_id:secret_access_key" 或只是 access_key_id
@@ -134,6 +184,13 @@ class RedBearModelFactory:
            elif "region_name" not in params:
                params["region_name"] = "us-east-1"  # 默认区域

+            # 深度思考模式：Claude 3.7 Sonnet 等支持思考的模型
+            # 通过 additional_model_request_fields 传递 thinking 块，关闭时不传（Bedrock 无 disabled 选项）
+            if config.deep_thinking:
+                budget = config.thinking_budget_tokens or 10000
+                params["additional_model_request_fields"] = {
+                    "thinking": {"type": "enabled", "budget_tokens": budget}
+                }
            return params
        else:
            raise BusinessException(f"不支持的提供商: {provider}", code=BizCode.PROVIDER_NOT_SUPPORTED)
@@ -160,7 +217,9 @@ def get_provider_llm_class(config: RedBearModelConfig, type: ModelType = ModelTy
    # dashscope 的 omni 模型使用 OpenAI 兼容模式
    if provider == ModelProvider.DASHSCOPE and config.is_omni:
        return ChatOpenAI
-    if provider in [ModelProvider.OPENAI, ModelProvider.XINFERENCE, ModelProvider.GPUSTACK, ModelProvider.VOLCANO]:
+    if provider == ModelProvider.VOLCANO:
+        return VolcanoChatOpenAI
+    if provider in [ModelProvider.OPENAI, ModelProvider.XINFERENCE, ModelProvider.GPUSTACK]:
        if type == ModelType.LLM:
            return OpenAI
        elif type == ModelType.CHAT:
--- a/api/app/core/models/scripts/bedrock_models.yaml
+++ b/api/app/core/models/scripts/bedrock_models.yaml
@@ -11,6 +11,7 @@ models:
  tags:
  - 大语言模型
  logo: bedrock
+
 - name: amazon nova
  type: llm
  provider: bedrock
@@ -27,6 +28,7 @@ models:
  - stream-tool-call
  - vision
  logo: bedrock
+
 - name: anthropic claude
  type: llm
  provider: bedrock
@@ -35,6 +37,7 @@ models:
  is_official: true
  capability:
    - vision
+    - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -44,6 +47,7 @@ models:
  - stream-tool-call
  - document
  logo: bedrock
+
 - name: cohere
  type: llm
  provider: bedrock
@@ -58,6 +62,7 @@ models:
  - tool-call
  - stream-tool-call
  logo: bedrock
+
 - name: deepseek
  type: llm
  provider: bedrock
@@ -66,6 +71,7 @@ models:
  is_official: true
  capability:
    - vision
+    - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -74,6 +80,7 @@ models:
  - tool-call
  - stream-tool-call
  logo: bedrock
+
 - name: meta
  type: llm
  provider: bedrock
@@ -87,6 +94,7 @@ models:
  - agent-thought
  - tool-call
  logo: bedrock
+
 - name: mistral
  type: llm
  provider: bedrock
@@ -100,6 +108,7 @@ models:
  - agent-thought
  - tool-call
  logo: bedrock
+
 - name: openai
  type: llm
  provider: bedrock
@@ -114,6 +123,7 @@ models:
  - tool-call
  - stream-tool-call
  logo: bedrock
+
 - name: qwen
  type: llm
  provider: bedrock
@@ -128,6 +138,7 @@ models:
  - tool-call
  - stream-tool-call
  logo: bedrock
+
 - name: amazon.rerank-v1:0
  type: rerank
  provider: bedrock
@@ -139,6 +150,7 @@ models:
  tags:
  - 重排序模型
  logo: bedrock
+
 - name: cohere.rerank-v3-5:0
  type: rerank
  provider: bedrock
@@ -150,6 +162,7 @@ models:
  tags:
  - 重排序模型
  logo: bedrock
+
 - name: amazon.nova-2-multimodal-embeddings-v1:0
  type: embedding
  provider: bedrock
@@ -163,6 +176,7 @@ models:
  - 文本嵌入模型
  - vision
  logo: bedrock
+
 - name: amazon.titan-embed-text-v1
  type: embedding
  provider: bedrock
@@ -174,6 +188,7 @@ models:
  tags:
  - 文本嵌入模型
  logo: bedrock
+
 - name: amazon.titan-embed-text-v2:0
  type: embedding
  provider: bedrock
@@ -185,6 +200,7 @@ models:
  tags:
  - 文本嵌入模型
  logo: bedrock
+
 - name: cohere.embed-english-v3
  type: embedding
  provider: bedrock
@@ -196,6 +212,7 @@ models:
  tags:
  - 文本嵌入模型
  logo: bedrock
+
 - name: cohere.embed-multilingual-v3
  type: embedding
  provider: bedrock
--- a/api/app/core/models/scripts/dashscope_models.yaml
+++ b/api/app/core/models/scripts/dashscope_models.yaml
@@ -6,36 +6,42 @@ models:
  description: DeepSeek-R1-Distill-Qwen-14B大语言模型，支持智能体思考，32000上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
  - agent-thought
  logo: dashscope
+
 - name: deepseek-r1-distill-qwen-32b
  type: llm
  provider: dashscope
  description: DeepSeek-R1-Distill-Qwen-32B大语言模型，支持智能体思考，32000上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
  - agent-thought
  logo: dashscope
+
 - name: deepseek-r1
  type: llm
  provider: dashscope
  description: DeepSeek-R1大语言模型，支持智能体思考，131072超大上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
  - agent-thought
  logo: dashscope
+
 - name: deepseek-v3.1
  type: llm
  provider: dashscope
@@ -48,6 +54,7 @@ models:
  - 大语言模型
  - agent-thought
  logo: dashscope
+
 - name: deepseek-v3.2-exp
  type: llm
  provider: dashscope
@@ -60,6 +67,7 @@ models:
  - 大语言模型
  - agent-thought
  logo: dashscope
+
 - name: deepseek-v3.2
  type: llm
  provider: dashscope
@@ -72,6 +80,7 @@ models:
  - 大语言模型
  - agent-thought
  logo: dashscope
+
 - name: deepseek-v3
  type: llm
  provider: dashscope
@@ -84,6 +93,7 @@ models:
  - 大语言模型
  - agent-thought
  logo: dashscope
+
 - name: farui-plus
  type: llm
  provider: dashscope
@@ -98,6 +108,7 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: glm-4.7
  type: llm
  provider: dashscope
@@ -112,6 +123,7 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qvq-max-latest
  type: llm
  provider: dashscope
@@ -119,7 +131,8 @@ models:
  is_deprecated: false
  is_official: true
  capability:
-    - vision
+  - vision
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -127,6 +140,7 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qvq-max
  type: llm
  provider: dashscope
@@ -134,7 +148,8 @@ models:
  is_deprecated: false
  is_official: true
  capability:
-    - vision
+  - vision
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -142,6 +157,7 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen-coder-turbo-0919
  type: llm
  provider: dashscope
@@ -155,13 +171,15 @@ models:
  - 代码模型
  - agent-thought
  logo: dashscope
+
 - name: qwen-max-latest
  type: llm
  provider: dashscope
  description: qwen-max-latest大语言模型，支持多工具调用、智能体思考、流式工具调用，131072上下文窗口，对话模式，支持联网搜索
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -169,6 +187,7 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen-max-longcontext
  type: llm
  provider: dashscope
@@ -183,13 +202,15 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen-max
  type: llm
  provider: dashscope
  description: qwen-max大语言模型，支持多工具调用、智能体思考、流式工具调用，32768上下文窗口，对话模式，支持联网搜索
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -197,6 +218,7 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen-mt-plus
  type: llm
  provider: dashscope
@@ -210,6 +232,7 @@ models:
  - 翻译模型
  - agent-thought
  logo: dashscope
+
 - name: qwen-mt-turbo
  type: llm
  provider: dashscope
@@ -223,6 +246,7 @@ models:
  - 翻译模型
  - agent-thought
  logo: dashscope
+
 - name: qwen-plus-0112
  type: llm
  provider: dashscope
@@ -237,6 +261,7 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen-plus-0125
  type: llm
  provider: dashscope
@@ -251,6 +276,7 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen-plus-0723
  type: llm
  provider: dashscope
@@ -265,6 +291,7 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen-plus-0806
  type: llm
  provider: dashscope
@@ -279,6 +306,7 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen-plus-0919
  type: llm
  provider: dashscope
@@ -293,6 +321,7 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen-plus-1125
  type: llm
  provider: dashscope
@@ -307,6 +336,7 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen-plus-1127
  type: llm
  provider: dashscope
@@ -321,6 +351,7 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen-plus-1220
  type: llm
  provider: dashscope
@@ -335,6 +366,7 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen-vl-max
  type: chat
  provider: dashscope
@@ -342,8 +374,8 @@ models:
  is_deprecated: false
  is_official: true
  capability:
-    - vision
-    - video
+  - vision
+  - video
  is_omni: false
  tags:
  - 大语言模型
@@ -352,6 +384,7 @@ models:
  - agent-thought
  - video
  logo: dashscope
+
 - name: qwen-vl-plus-0809
  type: chat
  provider: dashscope
@@ -359,8 +392,8 @@ models:
  is_deprecated: true
  is_official: true
  capability:
-    - vision
-    - video
+  - vision
+  - video
  is_omni: false
  tags:
  - 大语言模型
@@ -369,6 +402,7 @@ models:
  - agent-thought
  - video
  logo: dashscope
+
 - name: qwen-vl-plus-2025-01-02
  type: chat
  provider: dashscope
@@ -376,8 +410,8 @@ models:
  is_deprecated: false
  is_official: true
  capability:
-    - vision
-    - video
+  - vision
+  - video
  is_omni: false
  tags:
  - 大语言模型
@@ -386,6 +420,7 @@ models:
  - agent-thought
  - video
  logo: dashscope
+
 - name: qwen-vl-plus-2025-01-25
  type: chat
  provider: dashscope
@@ -393,8 +428,8 @@ models:
  is_deprecated: false
  is_official: true
  capability:
-    - vision
-    - video
+  - vision
+  - video
  is_omni: false
  tags:
  - 大语言模型
@@ -403,6 +438,7 @@ models:
  - agent-thought
  - video
  logo: dashscope
+
 - name: qwen-vl-plus-latest
  type: chat
  provider: dashscope
@@ -410,8 +446,8 @@ models:
  is_deprecated: false
  is_official: true
  capability:
-    - vision
-    - video
+  - vision
+  - video
  is_omni: false
  tags:
  - 大语言模型
@@ -420,6 +456,7 @@ models:
  - agent-thought
  - video
  logo: dashscope
+
 - name: qwen-vl-plus
  type: chat
  provider: dashscope
@@ -427,8 +464,8 @@ models:
  is_deprecated: false
  is_official: true
  capability:
-    - vision
-    - video
+  - vision
+  - video
  is_omni: false
  tags:
  - 大语言模型
@@ -437,6 +474,7 @@ models:
  - agent-thought
  - video
  logo: dashscope
+
 - name: qwen2.5-0.5b-instruct
  type: llm
  provider: dashscope
@@ -451,13 +489,15 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen3-14b
  type: llm
  provider: dashscope
  description: qwen3-14b大语言模型，支持多工具调用、智能体思考、流式工具调用，131072上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -465,13 +505,15 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen3-235b-a22b-instruct-2507
  type: llm
  provider: dashscope
  description: qwen3-235b-a22b-instruct-2507大语言模型，支持多工具调用、智能体思考、流式工具调用，131072上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -479,13 +521,15 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen3-235b-a22b-thinking-2507
  type: llm
  provider: dashscope
  description: qwen3-235b-a22b-thinking-2507大语言模型，支持多工具调用、智能体思考、流式工具调用，131072上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -493,13 +537,15 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen3-235b-a22b
  type: llm
  provider: dashscope
  description: qwen3-235b-a22b大语言模型，支持多工具调用、智能体思考、流式工具调用，131072上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -507,13 +553,15 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen3-30b-a3b-instruct-2507
  type: llm
  provider: dashscope
  description: qwen3-30b-a3b-instruct-2507大语言模型，支持多工具调用、智能体思考、流式工具调用，131072上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -521,13 +569,15 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen3-30b-a3b
  type: llm
  provider: dashscope
  description: qwen3-30b-a3b大语言模型，支持多工具调用、智能体思考、流式工具调用，131072上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -535,13 +585,15 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen3-32b
  type: llm
  provider: dashscope
  description: qwen3-32b大语言模型，支持多工具调用、智能体思考、流式工具调用，131072上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -549,13 +601,15 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen3-4b
  type: llm
  provider: dashscope
  description: qwen3-4b大语言模型，支持多工具调用、智能体思考、流式工具调用，131072上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -563,13 +617,15 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen3-8b
  type: llm
  provider: dashscope
  description: qwen3-8b大语言模型，支持多工具调用、智能体思考、流式工具调用，131072上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -577,65 +633,75 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen3-coder-30b-a3b-instruct
  type: llm
  provider: dashscope
  description: qwen3-coder-30b-a3b-instruct大语言模型，支持智能体思考，262144上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
  - 代码模型
  - agent-thought
  logo: dashscope
+
 - name: qwen3-coder-480b-a35b-instruct
  type: llm
  provider: dashscope
  description: qwen3-coder-480b-a35b-instruct大语言模型，支持智能体思考，262144上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
  - 代码模型
  - agent-thought
  logo: dashscope
+
 - name: qwen3-coder-plus-2025-09-23
  type: llm
  provider: dashscope
  description: qwen3-coder-plus-2025-09-23大语言模型，支持智能体思考，1000000上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
  - 代码模型
  - agent-thought
  logo: dashscope
+
 - name: qwen3-coder-plus
  type: llm
  provider: dashscope
  description: qwen3-coder-plus大语言模型，支持智能体思考，1000000上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
  - 代码模型
  - agent-thought
  logo: dashscope
+
 - name: qwen3-max-2025-09-23
  type: llm
  provider: dashscope
  description: qwen3-max-2025-09-23大语言模型，支持多工具调用、智能体思考、流式工具调用，262144上下文窗口，对话模式，支持联网搜索
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -644,13 +710,15 @@ models:
  - stream-tool-call
  - 联网搜索
  logo: dashscope
+
 - name: qwen3-max-2026-01-23
  type: llm
  provider: dashscope
  description: qwen3-max-2026-01-23大语言模型，支持多工具调用、智能体思考、流式工具调用，262144上下文窗口，对话模式，支持联网搜索
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -659,13 +727,15 @@ models:
  - stream-tool-call
  - 联网搜索
  logo: dashscope
+
 - name: qwen3-max-preview
  type: llm
  provider: dashscope
  description: qwen3-max-preview大语言模型，支持多工具调用、智能体思考、流式工具调用，262144上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -673,13 +743,15 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen3-max
  type: llm
  provider: dashscope
  description: qwen3-max大语言模型，支持多工具调用、智能体思考、流式工具调用，262144上下文窗口，对话模式，支持联网搜索
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -688,13 +760,15 @@ models:
  - stream-tool-call
  - 联网搜索
  logo: dashscope
+
 - name: qwen3-next-80b-a3b-instruct
  type: llm
  provider: dashscope
  description: qwen3-next-80b-a3b-instruct大语言模型，支持多工具调用、智能体思考、流式工具调用，131072上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -702,13 +776,15 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen3-next-80b-a3b-thinking
  type: llm
  provider: dashscope
  description: qwen3-next-80b-a3b-thinking大语言模型，支持多工具调用、智能体思考、流式工具调用，131072上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -716,6 +792,7 @@ models:
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwen3-omni-flash-2025-12-01
  type: llm
  provider: dashscope
@@ -723,9 +800,10 @@ models:
  is_deprecated: false
  is_official: true
  capability:
-    - vision
-    - video
-    - audio
+  - vision
+  - video
+  - audio
+  - thinking
  is_omni: true
  tags:
  - 大语言模型
@@ -735,6 +813,7 @@ models:
  - video
  - audio
  logo: dashscope
+
 - name: qwen3-vl-235b-a22b-instruct
  type: chat
  provider: dashscope
@@ -742,8 +821,9 @@ models:
  is_deprecated: false
  is_official: true
  capability:
-    - vision
-    - video
+  - vision
+  - video
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -754,6 +834,7 @@ models:
  - vision
  - video
  logo: dashscope
+
 - name: qwen3-vl-235b-a22b-thinking
  type: chat
  provider: dashscope
@@ -761,8 +842,9 @@ models:
  is_deprecated: false
  is_official: true
  capability:
-    - vision
-    - video
+  - vision
+  - video
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -773,6 +855,7 @@ models:
  - vision
  - video
  logo: dashscope
+
 - name: qwen3-vl-30b-a3b-instruct
  type: chat
  provider: dashscope
@@ -780,8 +863,9 @@ models:
  is_deprecated: false
  is_official: true
  capability:
-    - vision
-    - video
+  - vision
+  - video
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -792,6 +876,7 @@ models:
  - vision
  - video
  logo: dashscope
+
 - name: qwen3-vl-30b-a3b-thinking
  type: chat
  provider: dashscope
@@ -799,8 +884,9 @@ models:
  is_deprecated: false
  is_official: true
  capability:
-    - vision
-    - video
+  - vision
+  - video
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -811,6 +897,7 @@ models:
  - vision
  - video
  logo: dashscope
+
 - name: qwen3-vl-flash
  type: chat
  provider: dashscope
@@ -818,8 +905,9 @@ models:
  is_deprecated: false
  is_official: true
  capability:
-    - vision
-    - video
+  - vision
+  - video
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -830,6 +918,7 @@ models:
  - vision
  - video
  logo: dashscope
+
 - name: qwen3-vl-plus-2025-09-23
  type: chat
  provider: dashscope
@@ -837,8 +926,9 @@ models:
  is_deprecated: false
  is_official: true
  capability:
-    - vision
-    - video
+  - vision
+  - video
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -847,6 +937,7 @@ models:
  - agent-thought
  - video
  logo: dashscope
+
 - name: qwen3-vl-plus
  type: chat
  provider: dashscope
@@ -854,8 +945,9 @@ models:
  is_deprecated: false
  is_official: true
  capability:
-    - vision
-    - video
+  - vision
+  - video
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -864,45 +956,52 @@ models:
  - agent-thought
  - video
  logo: dashscope
+
 - name: qwq-32b
  type: llm
  provider: dashscope
  description: qwq-32b大语言模型，支持智能体思考、流式工具调用，131072上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwq-plus-0305
  type: llm
  provider: dashscope
  description: qwq-plus-0305大语言模型，支持智能体思考、流式工具调用，131072上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: qwq-plus
  type: llm
  provider: dashscope
  description: qwq-plus大语言模型，支持智能体思考、流式工具调用，131072上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+  - thinking
  is_omni: false
  tags:
  - 大语言模型
  - agent-thought
  - stream-tool-call
  logo: dashscope
+
 - name: gte-rerank-v2
  type: rerank
  provider: dashscope
@@ -914,6 +1013,7 @@ models:
  tags:
  - 重排序模型
  logo: dashscope
+
 - name: gte-rerank
  type: rerank
  provider: dashscope
@@ -925,6 +1025,7 @@ models:
  tags:
  - 重排序模型
  logo: dashscope
+
 - name: multimodal-embedding-v1
  type: embedding
  provider: dashscope
@@ -932,13 +1033,14 @@ models:
  is_deprecated: false
  is_official: true
  capability:
-    - vision
+  - vision
  is_omni: false
  tags:
  - 嵌入模型
  - 多模态模型
  - vision
  logo: dashscope
+
 - name: text-embedding-v1
  type: embedding
  provider: dashscope
@@ -951,6 +1053,7 @@ models:
  - 嵌入模型
  - 文本嵌入
  logo: dashscope
+
 - name: text-embedding-v2
  type: embedding
  provider: dashscope
@@ -963,6 +1066,7 @@ models:
  - 嵌入模型
  - 文本嵌入
  logo: dashscope
+
 - name: text-embedding-v3
  type: embedding
  provider: dashscope
@@ -975,6 +1079,7 @@ models:
  - 嵌入模型
  - 文本嵌入
  logo: dashscope
+
 - name: text-embedding-v4
  type: embedding
  provider: dashscope
@@ -986,4 +1091,4 @@ models:
  tags:
  - 嵌入模型
  - 文本嵌入
-  logo: dashscope
+  logo: dashscope
--- a/api/app/core/models/scripts/openai_models.yaml
+++ b/api/app/core/models/scripts/openai_models.yaml
@@ -20,6 +20,7 @@ models:
  - audio
  - video
  logo: openai
+
 - name: gpt-3.5-turbo-0125
  type: llm
  provider: openai
@@ -34,6 +35,7 @@ models:
  - agent-thought
  - stream-tool-call
  logo: openai
+
 - name: gpt-3.5-turbo-1106
  type: llm
  provider: openai
@@ -48,6 +50,7 @@ models:
  - agent-thought
  - stream-tool-call
  logo: openai
+
 - name: gpt-3.5-turbo-16k
  type: llm
  provider: openai
@@ -62,6 +65,7 @@ models:
  - agent-thought
  - stream-tool-call
  logo: openai
+
 - name: gpt-3.5-turbo-instruct
  type: llm
  provider: openai
@@ -73,6 +77,7 @@ models:
  tags:
  - 大语言模型
  logo: openai
+
 - name: gpt-3.5-turbo
  type: llm
  provider: openai
@@ -87,6 +92,7 @@ models:
  - agent-thought
  - stream-tool-call
  logo: openai
+
 - name: gpt-4-0125-preview
  type: llm
  provider: openai
@@ -101,6 +107,7 @@ models:
  - agent-thought
  - stream-tool-call
  logo: openai
+
 - name: gpt-4-1106-preview
  type: llm
  provider: openai
@@ -115,6 +122,7 @@ models:
  - agent-thought
  - stream-tool-call
  logo: openai
+
 - name: gpt-4-turbo-2024-04-09
  type: llm
  provider: openai
@@ -131,6 +139,7 @@ models:
  - stream-tool-call
  - vision
  logo: openai
+
 - name: gpt-4-turbo-preview
  type: llm
  provider: openai
@@ -145,6 +154,7 @@ models:
  - agent-thought
  - stream-tool-call
  logo: openai
+
 - name: gpt-4-turbo
  type: llm
  provider: openai
@@ -161,6 +171,7 @@ models:
  - stream-tool-call
  - vision
  logo: openai
+
 - name: o1-preview
  type: llm
  provider: openai
@@ -173,6 +184,7 @@ models:
  - 大语言模型
  - agent-thought
  logo: openai
+
 - name: o1
  type: llm
  provider: openai
@@ -181,6 +193,7 @@ models:
  is_official: true
  capability:
    - vision
+    - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -190,6 +203,7 @@ models:
  - vision
  - structured-output
  logo: openai
+
 - name: o3-2025-04-16
  type: llm
  provider: openai
@@ -198,6 +212,7 @@ models:
  is_official: true
  capability:
    - vision
+    - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -207,13 +222,15 @@ models:
  - stream-tool-call
  - structured-output
  logo: openai
+
 - name: o3-mini-2025-01-31
  type: llm
  provider: openai
  description: o3-mini-2025-01-31大语言模型，支持智能体思考、工具调用、流式工具调用、结构化输出，200000上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+    - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -222,13 +239,15 @@ models:
  - stream-tool-call
  - structured-output
  logo: openai
+
 - name: o3-mini
  type: llm
  provider: openai
  description: o3-mini大语言模型，支持智能体思考、工具调用、流式工具调用、结构化输出，200000上下文窗口，对话模式
  is_deprecated: false
  is_official: true
-  capability: []
+  capability:
+    - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -237,6 +256,7 @@ models:
  - stream-tool-call
  - structured-output
  logo: openai
+
 - name: o3-pro-2025-06-10
  type: llm
  provider: openai
@@ -245,6 +265,7 @@ models:
  is_official: true
  capability:
    - vision
+    - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -253,6 +274,7 @@ models:
  - vision
  - structured-output
  logo: openai
+
 - name: o3-pro
  type: llm
  provider: openai
@@ -261,6 +283,7 @@ models:
  is_official: true
  capability:
    - vision
+    - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -269,6 +292,7 @@ models:
  - vision
  - structured-output
  logo: openai
+
 - name: o3
  type: llm
  provider: openai
@@ -277,6 +301,7 @@ models:
  is_official: true
  capability:
    - vision
+    - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -286,6 +311,7 @@ models:
  - stream-tool-call
  - structured-output
  logo: openai
+
 - name: o4-mini-2025-04-16
  type: llm
  provider: openai
@@ -294,6 +320,7 @@ models:
  is_official: true
  capability:
    - vision
+    - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -303,6 +330,7 @@ models:
  - stream-tool-call
  - structured-output
  logo: openai
+
 - name: o4-mini
  type: llm
  provider: openai
@@ -311,6 +339,7 @@ models:
  is_official: true
  capability:
    - vision
+    - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -320,6 +349,7 @@ models:
  - stream-tool-call
  - structured-output
  logo: openai
+
 - name: text-embedding-3-large
  type: embedding
  provider: openai
@@ -331,6 +361,7 @@ models:
  tags:
  - 文本向量模型
  logo: openai
+
 - name: text-embedding-3-small
  type: embedding
  provider: openai
@@ -342,6 +373,7 @@ models:
  tags:
  - 文本向量模型
  logo: openai
+
 - name: text-embedding-ada-002
  type: embedding
  provider: openai
--- a/api/app/core/models/scripts/volcano_models.yaml
+++ b/api/app/core/models/scripts/volcano_models.yaml
@@ -10,6 +10,7 @@ models:
  capability:
    - vision
    - video
+    - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -24,6 +25,7 @@ models:
  capability:
    - vision
    - video
+    - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -38,6 +40,7 @@ models:
  capability:
    - vision
    - video
+    - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -52,6 +55,7 @@ models:
  capability:
    - vision
    - video
+    - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -82,6 +86,7 @@ models:
  capability:
    - vision
    - video
+    - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -96,6 +101,7 @@ models:
  capability:
    - vision
    - video
+    - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -110,6 +116,7 @@ models:
  capability:
    - vision
    - video
+    - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -124,6 +131,7 @@ models:
  capability:
    - vision
    - video
+    - thinking
  is_omni: false
  tags:
  - 大语言模型
@@ -139,6 +147,7 @@ models:
  capability:
    - vision
    - video
+    - thinking
  is_omni: false
  tags:
  - 大语言模型
--- a/api/app/core/models/volcano_chat.py
+++ b/api/app/core/models/volcano_chat.py
@@ -0,0 +1,38 @@
+"""
+火山引擎 ChatOpenAI 扩展
+
+ChatOpenAI 在解析流式 SSE 时只取 delta.content，会丢弃 delta.reasoning_content。
+此类仅重写 _convert_chunk_to_generation_chunk，将 reasoning_content 补入 additional_kwargs。
+"""
+from __future__ import annotations
+
+from typing import Any, Optional
+
+from langchain_core.outputs import ChatGenerationChunk
+from langchain_openai import ChatOpenAI
+
+
+class VolcanoChatOpenAI(ChatOpenAI):
+    """火山引擎 Chat 模型，支持深度思考内容（reasoning_content）的流式透传。"""
+
+    def _convert_chunk_to_generation_chunk(
+        self,
+        chunk: dict,
+        default_chunk_class: type,
+        base_generation_info: Optional[dict],
+    ) -> Optional[ChatGenerationChunk]:
+        gen_chunk = super()._convert_chunk_to_generation_chunk(
+            chunk, default_chunk_class, base_generation_info
+        )
+        if gen_chunk is None:
+            return None
+
+        # 从原始 chunk 中提取 reasoning_content
+        choices = chunk.get("choices") or chunk.get("chunk", {}).get("choices", [])
+        if choices:
+            delta = choices[0].get("delta") or {}
+            reasoning: Any = delta.get("reasoning_content")
+            if reasoning:
+                gen_chunk.message.additional_kwargs["reasoning_content"] = reasoning
+
+        return gen_chunk
--- a/api/app/core/tools/mcp/client.py
+++ b/api/app/core/tools/mcp/client.py
@@ -99,7 +99,7 @@ class SimpleMCPClient:
            # 建立 SSE 连接
            response = await self._session.get(self.server_url)
            
-            if response.status not in (200, 202):
+            if not (200 <= response.status < 300):
                error_text = await response.text()
                raise MCPConnectionError(f"SSE 连接失败 {response.status}: {error_text}")
            
@@ -190,9 +190,7 @@ class SimpleMCPClient:
        
        try:
            async with self._session.post(self._endpoint_url, json=request) as response:
-                # MCP SSE 协议：POST 请求返回 200 或 202 均为正常
-                # 202 Accepted 表示请求已接受，结果通过 SSE 流异步返回
-                if response.status not in (200, 202):
+                if not (200 <= response.status < 300):
                    error_text = await response.text()
                    raise MCPConnectionError(f"请求失败 {response.status}: {error_text}")
            
@@ -207,7 +205,7 @@ class SimpleMCPClient:
            raise MCPConnectionError("endpoint URL 未初始化")
        
        async with self._session.post(self._endpoint_url, json=notification) as response:
-            if response.status not in (200, 202):
+            if not (200 <= response.status < 300):
                logger.warning(f"通知发送失败: {response.status}")
    
    async def _initialize_modelscope_session(self):
@@ -225,7 +223,7 @@ class SimpleMCPClient:
        
        try:
            async with self._session.post(self.server_url, json=init_request) as response:
-                if response.status != 200:
+                if not (200 <= response.status < 300):
                    error_text = await response.text()
                    raise MCPConnectionError(f"初始化失败 {response.status}: {error_text}")
                
--- a/api/app/core/workflow/nodes/llm/node.py
+++ b/api/app/core/workflow/nodes/llm/node.py
@@ -135,7 +135,8 @@ class LLMNode(BaseNode):
                api_key=model_info.api_key,
                base_url=model_info.api_base,
                extra_params=extra_params,
-                is_omni=model_info.is_omni
+                is_omni=model_info.is_omni,
+                support_thinking="thinking" in (model_info.capability or []),
            ),
            type=model_info.model_type
        )
--- a/api/app/core/workflow/nodes/parameter_extractor/node.py
+++ b/api/app/core/workflow/nodes/parameter_extractor/node.py
@@ -109,6 +109,7 @@ class ParameterExtractorNode(BaseNode):
            api_key = api_config.api_key
            api_base = api_config.api_base
            is_omni = api_config.is_omni
+            capability = api_config.capability
            model_type = config.type

        llm = RedBearLLM(
@@ -117,7 +118,8 @@ class ParameterExtractorNode(BaseNode):
                provider=provider,
                api_key=api_key,
                base_url=api_base,
-                is_omni=is_omni
+                is_omni=is_omni,
+                support_thinking="thinking" in (capability or []),
            ),
            type=ModelType(model_type)
        )
--- a/api/app/core/workflow/nodes/question_classifier/node.py
+++ b/api/app/core/workflow/nodes/question_classifier/node.py
@@ -62,6 +62,7 @@ class QuestionClassifierNode(BaseNode):
            api_key = api_config.api_key
            base_url = api_config.api_base
            is_omni = api_config.is_omni
+            capability = api_config.capability
            model_type = config.type

        return RedBearLLM(
@@ -70,7 +71,8 @@ class QuestionClassifierNode(BaseNode):
                provider=provider,
                api_key=api_key,
                base_url=base_url,
-                is_omni=is_omni
+                is_omni=is_omni,
+                support_thinking="thinking" in (capability or []),
            ),
            type=ModelType(model_type)
        )
--- a/api/app/models/models_model.py
+++ b/api/app/models/models_model.py
@@ -81,7 +81,7 @@ class ModelConfig(BaseModel):
    
    # 模型配置参数
    capability = Column(ARRAY(String), default=list, nullable=False, server_default=text("'{}'::varchar[]"),
-                        comment="模型能力列表（如['vision', 'audio', 'video']）")
+                        comment="模型能力列表（如['vision', 'audio', 'video', 'thinking']）")
    is_omni = Column(Boolean, default=False, nullable=False, server_default="false", comment="是否为Omni模型（使用特殊API调用）")
    config = Column(JSON, comment="模型配置参数")
    # - temperature : 控制生成文本的随机性。值越高，输出越随机、越有创造性；值越低，输出越确定、越保守。
--- a/api/app/repositories/home_page_repository.py
+++ b/api/app/repositories/home_page_repository.py
@@ -1,6 +1,6 @@
-from datetime import datetime, timedelta
+from datetime import datetime, time
 from sqlalchemy.orm import Session
-from sqlalchemy import func
+from sqlalchemy import func, Table, MetaData
 from uuid import UUID
 from typing import Dict, Optional, Any

@@ -192,10 +192,63 @@ class HomePageRepository:
        
        return workspaces, app_count_dict, user_count_dict
    
+    @staticmethod
+    def get_latest_version_introduction(db: Session) -> tuple[Optional[str], Optional[Dict[str, Any]]]:
+        """
+        从数据库获取最新已发布的版本说明
+        使用反射方式读取表结构，不依赖 premium 模型类
+        
+        Args:
+            db: 数据库会话
+            
+        Returns:
+            (版本号，版本说明字典) 的元组
+            如果数据库中没有已发布的版本，返回 (None, None)
+        """
+        try:
+            metadata = MetaData()
+            
+            version_notes = Table('version_notes', metadata, autoload_with=db.bind)
+            
+            # 获取最新已发布的版本（按发布时间倒序，日期相同时按版本号倒序）
+            query = db.query(version_notes).filter(
+                version_notes.c.is_published == True
+            ).order_by(
+                version_notes.c.release_date.desc(),
+                version_notes.c.version.desc()
+            )
+            
+            note = query.first()
+            
+            if not note:
+                return None, None
+            
+            version_info = {
+                "introduction": {
+                    "codeName": note.code_name or "",
+                    "releaseDate": int(datetime.combine(note.release_date, time()).timestamp() * 1000) if note.release_date else 0,
+                    "upgradePosition": note.upgrade_position or "",
+                    "coreUpgrades": note.core_upgrades or []
+                },
+                "introduction_en": {
+                    "codeName": note.code_name_en or note.code_name or "",
+                    "releaseDate": int(datetime.combine(note.release_date, time()).timestamp() * 1000) if note.release_date else 0,
+                    "upgradePosition": note.upgrade_position_en or note.upgrade_position or "",
+                    "coreUpgrades": note.core_upgrades_en or []
+                }
+            }
+            
+            return note.version, version_info
+            
+        except Exception as e:
+            import traceback
+            traceback.print_exc()
+            return None, None
+    
    @staticmethod
    def get_version_introduction(db: Session, version: str) -> Optional[Dict[str, Any]]:
        """
-        从数据库获取版本说明（优先读取已发布的版本）
+        从数据库获取指定版本说明（优先读取已发布的版本）
        使用反射方式读取表结构，不依赖 premium 模型类
        
        Args:
@@ -207,11 +260,8 @@ class HomePageRepository:
            如果数据库中没有该版本，返回 None
        """
        try:
-            from sqlalchemy import Table, MetaData
-            
            metadata = MetaData()
            version_notes = Table('version_notes', metadata, autoload_with=db.engine)
-            version_note_items = Table('version_note_items', metadata, autoload_with=db.engine)
            
            note = db.query(version_notes).filter(
                version_notes.c.version == version,
@@ -221,31 +271,18 @@ class HomePageRepository:
            if not note:
                return None
            
-            items = db.query(version_note_items).filter(
-                version_note_items.c.note_id == note.id
-            ).order_by(version_note_items.c.sort_order).all()
-            
-            core_upgrades = []
-            for item in items:
-                title = item.title
-                content = item.content
-                if content:
-                    core_upgrades.append(f"{title}<br>{content}")
-                else:
-                    core_upgrades.append(title)
-            
            return {
                "introduction": {
-                    "codeName": "",
-                    "releaseDate": note.release_date.isoformat() if note.release_date else "",
-                    "upgradePosition": "",
-                    "coreUpgrades": core_upgrades
+                    "codeName": note.code_name or "",
+                    "releaseDate": int(datetime.combine(note.release_date, time()).timestamp() * 1000) if note.release_date else 0,
+                    "upgradePosition": note.upgrade_position or "",
+                    "coreUpgrades": note.core_upgrades or []
                },
                "introduction_en": {
-                    "codeName": "",
-                    "releaseDate": note.release_date.isoformat() if note.release_date else "",
-                    "upgradePosition": "",
-                    "coreUpgrades": core_upgrades
+                    "codeName": note.code_name_en or note.code_name or "",
+                    "releaseDate": int(datetime.combine(note.release_date, time()).timestamp() * 1000) if note.release_date else 0,
+                    "upgradePosition": note.upgrade_position_en or note.upgrade_position or "",
+                    "coreUpgrades": note.core_upgrades_en or []
                }
            }
        except Exception:
--- a/api/app/schemas/app_schema.py
+++ b/api/app/schemas/app_schema.py
@@ -241,6 +241,8 @@ class ModelParameters(BaseModel):
    presence_penalty: float = Field(default=0.0, ge=-2.0, le=2.0, description="存在惩罚")
    n: int = Field(default=1, ge=1, le=10, description="生成的回复数量")
    stop: Optional[List[str]] = Field(default=None, description="停止序列")
+    deep_thinking: bool = Field(default=False, description="是否启用深度思考模式（需模型支持，如 DeepSeek-R1、QwQ 等）")
+    thinking_budget_tokens: Optional[int] = Field(default=None, ge=1024, le=131072, description="深度思考 token 预算（仅部分模型支持）")


 class VariableDefinition(BaseModel):
@@ -612,6 +614,7 @@ class AppChatRequest(BaseModel):
    user_id: Optional[str] = Field(default=None, description="用户ID（用于会话管理）")
    variables: Optional[Dict[str, Any]] = Field(default=None, description="自定义变量参数值")
    stream: bool = Field(default=False, description="是否流式返回")
+    thinking: bool = Field(default=False, description="是否启用深度思考（需Agent配置支持）")
    files: List[FileInput] = Field(default_factory=list, description="附件列表（支持多文件）")


--- a/api/app/schemas/conversation_schema.py
+++ b/api/app/schemas/conversation_schema.py
@@ -31,6 +31,7 @@ class ChatRequest(BaseModel):
    stream: bool = Field(default=False, description="是否流式返回")
    web_search: bool = Field(default=False, description="是否启用网络搜索")
    memory: bool = Field(default=True, description="是否启用记忆功能")
+    thinking: bool = Field(default=False, description="是否启用深度思考（需Agent配置支持）")
    files: Optional[List[FileInput]] = Field(default=None, description="附件列表（支持多文件）")


--- a/api/app/schemas/memory_api_schema.py
+++ b/api/app/schemas/memory_api_schema.py
@@ -138,21 +138,13 @@ class CreateEndUserRequest(BaseModel):
    """Request schema for creating an end user.
    
    Attributes:
-        workspace_id: Workspace ID (required)
        other_id: External user identifier (required)
        other_name: Display name for the end user
+        memory_config_id: Optional memory config ID. If not provided, uses workspace default.
    """
-    workspace_id: str = Field(..., description="Workspace ID (required)")
    other_id: str = Field(..., description="External user identifier (required)")
    other_name: Optional[str] = Field("", description="Display name")
-
-    @field_validator("workspace_id")
-    @classmethod
-    def validate_workspace_id(cls, v: str) -> str:
-        """Validate that workspace_id is not empty."""
-        if not v or not v.strip():
-            raise ValueError("workspace_id is required and cannot be empty")
-        return v.strip()
+    memory_config_id: Optional[str] = Field(None, description="Memory config ID. Falls back to workspace default if not provided.")

    @field_validator("other_id")
    @classmethod
@@ -171,11 +163,13 @@ class CreateEndUserResponse(BaseModel):
        other_id: External user identifier
        other_name: Display name
        workspace_id: Workspace the user belongs to
+        memory_config_id: Connected memory config ID
    """
    id: str = Field(..., description="End user UUID")
    other_id: str = Field(..., description="External user identifier")
    other_name: str = Field("", description="Display name")
    workspace_id: str = Field(..., description="Workspace ID")
+    memory_config_id: Optional[str] = Field(None, description="Connected memory config ID")


 class MemoryConfigItem(BaseModel):
--- a/api/app/services/app_chat_service.py
+++ b/api/app/services/app_chat_service.py
@@ -117,7 +117,9 @@ class AppChatService:
            max_tokens=model_parameters.get("max_tokens", 2000),
            system_prompt=system_prompt,
            tools=tools,
-
+            deep_thinking=model_parameters.get("deep_thinking", False),
+            thinking_budget_tokens=model_parameters.get("thinking_budget_tokens"),
+            capability=api_key_obj.capability or [],
        )

        model_info = ModelInfo(
@@ -205,7 +207,8 @@ class AppChatService:
            "model": api_key_obj.model_name,
            "usage": result.get("usage", {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}),
            "audio_url": None,
-            "citations": filtered_citations
+            "citations": filtered_citations,
+            "reasoning_content": result.get("reasoning_content")
        }
        if files:
            for f in files:
@@ -258,6 +261,7 @@ class AppChatService:
            "conversation_id": conversation_id,
            "message_id": str(message_id),
            "message": result["content"],
+            "reasoning_content": result.get("reasoning_content"),
            "usage": result.get("usage", {
                "prompt_tokens": 0,
                "completion_tokens": 0,
@@ -354,7 +358,10 @@ class AppChatService:
                max_tokens=model_parameters.get("max_tokens", 2000),
                system_prompt=system_prompt,
                tools=tools,
-                streaming=True
+                streaming=True,
+                deep_thinking=model_parameters.get("deep_thinking", False),
+                thinking_budget_tokens=model_parameters.get("thinking_budget_tokens"),
+                capability=api_key_obj.capability or [],
            )

            model_info = ModelInfo(
@@ -403,6 +410,7 @@ class AppChatService:

            # 流式调用 Agent（支持多模态），同时并行启动 TTS
            full_content = ""
+            full_reasoning = ""
            total_tokens = 0

            text_queue: asyncio.Queue = asyncio.Queue()
@@ -426,6 +434,9 @@ class AppChatService:
            ):
                if isinstance(chunk, int):
                    total_tokens = chunk
+                elif isinstance(chunk, dict) and chunk.get("type") == "reasoning":
+                    full_reasoning += chunk['content']
+                    yield f"event: reasoning\ndata: {json.dumps({'content': chunk['content']}, ensure_ascii=False)}\n\n"
                else:
                    full_content += chunk
                    yield f"event: message\ndata: {json.dumps({'content': chunk}, ensure_ascii=False)}\n\n"
@@ -472,7 +483,8 @@ class AppChatService:
                "model": api_key_obj.model_name,
                "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens},
                "audio_url": None,
-                "citations": filtered_citations
+                "citations": filtered_citations,
+                "reasoning_content": full_reasoning or None
            }

            if files:
@@ -652,13 +664,13 @@ class AppChatService:
                    storage_type=storage_type,
                    user_rag_memory_id=user_rag_memory_id
            ):
-                if "sub_usage" in event:
+                # 拦截 sub_usage 事件，累加 token
+                if "event: sub_usage" in event:
                    if "data:" in event:
                        try:
                            data_line = event.split("data: ", 1)[1].strip()
                            data = json.loads(data_line)
-                            if "total_tokens" in data:
-                                total_tokens += data["total_tokens"]
+                            total_tokens += data.get("total_tokens", 0)
                        except:
                            pass
                else:
--- a/api/app/services/app_service.py
+++ b/api/app/services/app_service.py
@@ -13,7 +13,7 @@ import uuid
 from typing import Annotated, Any, Dict, List, Optional, Tuple

 from fastapi import Depends
-from sqlalchemy import and_, delete, func, or_, select
+from sqlalchemy import and_, delete, func, or_, select, update as sa_update
 from sqlalchemy.orm import Session

 from app.core.error_codes import BizCode
@@ -757,6 +757,17 @@ class AppService:

        # 逻辑删除应用
        app.is_active = False
+        
+        # 更新 app_shares 表中该应用的所有共享记录为失效状态，并更新 updated_at 时间
+        stmt = sa_update(AppShare).where(
+            AppShare.source_app_id == app_id,
+            AppShare.is_active.is_(True)
+        ).values(
+            is_active=False,
+            updated_at=datetime.datetime.now()
+        )
+        self.db.execute(stmt)
+        
        self.db.commit()

        logger.info(
--- a/api/app/services/conversation_service.py
+++ b/api/app/services/conversation_service.py
@@ -534,6 +534,7 @@ class ConversationService:
        api_key = api_config.api_key
        api_base = api_config.api_base
        is_omni = api_config.is_omni
+        capability = api_config.capability
        model_type = config.type

        llm = RedBearLLM(
@@ -542,7 +543,8 @@ class ConversationService:
                provider=provider,
                api_key=api_key,
                base_url=api_base,
-                is_omni=is_omni
+                is_omni=is_omni,
+                support_thinking="thinking" in (capability or []),
            ),
            type=ModelType(model_type)
        )
--- a/api/app/services/draft_run_service.py
+++ b/api/app/services/draft_run_service.py
@@ -458,7 +458,7 @@ class AgentRunService:
        
        statement = opening["statement"]
        suggested_questions = opening["suggested_questions"]
-        
+
        # 如果有变量，进行替换（仅支持 {{var_name}} 格式）
        if variables:
            for var_name, var_value in variables.items():
@@ -595,6 +595,9 @@ class AgentRunService:
                max_tokens=effective_params.get("max_tokens", 2000),
                system_prompt=system_prompt,
                tools=tools,
+                deep_thinking=effective_params.get("deep_thinking", False),
+                thinking_budget_tokens=effective_params.get("thinking_budget_tokens"),
+                capability=api_key_config.get("capability", []),
            )

            # 5. 处理会话ID（创建或验证），新会话时写入开场白
@@ -689,7 +692,8 @@ class AgentRunService:
                            "prompt_tokens": 0,
                            "completion_tokens": 0,
                            "total_tokens": 0
-                        })
+                        }),
+                        "reasoning_content": result.get("reasoning_content")
                    },
                    files=files,
                    processed_files=processed_files,
@@ -701,6 +705,7 @@ class AgentRunService:

            response = {
                "message": result["content"],
+                "reasoning_content": result.get("reasoning_content"),
                "conversation_id": conversation_id,
                "usage": result.get("usage", {
                    "prompt_tokens": 0,
@@ -838,7 +843,10 @@ class AgentRunService:
                max_tokens=effective_params.get("max_tokens", 2000),
                system_prompt=system_prompt,
                tools=tools,
-                streaming=True
+                streaming=True,
+                deep_thinking=effective_params.get("deep_thinking", False),
+                thinking_budget_tokens=effective_params.get("thinking_budget_tokens"),
+                capability=api_key_config.get("capability", []),
            )

            # 5. 处理会话ID（创建或验证），新会话时写入开场白
@@ -898,6 +906,7 @@ class AgentRunService:

            # 9. 流式调用 Agent（支持多模态），同时并行启动 TTS
            full_content = ""
+            full_reasoning = ""
            total_tokens = 0

            # 启动流式 TTS（文本边输出边合成）
@@ -916,6 +925,9 @@ class AgentRunService:
            ):
                if isinstance(chunk, int):
                    total_tokens = chunk
+                elif isinstance(chunk, dict) and chunk.get("type") == "reasoning":
+                    full_reasoning += chunk["content"]
+                    yield self._format_sse_event("reasoning", {"content": chunk["content"]})
                else:
                    full_content += chunk
                    yield self._format_sse_event("message", {"content": chunk})
@@ -944,7 +956,8 @@ class AgentRunService:
                    app_id=agent_config.app_id,
                    user_id=user_id,
                    meta_data={
-                        "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens}
+                        "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens},
+                        "reasoning_content": full_reasoning or None
                    },
                    files=files,
                    processed_files=processed_files,
@@ -1665,7 +1678,7 @@ class AgentRunService:
            """从 text_queue 取文本按句子切分后喂给 synthesizer"""
            import re
            buf = ""
-            sentence_end = re.compile(r'[\u3002\uff01\uff1f\.!?\n]')
+            sentence_end = re.compile(r'[\u3002\uff01\uff1f.!?\n]')
            while True:
                chunk = await text_queue.get()
                if chunk is None:
@@ -1894,6 +1907,7 @@ class AgentRunService:
                    "conversation_id": result['conversation_id'],
                    "parameters_used": model_info["parameters"],
                    "message": result.get("message"),
+                    "reasoning_content": result.get("reasoning_content"),
                    "usage": usage,
                    "elapsed_time": elapsed,
                    "tokens_per_second": (
@@ -2012,7 +2026,7 @@ class AgentRunService:
        # 需要从 ModelApiKey 获取实际的模型名称，或者在 ModelConfig 中添加 model 字段
        return None

-    def _with_parameters(self, agent_config: AgentConfig, parameters: Dict[str, Any]) -> AgentConfig:
+    def _with_parameters(self, agent_config: AgentConfig, parameters: Dict[str, Any]) -> tuple[AgentConfig, Any]:
        """创建一个带有覆盖参数的 agent_config（浅拷贝，只修改 model_parameters）

        Args:
@@ -2110,6 +2124,7 @@ class AgentRunService:

                start_time = time.time()
                full_content = ""
+                full_reasoning = ""
                returned_conversation_id = model_conversation_id
                audio_url = None
                audio_status = None
@@ -2168,6 +2183,18 @@ class AgentRunService:
                                    "content": chunk
                                }))

+                            # 转发深度思考事件（带模型标识）
+                            if event_type == "reasoning" and event_data:
+                                reasoning_chunk = event_data.get("content", "")
+                                full_reasoning += reasoning_chunk
+                                await event_queue.put(self._format_sse_event("model_reasoning", {
+                                    "model_index": idx,
+                                    "model_config_id": model_config_id,
+                                    "label": model_label,
+                                    "conversation_id": returned_conversation_id,
+                                    "content": event_data.get("content", "")
+                                }))
+
                            # 从 end 事件中提取 features 输出字段
                            if event_type == "end" and event_data:
                                audio_url = event_data.get("audio_url")
@@ -2199,6 +2226,7 @@ class AgentRunService:
                    "conversation_id": returned_conversation_id,
                    "parameters_used": model_info["parameters"],
                    "message": full_content,
+                    "reasoning_content": full_reasoning or None,
                    "elapsed_time": elapsed,
                    "audio_url": audio_url,
                    "audio_status": audio_status,
@@ -2351,6 +2379,7 @@ class AgentRunService:
                "label": r["label"],
                "conversation_id": r.get("conversation_id"),
                "message": r.get("message"),
+                "reasoning_content": r.get("reasoning_content"),
                "elapsed_time": r.get("elapsed_time", 0),
                "audio_url": r.get("audio_url"),
                "audio_status": r.get("audio_status"),
--- a/api/app/services/llm_router.py
+++ b/api/app/services/llm_router.py
@@ -415,6 +415,7 @@ class LLMRouter:
                api_key=api_key_config.api_key,
                base_url=api_key_config.api_base,
                is_omni=api_key_config.is_omni,
+                support_thinking="thinking" in (api_key_config.capability or []),
                temperature=0.3,
                max_tokens=500
            )
--- a/api/app/services/master_agent_router.py
+++ b/api/app/services/master_agent_router.py
@@ -393,6 +393,7 @@ class MasterAgentRouter:
                api_key=api_key_config.api_key,
                base_url=api_key_config.api_base,
                is_omni=api_key_config.is_omni,
+                support_thinking="thinking" in (api_key_config.capability or []),
                extra_params = extra_params
            )

@@ -403,6 +404,17 @@ class MasterAgentRouter:
            response = await llm.ainvoke(prompt)
            ModelApiKeyService.record_api_key_usage(self.db, api_key_config.id)

+            # 提取 token 消耗
+            self._last_routing_tokens = 0
+            if hasattr(response, 'usage_metadata') and response.usage_metadata:
+                um = response.usage_metadata
+                self._last_routing_tokens = um.get("total_tokens", 0) if isinstance(um, dict) else getattr(um, "total_tokens", 0)
+            elif hasattr(response, 'response_metadata') and response.response_metadata:
+                token_usage = response.response_metadata.get("token_usage") or response.response_metadata.get("usage", {})
+                if isinstance(token_usage, dict):
+                    self._last_routing_tokens = token_usage.get("total_tokens", 0)
+            logger.info(f"Master Agent 路由 token 消耗: {self._last_routing_tokens}")
+
            # 提取响应内容
            if hasattr(response, 'content'):
                return response.content
--- a/api/app/services/memory_dashboard_service.py
+++ b/api/app/services/memory_dashboard_service.py
@@ -353,15 +353,13 @@ async def get_workspace_total_memory_count(
                "details": []
            }
        
-        # 2. 对每个 host_id 调用 search_all 获取 total
+        # 2. 使用 search_all_batch 批量查询所有宿主的记忆数量
        from app.services import memory_storage_service
        
-        total_count = 0
-        details = []
-        
        # 如果提供了 end_user_id，只查询该用户
        if end_user_id:
-            search_result = await memory_storage_service.search_all(end_user_id=end_user_id)
+            batch_result = await memory_storage_service.search_all_batch([end_user_id])
+            count = batch_result.get(end_user_id, 0)
            # 查询用户名称
            from app.repositories.end_user_repository import EndUserRepository
            repo = EndUserRepository(db)
@@ -369,42 +367,31 @@ async def get_workspace_total_memory_count(
            user_name = end_user.other_name if end_user else None
            
            return {
-                "total_memory_count": search_result.get("total", 0),
+                "total_memory_count": count,
                "host_count": 1,
                "details": [{
                    "end_user_id": end_user_id, 
-                    "count": search_result.get("total", 0),
+                    "count": count,
                    "name": user_name
                }]
            }
        
-        for host in hosts:
-            try:
-                end_user_id_str = str(host.id)
-                
-                search_result = await memory_storage_service.search_all(
-                    end_user_id=end_user_id_str
-                )
-                
-                host_total = search_result.get("total", 0)
-                total_count += host_total
-                
-                details.append({
-                    "end_user_id": end_user_id_str,
-                    "count": host_total,
-                    "name": host.other_name  # 使用 other_name 字段
-                })
-                
-                business_logger.debug(f"EndUser {end_user_id_str} ({host.other_name}) 记忆数: {host_total}")
-                
-            except Exception as e:
-                business_logger.warning(f"获取 end_user {host.id} 记忆数失败: {str(e)}")
-                # 失败的 host 记为 0
-                details.append({
-                    "end_user_id": str(host.id),
-                    "count": 0,
-                    "name": host.other_name  # 使用 other_name 字段
-                })
+        # 批量查询所有宿主记忆数量（一次 Neo4j 查询）
+        end_user_ids = [str(host.id) for host in hosts]
+        batch_result = await memory_storage_service.search_all_batch(end_user_ids)
+        
+        # 构建 host name 映射
+        host_name_map = {str(host.id): host.other_name for host in hosts}
+        
+        total_count = sum(batch_result.values())
+        details = [
+            {
+                "end_user_id": uid,
+                "count": batch_result.get(uid, 0),
+                "name": host_name_map.get(uid)
+            }
+            for uid in end_user_ids
+        ]
        
        result = {
            "total_memory_count": total_count,
@@ -519,6 +506,206 @@ def get_rag_user_kb_total_chunk(
        business_logger.error(f"获取用户知识库总chunk数失败: workspace_id={workspace_id} - {str(e)}")
        raise

+def get_dashboard_yesterday_changes(
+    db: Session,
+    workspace_id: uuid.UUID,
+    storage_type: str,
+    today_data: dict
+) -> dict:
+    """
+    计算各指标相比昨天的变化量。
+    
+    Args:
+        db: 数据库会话
+        workspace_id: 工作空间ID
+        storage_type: 存储类型 'neo4j' | 'rag'
+        today_data: 当前数据，包含 total_memory, total_app, total_knowledge, total_api_call
+    
+    Returns:
+        {
+            "total_memory_change": int | None,
+            "total_app_change": int | None,
+            "total_knowledge_change": int | None,
+            "total_api_call_change": int | None
+        }
+    """
+    from datetime import datetime, timedelta
+    from sqlalchemy import func
+    from app.models.api_key_model import ApiKey, ApiKeyLog
+    from app.models.knowledge_model import Knowledge
+    from app.models.app_model import App
+    from app.models.appshare_model import AppShare
+
+    business_logger.info(f"计算昨日对比: workspace_id={workspace_id}, storage_type={storage_type}")
+
+    now_local = datetime.now()
+    today_start = now_local.replace(hour=0, minute=0, second=0, microsecond=0)
+    yesterday_start = today_start - timedelta(days=1)
+
+    changes = {
+        "total_memory_change": None,
+        "total_app_change": None,
+        "total_knowledge_change": None,
+        "total_api_call_change": None,
+    }
+
+    # --- total_api_call_change ---
+    try:
+        # 获取该workspace下所有api_key的id
+        api_key_ids = [
+            row[0] for row in db.query(ApiKey.id).filter(
+                ApiKey.workspace_id == workspace_id
+            ).all()
+        ]
+        if api_key_ids:
+            # 今日累计
+            today_api_count = db.query(func.count(ApiKeyLog.id)).filter(
+                ApiKeyLog.api_key_id.in_(api_key_ids),
+                ApiKeyLog.created_at >= today_start,
+                ApiKeyLog.created_at < now_local
+            ).scalar() or 0
+            # 昨日全天
+            yesterday_api_count = db.query(func.count(ApiKeyLog.id)).filter(
+                ApiKeyLog.api_key_id.in_(api_key_ids),
+                ApiKeyLog.created_at >= yesterday_start,
+                ApiKeyLog.created_at < today_start
+            ).scalar() or 0
+            changes["total_api_call_change"] = today_api_count - yesterday_api_count
+        else:
+            # 没有api_key，如果今日也是0则无对比意义
+            changes["total_api_call_change"] = None
+    except Exception as e:
+        business_logger.warning(f"计算API调用昨日对比失败: {str(e)}")
+
+    # --- total_knowledge_change ---
+    try:
+        # 今天有效总量：当前status=1的知识库总数，排除用户知识库(permission_id='Memory')
+        today_knowledge = db.query(func.count(Knowledge.id)).filter(
+            Knowledge.workspace_id == workspace_id,
+            Knowledge.status == 1,
+            Knowledge.permission_id != "Memory"
+        ).scalar() or 0
+        # 昨日有效总量：昨天之前创建的、当前仍有效的知识库，排除用户知识库
+        yesterday_knowledge = db.query(func.count(Knowledge.id)).filter(
+            Knowledge.workspace_id == workspace_id,
+            Knowledge.status == 1,
+            Knowledge.permission_id != "Memory",
+            Knowledge.created_at < today_start
+        ).scalar() or 0
+        # 今日软删：今天被软删的知识库(status=2 且 updated_at >= today_start)，排除用户知识库
+        today_deleted_knowledge = db.query(func.count(Knowledge.id)).filter(
+            Knowledge.workspace_id == workspace_id,
+            Knowledge.status == 2,
+            Knowledge.permission_id != "Memory",
+            Knowledge.updated_at >= today_start
+        ).scalar() or 0
+
+        if yesterday_knowledge == 0 and today_knowledge == 0 and today_deleted_knowledge == 0:
+            changes["total_knowledge_change"] = None
+        else:
+            # change = 今天有效总量 - 今日软删 - 昨日有效总量
+            changes["total_knowledge_change"] = today_knowledge - today_deleted_knowledge - yesterday_knowledge
+    except Exception as e:
+        business_logger.warning(f"计算知识库昨日对比失败: {str(e)}")
+
+    # --- total_app_change ---
+    try:
+        # === 自有app ===
+        # 今天有效总量
+        today_own_apps = db.query(func.count(App.id)).filter(
+            App.workspace_id == workspace_id,
+            App.is_active == True
+        ).scalar() or 0
+        # 昨日有效总量
+        yesterday_own_apps = db.query(func.count(App.id)).filter(
+            App.workspace_id == workspace_id,
+            App.is_active == True,
+            App.created_at < today_start
+        ).scalar() or 0
+        # 今日软删
+        today_deleted_own_apps = db.query(func.count(App.id)).filter(
+            App.workspace_id == workspace_id,
+            App.is_active == False,
+            App.updated_at >= today_start
+        ).scalar() or 0
+
+        # === 被分享app ===
+        # 今天有效总量
+        today_shared_apps = db.query(func.count(AppShare.id)).filter(
+            AppShare.target_workspace_id == workspace_id,
+            AppShare.is_active == True
+        ).scalar() or 0
+        # 昨日有效总量
+        yesterday_shared_apps = db.query(func.count(AppShare.id)).filter(
+            AppShare.target_workspace_id == workspace_id,
+            AppShare.is_active == True,
+            AppShare.created_at < today_start
+        ).scalar() or 0
+        # 今日软删
+        today_deleted_shared_apps = db.query(func.count(AppShare.id)).filter(
+            AppShare.target_workspace_id == workspace_id,
+            AppShare.is_active == False,
+            AppShare.updated_at >= today_start
+        ).scalar() or 0
+
+        today_total_app = today_own_apps + today_shared_apps
+        yesterday_total_app = yesterday_own_apps + yesterday_shared_apps
+        total_deleted = today_deleted_own_apps + today_deleted_shared_apps
+
+        if yesterday_total_app == 0 and today_total_app == 0 and total_deleted == 0:
+            changes["total_app_change"] = None
+        else:
+            # change = 今天有效总量 - 今日软删 - 昨日有效总量
+            changes["total_app_change"] = today_total_app - total_deleted - yesterday_total_app
+    except Exception as e:
+        business_logger.warning(f"计算应用数量昨日对比失败: {str(e)}")
+
+    # --- total_memory_change ---
+    try:
+        today_memory = today_data.get("total_memory")
+        if today_memory is None:
+            changes["total_memory_change"] = None
+        elif storage_type == "neo4j":
+            # 从 memory_increments 取最近一条 created_at < today_start 的记录
+            last_record = db.query(MemoryIncrement).filter(
+                MemoryIncrement.workspace_id == workspace_id,
+                MemoryIncrement.created_at < today_start
+            ).order_by(desc(MemoryIncrement.created_at)).first()
+            if last_record is None:
+                changes["total_memory_change"] = None
+            else:
+                changes["total_memory_change"] = today_memory - last_record.total_num
+        elif storage_type == "rag":
+            # RAG: 查 documents 表中 created_at < today_start 的 chunk_num 之和
+            from app.models.document_model import Document
+            from app.models.end_user_model import EndUser as _EndUser
+            from app.models.app_model import App as _App
+
+            end_user_ids = [
+                str(eid) for (eid,) in db.query(_EndUser.id)
+                .join(_App, _EndUser.app_id == _App.id)
+                .filter(_App.workspace_id == workspace_id)
+                .all()
+            ]
+            if not end_user_ids:
+                changes["total_memory_change"] = None
+            else:
+                file_names = [f"{uid}.txt" for uid in end_user_ids]
+                yesterday_chunk = db.query(func.sum(Document.chunk_num)).filter(
+                    Document.file_name.in_(file_names),
+                    Document.created_at < today_start
+                ).scalar()
+                if yesterday_chunk is None:
+                    changes["total_memory_change"] = None
+                else:
+                    changes["total_memory_change"] = today_memory - int(yesterday_chunk)
+    except Exception as e:
+        business_logger.warning(f"计算记忆总量昨日对比失败: {str(e)}")
+
+    business_logger.info(f"昨日对比计算完成: {changes}")
+    return changes
+
+
 def get_current_user_total_chunk(
    end_user_id: str,
    db: Session,
@@ -881,4 +1068,66 @@ async def generate_rag_profile(
        "tags_count": len(tags),
        "personas_count": len(personas),
        "insight_generated": bool(insight_sections.get("memory_insight")),
-    }
+    }
+
+
+def get_dashboard_common_stats(db: Session, workspace_id) -> dict:
+    """
+    获取 dashboard 中 neo4j/rag 分支共享的统计数据：
+    total_app、total_knowledge、total_api_call
+
+    Returns:
+        dict: {"total_app": int, "total_knowledge": int, "total_api_call": int}
+    """
+    result = {"total_app": 0, "total_knowledge": 0, "total_api_call": 0}
+
+    # total_app: 统计当前空间下的所有app数量（包含自有 + 被分享给本工作空间的app）
+    try:
+        from app.services import app_service as _app_svc
+        _, total_app = _app_svc.AppService(db).list_apps(
+            workspace_id=workspace_id, include_shared=True, pagesize=1
+        )
+        result["total_app"] = total_app
+    except Exception as e:
+        business_logger.warning(f"获取应用数量失败: {e}")
+
+    # total_knowledge: 统计顶层知识库（parent_id = workspace_id）
+    try:
+        from sqlalchemy import func as _func
+        from app.models.knowledge_model import Knowledge as _Knowledge
+        total_knowledge = db.query(_func.count(_Knowledge.id)).filter(
+            _Knowledge.workspace_id == workspace_id,
+            _Knowledge.status == 1,
+            _Knowledge.parent_id == _Knowledge.workspace_id
+        ).scalar() or 0
+        result["total_knowledge"] = total_knowledge
+    except Exception as e:
+        business_logger.warning(f"获取知识库数量失败: {e}")
+
+    # total_api_call: 仅统计当天 api_key_log 调用次数
+    try:
+        from datetime import datetime
+        from sqlalchemy import func as _api_func
+        from app.models.api_key_model import ApiKey as _ApiKey, ApiKeyLog as _ApiKeyLog
+
+        _now = datetime.now()
+        _today_start = _now.replace(hour=0, minute=0, second=0, microsecond=0)
+
+        _api_key_ids = [
+            row[0] for row in db.query(_ApiKey.id).filter(
+                _ApiKey.workspace_id == workspace_id
+            ).all()
+        ]
+        if _api_key_ids:
+            total_api_calls = db.query(_api_func.count(_ApiKeyLog.id)).filter(
+                _ApiKeyLog.api_key_id.in_(_api_key_ids),
+                _ApiKeyLog.created_at >= _today_start,
+                _ApiKeyLog.created_at < _now
+            ).scalar() or 0
+        else:
+            total_api_calls = 0
+        result["total_api_call"] = total_api_calls
+    except Exception as e:
+        business_logger.warning(f"获取API调用统计失败: {e}")
+
+    return result
--- a/api/app/services/memory_perceptual_service.py
+++ b/api/app/services/memory_perceptual_service.py
@@ -232,7 +232,8 @@ class MemoryPerceptualService:
                    provider=model_config.provider,
                    api_key=model_config.api_key,
                    base_url=model_config.api_base,
-                    is_omni=model_config.is_omni
+                    is_omni=model_config.is_omni,
+                    support_thinking="thinking" in (model_config.capability or []),
                )
            )
        return llm, model_config
--- a/api/app/services/memory_storage_service.py
+++ b/api/app/services/memory_storage_service.py
@@ -613,37 +613,6 @@ async def search_entity(end_user_id: Optional[str] = None) -> Dict[str, Any]:
    return data


-async def search_all(end_user_id: Optional[str] = None) -> Dict[str, Any]:
-    result = await _neo4j_connector.execute_query(
-        MemoryConfigRepository.SEARCH_FOR_ALL,
-        end_user_id=end_user_id,
-    )
-
-    # 检查结果是否为空或长度不足
-    if not result or len(result) < 4:
-        data = {
-            "total": 0,
-            "counts": {
-                "dialogue": 0,
-                "chunk": 0,
-                "statement": 0,
-                "entity": 0,
-            },
-        }
-        return data
-
-    data = {
-        "total": result[-1]["Count"],
-        "counts": {
-            "dialogue": result[0]["Count"],
-            "chunk": result[1]["Count"],
-            "statement": result[2]["Count"],
-            "entity": result[3]["Count"],
-        },
-    }
-    return data
-
-
 async def kb_type_distribution(end_user_id: Optional[str] = None) -> Dict[str, Any]:
    """统一知识库类型分布接口。

--- a/api/app/services/model_parameter_merger.py
+++ b/api/app/services/model_parameter_merger.py
@@ -45,12 +45,20 @@ class ModelParameterMerger:
            "frequency_penalty": 0.0,
            "presence_penalty": 0.0,
            "n": 1,
-            "stop": None
+            "stop": None,
+            "deep_thinking": False,
+            "thinking_budget_tokens": None
        }
        
        # 合并参数：默认值 -> 模型配置 -> Agent 配置
        merged = default_params.copy()
        
+        # Pydantic 对象转为 dict
+        if model_config_params and hasattr(model_config_params, 'model_dump'):
+            model_config_params = model_config_params.model_dump()
+        if agent_config_params and hasattr(agent_config_params, 'model_dump'):
+            agent_config_params = agent_config_params.model_dump()
+        
        # 应用模型配置参数
        if model_config_params:
            for key in default_params:
--- a/api/app/services/model_service.py
+++ b/api/app/services/model_service.py
@@ -85,15 +85,16 @@ class ModelConfigService:

    @staticmethod
    async def validate_model_config(
-            db: Session,
-            *,
-            model_name: str,
-            provider: str,
-            api_key: str,
-            api_base: Optional[str] = None,
-            model_type: str = "llm",
-            test_message: str = "Hello",
-            is_omni: bool = False
+        db: Session,
+        *,
+        model_name: str,
+        provider: str,
+        api_key: str,
+        api_base: Optional[str] = None,
+        model_type: str = "llm",
+        test_message: str = "Hello",
+        is_omni: bool = False,
+        capability: Optional[list] = None
    ) -> Dict[str, Any]:
        """验证模型配置是否有效

@@ -124,6 +125,7 @@ class ModelConfigService:
                api_key=api_key,
                base_url=api_base,
                is_omni=is_omni,
+                support_thinking="thinking" in (capability or []),
                temperature=0.7,
                max_tokens=100
            )
@@ -320,7 +322,8 @@ class ModelConfigService:
                    api_base=api_key_data.api_base,
                    model_type=model_data.type,
                    test_message="Hello",
-                    is_omni=model_data.is_omni
+                    is_omni=model_data.is_omni,
+                    capability=model_data.capability
                )
                if not validation_result["valid"]:
                    raise BusinessException(
@@ -590,7 +593,8 @@ class ModelApiKeyService:
                api_base=data.api_base,
                model_type=model_config.type,
                test_message="Hello",
-                is_omni=data.is_omni
+                is_omni=data.is_omni,
+                capability=model_config.capability
            )
            if not validation_result["valid"]:
                # 记录验证失败的模型，但不抛出异常
@@ -675,7 +679,8 @@ class ModelApiKeyService:
                    api_base=api_key_data.api_base,
                    model_type=model_config.type,
                    test_message="Hello",
-                    is_omni=api_key_data.is_omni
+                    is_omni=api_key_data.is_omni,
+                    capability=model_config.capability
                )
                if not validation_result["valid"]:
                    raise BusinessException(
@@ -707,7 +712,8 @@ class ModelApiKeyService:
                api_base=api_key_data.api_base or existing_api_key.api_base,
                model_type=model_config.type,
                test_message="Hello",
-                is_omni=model_config.is_omni
+                is_omni=model_config.is_omni,
+                capability=model_config.capability
            )
            if not validation_result["valid"]:
                raise BusinessException(
--- a/api/app/services/multi_agent_orchestrator.py
+++ b/api/app/services/multi_agent_orchestrator.py
@@ -287,6 +287,11 @@ class MultiAgentOrchestrator:
            sub_conversation_id = None
            total_tokens = 0
            
+            # 累加 Master Agent 路由决策消耗的 token
+            total_tokens += task_analysis.get("routing_tokens", 0)
+            # 累加 Master Agent 整合消耗的 token
+            total_tokens += getattr(self, '_last_merge_tokens', 0)
+
            if isinstance(results, dict):
                sub_conversation_id = results.get("conversation_id") or results.get("result", {}).get("conversation_id")
                # 提取 token 信息
@@ -358,12 +363,16 @@ class MultiAgentOrchestrator:
            variables=variables
        )

+        # 获取路由决策消耗的 token
+        routing_tokens = getattr(self.router, '_last_routing_tokens', 0)
+
        logger.info(
            "Master Agent 分析完成",
            extra={
                "selected_agent": routing_decision.get("selected_agent_id"),
                "confidence": routing_decision.get("confidence"),
-                "strategy": routing_decision.get("strategy")
+                "strategy": routing_decision.get("strategy"),
+                "routing_tokens": routing_tokens
            }
        )

@@ -372,7 +381,8 @@ class MultiAgentOrchestrator:
            "variables": variables or {},
            "sub_agents": self.config.sub_agents,
            "initial_context": variables or {},
-            "routing_decision": routing_decision
+            "routing_decision": routing_decision,
+            "routing_tokens": routing_tokens
        }

    async def _execute_sequential(
@@ -1032,6 +1042,11 @@ class MultiAgentOrchestrator:

        # 5. 流式执行子 Agent
        sub_conversation_id = None
+        # Master Agent 路由决策消耗的 token，通过 sub_usage 事件发送给上层
+        routing_tokens = task_analysis.get("routing_tokens", 0)
+        if routing_tokens > 0:
+            yield self._format_sse_event("sub_usage", {"total_tokens": routing_tokens})
+
        async for event in self._execute_sub_agent_stream(
            agent_data["config"],
            message,
@@ -1054,6 +1069,7 @@ class MultiAgentOrchestrator:
                except:
                    pass

+            # 直接透传所有事件（包括 sub_usage），累加统一由上层处理
            yield event

        # 6. 如果有会话 ID，发送一个包含它的事件
@@ -2600,6 +2616,7 @@ class MultiAgentOrchestrator:
                api_key=api_key_config.api_key,
                base_url=api_key_config.api_base,
                is_omni=api_key_config.is_omni,
+                support_thinking="thinking" in (api_key_config.capability or []),
                temperature=0.7,  # 整合任务使用中等温度
                max_tokens=2000
            )
@@ -2612,6 +2629,17 @@ class MultiAgentOrchestrator:

            ModelApiKeyService.record_api_key_usage(self.db, api_key_config.id)

+            # 提取整合消耗的 token
+            merge_tokens = 0
+            if hasattr(response, 'usage_metadata') and response.usage_metadata:
+                um = response.usage_metadata
+                merge_tokens = um.get("total_tokens", 0) if isinstance(um, dict) else getattr(um, "total_tokens", 0)
+            elif hasattr(response, 'response_metadata') and response.response_metadata:
+                token_usage = response.response_metadata.get("token_usage") or response.response_metadata.get("usage", {})
+                if isinstance(token_usage, dict):
+                    merge_tokens = token_usage.get("total_tokens", 0)
+            self._last_merge_tokens = merge_tokens
+
            # 提取响应内容
            if hasattr(response, 'content'):
                merged_response = response.content
@@ -2621,7 +2649,8 @@ class MultiAgentOrchestrator:
            logger.info(
                "Master Agent 整合完成",
                extra={
-                    "merged_length": len(merged_response)
+                    "merged_length": len(merged_response),
+                    "merge_tokens": merge_tokens
                }
            )

@@ -2766,6 +2795,7 @@ class MultiAgentOrchestrator:
                api_key=api_key_config.api_key,
                base_url=api_key_config.api_base,
                is_omni=api_key_config.is_omni,
+                support_thinking="thinking" in (api_key_config.capability or []),
                temperature=0.7,
                max_tokens=2000,
                extra_params={"streaming": True}  # 启用流式输出
--- a/api/app/services/prompt_optimizer_service.py
+++ b/api/app/services/prompt_optimizer_service.py
@@ -185,7 +185,8 @@ class PromptOptimizerService:
            provider=api_config.provider,
            api_key=api_config.api_key,
            base_url=api_config.api_base,
-            is_omni=api_config.is_omni
+            is_omni=api_config.is_omni,
+            support_thinking="thinking" in (api_config.capability or []),
        ), type=ModelType(model_config.type))
        try:
            prompt_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'prompt')
--- a/api/app/services/shared_chat_service.py
+++ b/api/app/services/shared_chat_service.py
@@ -248,7 +248,9 @@ class SharedChatService:
            max_tokens=model_parameters.get("max_tokens", 2000),
            system_prompt=system_prompt,
            tools=tools,
-
+            deep_thinking=model_parameters.get("deep_thinking", False),
+            thinking_budget_tokens=model_parameters.get("thinking_budget_tokens"),
+            capability=api_key_obj.capability or [],
        )

        # 加载历史消息
@@ -450,7 +452,10 @@ class SharedChatService:
                max_tokens=model_parameters.get("max_tokens", 2000),
                system_prompt=system_prompt,
                tools=tools,
-                streaming=True
+                streaming=True,
+                deep_thinking=model_parameters.get("deep_thinking", False),
+                thinking_budget_tokens=model_parameters.get("thinking_budget_tokens"),
+                capability=api_key_obj.capability or [],
            )

            # 加载历史消息
@@ -479,6 +484,8 @@ class SharedChatService:
            ):
                if isinstance(chunk, int):
                    total_tokens = chunk
+                elif isinstance(chunk, dict) and chunk.get("type") == "reasoning":
+                    yield f"event: reasoning\ndata: {json.dumps({'content': chunk['content']}, ensure_ascii=False)}\n\n"
                else:
                    full_content += chunk
                    # 发送消息块事件
--- a/api/app/tasks.py
+++ b/api/app/tasks.py
@@ -61,9 +61,9 @@ def _get_or_create_redis_pool() -> redis.ConnectionPool | None:
                db=settings.REDIS_DB_CELERY_BACKEND,
                password=settings.REDIS_PASSWORD,
                decode_responses=True,
-                max_connections=10,
+                max_connections=100,
                socket_connect_timeout=5,
-                socket_timeout=5,
+                socket_timeout=10,
                retry_on_timeout=True,
                health_check_interval=30,
            )
@@ -1100,7 +1100,7 @@ def read_message_task(self, end_user_id: str, message: str, history: List[Dict[s
        }


-@celery_app.task(name="app.core.memory.agent.write_message", bind=True)
+@celery_app.task(name="app.core.memory.agent.write_message", bind=True, acks_late=False)
 def write_message_task(
        self,
        end_user_id: str,
@@ -1176,6 +1176,7 @@ def write_message_task(

    redis_client = get_sync_redis_client()
    lock = None
+    loop = None
    if redis_client is not None:
        lock = RedisFairLock(
            key=f"memory_write:{end_user_id}",
@@ -1196,6 +1197,7 @@ def write_message_task(
            }

    try:
+        task_start_time = int(time.time())
        loop = set_asyncio_event_loop()

        result = loop.run_until_complete(_run())
@@ -1205,7 +1207,7 @@ def write_message_task(
                    f"- elapsed_time={elapsed_time:.2f}s, task_id={self.request.id}")

        try:
-            _r = get_sync_redis_client()
+            _r = redis_client
            if _r is not None:
                from datetime import timezone as _tz
                _now_utc = datetime.now(_tz.utc).isoformat()
@@ -1219,6 +1221,7 @@ def write_message_task(
        return {
            "status": "SUCCESS",
            "result": result,
+            "start_at": task_start_time,
            "end_user_id": end_user_id,
            "config_id": config_id,
            "elapsed_time": elapsed_time,
@@ -1252,7 +1255,8 @@ def write_message_task(
                logger.warning(f"[CELERY WRITE] 释放锁失败: {e}")
        # Gracefully shutdown the event loop to prevent
        # 'RuntimeError: Event loop is closed' from httpx.AsyncClient.__del__
-        _shutdown_loop_gracefully(loop)
+        if loop:
+            _shutdown_loop_gracefully(loop)


 # unused task
@@ -1320,7 +1324,7 @@ def write_total_memory_task(workspace_id: str) -> Dict[str, Any]:
        from app.models.app_model import App
        from app.models.end_user_model import EndUser
        from app.repositories.memory_increment_repository import write_memory_increment
-        from app.services.memory_storage_service import search_all
+        from app.services.memory_storage_service import search_all_batch

        with get_db_context() as db:
            try:
@@ -1354,27 +1358,15 @@ def write_total_memory_task(workspace_id: str) -> Dict[str, Any]:
                    EndUser.workspace_id == workspace_id
                ).distinct().all()

-                # 3. 遍历所有end_user，查询每个宿主的记忆总量并累加
-                total_num = 0
-                end_user_details = []
+                # 3. 批量查询所有宿主的记忆总量
+                end_user_id_list = [str(eid) for (eid,) in end_users]
+                batch_result = await search_all_batch(end_user_id_list)

-                for (end_user_id,) in end_users:
-                    try:
-                        # 调用 search_all 接口查询该宿主的总量
-                        result = await search_all(str(end_user_id))
-                        user_total = result.get("total", 0)
-                        total_num += user_total
-                        end_user_details.append({
-                            "end_user_id": str(end_user_id),
-                            "total": user_total
-                        })
-                    except Exception as e:
-                        # 记录单个用户查询失败，但继续处理其他用户
-                        end_user_details.append({
-                            "end_user_id": str(end_user_id),
-                            "total": 0,
-                            "error": str(e)
-                        })
+                total_num = sum(batch_result.values())
+                end_user_details = [
+                    {"end_user_id": uid, "total": batch_result.get(uid, 0)}
+                    for uid in end_user_id_list
+                ]

                # 4. 写入数据库
                memory_increment = write_memory_increment(
@@ -1437,7 +1429,7 @@ def write_all_workspaces_memory_task(self) -> Dict[str, Any]:
        from app.models.end_user_model import EndUser
        from app.models.workspace_model import Workspace
        from app.repositories.memory_increment_repository import write_memory_increment
-        from app.services.memory_storage_service import search_all
+        from app.services.memory_storage_service import search_all_batch

        with get_db_context() as db:
            try:
@@ -1495,28 +1487,15 @@ def write_all_workspaces_memory_task(self) -> Dict[str, Any]:
                            EndUser.workspace_id == workspace_id
                        ).distinct().all()

-                        # 3. 遍历所有end_user，查询每个宿主的记忆总量并累加
-                        total_num = 0
-                        end_user_details = []
+                        # 3. 批量查询所有宿主的记忆总量
+                        end_user_id_list = [str(eid) for (eid,) in end_users]
+                        batch_result = await search_all_batch(end_user_id_list)

-                        for (end_user_id,) in end_users:
-                            try:
-                                # 调用 search_all 接口查询该宿主的总量
-                                result = await search_all(str(end_user_id))
-                                user_total = result.get("total", 0)
-                                total_num += user_total
-                                end_user_details.append({
-                                    "end_user_id": str(end_user_id),
-                                    "total": user_total
-                                })
-                            except Exception as e:
-                                # 记录单个用户查询失败，但继续处理其他用户
-                                logger.warning(f"查询用户 {end_user_id} 记忆失败: {str(e)}")
-                                end_user_details.append({
-                                    "end_user_id": str(end_user_id),
-                                    "total": 0,
-                                    "error": str(e)
-                                })
+                        total_num = sum(batch_result.values())
+                        end_user_details = [
+                            {"end_user_id": uid, "total": batch_result.get(uid, 0)}
+                            for uid in end_user_id_list
+                        ]

                        # 4. 写入数据库
                        memory_increment = write_memory_increment(
--- a/api/app/utils/performance_timer.py
+++ b/api/app/utils/performance_timer.py
@@ -6,13 +6,13 @@
 """

 import time
-from contextlib import contextmanager
+from contextlib import asynccontextmanager, contextmanager
 from app.core.logging_config import get_api_logger

 # 获取API专用日志器
 api_logger = get_api_logger()

-
+# 同步的上下文管理器，使用@contextmanager修饰
@contextmanager
 def timer(label: str, user_count: int = 0):
    """上下文管理器：用于测量代码块执行时间
@@ -35,3 +35,27 @@ def timer(label: str, user_count: int = 0):
        elapsed = (time.perf_counter() - start) * 1000  # 转换为毫秒
        extra_info = f", 用户数: {user_count}" if user_count > 0 else ""
        api_logger.info(f"[性能统计] {label}: {elapsed:.2f}ms{extra_info}")
+
+# 异步的上下文管理器，使用@asynccontextmanager装饰
+@asynccontextmanager
+async def async_timer(label: str, user_count: int = 0):
+    """异步上下文管理器：用于测量包含 await 的异步代码块执行时间
+
+    Args:
+        label: 统计标签，用于标识被测量的代码块
+        user_count: 用户数，可选参数，用于记录处理的用户数量
+
+    Usage:
+        async with async_timer("获取用户列表"):
+            users = await get_users()
+
+        async with async_timer("批量处理", user_count=len(user_ids)):
+            await process_users(user_ids)
+    """
+    start = time.perf_counter()
+    try:
+        yield
+    finally:
+        elapsed = (time.perf_counter() - start) * 1000  # 转换为毫秒
+        extra_info = f", 用户数: {user_count}" if user_count > 0 else ""
+        api_logger.info(f"[性能统计] {label}: {elapsed:.2f}ms{extra_info}")
--- a/api/app/utils/redis_lock.py
+++ b/api/app/utils/redis_lock.py
@@ -1,14 +1,21 @@
-import redis
-import uuid
-import time
+import logging
 import threading
+import time
+import uuid
+
+import redis
+from redis.exceptions import (
+    ConnectionError,
+    TimeoutError,
+    RedisError,
+)

 UNLOCK_SCRIPT = """
 if redis.call("get", KEYS[1]) == ARGV[1] then
    return redis.call("del", KEYS[1])
-else
-    return 0
 end
+
+return 0
 """

 RENEW_SCRIPT = """
@@ -19,38 +26,44 @@ else
 end
 """

-CLEANUP_DEAD_HEAD_SCRIPT = """
+ACQUIRE_SCRIPT = """
 local queue_key = KEYS[1]
 local lock_key = KEYS[2]

-local first = redis.call("lindex", queue_key, 0)
-if not first then
-    return 0
+local client_id = ARGV[1]
+local expire = tonumber(ARGV[2])
+local time_out = tonumber(ARGV[3])
+
+local now = tonumber(redis.call("time")[1])
+
+if redis.call("zscore", queue_key, client_id) == false then
+    redis.call("zadd", queue_key, now, client_id)
 end

-if redis.call("exists", lock_key) == 1 then
-    return 0
+local expired = redis.call("zrangebyscore", queue_key, 0, now - time_out)
+
+for _, v in ipairs(expired) do
+    redis.call("zrem", queue_key, v)
 end

-redis.call("lpop", queue_key)
-return 1
-"""
+local first = redis.call("zrange", queue_key, 0, 0)[1]
+if first == client_id then

-SAFE_RELEASE_QUEUE_SCRIPT = """
-local queue_key = KEYS[1]
-local value = ARGV[1]
+    if redis.call("set", lock_key, client_id, "NX", "EX", expire) then
+        redis.call("zrem", queue_key, client_id)
+        return 1
+    end

-local first = redis.call("lindex", queue_key, 0)
-if first == value then
-    redis.call("lpop", queue_key)
-    return 1
+    if redis.call("get", lock_key) == client_id then
+        redis.call("expire", lock_key, expire)
+        return 1
+    end
 end
 return 0
 """


 def _ensure_str(val):
-    """统一将 Redis 返回值转为 str，兼容 decode_responses=True/False"""
    if val is None:
        return None
    if isinstance(val, bytes):
@@ -59,18 +72,27 @@ def _ensure_str(val):


 class RedisFairLock:
+    # ZOMBIE CLEAN BUFFER
+    CLEANUP_BUFFER = 30
+    # Redis 操作失败时的最大重试次数
+    MAX_RETRIES = 3
+    # 重试间隔基数（秒），实际间隔 = base * 2^attempt（指数退避）
+    RETRY_BACKOFF_BASE = 0.1
+
+    _logger = logging.getLogger(__name__)
+
    def __init__(
            self,
            key: str,
            redis_client: redis.StrictRedis,
            expire: int = 30,
-            retry_interval: float = 0.05,
+            retry_interval: float = 1,
            timeout: float = 600,
            auto_renewal: bool = True
    ):
        self.key = key
-        self.queue_key = f"{key}:queue"
-        self.value = str(uuid.uuid4())
+        self.queue_key = f"{key}:zset"
+        self.value = f"{uuid.uuid4().hex}:{int(time.time())}"
        self.expire = expire
        self.retry_interval = retry_interval
        self.timeout = timeout
@@ -80,28 +102,56 @@ class RedisFairLock:
        self._renew_thread = None
        self._stop_renew = threading.Event()

+    def _exec_with_retry(self, func, *args, raise_on_fail=True, **kwargs):
+        """
+        带指数退避重试的 Redis 操作执行器。
+
+        对 ConnectionError / TimeoutError 自动重试，其他异常直接抛出。
+        """
+        last_err = None
+        for attempt in range(self.MAX_RETRIES):
+            try:
+                return func(*args, **kwargs)
+            except (ConnectionError, TimeoutError) as e:
+                last_err = e
+                wait = self.RETRY_BACKOFF_BASE * (2 ** attempt)
+                self._logger.warning(
+                    f"[RedisFairLock] Redis error on attempt {attempt + 1}/{self.MAX_RETRIES} "
+                    f"for key={self.key}: {e}, retrying in {wait:.2f}s"
+                )
+                time.sleep(wait)
+            except RedisError:
+                raise
+        if raise_on_fail:
+            raise last_err
+        return None
+
    def acquire(self):
        start = time.time()

-        self.redis.rpush(self.queue_key, self.value)
-
        while True:
-            first = _ensure_str(self.redis.lindex(self.queue_key, 0))
+            ok = self._exec_with_retry(
+                self.redis.eval,
+                ACQUIRE_SCRIPT,
+                2,
+                self.queue_key,
+                self.key,
+                self.value,
+                str(self.expire),
+                str(self.timeout + self.CLEANUP_BUFFER),
+            )

-            if first == self.value:
-                ok = self.redis.set(self.key, self.value, nx=True, ex=self.expire)
-                if ok:
-                    self._locked = True
-
-                    if self.auto_renewal:
-                        self._start_renewal()
-                    return True
-
-            if first:
-                self.redis.eval(CLEANUP_DEAD_HEAD_SCRIPT, 2, self.queue_key, self.key)
+            if ok == 1:
+                self._locked = True
+                if self.auto_renewal:
+                    self._start_renewal()
+                return True

            if time.time() - start > self.timeout:
-                self.redis.lrem(self.queue_key, 0, self.value)
+                self._exec_with_retry(
+                    self.redis.zrem, self.queue_key, self.value,
+                    raise_on_fail=False,
+                )
                return False

            time.sleep(self.retry_interval)
@@ -112,13 +162,17 @@ class RedisFairLock:
            if self._stop_renew.is_set():
                break

-            self.redis.eval(
+            success = self._exec_with_retry(
+                self.redis.eval,
                RENEW_SCRIPT,
                1,
                self.key,
                self.value,
-                str(self.expire)
+                str(self.expire),
+                raise_on_fail=False,
            )
+            if not success:
+                break

    def _start_renewal(self):
        self._stop_renew = threading.Event()
@@ -137,9 +191,10 @@ class RedisFairLock:
        if self.auto_renewal:
            self._stop_renewal()

-        self.redis.eval(UNLOCK_SCRIPT, 1, self.key, self.value)
-
-        self.redis.eval(SAFE_RELEASE_QUEUE_SCRIPT, 1, self.queue_key, self.value)
+        self._exec_with_retry(
+            self.redis.eval, UNLOCK_SCRIPT, 1, self.key, self.value,
+            raise_on_fail=False,
+        )

        self._locked = False

@@ -151,4 +206,3 @@ class RedisFairLock:

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.release()
-
--- a/web/src/api/knowledgeBase.ts
+++ b/web/src/api/knowledgeBase.ts
@@ -68,7 +68,7 @@ export const getModelTypeList = async () => {
    return response as any[];
 };
 // 获取模型列表
-export const getModelList = async (types: string[], pageInfo: PageRequest) => {
+export const getModelList = async (pageInfo: PageRequest, types?: string[]) => {
  const response = await request.get(`${apiPrefix}/models`, { ...pageInfo, type: types?.join(','), is_active: true });
    return response as any;
 };
--- a/web/src/views/KnowledgeBase/components/CreateModal.tsx
+++ b/web/src/views/KnowledgeBase/components/CreateModal.tsx
@@ -162,7 +162,7 @@ const CreateModal = forwardRef<CreateModalRef, CreateModalRefProps>(({
    // If model data hasn't been fetched yet, fetch it once
    if (!models) {
      try {
-        models = await getModelList({ page: 1, pagesize: 100 });
+        models = await getModelList({ page: 1, pagesize: 100 }, ['llm', 'embedding', 'rerank', 'chat']);
      } catch (error) {
        console.error('Failed to fetch models:', error);
        models = { items: [] };
--- a/web/src/views/KnowledgeBase/index.tsx
+++ b/web/src/views/KnowledgeBase/index.tsx
@@ -207,7 +207,7 @@ const KnowledgeBaseManagement: FC = () => {
  };
  const fetchModelList = async () => { 
    try {
-      const response = await getModelList(['llm', 'embedding', 'rerank', 'chat'], { page: 1, pagesize: 100 });
+      const response = await getModelList({ page: 1, pagesize: 100 }, ['llm', 'embedding', 'rerank', 'chat']);
      // 缓存模型列表，建立 id -> name 的映射
      if (response?.items && Array.isArray(response.items)) {
        const cache: Record<string, string> = {};
--- a/web/src/views/Workflow/components/Editor/Jinja2Editor.tsx
+++ b/web/src/views/Workflow/components/Editor/Jinja2Editor.tsx
@@ -0,0 +1,186 @@
+/*
+ * @Author: ZhaoYing 
+ * @Date: 2026-04-02 15:15:36 
+ * @Last Modified by:   ZhaoYing 
+ * @Last Modified time: 2026-04-02 15:15:36 
+ */
+import { type FC, useEffect, useMemo } from 'react';
+import { LexicalComposer } from '@lexical/react/LexicalComposer';
+import { RichTextPlugin } from '@lexical/react/LexicalRichTextPlugin';
+import { ContentEditable } from '@lexical/react/LexicalContentEditable';
+import { HistoryPlugin } from '@lexical/react/LexicalHistoryPlugin';
+import { LexicalErrorBoundary } from '@lexical/react/LexicalErrorBoundary';
+
+import AutocompletePlugin, { type Suggestion } from './plugin/AutocompletePlugin';
+import CharacterCountPlugin from './plugin/CharacterCountPlugin';
+import InitialValuePlugin from './plugin/InitialValuePlugin';
+import CommandPlugin from './plugin/CommandPlugin';
+import Jinja2HighlightPlugin from './plugin/Jinja2HighlightPlugin';
+import LineNumberPlugin from './plugin/LineNumberPlugin';
+import BlurPlugin from './plugin/BlurPlugin';
+
+const jinja2Theme = {
+  paragraph: 'editor-paragraph',
+  code: 'jinja2-expression',
+  text: {
+    bold: 'editor-text-bold',
+    italic: 'editor-text-italic',
+    code: 'jinja2-inline',
+  },
+};
+
+const initialConfig = {
+  namespace: 'AutocompleteEditor',
+  theme: jinja2Theme,
+  nodes: [],
+  onError: (error: Error) => console.error(error),
+};
+
+const STYLE_ID = 'code-editor-styles';
+const JINJA2_STYLES = `
+  .jinja2-expression {
+    background-color: #f6f8fa !important;
+    border: 1px solid #d1d9e0 !important;
+    border-radius: 3px !important;
+    padding: 2px 4px !important;
+    font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace !important;
+    font-size: 13px !important;
+    color: #0969da !important;
+  }
+  .jinja2-inline {
+    background-color: #f6f8fa !important;
+    padding: 1px 3px !important;
+    border-radius: 2px !important;
+    font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace !important;
+    font-size: 13px !important;
+    color: #0969da !important;
+  }
+  .editor-paragraph { margin: 0; }
+  .editor-with-line-numbers { display: flex; }
+  .line-numbers {
+    font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace;
+    font-size: 12px;
+    line-height: 16px;
+    padding: 4px 8px;
+    text-align: right;
+    user-select: none;
+    display: flex;
+    flex-direction: column;
+  }
+  .line-numbers > div { min-height: 20px; display: flex; align-items: flex-start; }
+  .editor-content-wrapper { flex: 1; }
+  .editor-content-with-numbers {
+    white-space: pre-wrap;
+    font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace;
+  }
+  .editor-content-with-numbers p { margin: 0; min-height: 20px; }
+`;
+
+export interface Jinja2EditorProps {
+  placeholder?: string;
+  value?: string;
+  onChange?: (value: string) => void;
+  options?: Suggestion[];
+  variant?: 'outlined' | 'borderless';
+  height?: number;
+  size?: 'default' | 'small';
+  className?: string;
+}
+
+const Jinja2Editor: FC<Jinja2EditorProps> = ({
+  placeholder = '请输入内容...',
+  value = '',
+  onChange,
+  options = [],
+  variant = 'borderless',
+  size = 'default',
+  height,
+  className,
+}) => {
+  useEffect(() => {
+    if (!document.getElementById(STYLE_ID)) {
+      const style = document.createElement('style');
+      style.id = STYLE_ID;
+      style.textContent = JINJA2_STYLES;
+      document.head.appendChild(style);
+    }
+  }, []);
+
+  const minheight = useMemo(
+    () => `${height ?? (size === 'small' ? 60 : 120)}px`,
+    [height, size],
+  );
+
+  const fontSize = size === 'small' ? '12px' : '14px';
+
+  const lineHeight = useMemo(
+    () => `${height ? height - 10 : size === 'small' ? 16 : 20}px`,
+    [height, size],
+  );
+
+  const placeHolderMinheight = `${height ? 16 : size === 'small' ? 16 : 30}px`;
+
+  return (
+    <LexicalComposer initialConfig={initialConfig}>
+      <div style={{ position: 'relative' }} className={className}>
+        <RichTextPlugin
+          contentEditable={
+            <div
+              className="editor-with-line-numbers"
+              style={{
+                border: variant === 'borderless' ? 'none' : '1px solid #DFE4ED',
+                borderRadius: '6px',
+                minHeight: minheight,
+              }}
+            >
+              <div className="line-numbers">
+                <div>1</div>
+              </div>
+              <div className="editor-content-wrapper">
+                <ContentEditable
+                  className="editor-content-with-numbers"
+                  style={{
+                    minHeight: minheight,
+                    padding: variant === 'borderless' ? '0' : '4px 0',
+                    outline: 'none',
+                    resize: 'none',
+                    fontSize,
+                    lineHeight,
+                    border: 'none',
+                  }}
+                />
+              </div>
+            </div>
+          }
+          placeholder={
+            <div
+              style={{
+                minHeight: placeHolderMinheight,
+                position: 'absolute',
+                top: '4px',
+                left: '16px',
+                color: '#A8A9AA',
+                fontSize,
+                lineHeight: placeHolderMinheight,
+                pointerEvents: 'none',
+              }}
+            >
+              {placeholder}
+            </div>
+          }
+          ErrorBoundary={LexicalErrorBoundary}
+        />
+        <HistoryPlugin />
+        <CommandPlugin />
+        <Jinja2HighlightPlugin />
+        <LineNumberPlugin />
+        <AutocompletePlugin options={options} enableJinja2 />
+        <CharacterCountPlugin setCount={() => {}} onChange={onChange} />
+        <InitialValuePlugin value={value} options={options} enableLineNumbers />
+        <BlurPlugin enableJinja2 />
+      </div>
+    </LexicalComposer>
+  );
+};
+
+export default Jinja2Editor;
--- a/web/src/views/Workflow/components/Editor/index.tsx
+++ b/web/src/views/Workflow/components/Editor/index.tsx
@@ -4,26 +4,20 @@
 * @Last Modified by: ZhaoYing
 * @Last Modified time: 2026-03-25 10:58:47
 */
-import { type FC, useState, useEffect, useMemo } from 'react';
+import { type FC, useState, useMemo } from 'react';
 import { LexicalComposer } from '@lexical/react/LexicalComposer';
 import { RichTextPlugin } from '@lexical/react/LexicalRichTextPlugin';
 import { ContentEditable } from '@lexical/react/LexicalContentEditable';
 import { HistoryPlugin } from '@lexical/react/LexicalHistoryPlugin';
-// import { AutoFocusPlugin } from '@lexical/react/LexicalAutoFocusPlugin';
 import { LexicalErrorBoundary } from '@lexical/react/LexicalErrorBoundary';
-// import { HeadingNode, QuoteNode } from '@lexical/rich-text';
-// import { ListItemNode, ListNode } from '@lexical/list';
-// import { LinkNode } from '@lexical/link';
-// import { CodeNode } from '@lexical/code';

 import AutocompletePlugin, { type Suggestion } from './plugin/AutocompletePlugin'
 import CharacterCountPlugin from './plugin/CharacterCountPlugin'
 import InitialValuePlugin from './plugin/InitialValuePlugin';
 import CommandPlugin from './plugin/CommandPlugin';
-import Jinja2HighlightPlugin from './plugin/Jinja2HighlightPlugin';
-import LineNumberPlugin from './plugin/LineNumberPlugin';
 import BlurPlugin from './plugin/BlurPlugin';
 import { VariableNode } from './nodes/VariableNode'
+import Jinja2Editor from './Jinja2Editor';

 // Props interface for Lexical Editor component
 export interface LexicalEditorProps {
@@ -50,16 +44,6 @@ const theme = {
  },
 };

-// Theme with Jinja2 syntax highlighting
-const jinja2Theme = {
-  ...theme,
-  code: 'jinja2-expression',
-  text: {
-    ...theme.text,
-    code: 'jinja2-inline',
-  },
-};
-
 // Main Lexical Editor component
 const Editor: FC<LexicalEditorProps> =({
  placeholder = "请输入内容...",
@@ -74,97 +58,27 @@ const Editor: FC<LexicalEditorProps> =({
  className
 }) => {
  const [_count, setCount] = useState(0);
-  const [enableJinja2, setEnableJinja2] = useState(false)
-  const [enableLineNumbers, setEnableLineNumbers] = useState(false)

-  // Setup Jinja2 mode and inject styles when language changes
-  useEffect(() => {
-    const needsLineNumbers = language === 'jinja2';
-    setEnableJinja2(language === 'jinja2');
-    setEnableLineNumbers(needsLineNumbers);
-
-    if (needsLineNumbers) {
-      const styleId = 'code-editor-styles';
-      let existingStyle = document.getElementById(styleId);
-
-      if (!existingStyle) {
-        const style = document.createElement('style');
-        style.id = styleId;
-        style.textContent = `
-          .jinja2-expression {
-            background-color: #f6f8fa !important;
-            border: 1px solid #d1d9e0 !important;
-            border-radius: 3px !important;
-            padding: 2px 4px !important;
-            font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace !important;
-            font-size: 13px !important;
-            color: #0969da !important;
-          }
-          .jinja2-inline {
-            background-color: #f6f8fa !important;
-            padding: 1px 3px !important;
-            border-radius: 2px !important;
-            font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace !important;
-            font-size: 13px !important;
-            color: #0969da !important;
-          }
-          .editor-paragraph {
-            margin: 0;
-          }
-          .editor-paragraph:has-text('{') .editor-text,
-          .editor-paragraph:has-text('[') .editor-text {
-            font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace !important;
-          }
-          .editor-with-line-numbers {
-            display: flex;
-          }
-          .line-numbers {
-            font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace;
-            font-size: 12px;
-            line-height: 16px;
-            padding: 4px 8px;
-            text-align: right;
-            user-select: none;
-            display: flex;
-            flex-direction: column;
-          }
-          .line-numbers > div {
-            min-height: 20px;
-            display: flex;
-            align-items: flex-start;
-          }
-          .editor-content-wrapper {
-            flex: 1;
-          }
-          .editor-content-with-numbers {
-            white-space: pre-wrap;
-            font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace;
-          }
-          .editor-content-with-numbers p {
-            margin: 0;
-            min-height: 20px;
-          }
-        `;
-        document.head.appendChild(style);
-      }
-    }
-  }, [language])
+  if (language === 'jinja2') {
+    return (
+      <Jinja2Editor
+        placeholder={placeholder}
+        value={value}
+        onChange={onChange}
+        options={options}
+        variant={variant}
+        size={size}
+        height={height}
+        className={className}
+      />
+    );
+  }

  // Lexical editor configuration
  const initialConfig = {
    namespace: 'AutocompleteEditor',
-    theme: enableJinja2 ? jinja2Theme : theme,
-    nodes: enableJinja2 ? [
-      // When Jinja2 is enabled, use plain text instead of VariableNode
-    ] : [
-      // HeadingNode,
-      // QuoteNode,
-      // ListItemNode,
-      // ListNode,
-      // LinkNode,
-      // CodeNode,
-      VariableNode,
-    ],
+    theme,
+    nodes: [VariableNode],
    onError: (error: Error) => {
      console.error(error);
    },
@@ -198,54 +112,26 @@ const Editor: FC<LexicalEditorProps> =({
      <div style={{ position: 'relative' }} className={className}>
        <RichTextPlugin
          contentEditable={
-            enableLineNumbers ? (
-            // Editor with line numbers for Jinja2 mode
-              <div className="editor-with-line-numbers" style={{
-                border: variant === 'borderless' ? 'none' : '1px solid #DFE4ED',
-                borderRadius: '6px',
+            <ContentEditable
+              style={{
                minHeight: minheight,
-              }}>
-                <div className="line-numbers">
-                  <div>1</div>
-                </div>
-                <div className="editor-content-wrapper">
-                  <ContentEditable
-                    className="editor-content-with-numbers"
-                    style={{
-                      minHeight: minheight,
-                      padding: variant === 'borderless' ? '0' : '4px 0',
-                      outline: 'none',
-                      resize: 'none',
-                      fontSize: fontSize,
-                      lineHeight: lineHeight,
-                      border: 'none',
-                    }}
-                  />
-                </div>
-              </div>
-            ) : (
-              // Standard editor without line numbers
-              <ContentEditable
-                style={{
-                  minHeight: minheight,
-                  padding: height ? '4px 6px' : variant === 'borderless' ? '0' : '6px 8px',
-                  border: variant === 'borderless' ? 'none' : '1px solid #EBEBEB',
-                  borderRadius: '8px',
-                  outline: 'none',
-                  resize: 'none',
-                  fontSize: fontSize,
-                  lineHeight: lineHeight,
-                }}
-              />
-            )
+                padding: height ? '4px 6px' : variant === 'borderless' ? '0' : '6px 8px',
+                border: variant === 'borderless' ? 'none' : '1px solid #EBEBEB',
+                borderRadius: '8px',
+                outline: 'none',
+                resize: 'none',
+                fontSize: fontSize,
+                lineHeight: lineHeight,
+              }}
+            />
          }
          placeholder={
            <div
              style={{
                minHeight: placeHolderMinheight,
                position: 'absolute',
-                top: enableLineNumbers ? '4px' : variant === 'borderless' ? '0' : '6px',
-                left: enableLineNumbers ? '16px' : (variant === 'borderless' ? '0' : '11px'),
+                top: variant === 'borderless' ? '0' : '6px',
+                left: variant === 'borderless' ? '0' : '11px',
                color: '#A8A9AA',
                fontSize: fontSize,
                lineHeight: placeHolderMinheight,
@@ -257,15 +143,12 @@ const Editor: FC<LexicalEditorProps> =({
          }
          ErrorBoundary={LexicalErrorBoundary}
        />
-        {/* Editor plugins */}
        <HistoryPlugin />
        <CommandPlugin />
-        {language === 'jinja2' && <Jinja2HighlightPlugin />}
-        {enableLineNumbers && <LineNumberPlugin />}
-        <AutocompletePlugin options={options} enableJinja2={enableJinja2} />
+        <AutocompletePlugin options={options} enableJinja2={false} />
        <CharacterCountPlugin setCount={(count) => { setCount(count) }} onChange={onChange} />
-        <InitialValuePlugin value={value} options={options} enableLineNumbers={enableLineNumbers} />
-        <BlurPlugin enableJinja2={enableJinja2} />
+        <InitialValuePlugin value={value} options={options} enableLineNumbers={false} />
+        <BlurPlugin enableJinja2={false} />
      </div>
    </LexicalComposer>
  );
--- a/web/src/views/Workflow/components/Editor/plugin/CharacterCountPlugin.tsx
+++ b/web/src/views/Workflow/components/Editor/plugin/CharacterCountPlugin.tsx
@@ -1,4 +1,4 @@
-import { useEffect } from 'react';
+import { useEffect, useRef } from 'react';
 import { $getRoot, $isParagraphNode } from 'lexical';
 import { useLexicalComposerContext } from '@lexical/react/LexicalComposerContext';

@@ -6,9 +6,15 @@ import { $isVariableNode } from '../nodes/VariableNode';

 const CharacterCountPlugin = ({ setCount, onChange }: { setCount: (count: number) => void; onChange?: (value: string) => void }) => {
  const [editor] = useLexicalComposerContext();
+  const isReadyRef = useRef(false);

  useEffect(() => {
-    return editor.registerUpdateListener(({ editorState }) => {
+    return editor.registerUpdateListener(({ editorState, tags }) => {
+      if (tags.has('programmatic')) {
+        isReadyRef.current = true;
+        return;
+      }
+      if (!isReadyRef.current) return;
      editorState.read(() => {
        const root = $getRoot();
        let serializedContent = '';
--- a/web/src/views/Workflow/hooks/useWorkflowGraph.ts
+++ b/web/src/views/Workflow/hooks/useWorkflowGraph.ts
@@ -491,32 +491,36 @@ export const useWorkflowGraph = ({
   * @param node - Clicked node
   */
  const nodeClick = ({ node }: { node: Node }) => {
-    // Ignore add-node type node clicks
-    const nodeData = node.getData()
-    if (nodeData?.type === 'add-node' || nodeData.type === 'break' || nodeData.type === 'cycle-start') {
-      setSelectedNode(null)
-      return;
-    }
+    blankClick()

-    const nodes = graphRef.current?.getNodes();
-
-    nodes?.forEach(vo => {
-      const data = vo.getData();
-      if (data.isSelected) {
-        vo.setData({
-          ...data,
-          isSelected: false,
-        });
+    setTimeout(() => {
+      // Ignore add-node type node clicks
+      const nodeData = node.getData()
+      if (nodeData?.type === 'add-node' || nodeData.type === 'break' || nodeData.type === 'cycle-start') {
+        setSelectedNode(null)
+        return;
      }
-    });
-    node.setData({
-      ...nodeData,
-      isSelected: true,
-    });
-    clearEdgeSelect()
-    if (nodeData.type !== 'notes') {
-      setSelectedNode(node);
-    }
+
+      const nodes = graphRef.current?.getNodes();
+
+      nodes?.forEach(vo => {
+        const data = vo.getData();
+        if (data.isSelected) {
+          vo.setData({
+            ...data,
+            isSelected: false,
+          });
+        }
+      });
+      node.setData({
+        ...nodeData,
+        isSelected: true,
+      });
+      clearEdgeSelect()
+      if (nodeData.type !== 'notes') {
+        setSelectedNode(node);
+      }
+    }, 0)
  };
  /**
   * Handle edge click event