diff --git a/.github/workflows/release-notify-wechat.yml b/.github/workflows/release-notify-wechat.yml index 7b3378b0..bc67518b 100644 --- a/.github/workflows/release-notify-wechat.yml +++ b/.github/workflows/release-notify-wechat.yml @@ -13,70 +13,104 @@ jobs: steps: # 防止 GitHub HEAD 未同步 - - name: Wait for ref sync - run: sleep 3 + - run: sleep 3 # 1️⃣ 获取分支 HEAD - name: Get HEAD id: head - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - REPO: ${{ github.repository }} - BASE_REF: ${{ github.event.pull_request.base.ref }} run: | HEAD_SHA=$(curl -s \ - -H "Authorization: Bearer $GH_TOKEN" \ - "https://api.github.com/repos/$REPO/git/ref/heads/$BASE_REF" \ + -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ + https://api.github.com/repos/${{ github.repository }}/git/ref/heads/${{ github.event.pull_request.base.ref }} \ | jq -r '.object.sha') echo "head_sha=$HEAD_SHA" >> $GITHUB_OUTPUT # 2️⃣ 判断是否最终PR - name: Check Latest id: check - env: - MERGE_SHA: ${{ github.event.pull_request.merge_commit_sha }} - HEAD_SHA: ${{ steps.head.outputs.head_sha }} run: | - if [ "$MERGE_SHA" = "$HEAD_SHA" ]; then + if [ "${{ github.event.pull_request.merge_commit_sha }}" = "${{ steps.head.outputs.head_sha }}" ]; then echo "ok=true" >> $GITHUB_OUTPUT else echo "ok=false" >> $GITHUB_OUTPUT fi - # 3️⃣ 获取 commits - - name: Get Commits + # 3️⃣ 尝试从 PR body 提取 Sourcery 摘要 + - name: Extract Sourcery Summary if: steps.check.outputs.ok == 'true' - id: commits + id: sourcery env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - COMMITS_URL: ${{ github.event.pull_request.commits_url }} + PR_BODY: ${{ github.event.pull_request.body }} + run: | + python3 << 'PYEOF' + import os, re + + body = os.environ.get("PR_BODY", "") or "" + match = re.search( + r"## Summary by Sourcery\s*\n(.*?)(?=\n## |\Z)", + body, + re.DOTALL + ) + + if match: + summary = match.group(1).strip() + found = "true" + else: + summary = "" + found = "false" + + with open("sourcery_summary.txt", "w", encoding="utf-8") as f: + f.write(summary) + + with open(os.environ["GITHUB_OUTPUT"], "a") as gh: + gh.write(f"found={found}\n") + gh.write("summary< commits.txt - # 4️⃣ 阿里 AI 总结(通义千问) - - name: AI Summary (Qwen) - if: steps.check.outputs.ok == 'true' - id: ai + - name: AI Summary (Qwen Fallback) + if: steps.check.outputs.ok == 'true' && steps.sourcery.outputs.found == 'false' + id: qwen env: DASHSCOPE_API_KEY: ${{ secrets.DASHSCOPE_API_KEY }} run: | - COMMIT_MESSAGES=$(cat commits.txt) + python3 << 'PYEOF' + import json, os, urllib.request - jq -n --arg msgs "请用中文总结以下代码提交,输出3-5条,面向测试人员: - $COMMIT_MESSAGES" \ - '{"model": "qwen-plus", "input": {"prompt": $msgs}}' > ai_payload.json + with open("commits.txt", "r") as f: + commits = f.read().strip() - SUMMARY=$(curl -s https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation \ - -H "Authorization: Bearer $DASHSCOPE_API_KEY" \ - -H "Content-Type: application/json" \ - -d @ai_payload.json | jq -r '.output.text') + prompt = "请用中文总结以下代码提交,输出3-5条要点,面向测试人员。直接输出编号列表,不要输出标题或前言:\n" + commits + payload = {"model": "qwen-plus", "input": {"prompt": prompt}} + data = json.dumps(payload, ensure_ascii=False).encode("utf-8") - echo "summary<> $GITHUB_OUTPUT - echo "$SUMMARY" >> $GITHUB_OUTPUT - echo "EOF" >> $GITHUB_OUTPUT + req = urllib.request.Request( + "https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation", + data=data, + headers={ + "Authorization": "Bearer " + os.environ["DASHSCOPE_API_KEY"], + "Content-Type": "application/json" + } + ) + resp = urllib.request.urlopen(req) + result = json.loads(resp.read().decode()) + summary = result.get("output", {}).get("text", "AI 摘要生成失败") + + with open(os.environ["GITHUB_OUTPUT"], "a") as gh: + gh.write("summary< 📦 **分支**: %s\n> 👤 **提交人**: %s\n> 📝 **标题**: %s\n\n### 🧠 AI变更摘要\n%s\n\n---\n🔗 [查看PR详情](%s)' \ - "$BRANCH" "$AUTHOR" "$PR_TITLE" "$AI_SUMMARY" "$PR_URL") + python3 << 'PYEOF' + import json, os, urllib.request - jq -n --arg content "$CONTENT" \ - '{"msgtype": "markdown", "markdown": {"content": $content}}' > wechat_payload.json + if os.environ.get("SOURCERY_FOUND") == "true": + label = "Summary by Sourcery" + summary = os.environ.get("SOURCERY_SUMMARY", "") + else: + label = "AI变更摘要" + summary = os.environ.get("QWEN_SUMMARY", "AI 摘要生成失败") - curl -s "$WECHAT_WEBHOOK" \ - -H 'Content-Type: application/json' \ - -d @wechat_payload.json + content = ( + "## 🚀 Release 发布通知\n" + "> 📦 **分支**: " + os.environ["BRANCH"] + "\n" + "> 👤 **提交人**: " + os.environ["AUTHOR"] + "\n" + "> 📝 **标题**: " + os.environ["PR_TITLE"] + "\n\n" + "### 🧠 " + label + "\n" + + summary + "\n\n" + "---\n" + "🔗 [查看PR详情](" + os.environ["PR_URL"] + ")" + ) + payload = {"msgtype": "markdown", "markdown": {"content": content}} + data = json.dumps(payload, ensure_ascii=False).encode("utf-8") + req = urllib.request.Request( + os.environ["WECHAT_WEBHOOK"], + data=data, + headers={"Content-Type": "application/json"} + ) + resp = urllib.request.urlopen(req) + print(resp.read().decode()) + PYEOF diff --git a/.gitignore b/.gitignore index 0ec6822c..a1896da7 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,7 @@ time.log celerybeat-schedule.db search_results.json redbear-mem-metrics/ +redbear-mem-benchmark/ pitch-deck/ api/migrations/versions diff --git a/api/app/celery_app.py b/api/app/celery_app.py index 23fd82ed..0f8a197c 100644 --- a/api/app/celery_app.py +++ b/api/app/celery_app.py @@ -111,6 +111,9 @@ celery_app.conf.update( # Clustering tasks → memory_tasks queue (使用相同的 worker,避免 macOS fork 问题) 'app.tasks.run_incremental_clustering': {'queue': 'memory_tasks'}, + # Metadata extraction → memory_tasks queue + 'app.tasks.extract_user_metadata': {'queue': 'memory_tasks'}, + # Document tasks → document_tasks queue (prefork worker) 'app.core.rag.tasks.parse_document': {'queue': 'document_tasks'}, 'app.core.rag.tasks.build_graphrag_for_kb': {'queue': 'document_tasks'}, diff --git a/api/app/controllers/auth_controller.py b/api/app/controllers/auth_controller.py index fb9ebaa5..baae44a6 100644 --- a/api/app/controllers/auth_controller.py +++ b/api/app/controllers/auth_controller.py @@ -136,7 +136,7 @@ async def refresh_token( # 检查用户是否存在 user = auth_service.get_user_by_id(db, userId) if not user: - raise BusinessException(t("auth.user.not_found"), code=BizCode.USER_NOT_FOUND) + raise BusinessException(t("auth.user.not_found"), code=BizCode.USER_NO_ACCESS) # 检查 refresh token 黑名单 if settings.ENABLE_SINGLE_SESSION: diff --git a/api/app/controllers/prompt_optimizer_controller.py b/api/app/controllers/prompt_optimizer_controller.py index 80f14cd3..b9fc697c 100644 --- a/api/app/controllers/prompt_optimizer_controller.py +++ b/api/app/controllers/prompt_optimizer_controller.py @@ -124,10 +124,11 @@ async def get_prompt_opt( skill=data.skill ): # chunk 是 prompt 的增量内容 - yield f"event:message\ndata: {json.dumps(chunk)}\n\n" + yield f"event:message\ndata: {json.dumps(chunk, ensure_ascii=False)}\n\n" except Exception as e: yield f"event:error\ndata: {json.dumps( - {"error": str(e)} + {"error": str(e)}, + ensure_ascii=False )}\n\n" yield "event:end\ndata: {}\n\n" diff --git a/api/app/controllers/service/app_api_controller.py b/api/app/controllers/service/app_api_controller.py index 93caa200..a78fd842 100644 --- a/api/app/controllers/service/app_api_controller.py +++ b/api/app/controllers/service/app_api_controller.py @@ -14,6 +14,7 @@ from app.core.response_utils import success from app.db import get_db from app.models.app_model import App from app.models.app_model import AppType +from app.models.app_release_model import AppRelease from app.repositories import knowledge_repository from app.repositories.end_user_repository import EndUserRepository from app.schemas import AppChatRequest, conversation_schema @@ -61,18 +62,18 @@ async def list_apps(): # return success(data={"received": True}, msg="消息已接收") -def _checkAppConfig(app: App): - if app.type == AppType.AGENT: - if not app.current_release.config: +def _checkAppConfig(release: AppRelease): + if release.type == AppType.AGENT: + if not release.config: raise BusinessException("Agent 应用未配置模型", BizCode.AGENT_CONFIG_MISSING) - elif app.type == AppType.MULTI_AGENT: - if not app.current_release.config: + elif release.type == AppType.MULTI_AGENT: + if not release.config: raise BusinessException("Multi-Agent 应用未配置模型", BizCode.AGENT_CONFIG_MISSING) - elif app.type == AppType.WORKFLOW: - if not app.current_release.config: + elif release.type == AppType.WORKFLOW: + if not release.config: raise BusinessException("工作流应用未配置模型", BizCode.AGENT_CONFIG_MISSING) else: - raise BusinessException("不支持的应用类型", BizCode.AGENT_CONFIG_MISSING) + raise BusinessException("不支持的应用类型", BizCode.APP_TYPE_NOT_SUPPORTED) @router.post("/chat") @@ -86,10 +87,22 @@ async def chat( app_service: Annotated[AppService, Depends(get_app_service)] = None, message: str = Body(..., description="聊天消息内容"), ): + """ + Agent/Workflow 聊天接口 + + - 不传 version:使用当前生效版本(current_release,回滚后为回滚目标版本) + - 传 version=release_id:使用指定版本uuid的历史快照,例如 {"version": "{{release_id}}"} + """ body = await request.json() payload = AppChatRequest(**body) app = app_service.get_app(api_key_auth.resource_id, api_key_auth.workspace_id) + + # 版本切换:指定 release_id 时查找对应历史快照,否则使用当前激活版本 + if payload.version is not None: + active_release = app_service.get_release_by_id(app.id, payload.version) + else: + active_release = app.current_release other_id = payload.user_id workspace_id = api_key_auth.workspace_id end_user_repo = EndUserRepository(db) @@ -127,7 +140,7 @@ async def chat( storage_type = 'neo4j' app_type = app.type # check app config - _checkAppConfig(app) + _checkAppConfig(active_release) # 获取或创建会话(提前验证) conversation = conversation_service.create_or_get_conversation( @@ -142,7 +155,7 @@ async def chat( # print("="*50) # print(app.current_release.default_model_config_id) - agent_config = agent_config_4_app_release(app.current_release) + agent_config = agent_config_4_app_release(active_release) # print(agent_config.default_model_config_id) # thinking 开关:仅当 agent 配置了 deep_thinking 且请求 thinking=True 时才启用 @@ -194,7 +207,7 @@ async def chat( return success(data=conversation_schema.ChatResponse(**result).model_dump(mode="json")) elif app_type == AppType.MULTI_AGENT: # 多 Agent 流式返回 - config = multi_agent_config_4_app_release(app.current_release) + config = multi_agent_config_4_app_release(active_release) if payload.stream: async def event_generator(): async for event in app_chat_service.multi_agent_chat_stream( @@ -237,7 +250,7 @@ async def chat( return success(data=conversation_schema.ChatResponse(**result).model_dump(mode="json")) elif app_type == AppType.WORKFLOW: # 多 Agent 流式返回 - config = workflow_config_4_app_release(app.current_release) + config = workflow_config_4_app_release(active_release) if payload.stream: async def event_generator(): async for event in app_chat_service.workflow_chat_stream( @@ -253,7 +266,7 @@ async def chat( user_rag_memory_id=user_rag_memory_id, app_id=app.id, workspace_id=workspace_id, - release_id=app.current_release.id, + release_id=active_release.id, public=True ): event_type = event.get("event", "message") @@ -288,7 +301,7 @@ async def chat( files=payload.files, app_id=app.id, workspace_id=workspace_id, - release_id=app.current_release.id + release_id=active_release.id ) logger.debug( "工作流试运行返回结果", @@ -302,6 +315,4 @@ async def chat( msg="工作流任务执行成功" ) else: - from app.core.exceptions import BusinessException - from app.core.error_codes import BizCode raise BusinessException(f"不支持的应用类型: {app_type}", BizCode.APP_TYPE_NOT_SUPPORTED) diff --git a/api/app/core/error_codes.py b/api/app/core/error_codes.py index 41f58734..01b6115d 100644 --- a/api/app/core/error_codes.py +++ b/api/app/core/error_codes.py @@ -41,6 +41,7 @@ class BizCode(IntEnum): FILE_NOT_FOUND = 4006 APP_NOT_FOUND = 4007 RELEASE_NOT_FOUND = 4008 + USER_NO_ACCESS = 4009 # 冲突/状态(5xxx) DUPLICATE_NAME = 5001 @@ -118,6 +119,7 @@ HTTP_MAPPING = { BizCode.WORKSPACE_ACCESS_DENIED: 403, BizCode.NOT_FOUND: 400, BizCode.USER_NOT_FOUND: 200, + BizCode.USER_NO_ACCESS: 401, BizCode.WORKSPACE_NOT_FOUND: 400, BizCode.MODEL_NOT_FOUND: 400, BizCode.KNOWLEDGE_NOT_FOUND: 400, diff --git a/api/app/core/memory/agent/langgraph_graph/nodes/perceptual_retrieve_node.py b/api/app/core/memory/agent/langgraph_graph/nodes/perceptual_retrieve_node.py index f248afa5..1cf5e291 100644 --- a/api/app/core/memory/agent/langgraph_graph/nodes/perceptual_retrieve_node.py +++ b/api/app/core/memory/agent/langgraph_graph/nodes/perceptual_retrieve_node.py @@ -153,7 +153,7 @@ class PerceptualSearchService: return [] try: r = await search_perceptual( - connector=connector, q=escaped, + connector=connector, query=escaped, end_user_id=self.end_user_id, limit=limit * 5, # 多查一些以提高命中率 ) @@ -178,7 +178,7 @@ class PerceptualSearchService: if not escaped.strip(): return [] r = await search_perceptual( - connector=connector, q=escaped, + connector=connector, query=escaped, end_user_id=self.end_user_id, limit=limit, ) return r.get("perceptuals", []) diff --git a/api/app/core/memory/agent/utils/write_tools.py b/api/app/core/memory/agent/utils/write_tools.py index bae4643e..3b0ea1ee 100644 --- a/api/app/core/memory/agent/utils/write_tools.py +++ b/api/app/core/memory/agent/utils/write_tools.py @@ -14,6 +14,7 @@ from dotenv import load_dotenv from app.core.logging_config import get_agent_logger from app.core.memory.agent.utils.get_dialogs import get_chunked_dialogs +from app.core.memory.storage_services.extraction_engine.deduplication.deduped_and_disamb import _USER_PLACEHOLDER_NAMES from app.core.memory.storage_services.extraction_engine.extraction_orchestrator import ExtractionOrchestrator from app.core.memory.storage_services.extraction_engine.knowledge_extraction.memory_summary import \ memory_summary_generation @@ -191,15 +192,37 @@ async def write( if success: logger.info("Successfully saved all data to Neo4j") - # 使用 Celery 异步任务触发聚类(不阻塞主流程) if all_entity_nodes: + end_user_id = all_entity_nodes[0].end_user_id + + # Neo4j 写入完成后,用 PgSQL 权威 aliases 覆盖 Neo4j 用户实体 + try: + from app.repositories.end_user_info_repository import EndUserInfoRepository + if end_user_id: + with get_db_context() as db_session: + info = EndUserInfoRepository(db_session).get_by_end_user_id(uuid.UUID(end_user_id)) + pg_aliases = info.aliases if info and info.aliases else [] + if info is not None: + # 将 Python 侧占位名集合作为参数传入,避免 Cypher 硬编码 + placeholder_names = list(_USER_PLACEHOLDER_NAMES) + await neo4j_connector.execute_query( + """ + MATCH (e:ExtractedEntity) + WHERE e.end_user_id = $end_user_id AND toLower(e.name) IN $placeholder_names + SET e.aliases = $aliases + """, + end_user_id=end_user_id, aliases=pg_aliases, + placeholder_names=placeholder_names, + ) + logger.info(f"[AliasSync] Neo4j 用户实体 aliases 已用 PgSQL 权威源覆盖: {pg_aliases}") + except Exception as sync_err: + logger.warning(f"[AliasSync] PgSQL→Neo4j aliases 同步失败(不影响主流程): {sync_err}") + + # 使用 Celery 异步任务触发聚类(不阻塞主流程) try: from app.tasks import run_incremental_clustering - end_user_id = all_entity_nodes[0].end_user_id new_entity_ids = [e.id for e in all_entity_nodes] - - # 异步提交 Celery 任务 task = run_incremental_clustering.apply_async( kwargs={ "end_user_id": end_user_id, @@ -207,7 +230,6 @@ async def write( "llm_model_id": str(memory_config.llm_model_id) if memory_config.llm_model_id else None, "embedding_model_id": str(memory_config.embedding_model_id) if memory_config.embedding_model_id else None, }, - # 设置任务优先级(低优先级,不影响主业务) priority=3, ) logger.info( @@ -215,7 +237,6 @@ async def write( f"task_id={task.id}, end_user_id={end_user_id}, entity_count={len(new_entity_ids)}" ) except Exception as e: - # 聚类任务提交失败不影响主流程 logger.error(f"[Clustering] 提交聚类任务失败(不影响主流程): {e}", exc_info=True) break diff --git a/api/app/core/memory/models/__init__.py b/api/app/core/memory/models/__init__.py index 41d08908..eed8e8c4 100644 --- a/api/app/core/memory/models/__init__.py +++ b/api/app/core/memory/models/__init__.py @@ -58,6 +58,14 @@ from app.core.memory.models.triplet_models import ( TripletExtractionResponse, ) +# User metadata models +from app.core.memory.models.metadata_models import ( + UserMetadata, + UserMetadataBehavioralHints, + UserMetadataProfile, + MetadataExtractionResponse, +) + # Ontology scenario models (LLM extracted from scenarios) from app.core.memory.models.ontology_scenario_models import ( OntologyClass, @@ -124,6 +132,10 @@ __all__ = [ "Entity", "Triplet", "TripletExtractionResponse", + "UserMetadata", + "UserMetadataBehavioralHints", + "UserMetadataProfile", + "MetadataExtractionResponse", # Ontology models "OntologyClass", "OntologyExtractionResponse", diff --git a/api/app/core/memory/models/graph_models.py b/api/app/core/memory/models/graph_models.py index 1b8c9d52..6e34421c 100644 --- a/api/app/core/memory/models/graph_models.py +++ b/api/app/core/memory/models/graph_models.py @@ -364,12 +364,14 @@ class ChunkNode(Node): Attributes: dialog_id: ID of the parent dialog content: The text content of the chunk + speaker: Speaker identifier ('user' or 'assistant') chunk_embedding: Optional embedding vector for the chunk sequence_number: Order of this chunk within the dialog metadata: Additional chunk metadata as key-value pairs """ dialog_id: str = Field(..., description="ID of the parent dialog") content: str = Field(..., description="The text content of the chunk") + speaker: Optional[str] = Field(None, description="Speaker identifier: 'user' for user messages, 'assistant' for AI responses") chunk_embedding: Optional[List[float]] = Field(None, description="Chunk embedding vector") sequence_number: int = Field(..., description="Order of this chunk within the dialog") metadata: dict = Field(default_factory=dict, description="Additional chunk metadata") diff --git a/api/app/core/memory/models/metadata_models.py b/api/app/core/memory/models/metadata_models.py new file mode 100644 index 00000000..55c2359e --- /dev/null +++ b/api/app/core/memory/models/metadata_models.py @@ -0,0 +1,57 @@ +"""Models for user metadata extraction. + +Independent from triplet_models.py - these models are used by the +standalone metadata extraction pipeline (post-dedup async Celery task). +""" + +from typing import List + +from pydantic import BaseModel, ConfigDict, Field + + +class UserMetadataProfile(BaseModel): + """用户画像信息""" + + model_config = ConfigDict(extra="ignore") + role: str = Field(default="", description="用户职业或角色") + domain: str = Field(default="", description="用户所在领域") + expertise: List[str] = Field( + default_factory=list, description="用户擅长的技能或工具" + ) + interests: List[str] = Field( + default_factory=list, description="用户关注的话题或领域标签" + ) + + +class UserMetadataBehavioralHints(BaseModel): + """行为偏好""" + + model_config = ConfigDict(extra="ignore") + learning_stage: str = Field(default="", description="学习阶段") + preferred_depth: str = Field(default="", description="偏好深度") + tone_preference: str = Field(default="", description="语气偏好") + + +class UserMetadata(BaseModel): + """用户元数据顶层结构""" + + model_config = ConfigDict(extra="ignore") + profile: UserMetadataProfile = Field(default_factory=UserMetadataProfile) + behavioral_hints: UserMetadataBehavioralHints = Field( + default_factory=UserMetadataBehavioralHints + ) + knowledge_tags: List[str] = Field(default_factory=list, description="知识标签") + + +class MetadataExtractionResponse(BaseModel): + """元数据提取 LLM 响应结构""" + + model_config = ConfigDict(extra="ignore") + user_metadata: UserMetadata = Field(default_factory=UserMetadata) + aliases_to_add: List[str] = Field( + default_factory=list, + description="本次新发现的用户别名(用户自我介绍或他人对用户的称呼)", + ) + aliases_to_remove: List[str] = Field( + default_factory=list, description="用户明确否认的别名(如'我不叫XX了')" + ) diff --git a/api/app/core/memory/src/search.py b/api/app/core/memory/src/search.py index ef39a12e..4e2883d5 100644 --- a/api/app/core/memory/src/search.py +++ b/api/app/core/memory/src/search.py @@ -1,4 +1,3 @@ -import argparse import asyncio import json import math @@ -6,7 +5,6 @@ import os import time from datetime import datetime from typing import TYPE_CHECKING, Any, Dict, List, Optional -from uuid import UUID if TYPE_CHECKING: from app.schemas.memory_config_schema import MemoryConfig @@ -23,7 +21,7 @@ from app.core.memory.utils.config.config_utils import ( ) from app.core.memory.utils.data.text_utils import extract_plain_query from app.core.memory.utils.data.time_utils import normalize_date_safe -from app.core.memory.utils.llm.llm_utils import get_reranker_client +# from app.core.memory.utils.llm.llm_utils import get_reranker_client from app.core.models.base import RedBearModelConfig from app.db import get_db_context from app.repositories.neo4j.graph_search import ( @@ -748,11 +746,10 @@ async def run_hybrid_search( if search_type in ["keyword", "hybrid"]: # Keyword-based search logger.info("[PERF] Starting keyword search...") - keyword_start = time.time() keyword_task = asyncio.create_task( search_graph( connector=connector, - q=query_text, + query=query_text, end_user_id=end_user_id, limit=limit, include=include @@ -762,7 +759,6 @@ async def run_hybrid_search( if search_type in ["embedding", "hybrid"]: # Embedding-based search logger.info("[PERF] Starting embedding search...") - embedding_start = time.time() # 从数据库读取嵌入器配置(按 ID)并构建 RedBearModelConfig config_load_start = time.time() @@ -904,10 +900,10 @@ async def run_hybrid_search( else: results["latency_metrics"] = latency_metrics - logger.info(f"[PERF] ===== SEARCH PERFORMANCE SUMMARY =====") + logger.info("[PERF] ===== SEARCH PERFORMANCE SUMMARY =====") logger.info(f"[PERF] Total search completed in {total_latency:.4f}s") logger.info(f"[PERF] Latency breakdown: {json.dumps(latency_metrics, indent=2)}") - logger.info(f"[PERF] =========================================") + logger.info("[PERF] =========================================") # Sanitize results: drop large/unused fields _remove_keys_recursive(results, ["name_embedding"]) # drop entity name embeddings from outputs diff --git a/api/app/core/memory/storage_services/extraction_engine/deduplication/deduped_and_disamb.py b/api/app/core/memory/storage_services/extraction_engine/deduplication/deduped_and_disamb.py index 7e0976fe..715f190c 100644 --- a/api/app/core/memory/storage_services/extraction_engine/deduplication/deduped_and_disamb.py +++ b/api/app/core/memory/storage_services/extraction_engine/deduplication/deduped_and_disamb.py @@ -82,51 +82,38 @@ def _merge_attribute(canonical: ExtractedEntityNode, ent: ExtractedEntityNode): canonical.connect_strength = next(iter(pair)) # 别名合并(去重保序,使用标准化工具) + # 用户实体的 aliases 由 PgSQL end_user_info 作为唯一权威源,去重合并时不修改 try: canonical_name = (getattr(canonical, "name", "") or "").strip() - incoming_name = (getattr(ent, "name", "") or "").strip() - - # 收集所有需要合并的别名 - all_aliases = [] - - # 1. 添加canonical现有的别名 - existing = getattr(canonical, "aliases", []) or [] - all_aliases.extend(existing) - - # 2. 添加incoming实体的名称(如果不同于canonical的名称) - if incoming_name and incoming_name != canonical_name: - all_aliases.append(incoming_name) - - # 3. 添加incoming实体的所有别名 - incoming = getattr(ent, "aliases", []) or [] - all_aliases.extend(incoming) - - # 4. 标准化并去重(优先使用alias_utils工具函数) - try: - from app.core.memory.utils.alias_utils import normalize_aliases - canonical.aliases = normalize_aliases(canonical_name, all_aliases) - except Exception: - # 如果导入失败,使用增强的去重逻辑 - seen_normalized = set() - unique_aliases = [] + if canonical_name.lower() not in _USER_PLACEHOLDER_NAMES: + incoming_name = (getattr(ent, "name", "") or "").strip() - for alias in all_aliases: - if not alias: - continue - - alias_stripped = str(alias).strip() - if not alias_stripped or alias_stripped == canonical_name: - continue - - # 标准化:转小写用于去重判断 - alias_normalized = alias_stripped.lower() - - if alias_normalized not in seen_normalized: - seen_normalized.add(alias_normalized) - unique_aliases.append(alias_stripped) + # 收集所有需要合并的别名,过滤掉用户占位名避免污染非用户实体 + all_aliases = list(getattr(canonical, "aliases", []) or []) + if incoming_name and incoming_name != canonical_name and incoming_name.lower() not in _USER_PLACEHOLDER_NAMES: + all_aliases.append(incoming_name) + all_aliases.extend( + a for a in (getattr(ent, "aliases", []) or []) + if a and a.strip().lower() not in _USER_PLACEHOLDER_NAMES + ) - # 排序并赋值 - canonical.aliases = sorted(unique_aliases) + try: + from app.core.memory.utils.alias_utils import normalize_aliases + canonical.aliases = normalize_aliases(canonical_name, all_aliases) + except Exception: + seen_normalized = set() + unique_aliases = [] + for alias in all_aliases: + if not alias: + continue + alias_stripped = str(alias).strip() + if not alias_stripped or alias_stripped == canonical_name: + continue + alias_normalized = alias_stripped.lower() + if alias_normalized not in seen_normalized: + seen_normalized.add(alias_normalized) + unique_aliases.append(alias_stripped) + canonical.aliases = sorted(unique_aliases) except Exception: pass @@ -733,66 +720,37 @@ def fuzzy_match( def _merge_entities_with_aliases(canonical: ExtractedEntityNode, losing: ExtractedEntityNode): - """ 模糊匹配中的实体合并。 + """模糊匹配中的实体合并(别名部分)。 - 合并策略: - 1. 保留canonical的主名称不变 - 2. 将losing的主名称添加为alias(如果不同) - 3. 合并两个实体的所有aliases - 4. 自动去重(case-insensitive)并排序 - - Args: - canonical: 规范实体(保留) - losing: 被合并实体(删除) - - Note: - 使用alias_utils.normalize_aliases进行标准化去重 + 用户实体的 aliases 由 PgSQL end_user_info 作为唯一权威源,跳过合并。 """ - # 获取规范实体的名称 canonical_name = (getattr(canonical, "name", "") or "").strip() + if canonical_name.lower() in _USER_PLACEHOLDER_NAMES: + return + losing_name = (getattr(losing, "name", "") or "").strip() - # 收集所有需要合并的别名 - all_aliases = [] - - # 1. 添加canonical现有的别名 - current_aliases = getattr(canonical, "aliases", []) or [] - all_aliases.extend(current_aliases) - - # 2. 添加losing实体的名称(如果不同于canonical的名称) + all_aliases = list(getattr(canonical, "aliases", []) or []) if losing_name and losing_name != canonical_name: all_aliases.append(losing_name) + all_aliases.extend(getattr(losing, "aliases", []) or []) - # 3. 添加losing实体的所有别名 - losing_aliases = getattr(losing, "aliases", []) or [] - all_aliases.extend(losing_aliases) - - # 4. 标准化并去重(使用标准化后的字符串进行去重) try: from app.core.memory.utils.alias_utils import normalize_aliases canonical.aliases = normalize_aliases(canonical_name, all_aliases) except Exception: - # 如果导入失败,使用增强的去重逻辑 - # 使用标准化后的字符串作为key进行去重 seen_normalized = set() unique_aliases = [] - for alias in all_aliases: if not alias: continue - alias_stripped = str(alias).strip() if not alias_stripped or alias_stripped == canonical_name: continue - - # 标准化:转小写用于去重判断 alias_normalized = alias_stripped.lower() - if alias_normalized not in seen_normalized: seen_normalized.add(alias_normalized) unique_aliases.append(alias_stripped) - - # 排序并赋值 canonical.aliases = sorted(unique_aliases) # ========== 主循环:遍历所有实体对进行模糊匹配 ========== diff --git a/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py b/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py index 3229674d..75fc87d2 100644 --- a/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py +++ b/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py @@ -311,10 +311,53 @@ class ExtractionOrchestrator: dialog_data_list, ) - # 步骤 7: 同步用户别名到数据库表(仅正式模式) + # 步骤 7: 触发异步元数据和别名提取(仅正式模式) if not is_pilot_run: - logger.info("步骤 7: 同步用户别名到 end_user 和 end_user_info 表") - await self._update_end_user_other_name(entity_nodes, dialog_data_list) + try: + from app.core.memory.storage_services.extraction_engine.knowledge_extraction.metadata_extractor import ( + MetadataExtractor, + ) + + metadata_extractor = MetadataExtractor( + llm_client=self.llm_client, language=self.language + ) + user_statements = ( + metadata_extractor.collect_user_related_statements( + entity_nodes, statement_nodes, statement_entity_edges + ) + ) + if user_statements: + end_user_id = ( + dialog_data_list[0].end_user_id + if dialog_data_list + else None + ) + config_id = ( + dialog_data_list[0].config_id + if dialog_data_list + and hasattr(dialog_data_list[0], "config_id") + else None + ) + if end_user_id: + from app.tasks import extract_user_metadata_task + + extract_user_metadata_task.delay( + end_user_id=str(end_user_id), + statements=user_statements, + config_id=str(config_id) if config_id else None, + language=self.language, + ) + logger.info( + f"已触发异步元数据提取任务,共 {len(user_statements)} 条用户相关 statement" + ) + else: + logger.info("未找到用户相关 statement,跳过元数据提取") + except Exception as e: + logger.error( + f"触发元数据提取任务失败(不影响主流程): {e}", exc_info=True + ) + + # 别名同步已迁移到 Celery 元数据提取任务中,不再在此处执行 logger.info(f"知识提取流水线运行完成({mode_str})") return ( @@ -1107,6 +1150,7 @@ class ExtractionOrchestrator: end_user_id=dialog_data.end_user_id, run_id=dialog_data.run_id, # 使用 dialog_data 的 run_id content=chunk.content, + speaker=getattr(chunk, 'speaker', None), chunk_embedding=chunk.chunk_embedding, sequence_number=chunk_idx, # 添加必需的 sequence_number 字段 created_at=dialog_data.created_at, @@ -1342,23 +1386,23 @@ class ExtractionOrchestrator: async def _update_end_user_other_name( self, entity_nodes: List[ExtractedEntityNode], - dialog_data_list: List[DialogData] + dialog_data_list: List[DialogData], ) -> None: """ 将本轮提取的用户别名同步到 end_user 和 end_user_info 表。 - 注意:此方法在 Neo4j 写入之前调用,因此不能依赖 Neo4j 作为别名的权威数据源。 - 改为直接使用内存中去重后的 entity_nodes 的 aliases,与 PgSQL 已有的 aliases 合并。 + PgSQL end_user_info.aliases 是用户别名的唯一权威源。 + 此方法仅将本轮 LLM 从对话中新提取的别名增量追加到 PgSQL, + 不再从 Neo4j 二层去重合并历史别名,避免脏数据反向污染 PgSQL。 策略: - 1. 从内存中的 entity_nodes 提取本轮用户别名(current_aliases) - 2. 从去重后的 entity_nodes 中提取完整别名(含 Neo4j 二层去重合并的历史别名) - 3. 从 PgSQL end_user_info 读取已有的 aliases(db_aliases) - 4. 合并 db_aliases + deduped_aliases + current_aliases,去重保序 - 5. 写回 PgSQL + 1. 从本轮对话原始发言中提取用户别名(current_aliases) + 2. 从 PgSQL end_user_info 读取已有的 aliases(db_aliases) + 3. 合并 db_aliases + current_aliases,去重保序 + 4. 写回 PgSQL Args: - entity_nodes: 去重后的实体节点列表(内存中,含二层去重合并结果) + entity_nodes: 去重后的实体节点列表(内存中) dialog_data_list: 对话数据列表 """ try: @@ -1374,11 +1418,6 @@ class ExtractionOrchestrator: # 1. 提取本轮对话的用户别名(保持 LLM 提取的原始顺序,不排序) current_aliases = self._extract_current_aliases(entity_nodes, dialog_data_list) - # 1.5 从去重后的 entity_nodes 中提取完整别名 - # 二层去重会将 Neo4j 中已有的历史别名合并到 entity_nodes 中, - # 这里提取出来确保 PgSQL 与 Neo4j 的别名保持同步 - deduped_aliases = self._extract_deduped_entity_aliases(entity_nodes) - # 1.6 从 Neo4j 查询已有的 AI 助手别名,作为额外的排除源 # (防止 LLM 未提取出 AI 助手实体时,AI 别名泄漏到用户别名中) neo4j_assistant_aliases = await self._fetch_neo4j_assistant_aliases(end_user_id) @@ -1390,19 +1429,12 @@ class ExtractionOrchestrator: ] if len(current_aliases) < before_count: logger.info(f"通过 Neo4j AI 助手别名排除了 {before_count - len(current_aliases)} 个误归属别名") - # 同样过滤 deduped_aliases - deduped_aliases = [ - a for a in deduped_aliases - if a.strip().lower() not in neo4j_assistant_aliases - ] - if not current_aliases and not deduped_aliases: + if not current_aliases: logger.debug(f"本轮未提取到用户别名,跳过同步: end_user_id={end_user_id}") return logger.info(f"本轮对话提取的 aliases: {current_aliases}") - if deduped_aliases: - logger.info(f"去重后实体的完整 aliases(含历史): {deduped_aliases}") # 2. 同步到数据库 end_user_uuid = uuid.UUID(end_user_id) @@ -1413,21 +1445,15 @@ class ExtractionOrchestrator: logger.warning(f"未找到 end_user_id={end_user_id} 的用户记录") return - # 3. 从 PgSQL 读取已有 aliases 并与本轮合并 + # 3. 从 PgSQL 读取已有 aliases 并与本轮新增合并 info = EndUserInfoRepository(db).get_by_end_user_id(end_user_uuid) db_aliases = (info.aliases if info and info.aliases else []) # 过滤掉占位名称 db_aliases = [a for a in db_aliases if a.strip().lower() not in self.USER_PLACEHOLDER_NAMES] - # 合并:已有 + 去重后完整别名 + 本轮新增,去重保序 + # 合并:PgSQL 已有 + 本轮新增,去重保序(不再合并 Neo4j 历史别名) merged_aliases = list(db_aliases) seen_lower = {a.strip().lower() for a in merged_aliases} - # 先合并去重后实体的完整别名(含 Neo4j 历史别名) - for alias in deduped_aliases: - if alias.strip().lower() not in seen_lower: - merged_aliases.append(alias) - seen_lower.add(alias.strip().lower()) - # 再合并本轮新提取的别名 for alias in current_aliases: if alias.strip().lower() not in seen_lower: merged_aliases.append(alias) @@ -1461,16 +1487,13 @@ class ExtractionOrchestrator: info.aliases = merged_aliases logger.info(f"同步合并后 aliases 到 end_user_info: {merged_aliases}") else: - first_alias = current_aliases[0].strip() if current_aliases else ( - deduped_aliases[0].strip() if deduped_aliases else "" - ) + first_alias = current_aliases[0].strip() if current_aliases else "" # 确保 first_alias 不是占位名称 if first_alias and first_alias.lower() not in self.USER_PLACEHOLDER_NAMES: db.add(EndUserInfo( end_user_id=end_user_uuid, other_name=first_alias, aliases=merged_aliases, - meta_data={} )) logger.info(f"创建 end_user_info 记录,other_name={first_alias}, aliases={merged_aliases}") @@ -1478,9 +1501,6 @@ class ExtractionOrchestrator: except Exception as e: logger.error(f"更新 end_user other_name 失败: {e}", exc_info=True) - - - # 用户实体占位名称,不允许作为 other_name 或出现在 aliases 中 # 复用 deduped_and_disamb 模块级常量,避免重复维护 USER_PLACEHOLDER_NAMES = _USER_PLACEHOLDER_NAMES @@ -1587,7 +1607,6 @@ class ExtractionOrchestrator: if candidate and candidate.lower() in self.USER_PLACEHOLDER_NAMES: return None return candidate - return None async def _run_dedup_and_write_summary( diff --git a/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/metadata_extractor.py b/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/metadata_extractor.py new file mode 100644 index 00000000..19f1e533 --- /dev/null +++ b/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/metadata_extractor.py @@ -0,0 +1,175 @@ +""" +Metadata extractor module. + +Collects user-related statements from post-dedup graph data and +extracts user metadata via an independent LLM call. +""" + +import logging +from typing import List, Optional + +from app.core.memory.models.graph_models import ( + ExtractedEntityNode, + StatementEntityEdge, + StatementNode, +) + +logger = logging.getLogger(__name__) + +# Reuse the same user-entity detection logic from dedup module +_USER_NAMES = {"用户", "我", "user", "i"} +_CANONICAL_USER_TYPE = "用户" + + +def _is_user_entity(ent: ExtractedEntityNode) -> bool: + """判断实体是否为用户实体""" + name = (getattr(ent, "name", "") or "").strip().lower() + etype = (getattr(ent, "entity_type", "") or "").strip() + return name in _USER_NAMES or etype == _CANONICAL_USER_TYPE + + +class MetadataExtractor: + """Extracts user metadata from post-dedup graph data via independent LLM call.""" + + def __init__(self, llm_client, language: Optional[str] = None): + self.llm_client = llm_client + self.language = language + + @staticmethod + def detect_language(statements: List[str]) -> str: + """根据 statement 文本内容检测语言。 + 如果文本中包含中文字符则返回 "zh",否则返回 "en"。 + """ + import re + + combined = " ".join(statements) + if re.search(r"[\u4e00-\u9fff]", combined): + return "zh" + return "en" + + def collect_user_related_statements( + self, + entity_nodes: List[ExtractedEntityNode], + statement_nodes: List[StatementNode], + statement_entity_edges: List[StatementEntityEdge], + ) -> List[str]: + """ + 从去重后的数据中筛选与用户直接相关且由用户发言的 statement 文本。 + + 筛选逻辑: + 1. 用户实体 → StatementEntityEdge → statement(直接关联) + 2. 只保留 speaker="user" 的 statement(过滤 assistant 回复的噪声) + + Returns: + 用户发言的 statement 文本列表 + """ + # Find user entity IDs + user_entity_ids = set() + for ent in entity_nodes: + if _is_user_entity(ent): + user_entity_ids.add(ent.id) + + if not user_entity_ids: + logger.debug("未找到用户实体节点,跳过 statement 收集") + return [] + + # 用户实体 → StatementEntityEdge → statement + target_stmt_ids = set() + for edge in statement_entity_edges: + if edge.target in user_entity_ids: + target_stmt_ids.add(edge.source) + + # Collect: only speaker="user" statements, preserving order + result = [] + seen = set() + total_associated = 0 + skipped_non_user = 0 + for stmt_node in statement_nodes: + if stmt_node.id in target_stmt_ids and stmt_node.id not in seen: + total_associated += 1 + speaker = getattr(stmt_node, "speaker", None) or "unknown" + if speaker == "user": + text = (stmt_node.statement or "").strip() + if text: + result.append(text) + else: + skipped_non_user += 1 + seen.add(stmt_node.id) + + logger.info( + f"收集到 {len(result)} 条用户发言 statement " + f"(直接关联: {total_associated}, speaker=user: {len(result)}, " + f"跳过非user: {skipped_non_user})" + ) + if result: + for i, text in enumerate(result): + logger.info(f" [user statement {i + 1}] {text}") + if total_associated > 0 and len(result) == 0: + logger.warning( + f"有 {total_associated} 条直接关联 statement 但全部被 speaker 过滤," + f"可能本次写入不包含 user 消息" + ) + return result + + async def extract_metadata( + self, + statements: List[str], + existing_metadata: Optional[dict] = None, + existing_aliases: Optional[List[str]] = None, + ) -> Optional[tuple]: + """ + 对筛选后的 statement 列表调用 LLM 提取元数据和用户别名。 + + Args: + statements: 用户发言的 statement 文本列表 + existing_metadata: 数据库已有的元数据(可选) + existing_aliases: 数据库已有的用户别名列表(可选) + + Returns: + (UserMetadata, List[str], List[str]) tuple: (metadata, aliases_to_add, aliases_to_remove) on success, None on failure + """ + if not statements: + return None + + try: + from app.core.memory.utils.prompt.prompt_utils import prompt_env + + if self.language: + detected_language = self.language + logger.info(f"元数据提取使用显式指定语言: {detected_language}") + else: + detected_language = self.detect_language(statements) + logger.info(f"元数据提取语言自动检测结果: {detected_language}") + + template = prompt_env.get_template("extract_user_metadata.jinja2") + prompt = template.render( + statements=statements, + language=detected_language, + existing_metadata=existing_metadata, + existing_aliases=existing_aliases, + json_schema="", + ) + + from app.core.memory.models.metadata_models import ( + MetadataExtractionResponse, + ) + + response = await self.llm_client.response_structured( + messages=[{"role": "user", "content": prompt}], + response_model=MetadataExtractionResponse, + ) + + if response: + metadata = response.user_metadata if response.user_metadata else None + to_add = response.aliases_to_add if response.aliases_to_add else [] + to_remove = ( + response.aliases_to_remove if response.aliases_to_remove else [] + ) + return metadata, to_add, to_remove + + logger.warning("LLM 返回的响应为空") + return None + + except Exception as e: + logger.error(f"元数据提取 LLM 调用失败: {e}", exc_info=True) + return None diff --git a/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/statement_extraction.py b/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/statement_extraction.py index b06bd70f..d90a49ba 100644 --- a/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/statement_extraction.py +++ b/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/statement_extraction.py @@ -1,6 +1,5 @@ import asyncio import logging -import os from datetime import datetime from typing import Any, Dict, List, Optional @@ -82,6 +81,7 @@ class StatementExtractor: logger.warning(f"Chunk {getattr(chunk, 'id', 'unknown')} has no speaker field or is empty") return None + async def _extract_statements(self, chunk, end_user_id: Optional[str] = None, dialogue_content: str = None) -> List[Statement]: """Process a single chunk and return extracted statements @@ -94,7 +94,8 @@ class StatementExtractor: List of ExtractedStatement objects extracted from the chunk """ chunk_content = chunk.content - + chunk_speaker = self._get_speaker_from_chunk(chunk) + if not chunk_content or len(chunk_content.strip()) < 5: logger.warning(f"Chunk {chunk.id} content too short or empty, skipping") return [] @@ -149,8 +150,6 @@ class StatementExtractor: relevence_info = RelevenceInfo[relevence_str] if relevence_str in RelevenceInfo.__members__ else RelevenceInfo.RELEVANT except (KeyError, ValueError): relevence_info = RelevenceInfo.RELEVANT - - chunk_speaker = self._get_speaker_from_chunk(chunk) chunk_statement = Statement( statement=extracted_stmt.statement, diff --git a/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/triplet_extraction.py b/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/triplet_extraction.py index 7fb74b82..ea355ca1 100644 --- a/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/triplet_extraction.py +++ b/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/triplet_extraction.py @@ -1,4 +1,3 @@ -import os import asyncio from typing import List, Dict, Optional diff --git a/api/app/core/memory/storage_services/forgetting_engine/access_history_manager.py b/api/app/core/memory/storage_services/forgetting_engine/access_history_manager.py index a71c0957..e5254646 100644 --- a/api/app/core/memory/storage_services/forgetting_engine/access_history_manager.py +++ b/api/app/core/memory/storage_services/forgetting_engine/access_history_manager.py @@ -42,22 +42,21 @@ class AccessHistoryManager: - access_count: 访问次数 特性: - - 原子性更新:使用Neo4j事务确保所有字段同时更新或回滚 - - 并发安全:使用乐观锁机制防止并发冲突 + - 原子性更新:使用 APOC 原子操作确保并发安全 + - 批次内合并:同一批次中对同一节点的多次访问合并为一次更新 - 一致性保证:提供一致性检查和自动修复功能 - 智能修剪:自动修剪过长的访问历史 Attributes: connector: Neo4j连接器实例 actr_calculator: ACT-R激活值计算器实例 - max_retries: 并发冲突时的最大重试次数 """ def __init__( self, connector: Neo4jConnector, actr_calculator: ACTRCalculator, - max_retries: int = 3 + max_retries: int = 5 ): """ 初始化访问历史管理器 @@ -65,47 +64,35 @@ class AccessHistoryManager: Args: connector: Neo4j连接器实例 actr_calculator: ACT-R激活值计算器实例 - max_retries: 并发冲突时的最大重试次数(默认3次) + max_retries: 已废弃,保留参数兼容性(APOC 原子操作无需重试) """ self.connector = connector self.actr_calculator = actr_calculator - self.max_retries = max_retries - + async def record_access( self, node_id: str, node_label: str, end_user_id: Optional[str] = None, - current_time: Optional[datetime] = None + current_time: Optional[datetime] = None, + access_times: int = 1 ) -> Dict[str, Any]: """ 记录节点访问并原子性更新所有相关字段 - 这是核心方法,实现了: - 1. 首次访问:初始化access_history,计算初始激活值 - 2. 后续访问:追加访问历史,重新计算激活值 - 3. 历史修剪:当历史过长时自动修剪 - 4. 原子性:所有字段在单个事务中更新 - 5. 并发安全:使用乐观锁重试机制 - Args: node_id: 节点ID node_label: 节点标签(Statement, ExtractedEntity, MemorySummary) end_user_id: 组ID(可选,用于过滤) current_time: 当前时间(可选,默认使用系统时间) + access_times: 本次访问次数(默认1,批量合并时可能大于1) Returns: - Dict[str, Any]: 更新后的节点数据,包含: - - id: 节点ID - - activation_value: 更新后的激活值 - - access_history: 更新后的访问历史 - - last_access_time: 最后访问时间 - - access_count: 访问次数 - - importance_score: 重要性分数 + Dict[str, Any]: 更新后的节点数据 Raises: ValueError: 如果节点不存在或节点标签无效 - RuntimeError: 如果重试次数耗尽仍然失败 + RuntimeError: 如果更新失败 """ if current_time is None: current_time = datetime.now() @@ -119,55 +106,48 @@ class AccessHistoryManager: f"Invalid node_label: {node_label}. Must be one of {valid_labels}" ) - # 使用乐观锁重试机制处理并发冲突 - for attempt in range(self.max_retries): - try: - # 步骤1:读取当前节点状态 - node_data = await self._fetch_node(node_id, node_label, end_user_id) - - if not node_data: - raise ValueError( - f"Node not found: {node_label} with id={node_id}" - ) - - # 步骤2:计算新的访问历史和激活值 - update_data = await self._calculate_update( - node_data=node_data, - current_time=current_time, - current_time_iso=current_time_iso + try: + # 步骤1:读取当前节点状态 + node_data = await self._fetch_node(node_id, node_label, end_user_id) + + if not node_data: + raise ValueError( + f"Node not found: {node_label} with id={node_id}" ) - - # 步骤3:原子性更新节点(使用事务) - updated_node = await self._atomic_update( - node_id=node_id, - node_label=node_label, - update_data=update_data, - end_user_id=end_user_id - ) - - logger.info( - f"成功记录访问: {node_label}[{node_id}], " - f"activation={update_data['activation_value']:.4f}, " - f"access_count={update_data['access_count']}" - ) - - return updated_node - - except Exception as e: - if attempt < self.max_retries - 1: - logger.warning( - f"访问记录失败(尝试 {attempt + 1}/{self.max_retries}): {str(e)}" - ) - continue - else: - logger.error( - f"访问记录失败,重试次数耗尽: {node_label}[{node_id}], " - f"错误: {str(e)}" - ) - raise RuntimeError( - f"Failed to record access after {self.max_retries} attempts: {str(e)}" - ) - + + # 步骤2:计算新的访问历史和激活值 + update_data = await self._calculate_update( + node_data=node_data, + current_time=current_time, + current_time_iso=current_time_iso, + access_times=access_times + ) + + # 步骤3:使用 APOC 原子操作更新节点(无需重试) + updated_node = await self._atomic_update( + node_id=node_id, + node_label=node_label, + update_data=update_data, + end_user_id=end_user_id + ) + + logger.info( + f"成功记录访问: {node_label}[{node_id}], " + f"activation={update_data['activation_value']:.4f}, " + f"access_count={update_data['access_count']}" + f"{f', 合并访问次数={access_times}' if access_times > 1 else ''}" + ) + + return updated_node + + except Exception as e: + logger.error( + f"访问记录失败: {node_label}[{node_id}], 错误: {str(e)}" + ) + raise RuntimeError( + f"Failed to record access: {str(e)}" + ) from e + async def record_batch_access( self, node_ids: List[str], @@ -178,11 +158,10 @@ class AccessHistoryManager: """ 批量记录多个节点的访问 - 为提高性能,批量更新多个节点的访问历史。 - 每个节点独立更新,失败的节点不影响其他节点。 + 对同一个节点的多次访问会先在内存中合并,只发起一次更新。 Args: - node_ids: 节点ID列表 + node_ids: 节点ID列表(可包含重复ID) node_label: 节点标签(所有节点必须是同一类型) end_user_id: 组ID(可选) current_time: 当前时间(可选) @@ -196,25 +175,38 @@ class AccessHistoryManager: if current_time is None: current_time = datetime.now() - # PERFORMANCE FIX: Process all nodes in parallel instead of sequentially - tasks = [] + # 合并同一节点的访问次数,避免对同一节点并发写入 + access_count_map: Dict[str, int] = {} for node_id in node_ids: + access_count_map[node_id] = access_count_map.get(node_id, 0) + 1 + + merged_count = len(node_ids) - len(access_count_map) + if merged_count > 0: + logger.info( + f"批量访问合并: 原始={len(node_ids)}, " + f"去重后={len(access_count_map)}, 合并={merged_count}" + ) + + # 对去重后的节点并行发起更新 + tasks = [] + for node_id, access_times in access_count_map.items(): task = self.record_access( node_id=node_id, node_label=node_label, end_user_id=end_user_id, - current_time=current_time + current_time=current_time, + access_times=access_times ) - tasks.append(task) + tasks.append((node_id, task)) - # Execute all tasks in parallel - task_results = await asyncio.gather(*tasks, return_exceptions=True) + task_results = await asyncio.gather( + *[t for _, t in tasks], return_exceptions=True + ) - # Collect successful results and count failures results = [] failed_count = 0 - for node_id, result in zip(node_ids, task_results): + for (node_id, _), result in zip(tasks, task_results): if isinstance(result, Exception): failed_count += 1 logger.warning( @@ -225,12 +217,12 @@ class AccessHistoryManager: batch_duration = time.time() - batch_start logger.info( - f"[PERF] 批量访问记录完成: 成功 {len(results)}/{len(node_ids)}, " + f"[PERF] 批量访问记录完成: 成功 {len(results)}/{len(access_count_map)}, " f"失败 {failed_count}, 耗时 {batch_duration:.4f}s" ) return results - + async def check_consistency( self, node_id: str, @@ -239,22 +231,6 @@ class AccessHistoryManager: ) -> Tuple[ConsistencyCheckResult, Optional[str]]: """ 检查节点数据的一致性 - - 验证以下一致性规则: - 1. access_history[-1] == last_access_time - 2. len(access_history) == access_count - 3. 如果有访问历史,必须有激活值 - 4. 激活值必须在有效范围内 [offset, 1.0] - - Args: - node_id: 节点ID - node_label: 节点标签 - end_user_id: 组ID(可选) - - Returns: - Tuple[ConsistencyCheckResult, Optional[str]]: - - 一致性检查结果枚举 - - 错误描述(如果不一致) """ node_data = await self._fetch_node(node_id, node_label, end_user_id) @@ -266,7 +242,6 @@ class AccessHistoryManager: access_count = node_data.get('access_count', 0) activation_value = node_data.get('activation_value') - # 检查1:access_history[-1] == last_access_time if access_history and last_access_time: if access_history[-1] != last_access_time: return ( @@ -275,7 +250,6 @@ class AccessHistoryManager: f"last_access_time={last_access_time}" ) - # 检查2:len(access_history) == access_count if len(access_history) != access_count: return ( ConsistencyCheckResult.INCONSISTENT_HISTORY_COUNT, @@ -283,14 +257,12 @@ class AccessHistoryManager: f"access_count={access_count}" ) - # 检查3:有访问历史必须有激活值 if access_history and activation_value is None: return ( ConsistencyCheckResult.MISSING_ACTIVATION, "Node has access_history but activation_value is None" ) - # 检查4:激活值范围 if activation_value is not None: offset = self.actr_calculator.offset if not (offset <= activation_value <= 1.0): @@ -301,30 +273,14 @@ class AccessHistoryManager: ) return ConsistencyCheckResult.CONSISTENT, None - + async def check_batch_consistency( self, node_label: str, end_user_id: Optional[str] = None, limit: int = 1000 ) -> Dict[str, Any]: - """ - 批量检查多个节点的一致性 - - Args: - node_label: 节点标签 - end_user_id: 组ID(可选) - limit: 检查的最大节点数 - - Returns: - Dict[str, Any]: 一致性检查报告,包含: - - total_checked: 检查的节点总数 - - consistent_count: 一致的节点数 - - inconsistent_count: 不一致的节点数 - - inconsistencies: 不一致节点的详细信息列表 - - consistency_rate: 一致性率(0-1) - """ - # 查询所有相关节点 + """批量检查多个节点的一致性""" query = f""" MATCH (n:{node_label}) WHERE n.access_history IS NOT NULL @@ -343,7 +299,6 @@ class AccessHistoryManager: results = await self.connector.execute_query(query, **params) node_ids = [r['id'] for r in results] - # 检查每个节点 inconsistencies = [] consistent_count = 0 @@ -382,32 +337,15 @@ class AccessHistoryManager: ) return report - + async def repair_inconsistency( self, node_id: str, node_label: str, end_user_id: Optional[str] = None ) -> bool: - """ - 自动修复节点的数据不一致问题 - - 修复策略: - 1. 如果access_history[-1] != last_access_time:使用access_history[-1] - 2. 如果len(access_history) != access_count:使用len(access_history) - 3. 如果有历史但无激活值:重新计算激活值 - 4. 如果激活值超出范围:重新计算激活值 - - Args: - node_id: 节点ID - node_label: 节点标签 - end_user_id: 组ID(可选) - - Returns: - bool: 修复成功返回True,否则返回False - """ + """自动修复节点的数据不一致问题""" try: - # 检查一致性 result, message = await self.check_consistency( node_id=node_id, node_label=node_label, @@ -418,7 +356,6 @@ class AccessHistoryManager: logger.info(f"节点数据一致,无需修复: {node_label}[{node_id}]") return True - # 获取节点数据 node_data = await self._fetch_node(node_id, node_label, end_user_id) if not node_data: logger.error(f"节点不存在,无法修复: {node_label}[{node_id}]") @@ -427,17 +364,13 @@ class AccessHistoryManager: access_history = node_data.get('access_history') or [] importance_score = node_data.get('importance_score', 0.5) - # 准备修复数据 repair_data = {} - # 修复last_access_time if access_history: repair_data['last_access_time'] = access_history[-1] - # 修复access_count repair_data['access_count'] = len(access_history) - # 修复activation_value if access_history: current_time = datetime.now() last_access_dt = datetime.fromisoformat(access_history[-1]) @@ -453,7 +386,6 @@ class AccessHistoryManager: ) repair_data['activation_value'] = activation_value - # 执行修复 query = f""" MATCH (n:{node_label} {{id: $node_id}}) """ @@ -484,26 +416,16 @@ class AccessHistoryManager: f"修复节点失败: {node_label}[{node_id}], 错误: {str(e)}" ) return False - + # ==================== 私有辅助方法 ==================== - + async def _fetch_node( self, node_id: str, node_label: str, end_user_id: Optional[str] = None ) -> Optional[Dict[str, Any]]: - """ - 获取节点数据 - - Args: - node_id: 节点ID - node_label: 节点标签 - end_user_id: 组ID(可选) - - Returns: - Optional[Dict[str, Any]]: 节点数据,如果不存在返回None - """ + """获取节点数据""" query = f""" MATCH (n:{node_label} {{id: $node_id}}) """ @@ -527,12 +449,13 @@ class AccessHistoryManager: if results: return results[0] return None - + async def _calculate_update( self, node_data: Dict[str, Any], current_time: datetime, - current_time_iso: str + current_time_iso: str, + access_times: int = 1 ) -> Dict[str, Any]: """ 计算更新数据 @@ -541,45 +464,40 @@ class AccessHistoryManager: node_data: 当前节点数据 current_time: 当前时间(datetime对象) current_time_iso: 当前时间(ISO格式字符串) + access_times: 本次访问次数(合并后可能大于1) Returns: - Dict[str, Any]: 更新数据,包含所有需要更新的字段 + Dict[str, Any]: 更新数据 """ - access_history = node_data.get('access_history') or [] - # Handle None importance_score - default to 0.5 importance_score = node_data.get('importance_score') if importance_score is None: importance_score = 0.5 - # 追加新的访问时间 - new_access_history = access_history + [current_time_iso] + # 本次新增的时间戳 + new_timestamps = [current_time_iso] * access_times - # 修剪访问历史(如果过长) - access_history_dt = [ - datetime.fromisoformat(ts) for ts in new_access_history - ] + # 仅用本次新增的访问记录计算激活值 + new_history_dt = [current_time] * access_times trimmed_history_dt = self.actr_calculator.trim_access_history( - access_history=access_history_dt, + access_history=new_history_dt, current_time=current_time ) - trimmed_history = [ts.isoformat() for ts in trimmed_history_dt] - # 计算新的激活值 activation_value = self.actr_calculator.calculate_memory_activation( access_history=trimmed_history_dt, current_time=current_time, - last_access_time=current_time, # 最后访问时间就是当前时间 + last_access_time=current_time, importance_score=importance_score ) - # 返回所有需要更新的字段 return { 'activation_value': activation_value, - 'access_history': trimmed_history, + 'new_timestamps': new_timestamps, + 'access_count_delta': access_times, + 'access_count': len(trimmed_history_dt), 'last_access_time': current_time_iso, - 'access_count': len(trimmed_history) } - + async def _atomic_update( self, node_id: str, @@ -588,10 +506,10 @@ class AccessHistoryManager: end_user_id: Optional[str] = None ) -> Dict[str, Any]: """ - 原子性更新节点(使用乐观锁) + 原子性更新节点(使用 APOC 原子操作) - 使用Neo4j事务和版本号确保所有字段同时更新或回滚。 - 实现乐观锁机制防止并发冲突。 + 使用 apoc.atomic.add 和 apoc.atomic.insert 保证并发安全, + 无需 version 字段和乐观锁,数据库层面保证原子性。 Args: node_id: 节点ID @@ -603,126 +521,68 @@ class AccessHistoryManager: Dict[str, Any]: 更新后的节点数据 Raises: - RuntimeError: 如果更新失败或发生版本冲突 + RuntimeError: 如果更新失败 """ - # 定义事务函数 - async def update_transaction(tx, node_id, node_label, update_data, end_user_id): - # 步骤1:读取当前节点并获取版本号 - read_query = f""" - MATCH (n:{node_label} {{id: $node_id}}) - """ - if end_user_id: - read_query += " WHERE n.end_user_id = $end_user_id" - read_query += """ - RETURN n.id as id, - n.version as version, - n.activation_value as activation_value, - n.access_history as access_history, - n.last_access_time as last_access_time, - n.access_count as access_count, - n.importance_score as importance_score - """ + content_field_map = { + 'Statement': 'n.statement as statement', + 'MemorySummary': 'n.content as content', + 'ExtractedEntity': 'null as content_placeholder', + 'Community': 'n.summary as summary' + } + + if node_label not in content_field_map: + raise ValueError( + f"Unsupported node_label: {node_label}. " + f"Supported labels are: {list(content_field_map.keys())}" + ) + + content_field = content_field_map[node_label] + + where_clause = "" + if end_user_id: + where_clause = " AND n.end_user_id = $end_user_id" + + query = f""" + MATCH (n:{node_label} {{id: $node_id}}) + WHERE true{where_clause} + CALL apoc.atomic.add(n, 'access_count', $access_count_delta, 5) YIELD oldValue AS old_count + WITH n + CALL (n) {{ + UNWIND $new_timestamps AS ts + CALL apoc.atomic.insert(n, 'access_history', size(n.access_history), ts, 5) YIELD oldValue + RETURN count(*) AS inserted + }} + SET n.activation_value = $activation_value, + n.last_access_time = $last_access_time + RETURN n.id as id, + n.activation_value as activation_value, + n.access_history as access_history, + n.last_access_time as last_access_time, + n.access_count as access_count, + n.importance_score as importance_score, + {content_field} + """ + + params = { + 'node_id': node_id, + 'access_count_delta': update_data['access_count_delta'], + 'new_timestamps': update_data['new_timestamps'], + 'activation_value': update_data['activation_value'], + 'last_access_time': update_data['last_access_time'], + } + if end_user_id: + params['end_user_id'] = end_user_id + + try: + results = await self.connector.execute_query(query, **params) - read_params = {'node_id': node_id} - if end_user_id: - read_params['end_user_id'] = end_user_id - - read_result = await tx.run(read_query, **read_params) - current_node = await read_result.single() - - if not current_node: + if not results: raise RuntimeError(f"Node not found: {node_label}[{node_id}]") - # 获取当前版本号(如果不存在则为0) - current_version = current_node.get('version', 0) or 0 - new_version = current_version + 1 - - # 步骤2:使用乐观锁更新节点 - # 根据节点类型构建完整的查询语句 - content_field_map = { - 'Statement': 'n.statement as statement', - 'MemorySummary': 'n.content as content', - 'ExtractedEntity': 'null as content_placeholder' # 占位符,后续会被过滤 - } - - # 显式检查节点类型,不支持的类型抛出错误 - if node_label not in content_field_map: - raise ValueError( - f"Unsupported node_label: {node_label}. " - f"Supported labels are: {list(content_field_map.keys())}" - ) - - content_field = content_field_map[node_label] - - # 构建 WHERE 子句 - where_conditions = [] - if end_user_id: - where_conditions.append("n.end_user_id = $end_user_id") - - # 添加版本检查 - if current_version > 0: - where_conditions.append("n.version = $current_version") - else: - where_conditions.append("(n.version IS NULL OR n.version = 0)") - - where_clause = " AND ".join(where_conditions) if where_conditions else "true" - - # 构建完整的更新查询 - update_query = f""" - MATCH (n:{node_label} {{id: $node_id}}) - WHERE {where_clause} - SET n.activation_value = $activation_value, - n.access_history = $access_history, - n.last_access_time = $last_access_time, - n.access_count = $access_count, - n.version = $new_version - RETURN n.id as id, - n.activation_value as activation_value, - n.access_history as access_history, - n.last_access_time as last_access_time, - n.access_count as access_count, - n.importance_score as importance_score, - n.version as version, - {content_field} - """ - - update_params = { - 'node_id': node_id, - 'current_version': current_version, - 'new_version': new_version, - 'activation_value': update_data['activation_value'], - 'access_history': update_data['access_history'], - 'last_access_time': update_data['last_access_time'], - 'access_count': update_data['access_count'] - } - if end_user_id: - update_params['end_user_id'] = end_user_id - - update_result = await tx.run(update_query, **update_params) - updated_node = await update_result.single() - - if not updated_node: - raise RuntimeError( - f"Version conflict detected for {node_label}[{node_id}]. " - f"Expected version {current_version}, but node was modified by another transaction." - ) - - # 转换为字典并移除占位符字段 - result_dict = dict(updated_node) + result_dict = dict(results[0]) result_dict.pop('content_placeholder', None) return result_dict - - # 执行事务 - try: - result = await self.connector.execute_write_transaction( - update_transaction, - node_id=node_id, - node_label=node_label, - update_data=update_data, - end_user_id=end_user_id - ) - return result except Exception as e: logger.error( f"原子性更新失败: {node_label}[{node_id}], 错误: {str(e)}" diff --git a/api/app/core/memory/storage_services/search/keyword_search.py b/api/app/core/memory/storage_services/search/keyword_search.py index d2591945..2458cf30 100644 --- a/api/app/core/memory/storage_services/search/keyword_search.py +++ b/api/app/core/memory/storage_services/search/keyword_search.py @@ -5,7 +5,7 @@ 使用Neo4j的全文索引进行高效的文本匹配。 """ -from typing import List, Dict, Any, Optional +from typing import List, Optional from app.core.logging_config import get_memory_logger from app.repositories.neo4j.neo4j_connector import Neo4jConnector from app.core.memory.storage_services.search.search_strategy import SearchStrategy, SearchResult @@ -74,7 +74,7 @@ class KeywordSearchStrategy(SearchStrategy): # 调用底层的关键词搜索函数 results_dict = await search_graph( connector=self.connector, - q=query_text, + query=query_text, end_user_id=end_user_id, limit=limit, include=include_list diff --git a/api/app/core/memory/utils/data/text_utils.py b/api/app/core/memory/utils/data/text_utils.py index d0b10f97..eaed0940 100644 --- a/api/app/core/memory/utils/data/text_utils.py +++ b/api/app/core/memory/utils/data/text_utils.py @@ -22,7 +22,9 @@ def escape_lucene_query(query: str) -> str: s = s.replace("\r", " ").replace("\n", " ").strip() # Lucene reserved tokens/special characters - specials = ['&&', '||', '\\', '+', '-', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':'] + # NOTE: '/' is the regex delimiter in Lucene — must be escaped to prevent + # TokenMgrError when the query contains unmatched slashes. + specials = ['&&', '||', '\\', '+', '-', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '/'] # Replace longer tokens first to avoid partial double-escaping for token in sorted(specials, key=len, reverse=True): s = s.replace(token, f"\\{token}") diff --git a/api/app/core/memory/utils/prompt/prompts/extract_statement.jinja2 b/api/app/core/memory/utils/prompt/prompts/extract_statement.jinja2 index 3cdb5fd0..611bd6df 100644 --- a/api/app/core/memory/utils/prompt/prompts/extract_statement.jinja2 +++ b/api/app/core/memory/utils/prompt/prompts/extract_statement.jinja2 @@ -43,8 +43,9 @@ Each statement must be labeled as per the criteria mentioned below. 对话上下文和共指消解: - 将每个陈述句归属于说出它的参与者。 -- 如果参与者列表为说话者提供了名称(例如,"李雪(用户)"),请在提取的陈述句中使用具体名称("李雪"),而不是通用角色("用户")。 -- 将所有代词解析为对话上下文中的具体人物或实体。 +- **对于用户的发言:必须使用"用户"作为主语**,禁止将"用户"或"我"替换为用户的真实姓名或别名。例如,用户说"我叫张三"应提取为"用户叫张三",而不是"张三叫张三"。 +- 对于 AI 助手的发言:使用"助手"或"AI助手"作为主语。 +- 将所有代词解析为对话上下文中的具体人物或实体,但"我"必须解析为"用户"。 - 识别并将抽象引用解析为其具体名称(如果提到)。 - 将缩写和首字母缩略词扩展为其完整形式。 {% else %} @@ -68,8 +69,9 @@ Context Resolution Requirements: Conversational Context & Co-reference Resolution: - Attribute every statement to the participant who uttered it. -- If the participant list provides a name for a speaker (e.g., "李雪 (用户)"), use the specific name ("李雪") in the extracted statement, not the generic role ("用户"). -- Resolve all pronouns to the specific person or entity from the conversation's context. +- **For user's statements: always use "用户" (User) as the subject**. Do NOT replace "用户" or "I" with the user's real name or alias. For example, if the user says "I'm John", extract as "用户 is John", not "John is John". +- For AI assistant's statements: use "助手" or "AI助手" as the subject. +- Resolve all pronouns to the specific person or entity from the conversation's context, but "I"/"我" must always resolve to "用户". - Identify and resolve abstract references to their specific names if mentioned. - Expand abbreviations and acronyms to their full form. {% endif %} @@ -139,13 +141,13 @@ AI: "水彩画很有趣!水彩颜料通常由颜料与阿拉伯树胶等粘合 示例输出: { "statements": [ { - "statement": "Sarah Chen 最近一直在尝试水彩画。", + "statement": "用户最近一直在尝试水彩画。", "statement_type": "FACT", "temporal_type": "DYNAMIC", "relevance": "RELEVANT" }, { - "statement": "Sarah Chen 画了一些花朵。", + "statement": "用户画了一些花朵。", "statement_type": "FACT", "temporal_type": "DYNAMIC", "relevance": "RELEVANT" @@ -157,13 +159,13 @@ AI: "水彩画很有趣!水彩颜料通常由颜料与阿拉伯树胶等粘合 "relevance": "IRRELEVANT" }, { - "statement": "Sarah Chen 认为她的水彩画中的色彩组合可以改进。", + "statement": "用户认为她的水彩画中的色彩组合可以改进。", "statement_type": "OPINION", "temporal_type": "STATIC", "relevance": "RELEVANT" }, { - "statement": "Sarah Chen 真的很喜欢玫瑰和百合。", + "statement": "用户真的很喜欢玫瑰和百合。", "statement_type": "FACT", "temporal_type": "STATIC", "relevance": "RELEVANT" @@ -186,13 +188,13 @@ AI: "水彩画很有趣!水彩颜料通常由颜料和阿拉伯树胶等粘合 示例输出: { "statements": [ { - "statement": "张曼婷最近在尝试水彩画。", + "statement": "用户最近在尝试水彩画。", "statement_type": "FACT", "temporal_type": "DYNAMIC", "relevance": "RELEVANT" }, { - "statement": "张曼婷画了一些花朵。", + "statement": "用户画了一些花朵。", "statement_type": "FACT", "temporal_type": "DYNAMIC", "relevance": "RELEVANT" @@ -204,13 +206,13 @@ AI: "水彩画很有趣!水彩颜料通常由颜料和阿拉伯树胶等粘合 "relevance": "IRRELEVANT" }, { - "statement": "张曼婷觉得水彩画的色彩搭配还有提升的空间。", + "statement": "用户觉得水彩画的色彩搭配还有提升的空间。", "statement_type": "OPINION", "temporal_type": "STATIC", "relevance": "RELEVANT" }, { - "statement": "张曼婷很喜欢玫瑰和百合。", + "statement": "用户很喜欢玫瑰和百合。", "statement_type": "FACT", "temporal_type": "STATIC", "relevance": "RELEVANT" @@ -233,13 +235,13 @@ User: "I think the color combinations could use some improvement, but I really l Example Output: { "statements": [ { - "statement": "Sarah Chen has been trying watercolor painting recently.", + "statement": "用户 has been trying watercolor painting recently.", "statement_type": "FACT", "temporal_type": "DYNAMIC", "relevance": "RELEVANT" }, { - "statement": "Sarah Chen painted some flowers.", + "statement": "用户 painted some flowers.", "statement_type": "FACT", "temporal_type": "DYNAMIC", "relevance": "RELEVANT" @@ -251,13 +253,13 @@ Example Output: { "relevance": "IRRELEVANT" }, { - "statement": "Sarah Chen thinks the color combinations in her watercolor paintings could use some improvement.", + "statement": "用户 thinks the color combinations in her watercolor paintings could use some improvement.", "statement_type": "OPINION", "temporal_type": "STATIC", "relevance": "RELEVANT" }, { - "statement": "Sarah Chen really likes roses and lilies.", + "statement": "用户 really likes roses and lilies.", "statement_type": "FACT", "temporal_type": "STATIC", "relevance": "RELEVANT" @@ -280,13 +282,13 @@ AI: "水彩画很有趣!水彩颜料通常由颜料和阿拉伯树胶等粘合 Example Output: { "statements": [ { - "statement": "张曼婷最近在尝试水彩画。", + "statement": "用户最近在尝试水彩画。", "statement_type": "FACT", "temporal_type": "DYNAMIC", "relevance": "RELEVANT" }, { - "statement": "张曼婷画了一些花朵。", + "statement": "用户画了一些花朵。", "statement_type": "FACT", "temporal_type": "DYNAMIC", "relevance": "RELEVANT" @@ -298,13 +300,13 @@ Example Output: { "relevance": "IRRELEVANT" }, { - "statement": "张曼婷觉得水彩画的色彩搭配还有提升的空间。", + "statement": "用户觉得水彩画的色彩搭配还有提升的空间。", "statement_type": "OPINION", "temporal_type": "STATIC", "relevance": "RELEVANT" }, { - "statement": "张曼婷很喜欢玫瑰和百合。", + "statement": "用户很喜欢玫瑰和百合。", "statement_type": "FACT", "temporal_type": "STATIC", "relevance": "RELEVANT" diff --git a/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2 b/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2 index 7ded48a4..1a79b482 100644 --- a/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2 +++ b/api/app/core/memory/utils/prompt/prompts/extract_triplet.jinja2 @@ -406,4 +406,12 @@ Output: - **⚠️ ALIASES ORDER: preserve temporal order of appearance** - **🚨 MANDATORY FIELD: EVERY entity MUST include "aliases" field, even if empty array []** +**Output JSON structure:** +```json +{ + "triplets": [...], + "entities": [...] +} +``` + {{ json_schema }} diff --git a/api/app/core/memory/utils/prompt/prompts/extract_user_metadata.jinja2 b/api/app/core/memory/utils/prompt/prompts/extract_user_metadata.jinja2 new file mode 100644 index 00000000..5d019b12 --- /dev/null +++ b/api/app/core/memory/utils/prompt/prompts/extract_user_metadata.jinja2 @@ -0,0 +1,135 @@ +===Task=== +Extract user metadata from the following conversation statements spoken by the user. + +{% if language == "zh" %} +**"三度原则"判断标准:** +- 复用度:该信息是否会被多个功能模块使用? +- 约束度:该信息是否会影响系统行为? +- 时效性:该信息是长期稳定的还是临时的?仅提取长期稳定信息。 + +**提取规则:** +- **只提取关于"用户本人"的画像信息**,忽略用户提到的第三方人物(如朋友、同事、家人)的信息 +- 仅提取文本中明确提到的信息,不要推测 +- 如果文本中没有可提取的用户画像信息,返回空的 user_metadata 对象 +- **输出语言必须与输入文本的语言一致**(输入中文则输出中文值,输入英文则输出英文值) + +{% if existing_metadata %} +**重要:合并已有元数据** +下方提供了数据库中已有的用户元数据。请结合用户最新发言,输出**合并后的完整元数据**: +- 如果用户明确否定了已有信息(如"我不再教高中物理了"),在输出中**移除**该信息 +- 如果用户提到了新信息,**添加**到对应字段中 +- 如果已有信息未被用户否定,**保留**在输出中 +- 标量字段(如 role、domain):如果用户提到了新值,用新值替换;否则保留已有值 +- 最终输出应该是完整的、合并后的元数据,不是增量 +{% endif %} + +**字段说明:** +- profile.role:用户的职业或角色,如 教师、医生、后端工程师 +- profile.domain:用户所在领域,如 教育、医疗、软件开发 +- profile.expertise:用户擅长的技能或工具(通用,不限于编程),如 Python、心理咨询、高中物理 +- profile.interests:用户主动表达兴趣的话题或领域标签 +- behavioral_hints.learning_stage:学习阶段(初学者/中级/高级) +- behavioral_hints.preferred_depth:偏好深度(概览/技术细节/深入探讨) +- behavioral_hints.tone_preference:语气偏好(轻松随意/专业简洁/学术严谨) +- knowledge_tags:用户涉及的知识领域标签 + +**用户别名变更(增量模式):** +- **aliases_to_add**:本次新发现的用户别名,包括: + * 用户主动自我介绍:如"我叫张三"、"我的名字是XX"、"我的网名是XX" + * 他人对用户的称呼:如"同事叫我陈哥"、"大家叫我小张"、"领导叫我老陈" + * 只提取原文中逐字出现的名字,严禁推测或创造 + * 禁止提取:用户给 AI 取的名字、第三方人物自身的名字、"用户"/"我" 等占位词 + * 如果没有新别名,返回空数组 `[]` +- **aliases_to_remove**:用户明确否认的别名,包括: + * 用户说"我不叫XX了"、"别叫我XX"、"我改名了,不叫XX" → 将 XX 放入此数组 + * **严格限制**:只将用户原文中**逐字提到**的被否认名字放入,不要推断关联的其他别名 + * 例如:用户说"我不叫陈小刀了" → 只移除"陈小刀",不要移除"陈哥"、"老陈"等未被提及的别名 + * 如果没有要移除的别名,返回空数组 `[]` +{% if existing_aliases %} +- 已有别名:{{ existing_aliases | tojson }}(仅供参考,不需要在输出中重复) +{% endif %} +{% else %} +**"Three-Degree Principle" criteria:** +- Reusability: Will this information be used by multiple functional modules? +- Constraint: Will this information affect system behavior? +- Timeliness: Is this information long-term stable or temporary? Only extract long-term stable information. + +**Extraction rules:** +- **Only extract profile information about the user themselves**, ignore information about third parties (friends, colleagues, family) mentioned by the user +- Only extract information explicitly mentioned in the text, do not speculate +- If no user profile information can be extracted, return an empty user_metadata object +- **Output language must match the input text language** + +{% if existing_metadata %} +**Important: Merge with existing metadata** +Existing user metadata from the database is provided below. Combine with the user's latest statements to output the **complete merged metadata**: +- If the user explicitly negates existing info (e.g. "I no longer teach high school physics"), **remove** it from output +- If the user mentions new info, **add** it to the corresponding field +- If existing info is not negated by the user, **keep** it in the output +- Scalar fields (e.g. role, domain): replace with new value if user mentions one; otherwise keep existing +- The final output should be the complete, merged metadata — not an incremental update +{% endif %} + +**Field descriptions:** +- profile.role: User's occupation or role, e.g. teacher, doctor, software engineer +- profile.domain: User's domain, e.g. education, healthcare, software development +- profile.expertise: User's skills or tools (general, not limited to programming) +- profile.interests: Topics or domain tags the user actively expressed interest in +- behavioral_hints.learning_stage: Learning stage (beginner/intermediate/advanced) +- behavioral_hints.preferred_depth: Preferred depth (overview/detailed/deep dive) +- behavioral_hints.tone_preference: Tone preference (casual/professional/academic) +- knowledge_tags: Knowledge domain tags related to the user + +**User alias changes (incremental mode):** +- **aliases_to_add**: Newly discovered user aliases from this conversation, including: + * User self-introductions: e.g. "I'm John", "My name is XX", "My username is XX" + * How others address the user: e.g. "My colleagues call me Johnny", "People call me Mike" + * Only extract names that appear VERBATIM in the text — never infer or fabricate + * Do NOT extract: names the user gives to the AI, third-party people's own names, placeholder words like "User"/"I" + * If no new aliases, return empty array `[]` +- **aliases_to_remove**: Aliases the user explicitly denies, including: + * User says "Don't call me XX anymore", "I'm not called XX", "I changed my name from XX" → put XX in this array + * **Strict rule**: Only include the exact name the user **verbatim mentions** as denied. Do NOT infer or remove related aliases + * Example: User says "I'm not called John anymore" → only remove "John", do NOT remove "Johnny", "J" or other related aliases not mentioned + * If no aliases to remove, return empty array `[]` +{% if existing_aliases %} +- Existing aliases: {{ existing_aliases | tojson }} (for reference only, do not repeat in output) +{% endif %} +{% endif %} + +===User Statements=== +{% for stmt in statements %} +- {{ stmt }} +{% endfor %} + +{% if existing_metadata %} +===Existing User Metadata=== +```json +{{ existing_metadata | tojson }} +``` +{% endif %} + +===Output Format=== +Return a JSON object with the following structure: +```json +{ + "user_metadata": { + "profile": { + "role": "", + "domain": "", + "expertise": [], + "interests": [] + }, + "behavioral_hints": { + "learning_stage": "", + "preferred_depth": "", + "tone_preference": "" + }, + "knowledge_tags": [] + }, + "aliases_to_add": [], + "aliases_to_remove": [] +} +``` + +{{ json_schema }} diff --git a/api/app/core/models/base.py b/api/app/core/models/base.py index eff6292f..89a7dcee 100644 --- a/api/app/core/models/base.py +++ b/api/app/core/models/base.py @@ -112,22 +112,23 @@ class RedBearModelFactory: params["stream_usage"] = True # 深度思考模式 is_streaming = bool(config.extra_params.get("streaming")) - if is_streaming and not config.is_omni: - if provider == ModelProvider.VOLCANO: - # 火山引擎深度思考仅流式调用支持,非流式时不传 thinking 参数 - thinking_config: Dict[str, Any] = { - "type": "enabled" if config.deep_thinking else "disabled" - } - if config.deep_thinking and config.thinking_budget_tokens: - thinking_config["budget_tokens"] = config.thinking_budget_tokens - params["extra_body"] = {"thinking": thinking_config} - else: - # 始终显式传递 enable_thinking,不支持该参数的模型(如 DeepSeek-R1)会直接忽略 - model_kwargs: Dict[str, Any] = config.extra_params.get("model_kwargs", {}) - model_kwargs["enable_thinking"] = config.deep_thinking - if config.deep_thinking and config.thinking_budget_tokens: - model_kwargs["thinking_budget"] = config.thinking_budget_tokens - params["model_kwargs"] = model_kwargs + if config.support_thinking: + if is_streaming and not config.is_omni: + if provider == ModelProvider.VOLCANO: + # 火山引擎深度思考仅流式调用支持,非流式时不传 thinking 参数 + thinking_config: Dict[str, Any] = { + "type": "enabled" if config.deep_thinking else "disabled" + } + if config.deep_thinking and config.thinking_budget_tokens: + thinking_config["budget_tokens"] = config.thinking_budget_tokens + params["extra_body"] = {"thinking": thinking_config} + else: + # 始终显式传递 enable_thinking,不支持该参数的模型(如 DeepSeek-R1)会直接忽略 + model_kwargs: Dict[str, Any] = config.extra_params.get("model_kwargs", {}) + model_kwargs["enable_thinking"] = config.deep_thinking + if config.deep_thinking and config.thinking_budget_tokens: + model_kwargs["thinking_budget"] = config.thinking_budget_tokens + params["model_kwargs"] = model_kwargs return params elif provider == ModelProvider.DASHSCOPE: params = { @@ -206,10 +207,15 @@ class RedBearModelFactory: if provider in [ModelProvider.XINFERENCE, ModelProvider.GPUSTACK]: return { "model": config.model_name, - # "base_url": config.base_url, "jina_api_key": config.api_key, **config.extra_params } + elif provider == ModelProvider.DASHSCOPE: + return { + "model": config.model_name, + "dashscope_api_key": config.api_key, + **config.extra_params + } else: raise BusinessException(f"不支持的提供商: {provider}", code=BizCode.PROVIDER_NOT_SUPPORTED) @@ -265,6 +271,9 @@ def get_provider_rerank_class(provider: str): if provider in [ModelProvider.XINFERENCE, ModelProvider.GPUSTACK]: from langchain_community.document_compressors import JinaRerank return JinaRerank + elif provider == ModelProvider.DASHSCOPE: + from langchain_community.document_compressors.dashscope_rerank import DashScopeRerank + return DashScopeRerank # elif provider == ModelProvider.OLLAMA: # from langchain_ollama import OllamaEmbeddings # return OllamaEmbeddings diff --git a/api/app/core/models/embedding.py b/api/app/core/models/embedding.py index fb75696a..991e4498 100644 --- a/api/app/core/models/embedding.py +++ b/api/app/core/models/embedding.py @@ -36,9 +36,7 @@ class RedBearEmbeddings(Embeddings): "base_url": config.base_url, "api_key": config.api_key, "timeout": httpx.Timeout(timeout=config.timeout, connect=60.0), - "max_retries": config.max_retries, - "check_embedding_ctx_length": False, - "encoding_format": "float" + "max_retries": config.max_retries } elif provider == ModelProvider.DASHSCOPE: params = { diff --git a/api/app/core/models/rerank.py b/api/app/core/models/rerank.py index c4b91e25..45b6fc88 100644 --- a/api/app/core/models/rerank.py +++ b/api/app/core/models/rerank.py @@ -76,5 +76,9 @@ class RedBearRerank(BaseDocumentCompressor): from langchain_community.document_compressors import JinaRerank model_instance: JinaRerank = self._model return model_instance.rerank(documents=documents, query=query, top_n=top_n) + elif provider == ModelProvider.DASHSCOPE: + from langchain_community.document_compressors.dashscope_rerank import DashScopeRerank + model_instance: DashScopeRerank = self._model + return model_instance.rerank(documents=documents, query=query, top_n=top_n) else: raise ValueError(f"不支持的模型提供商: {provider}") diff --git a/api/app/core/rag/app/naive.py b/api/app/core/rag/app/naive.py index 72272347..312216dd 100644 --- a/api/app/core/rag/app/naive.py +++ b/api/app/core/rag/app/naive.py @@ -672,10 +672,15 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, excel_parser = ExcelParser() if parser_config.get("html4excel") and parser_config.get("html4excel").lower() == "true": sections = [(_, "") for _ in excel_parser.html(binary, 12) if _] - parser_config["chunk_token_num"] = 0 else: sections = [(_, "") for _ in excel_parser(binary) if _] - parser_config["chunk_token_num"] = 12800 + callback(0.8, "Finish parsing.") + # Excel 每行直接作为一个 chunk,不经过 naive_merge 避免被 delimiter 拆分 + chunks = [s for s, _ in sections] + res.extend(tokenize_chunks(chunks, doc, is_english, None)) + res.extend(embed_res) + res.extend(url_res) + return res elif re.search(r"\.(txt|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|sql)$", filename, re.IGNORECASE): callback(0.1, "Start to parse.") diff --git a/api/app/core/rag/deepdoc/parser/excel_parser.py b/api/app/core/rag/deepdoc/parser/excel_parser.py index d66a21a8..c3999be9 100644 --- a/api/app/core/rag/deepdoc/parser/excel_parser.py +++ b/api/app/core/rag/deepdoc/parser/excel_parser.py @@ -232,14 +232,14 @@ class RAGExcelParser: t = str(ti[i].value) if i < len(ti) else "" t += (":" if t else "") + str(c.value) fields.append(t) - line = "; ".join(fields) + line = "\n".join(fields) if sheetname.lower().find("sheet") < 0: - line += " ——" + sheetname + line += "\n——" + sheetname res.append(line) else: # 只有表头的情况 if header_fields: - line = "; ".join(header_fields) + line = "\n".join(header_fields) if sheetname.lower().find("sheet") < 0: line += " ——" + sheetname res.append(line) diff --git a/api/app/core/rag/llm/embedding_model.py b/api/app/core/rag/llm/embedding_model.py index 22e35a15..59210054 100644 --- a/api/app/core/rag/llm/embedding_model.py +++ b/api/app/core/rag/llm/embedding_model.py @@ -50,7 +50,9 @@ class OpenAIEmbed(Base): def encode(self, texts: list): # OpenAI requires batch size <=16 batch_size = 16 - texts = [truncate(t, 8191) for t in texts] + # Use 8000 instead of 8191 to leave safety margin for tokenizer differences + # between cl100k_base (used by truncate) and the actual embedding model + texts = [truncate(t, 8000) for t in texts] ress = [] total_tokens = 0 for i in range(0, len(texts), batch_size): @@ -63,7 +65,7 @@ class OpenAIEmbed(Base): return np.array(ress), total_tokens def encode_queries(self, text): - res = self.client.embeddings.create(input=[truncate(text, 8191)], model=self.model_name, encoding_format="float",extra_body={"drop_params": True}) + res = self.client.embeddings.create(input=[truncate(text, 8000)], model=self.model_name, encoding_format="float",extra_body={"drop_params": True}) return np.array(res.data[0].embedding), self.total_token_count(res) @@ -79,6 +81,7 @@ class LocalAIEmbed(Base): def encode(self, texts: list): batch_size = 16 + texts = [truncate(t, 8000) for t in texts] ress = [] for i in range(0, len(texts), batch_size): res = self.client.embeddings.create(input=texts[i : i + batch_size], model=self.model_name) @@ -173,6 +176,7 @@ class XinferenceEmbed(Base): def encode(self, texts: list): batch_size = 16 + texts = [truncate(t, 8000) for t in texts] ress = [] total_tokens = 0 for i in range(0, len(texts), batch_size): @@ -188,7 +192,7 @@ class XinferenceEmbed(Base): def encode_queries(self, text): res = None try: - res = self.client.embeddings.create(input=[text], model=self.model_name) + res = self.client.embeddings.create(input=[truncate(text, 8000)], model=self.model_name) return np.array(res.data[0].embedding), self.total_token_count(res) except Exception as _e: log_exception(_e, res) diff --git a/api/app/core/tools/builtin/openclaw_tool.py b/api/app/core/tools/builtin/openclaw_tool.py new file mode 100644 index 00000000..2ff3a626 --- /dev/null +++ b/api/app/core/tools/builtin/openclaw_tool.py @@ -0,0 +1,300 @@ +"""OpenClaw 远程 Agent 内置工具""" +import time +import base64 +from io import BytesIO +from typing import List, Dict, Any, Optional +import aiohttp + +from app.core.tools.builtin.base import BuiltinTool +from app.schemas.tool_schema import ToolParameter, ToolResult, ParameterType +from app.core.logging_config import get_business_logger + +logger = get_business_logger() + + +class OpenClawTool(BuiltinTool): + """OpenClaw 远程 Agent 工具 — 支持文本和图片多模态输入""" + + def __init__(self, tool_id: str, config: Dict[str, Any]): + super().__init__(tool_id, config) + params = self.parameters_config + + # 用户配置项(前端表单填写) + self._server_url = params.get("server_url", "") + self._api_key = params.get("api_key", "") + self._agent_id = params.get("agent_id", "main") + + # 内部默认值 + self._model = "openclaw" + self._session_strategy = "by_user" + self._timeout = 120 + + # 运行时上下文(通过 set_runtime_context 注入) + self._user_id = "anonymous" + self._conversation_id = None + self._uploaded_files = [] + + @property + def name(self) -> str: + return "openclaw_tool" + + @property + def description(self) -> str: + return ( + "OpenClaw 远程 Agent:将任务委托给远程 OpenClaw Agent。" + "具备 3D 模型生成与打印控制、设备管理、文件处理、浏览器自动化、" + "Shell 命令执行、网络搜索等能力。支持文本和图片多模态交互。" + ) + + def get_required_config_parameters(self) -> List[str]: + return ["server_url", "api_key"] + + @property + def parameters(self) -> List[ToolParameter]: + return [ + ToolParameter( + name="operation", + type=ParameterType.STRING, + description="任务类型", + required=True, + enum= ["print_task", "device_query", "image_understand", "general"] + ), + ToolParameter( + name="message", + type=ParameterType.STRING, + description="发送给 OpenClaw Agent 的文本请求内容", + required=True + ), + ToolParameter( + name="image_url", + type=ParameterType.STRING, + description="可选,附带的图片 URL 或 base64 data URI(OpenClaw 支持图片输入)", + required=False + ) + ] + + # ---------- 运行时上下文注入 ---------- + def set_runtime_context( + self, + user_id: str = "anonymous", + conversation_id: Optional[str] = None, + uploaded_files: Optional[list] = None + ): + """注入运行时上下文(由 chat service 调用)""" + self._user_id = user_id + self._conversation_id = conversation_id + self._uploaded_files = uploaded_files or [] + + # ---------- 连接测试 ---------- + async def test_connection(self) -> Dict[str, Any]: + """测试 OpenClaw Gateway 连接""" + if not self._server_url: + return {"success": False, "message": "未配置 server_url"} + if not self._api_key: + return {"success": False, "message": "未配置 api_key"} + + url = f"{self._server_url.rstrip('/')}/v1/responses" + headers = { + "Authorization": f"Bearer {self._api_key}", + "Content-Type": "application/json", + "x-openclaw-agent-id": self._agent_id + } + body = { + "model": self._model, + "user": "connection-test", + "input": "hi", + "stream": False + } + try: + timeout_cfg = aiohttp.ClientTimeout(total=30) + async with aiohttp.ClientSession(timeout=timeout_cfg) as session: + async with session.post(url, json=body, headers=headers) as resp: + if resp.status < 400: + return {"success": True, "message": "OpenClaw 连接成功"} + error_text = await resp.text() + return { + "success": False, + "message": f"OpenClaw HTTP {resp.status}: {error_text[:200]}" + } + except Exception as e: + return {"success": False, "message": f"OpenClaw 连接失败: {str(e)}"} + + # ---------- 执行 ---------- + async def execute(self, **kwargs) -> ToolResult: + """执行 OpenClaw 调用""" + start_time = time.time() + try: + message = kwargs.get("message", "") + if not message: + return ToolResult.error_result( + error="message 参数不能为空", + error_code="OPENCLAW_INVALID_INPUT", + execution_time=time.time() - start_time + ) + + # 提取图片:优先从用户上传文件中获取,LLM 传的 image_url 作为兜底 + image_url = self._extract_image_from_uploads() + if not image_url: + image_url = kwargs.get("image_url") + if image_url and not image_url.startswith("data:"): + image_url = await self._download_and_encode_image(image_url) + + # 构建请求 + url = f"{self._server_url.rstrip('/')}/v1/responses" + headers = { + "Authorization": f"Bearer {self._api_key}", + "Content-Type": "application/json", + "x-openclaw-agent-id": self._agent_id + } + user_field = ( + f"conv-{self._conversation_id}" + if self._session_strategy == "by_conversation" and self._conversation_id + else f"user-{self._user_id}" + ) + input_field = self._build_input(message, image_url) + body = { + "model": self._model, + "user": user_field, + "input": input_field, + "stream": False + } + + timeout_cfg = aiohttp.ClientTimeout(total=self._timeout) + # 打印请求日志(截断 base64 避免日志过大) + log_body = {**body} + if isinstance(log_body.get("input"), list): + log_body["input"] = "[multimodal input, truncated]" + elif isinstance(log_body.get("input"), str) and len(log_body["input"]) > 500: + log_body["input"] = log_body["input"][:500] + "..." + logger.info( + f"OpenClaw 请求: url={url}, agent_id={self._agent_id}, " + f"has_image={bool(image_url)}, body={log_body}" + ) + async with aiohttp.ClientSession(timeout=timeout_cfg) as session: + async with session.post(url, json=body, headers=headers) as resp: + execution_time = time.time() - start_time + if resp.status >= 400: + error_text = await resp.text() + return ToolResult.error_result( + error=f"OpenClaw HTTP {resp.status}: {error_text[:500]}", + error_code="OPENCLAW_HTTP_ERROR", + execution_time=execution_time + ) + data = await resp.json() + text = self._extract_response(data) + display_text = self._format_result(text) + return ToolResult.success_result( + data=display_text, + execution_time=execution_time + ) + + except aiohttp.ClientError as e: + return ToolResult.error_result( + error=f"OpenClaw 网络连接失败: {str(e)}", + error_code="OPENCLAW_NETWORK_ERROR", + execution_time=time.time() - start_time + ) + except Exception as e: + return ToolResult.error_result( + error=f"OpenClaw 调用失败: {str(e)}", + error_code="OPENCLAW_EXECUTION_ERROR", + execution_time=time.time() - start_time + ) + + # ---------- 私有方法 ---------- + def _extract_image_from_uploads(self) -> Optional[str]: + """从用户上传文件中提取图片 URL""" + for f in self._uploaded_files: + f_type = f.get("type", "") + if f_type == "image": + source = f.get("source", {}) + if source.get("type") == "base64": + media_type = source.get("media_type", "image/jpeg") + data = source.get("data", "") + return f"data:{media_type};base64,{data}" + elif f.get("image"): + return f.get("image") + elif f.get("url"): + return f.get("url") + elif f_type == "image_url": + return f.get("image_url", {}).get("url", "") + return None + + async def _download_and_encode_image(self, image_url: str) -> str: + """下载图片并转为 base64 data URI""" + try: + from PIL import Image + MAX_RAW_SIZE = 4 * 1024 * 1024 + + async with aiohttp.ClientSession() as session: + async with session.get( + image_url, allow_redirects=True, + timeout=aiohttp.ClientTimeout(total=30) + ) as resp: + if resp.status != 200: + return image_url + content_type = resp.headers.get("Content-Type", "image/jpeg") + if not content_type.startswith("image/"): + return image_url + img_bytes = await resp.read() + + if len(img_bytes) > MAX_RAW_SIZE: + img = Image.open(BytesIO(img_bytes)) + if img.mode in ("RGBA", "P", "LA"): + img = img.convert("RGB") + if max(img.size) > 2048: + img.thumbnail((2048, 2048), Image.LANCZOS) + buf = BytesIO() + img.save(buf, format="JPEG", quality=75, optimize=True) + img_bytes = buf.getvalue() + content_type = "image/jpeg" + + b64 = base64.b64encode(img_bytes).decode("utf-8") + return f"data:{content_type};base64,{b64}" + except Exception as e: + logger.warning(f"OpenClaw 下载图片失败,使用原始 URL: {e}") + return image_url + + def _build_input(self, message: str, image_url: Optional[str] = None): + """构造请求 input 字段:有图片则构造多模态结构,否则纯文本""" + if not image_url: + return message + + content_parts = [{"type": "input_text", "text": message}] + if image_url.startswith("data:"): + try: + header, data = image_url.split(",", 1) + media_type = header.split(":")[1].split(";")[0] + content_parts.append({ + "type": "input_image", + "source": {"type": "base64", "media_type": media_type, "data": data} + }) + except (ValueError, IndexError): + return message + else: + content_parts.append({ + "type": "input_image", + "source": {"type": "url", "url": image_url} + }) + + return [{"type": "message", "role": "user", "content": content_parts}] + + def _extract_response(self, response_data: Dict[str, Any]) -> str: + """从 OpenClaw 响应中提取文本内容 + + OpenClaw /v1/responses 只返回 output_text 类型的内容。 + 图片信息(如有)由 OpenClaw Skill 以 Markdown 链接形式嵌入文本中返回。 + """ + output = response_data.get("output", []) + texts = [] + for item in output: + if item.get("type") == "message": + for content in item.get("content", []): + if content.get("type") == "output_text" and content.get("text"): + texts.append(content["text"]) + return "\n".join(texts) if texts else str(response_data) + + @staticmethod + def _format_result(text: str) -> str: + """格式化结果为 LLM 可读字符串""" + return text or "(OpenClaw 返回了空内容)" diff --git a/api/app/core/tools/builtin/operation_tool.py b/api/app/core/tools/builtin/operation_tool.py index 95e6fdf5..e8b7c77e 100644 --- a/api/app/core/tools/builtin/operation_tool.py +++ b/api/app/core/tools/builtin/operation_tool.py @@ -11,6 +11,11 @@ class OperationTool(BaseTool): self.base_tool = base_tool self.operation = operation super().__init__(base_tool.tool_id, base_tool.config) + + def set_runtime_context(self, **kwargs): + """转发运行时上下文到 base_tool""" + if hasattr(self.base_tool, 'set_runtime_context'): + self.base_tool.set_runtime_context(**kwargs) @property def name(self) -> str: @@ -32,6 +37,8 @@ class OperationTool(BaseTool): return self._get_datetime_params() elif self.base_tool.name == 'json_tool': return self._get_json_params() + elif self.base_tool.name == 'openclaw_tool': + return self._get_openclaw_params() else: # 默认返回除operation外的所有参数 return [p for p in self.base_tool.parameters if p.name != "operation"] @@ -232,6 +239,64 @@ class OperationTool(BaseTool): else: return base_params + def _get_openclaw_params(self) -> List[ToolParameter]: + """获取 openclaw_tool 特定操作的参数""" + if self.operation == "print_task": + return [ + ToolParameter( + name="message", + type=ParameterType.STRING, + description="发送给 OpenClaw 的打印任务描述,将用户的原始消息原封不动地传递给 OpenClaw,禁止改写、补充或润色用户的原文", + required=True + ), + ToolParameter( + name="image_url", + type=ParameterType.STRING, + description="可选,附带的设计图片或参考图,OpenClaw 可据此生成 3D 模型", + required=False + ) + ] + elif self.operation == "device_query": + return [ + ToolParameter( + name="message", + type=ParameterType.STRING, + description="发送给 OpenClaw 的设备查询指令", + required=True + ) + ] + elif self.operation == "image_understand": + return [ + ToolParameter( + name="message", + type=ParameterType.STRING, + description="发送给 OpenClaw 的图片理解任务,应描述需要对图片做什么(如描述内容、提取文字、分析信息)", + required=True + ), + ToolParameter( + name="image_url", + type=ParameterType.STRING, + description="要分析的图片 URL 或 base64 data URI", + required=False + ) + ] + else: + # general 及其他 + return [ + ToolParameter( + name="message", + type=ParameterType.STRING, + description="发送给 OpenClaw Agent 的任务描述,应包含完整的任务需求", + required=True + ), + ToolParameter( + name="image_url", + type=ParameterType.STRING, + description="可选,附带的图片 URL 或 base64 data URI", + required=False + ) + ] + async def execute(self, **kwargs) -> ToolResult: """执行特定操作""" # 添加operation参数 diff --git a/api/app/core/tools/configs/builtin/openclaw_tool.json b/api/app/core/tools/configs/builtin/openclaw_tool.json new file mode 100644 index 00000000..7c1f9629 --- /dev/null +++ b/api/app/core/tools/configs/builtin/openclaw_tool.json @@ -0,0 +1,15 @@ +{ + "name": "openclaw_tool", + "description": "调用OpenClaw Agent远程服务", + "tool_class": "OpenClawTool", + "category": "agent", + "requires_config": true, + "version": "1.0.0", + "enabled": true, + "parameters": { + "server_url": "", + "api_key": "", + "agent_id": "main" + }, + "tags": ["agent", "openclaw", "multimodal", "3d-printing", "builtin"] +} diff --git a/api/app/core/tools/configs/builtin_tools.json b/api/app/core/tools/configs/builtin_tools.json index 79206a5e..882a970a 100644 --- a/api/app/core/tools/configs/builtin_tools.json +++ b/api/app/core/tools/configs/builtin_tools.json @@ -30,5 +30,18 @@ "parameters": { "api_key": {"type": "string", "description": "百度搜索API密钥", "sensitive": true, "required": true} } + }, + "openclaw": { + "name": "OpenClaw远程Agent", + "description": "OpenClaw Agent远程服务", + "tool_class": "OpenClawTool", + "category": "agent", + "requires_config": true, + "version": "1.0.0", + "enabled": true, + "parameters": { + "server_url": {"type": "string", "description": "OpenClaw Gateway 地址", "required": true}, + "api_key": {"type": "string", "description": "OpenClaw API Key", "sensitive": true, "required": true} + } } } \ No newline at end of file diff --git a/api/app/core/tools/custom/base.py b/api/app/core/tools/custom/base.py index 3dfe4c93..c03fe206 100644 --- a/api/app/core/tools/custom/base.py +++ b/api/app/core/tools/custom/base.py @@ -30,7 +30,7 @@ class CustomTool(BaseTool): self.auth_config = config.get("auth_config", {}) self.base_url = config.get("base_url", "") self.timeout = config.get("timeout", 30) - + # 解析schema self._parsed_operations = self._parse_openapi_schema() diff --git a/api/app/core/tools/langchain_adapter.py b/api/app/core/tools/langchain_adapter.py index 51415732..859b6312 100644 --- a/api/app/core/tools/langchain_adapter.py +++ b/api/app/core/tools/langchain_adapter.py @@ -131,7 +131,7 @@ class LangchainAdapter: def _tool_supports_operations(tool: BaseTool) -> bool: """检查工具是否支持多操作""" # 内置工具中支持操作的工具 - builtin_operation_tools = ['datetime_tool', 'json_tool'] + builtin_operation_tools = ['datetime_tool', 'json_tool', 'openclaw_tool'] # 检查内置工具 if tool.tool_type.value == "builtin" and tool.name in builtin_operation_tools: diff --git a/api/app/core/workflow/adapters/base_adapter.py b/api/app/core/workflow/adapters/base_adapter.py index 2e24d085..41090983 100644 --- a/api/app/core/workflow/adapters/base_adapter.py +++ b/api/app/core/workflow/adapters/base_adapter.py @@ -40,6 +40,7 @@ class WorkflowParserResult(BaseModel): edges: list[EdgeDefinition] = Field(default_factory=list) nodes: list[NodeDefinition] = Field(default_factory=list) variables: list[VariableDefinition] = Field(default_factory=list) + features: dict[str, Any] = Field(default_factory=dict) warnings: list[ExceptionDefinition] = Field(default_factory=list) errors: list[ExceptionDefinition] = Field(default_factory=list) @@ -51,6 +52,7 @@ class WorkflowImportResult(BaseModel): edges: list[EdgeDefinition] = Field(default_factory=list) nodes: list[NodeDefinition] = Field(default_factory=list) variables: list[VariableDefinition] = Field(default_factory=list) + features: dict[str, Any] = Field(default_factory=dict) warnings: list[ExceptionDefinition] = Field(default_factory=list) errors: list[ExceptionDefinition] = Field(default_factory=list) diff --git a/api/app/core/workflow/adapters/dify/converter.py b/api/app/core/workflow/adapters/dify/converter.py index 61065c71..ad9312e1 100644 --- a/api/app/core/workflow/adapters/dify/converter.py +++ b/api/app/core/workflow/adapters/dify/converter.py @@ -15,7 +15,7 @@ from app.core.workflow.adapters.errors import ( ExceptionType ) from app.core.workflow.nodes.assigner.config import AssignmentItem -from app.core.workflow.nodes.base_config import VariableDefinition, BaseNodeConfig +from app.core.workflow.nodes.base_config import VariableDefinition as NodeVariableDefinition, BaseNodeConfig from app.core.workflow.nodes.code.config import InputVariable, OutputVariable from app.core.workflow.nodes.configs import ( StartNodeConfig, @@ -36,6 +36,7 @@ from app.core.workflow.nodes.configs import ( ListOperatorNodeConfig, DocExtractorNodeConfig, ) +from app.schemas.workflow_schema import VariableDefinition as SchemaVariableDefinition from app.core.workflow.nodes.cycle_graph.config import ( ConditionDetail as LoopConditionDetail, ConditionsConfig, @@ -98,6 +99,7 @@ class DifyConverter(BaseConverter): NodeType.CYCLE_START: lambda x: {}, NodeType.BREAK: lambda x: {}, } + self._file_vars_to_conv: list[SchemaVariableDefinition] = [] def get_node_convert(self, node_type): func = self.CONFIG_CONVERT_MAP.get(node_type, lambda x: {}) @@ -286,19 +288,25 @@ class DifyConverter(BaseConverter): ) continue - if var_type in ["file", "array[file]"]: - self.errors.append( - ExceptionDefinition( - type=ExceptionType.VARIABLE, - node_id=node["id"], - node_name=node_data["title"], - name=var["variable"], - detail=f"Unsupported Variable type for start node: {var_type}" - ) - ) + if var_type in [VariableType.FILE, VariableType.ARRAY_FILE]: + # 开始节点不支持文件变量,转为会话变量 + self._file_vars_to_conv.append(SchemaVariableDefinition( + name=var["variable"], + type=var_type.value, + required=var.get("required", False), + default=None, + description=var.get("label", ""), + )) + self.warnings.append(ExceptionDefinition( + type=ExceptionType.VARIABLE, + node_id=node["id"], + node_name=node_data["title"], + name=var["variable"], + detail=f"File variable '{var['variable']}' is not supported in start node, moved to conversation variables" + )) continue - var_def = VariableDefinition( + var_def = NodeVariableDefinition( name=var["variable"], type=var_type, required=var["required"], @@ -837,3 +845,76 @@ class DifyConverter(BaseConverter): ).model_dump() self.config_validate(node["id"], node["data"]["title"], DocExtractorNodeConfig, result) return result + + @staticmethod + def convert_features(features: dict) -> dict: + """Convert Dify features to MemoryBear FeaturesConfigForm format.""" + if not features: + return {} + + result: dict = {} + + # opening_statement + opening = features.get("opening_statement", "") + suggested = features.get("suggested_questions", []) + result["opening_statement"] = { + "enabled": bool(opening), + "statement": opening or None, + "suggested_questions": suggested, + } + + # citation (对应 Dify retriever_resource) + retriever = features.get("retriever_resource", {}) + result["citation"] = { + "enabled": retriever.get("enabled", False) if isinstance(retriever, dict) else False, + } + + # file_upload: Dify allowed_file_types 数组 -> 前端扁平字段 + file_upload = features.get("file_upload", {}) + allowed_types = file_upload.get("allowed_file_types", []) if file_upload else [] + allowed_methods = file_upload.get("allowed_file_upload_methods", ["local_file", "remote_url"]) + if isinstance(allowed_methods, list): + if len(allowed_methods) >= 2: + transfer_method = "both" + elif allowed_methods: + transfer_method = allowed_methods[0] + else: + transfer_method = "both" + else: + transfer_method = allowed_methods or "both" + + file_config = file_upload.get("fileUploadConfig", {}) + result["file_upload"] = { + "enabled": file_upload.get("enabled", False) if file_upload else False, + "image_enabled": "image" in allowed_types, + "image_max_size_mb": file_config.get("image_file_size_limit", 10) if file_config else 10, + "image_allowed_extensions": ["png", "jpg", "jpeg"], + "audio_enabled": "audio" in allowed_types, + "audio_max_size_mb": file_config.get("audio_file_size_limit", 50) if file_config else 50, + "audio_allowed_extensions": ["mp3", "wav", "m4a"], + "document_enabled": "document" in allowed_types, + "document_max_size_mb": file_config.get("file_size_limit", 100) if file_config else 100, + "document_allowed_extensions": ["pdf", "docx", "doc", "xlsx", "xls", "txt", "csv", "json", "md"], + "video_enabled": "video" in allowed_types, + "video_max_size_mb": file_config.get("video_file_size_limit", 100) if file_config else 100, + "video_allowed_extensions": ["mp4", "mov"], + "max_file_count": file_upload.get("number_limits", 1) if file_upload else 1, + "allowed_transfer_methods": transfer_method, + } + + # text_to_speech + tts = features.get("text_to_speech", {}) + result["text_to_speech"] = { + "enabled": tts.get("enabled", False) if isinstance(tts, dict) else False, + "voice": tts.get("voice") if isinstance(tts, dict) else None, + "language": tts.get("language") if isinstance(tts, dict) else None, + "autoplay": False, + } + + # suggested_questions_after_answer + sqa = features.get("suggested_questions_after_answer", {}) + result["suggested_questions_after_answer"] = { + "enabled": sqa.get("enabled", False) if isinstance(sqa, dict) else False, + } + + return result diff --git a/api/app/core/workflow/adapters/dify/dify_adapter.py b/api/app/core/workflow/adapters/dify/dify_adapter.py index d1126d08..c699f877 100644 --- a/api/app/core/workflow/adapters/dify/dify_adapter.py +++ b/api/app/core/workflow/adapters/dify/dify_adapter.py @@ -119,9 +119,12 @@ class DifyAdapter(BasePlatformAdapter, DifyConverter): if variable: self.conv_variables.append(con_var) - # for variables in config.get("workflow").get("environment_variables"): - # variable = self._convert_variable(variables) - # conv_variables.append(variable) + # 开始节点的文件变量合并到会话变量 + self.conv_variables.extend(self._file_vars_to_conv) + + features = self.convert_features( + self.config.get("workflow", {}).get("features", {}) + ) trigger = self._convert_trigger({}) execution_config = self._convert_execution({}) @@ -135,6 +138,7 @@ class DifyAdapter(BasePlatformAdapter, DifyConverter): edges=self.edges, nodes=self.nodes, variables=self.conv_variables, + features=features, warnings=self.warnings, errors=self.errors ) diff --git a/api/app/core/workflow/engine/graph_builder.py b/api/app/core/workflow/engine/graph_builder.py index 4f76d694..e0bdebf3 100644 --- a/api/app/core/workflow/engine/graph_builder.py +++ b/api/app/core/workflow/engine/graph_builder.py @@ -33,7 +33,7 @@ logger = logging.getLogger(__name__) # ["Hello ", "{{user.name}}", "!"] _OUTPUT_PATTERN = re.compile(r'\{\{.*?}}|[^{]+|{') # Strict variable format: {{ node_id.field_name }} -_VARIABLE_PATTERN = re.compile(r'\{\{\s*[a-zA-Z0-9_]+\.[a-zA-Z0-9_]+\s*}}') +_VARIABLE_PATTERN = re.compile(r'\{\{\s*[a-zA-Z0-9_]+\.[a-zA-Z0-9_]+(?:\.[a-zA-Z0-9_]+)?\s*}}') class GraphBuilder: diff --git a/api/app/core/workflow/engine/stream_output_coordinator.py b/api/app/core/workflow/engine/stream_output_coordinator.py index dcc92fdb..361f99d2 100644 --- a/api/app/core/workflow/engine/stream_output_coordinator.py +++ b/api/app/core/workflow/engine/stream_output_coordinator.py @@ -14,7 +14,7 @@ from app.core.workflow.engine.variable_pool import VariablePool logger = get_logger(__name__) SCOPE_PATTERN = re.compile( - r"\{\{\s*([a-zA-Z0-9_]+)\.[a-zA-Z0-9_]+\s*}}" + r"\{\{\s*([a-zA-Z0-9_]+)\.[a-zA-Z0-9_]+(?:\.[a-zA-Z0-9_]+)?\s*}}" ) diff --git a/api/app/core/workflow/engine/variable_pool.py b/api/app/core/workflow/engine/variable_pool.py index 86a85daf..08d10e22 100644 --- a/api/app/core/workflow/engine/variable_pool.py +++ b/api/app/core/workflow/engine/variable_pool.py @@ -34,19 +34,22 @@ class LazyVariableDict: return self._cache[key] var_struct = self._source.get(key) if var_struct is None: - raise KeyError(key) - value = var_struct.instance.to_literal() if self._literal else var_struct.instance.get_value() + return None + raw = var_struct.instance.get_value() + # literal 模式下 dict/list 保留结构,让 Jinja2 能继续访问子字段(如 .type) + value = raw if (not self._literal or isinstance(raw, (dict, list))) else var_struct.instance.to_literal() self._cache[key] = value return value def get(self, key, default=None): - try: - return self._resolve(key) - except KeyError: - return default + value = self._resolve(key) + return default if value is None else value def __getitem__(self, key): - return self._resolve(key) + value = self._resolve(key) + if value is None: + raise KeyError(key) + return value def __getattr__(self, key): if key.startswith('_'): @@ -164,7 +167,7 @@ class VariablePool: def transform_selector(selector): variable_literal = VARIABLE_PATTERN.sub(r"\1", selector).strip() selector = VariableSelector.from_string(variable_literal).path - if len(selector) != 2: + if len(selector) not in (2, 3): raise ValueError(f"Selector not valid - {selector}") return selector @@ -196,6 +199,16 @@ class VariablePool: return None return var_instance + @staticmethod + def _extract_field(struct: "VariableStruct", field: str | None) -> Any: + """If field is given, drill into a dict/object variable's value.""" + if field is None: + return struct.instance.get_value() + value = struct.instance.get_value() + if not isinstance(value, dict): + raise KeyError(f"Variable is not an object, cannot access field '{field}'") + return value.get(field) + def get_instance( self, selector: str, @@ -250,12 +263,14 @@ class VariablePool: Raises: KeyError: If strict is True and the variable does not exist. """ + path = self.transform_selector(selector) variable_struct = self._get_variable_struct(selector) if variable_struct is None: if strict: raise KeyError(f"{selector} not exist") return default - + if len(path) == 3: + return self._extract_field(variable_struct, path[2]) return variable_struct.instance.get_value() def get_literal( @@ -282,12 +297,15 @@ class VariablePool: Raises: KeyError: If strict is True and the variable does not exist. """ + path = self.transform_selector(selector) variable_struct = self._get_variable_struct(selector) if variable_struct is None: if strict: raise KeyError(f"{selector} not exist") return default - + if len(path) == 3: + value = self._extract_field(variable_struct, path[2]) + return str(value) if value is not None else "" return variable_struct.instance.to_literal() async def set( @@ -345,7 +363,14 @@ class VariablePool: Returns: 变量是否存在 """ - return self._get_variable_struct(selector) is not None + path = self.transform_selector(selector) + struct = self._get_variable_struct(selector) + if struct is None: + return False + if len(path) == 3: + value = struct.instance.get_value() + return isinstance(value, dict) and path[2] in value + return True def lazy_namespace(self, namespace: str, literal: bool = False) -> LazyVariableDict: return LazyVariableDict(self.variables.get(namespace, {}), literal) diff --git a/api/app/core/workflow/nodes/http_request/config.py b/api/app/core/workflow/nodes/http_request/config.py index e1b84f0c..72474436 100644 --- a/api/app/core/workflow/nodes/http_request/config.py +++ b/api/app/core/workflow/nodes/http_request/config.py @@ -72,8 +72,9 @@ class HttpContentTypeConfig(BaseModel): @classmethod def validate_data(cls, v, info): content_type = info.data.get("content_type") - if content_type == HttpContentType.FROM_DATA and not isinstance(v, HttpFormData): - raise ValueError("When content_type is 'form-data', data must be of type HttpFormData") + if content_type == HttpContentType.FROM_DATA and ( + not isinstance(v, list) or not all(isinstance(item, HttpFormData) for item in v)): + raise ValueError("When content_type is 'form-data', data must be a list of HttpFormData") elif content_type in [HttpContentType.JSON] and not isinstance(v, str): raise ValueError("When content_type is JSON, data must be of type str") elif content_type in [HttpContentType.WWW_FORM] and not isinstance(v, dict): diff --git a/api/app/core/workflow/nodes/http_request/node.py b/api/app/core/workflow/nodes/http_request/node.py index 086bee4a..783c230b 100644 --- a/api/app/core/workflow/nodes/http_request/node.py +++ b/api/app/core/workflow/nodes/http_request/node.py @@ -260,17 +260,22 @@ class HttpRequestNode(BaseNode): )) case HttpContentType.FROM_DATA: data = {} - content["files"] = {} + files = [] for item in self.typed_config.body.data: + key = self._render_template(item.key, variable_pool) if item.type == "text": - data[self._render_template(item.key, variable_pool)] = self._render_template(item.value, - variable_pool) + data[key] = self._render_template(item.value, variable_pool) elif item.type == "file": - content["files"][self._render_template(item.key, variable_pool)] = ( - uuid.uuid4().hex, - await variable_pool.get_instance(item.value).get_content() - ) + file_instance = variable_pool.get_instance(item.value) + if isinstance(file_instance, ArrayVariable): + for v in file_instance.value: + if isinstance(v, FileVariable): + files.append((key, (uuid.uuid4().hex, await v.get_content()))) + elif isinstance(file_instance, FileVariable): + files.append((key, (uuid.uuid4().hex, await file_instance.get_content()))) content["data"] = data + if files: + content["files"] = files case HttpContentType.BINARY: content["files"] = [] file_instence = variable_pool.get_instance(self.typed_config.body.data) diff --git a/api/app/core/workflow/variable/variable_objects.py b/api/app/core/workflow/variable/variable_objects.py index 94f87287..2b849c94 100644 --- a/api/app/core/workflow/variable/variable_objects.py +++ b/api/app/core/workflow/variable/variable_objects.py @@ -84,7 +84,7 @@ class FileVariable(BaseVariable): total_bytes = 0 chunks = [] - async with httpx.AsyncClient() as client: + async with httpx.AsyncClient(follow_redirects=True) as client: async with client.stream("GET", self.value.url) as resp: resp.raise_for_status() async for chunk in resp.aiter_bytes(8192): diff --git a/api/app/repositories/app_repository.py b/api/app/repositories/app_repository.py index 75a91fd6..c9d980e2 100644 --- a/api/app/repositories/app_repository.py +++ b/api/app/repositories/app_repository.py @@ -61,3 +61,15 @@ def get_apps_by_id(db: Session, app_id: uuid.UUID) -> App: """根据工作空间ID查询应用""" repo = AppRepository(db) return repo.get_apps_by_id(app_id) + + +def get_release_by_id(db: Session, app_id: uuid.UUID, release_id: uuid.UUID): + """根据发布版本ID查询发布快照(仅返回激活状态)""" + from app.models.app_release_model import AppRelease + return db.scalars( + select(AppRelease).where( + AppRelease.app_id == app_id, + AppRelease.id == release_id, + AppRelease.is_active.is_(True), + ) + ).first() diff --git a/api/app/repositories/implicit_emotions_storage_repository.py b/api/app/repositories/implicit_emotions_storage_repository.py index b6c40b40..b665924d 100644 --- a/api/app/repositories/implicit_emotions_storage_repository.py +++ b/api/app/repositories/implicit_emotions_storage_repository.py @@ -5,16 +5,9 @@ Implicit Emotions Storage Repository 事务由调用方控制,仓储层只使用 flush/refresh """ import logging -from datetime import date, datetime, timezone +from datetime import datetime, timedelta, timezone from typing import Generator, Optional - -class TimeFilterUnavailableError(Exception): - """redis_client 不可用,无法执行时间轴筛选。 - - 调用方捕获此异常后可选择回退到 get_all_user_ids 进行全量处理。 - """ - import redis from sqlalchemy import exists, not_, select from sqlalchemy.orm import Session @@ -25,6 +18,13 @@ from app.models.implicit_emotions_storage_model import ImplicitEmotionsStorage logger = logging.getLogger(__name__) +class TimeFilterUnavailableError(Exception): + """redis_client 不可用,无法执行时间轴筛选。 + + 调用方捕获此异常后可选择回退到 get_all_user_ids 进行全量处理。 + """ + + class ImplicitEmotionsStorageRepository: """隐性记忆和情绪存储仓储类""" @@ -216,9 +216,7 @@ class ImplicitEmotionsStorageRepository: """ from sqlalchemy import String as SAString from sqlalchemy import cast - CST = timezone(timedelta(hours=8)) - now_cst = datetime.now(CST) - today_start = now_cst.replace(hour=0, minute=0, second=0, microsecond=0).astimezone(timezone.utc).replace(tzinfo=None) + today_start = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) tomorrow_start = today_start + timedelta(days=1) offset = 0 while True: diff --git a/api/app/repositories/neo4j/cypher_queries.py b/api/app/repositories/neo4j/cypher_queries.py index aa246829..daf04bcb 100644 --- a/api/app/repositories/neo4j/cypher_queries.py +++ b/api/app/repositories/neo4j/cypher_queries.py @@ -23,6 +23,7 @@ SET s += { end_user_id: statement.end_user_id, stmt_type: statement.stmt_type, statement: statement.statement, + speaker: statement.speaker, emotion_intensity: statement.emotion_intensity, emotion_target: statement.emotion_target, emotion_subject: statement.emotion_subject, @@ -56,6 +57,7 @@ SET c += { expired_at: chunk.expired_at, dialog_id: chunk.dialog_id, content: chunk.content, + speaker: chunk.speaker, chunk_embedding: chunk.chunk_embedding, sequence_number: chunk.sequence_number, start_index: chunk.start_index, @@ -91,6 +93,8 @@ SET e.name = CASE WHEN entity.name IS NOT NULL AND entity.name <> '' THEN entity END, e.statement_id = CASE WHEN entity.statement_id IS NOT NULL AND entity.statement_id <> '' THEN entity.statement_id ELSE e.statement_id END, e.aliases = CASE + // 用户实体的 aliases 由 PgSQL end_user_info 作为唯一权威源,知识抽取完全不写入 + WHEN entity.name IN ['用户', '我', 'User', 'I'] THEN e.aliases WHEN entity.aliases IS NOT NULL AND size(entity.aliases) > 0 THEN CASE WHEN e.aliases IS NULL THEN entity.aliases @@ -283,7 +287,7 @@ LIMIT $limit """ SEARCH_STATEMENTS_BY_KEYWORD = """ -CALL db.index.fulltext.queryNodes("statementsFulltext", $q) YIELD node AS s, score +CALL db.index.fulltext.queryNodes("statementsFulltext", $query) YIELD node AS s, score WHERE ($end_user_id IS NULL OR s.end_user_id = $end_user_id) OPTIONAL MATCH (c:Chunk)-[:CONTAINS]->(s) OPTIONAL MATCH (s)-[:REFERENCES_ENTITY]->(e:ExtractedEntity) @@ -307,7 +311,7 @@ LIMIT $limit """ # 查询实体名称包含指定字符串的实体 SEARCH_ENTITIES_BY_NAME = """ -CALL db.index.fulltext.queryNodes("entitiesFulltext", $q) YIELD node AS e, score +CALL db.index.fulltext.queryNodes("entitiesFulltext", $query) YIELD node AS e, score WHERE ($end_user_id IS NULL OR e.end_user_id = $end_user_id) OPTIONAL MATCH (s:Statement)-[:REFERENCES_ENTITY]->(e) OPTIONAL MATCH (c:Chunk)-[:CONTAINS]->(s) @@ -337,21 +341,21 @@ LIMIT $limit """ SEARCH_ENTITIES_BY_NAME_OR_ALIAS = """ -CALL db.index.fulltext.queryNodes("entitiesFulltext", $q) YIELD node AS e, score +CALL db.index.fulltext.queryNodes("entitiesFulltext", $query) YIELD node AS e, score WHERE ($end_user_id IS NULL OR e.end_user_id = $end_user_id) WITH e, score -WITH collect({entity: e, score: score}) AS fulltextResults +With collect({entity: e, score: score}) AS fulltextResults OPTIONAL MATCH (ae:ExtractedEntity) WHERE ($end_user_id IS NULL OR ae.end_user_id = $end_user_id) AND ae.aliases IS NOT NULL - AND ANY(alias IN ae.aliases WHERE toLower(alias) CONTAINS toLower($q)) + AND ANY(alias IN ae.aliases WHERE toLower(alias) CONTAINS toLower($query)) WITH fulltextResults, collect(ae) AS aliasEntities UNWIND (fulltextResults + [x IN aliasEntities | {entity: x, score: CASE - WHEN ANY(alias IN x.aliases WHERE toLower(alias) = toLower($q)) THEN 1.0 - WHEN ANY(alias IN x.aliases WHERE toLower(alias) STARTS WITH toLower($q)) THEN 0.9 + WHEN ANY(alias IN x.aliases WHERE toLower(alias) = toLower($query)) THEN 1.0 + WHEN ANY(alias IN x.aliases WHERE toLower(alias) STARTS WITH toLower($query)) THEN 0.9 ELSE 0.8 END }]) AS row @@ -384,7 +388,7 @@ LIMIT $limit SEARCH_CHUNKS_BY_CONTENT = """ -CALL db.index.fulltext.queryNodes("chunksFulltext", $q) YIELD node AS c, score +CALL db.index.fulltext.queryNodes("chunksFulltext", $query) YIELD node AS c, score WHERE ($end_user_id IS NULL OR c.end_user_id = $end_user_id) OPTIONAL MATCH (c)-[:CONTAINS]->(s:Statement) OPTIONAL MATCH (s)-[:REFERENCES_ENTITY]->(e:ExtractedEntity) @@ -501,7 +505,7 @@ LIMIT $limit """ SEARCH_STATEMENTS_BY_KEYWORD_TEMPORAL = """ -CALL db.index.fulltext.queryNodes("statementsFulltext", $q) YIELD node AS s, score +CALL db.index.fulltext.queryNodes("statementsFulltext", $query) YIELD node AS s, score WHERE ($end_user_id IS NULL OR s.end_user_id = $end_user_id) AND ((($start_date IS NULL OR (s.created_at IS NOT NULL AND datetime(s.created_at) >= datetime($start_date))) AND ($end_date IS NULL OR (s.created_at IS NOT NULL AND datetime(s.created_at) <= datetime($end_date)))) @@ -677,7 +681,7 @@ SET n.invalid_at = $new_invalid_at # MemorySummary keyword search using fulltext index SEARCH_MEMORY_SUMMARIES_BY_KEYWORD = """ -CALL db.index.fulltext.queryNodes("summariesFulltext", $q) YIELD node AS m, score +CALL db.index.fulltext.queryNodes("summariesFulltext", $query) YIELD node AS m, score WHERE ($end_user_id IS NULL OR m.end_user_id = $end_user_id) OPTIONAL MATCH (m)-[:DERIVED_FROM_STATEMENT]->(s:Statement) RETURN m.id AS id, @@ -1363,7 +1367,7 @@ RETURN c.community_id AS community_id # Community keyword search: matches name or summary via fulltext index SEARCH_COMMUNITIES_BY_KEYWORD = """ -CALL db.index.fulltext.queryNodes("communitiesFulltext", $q) YIELD node AS c, score +CALL db.index.fulltext.queryNodes("communitiesFulltext", $query) YIELD node AS c, score WHERE ($end_user_id IS NULL OR c.end_user_id = $end_user_id) RETURN c.community_id AS id, c.name AS name, @@ -1451,7 +1455,7 @@ RETURN elementId(r) AS uuid """ SEARCH_PERCEPTUAL_BY_KEYWORD = """ -CALL db.index.fulltext.queryNodes("perceptualFulltext", $q) YIELD node AS p, score +CALL db.index.fulltext.queryNodes("perceptualFulltext", $query) YIELD node AS p, score WHERE p.end_user_id = $end_user_id RETURN p.id AS id, p.end_user_id AS end_user_id, diff --git a/api/app/repositories/neo4j/graph_saver.py b/api/app/repositories/neo4j/graph_saver.py index adc266fe..56feece2 100644 --- a/api/app/repositories/neo4j/graph_saver.py +++ b/api/app/repositories/neo4j/graph_saver.py @@ -186,6 +186,58 @@ async def save_dialog_and_statements_to_neo4j( Returns: bool: True if successful, False otherwise """ + # TODO 需要在去重消歧节阶段,做以下逻辑的处理 + # 预处理:对特殊实体("用户"、"AI助手")复用 Neo4j 中已有节点的 ID, + # 确保同一个 end_user_id 下只有一个"用户"节点和一个"AI助手"节点。 + if entity_nodes: + _SPECIAL_NAMES = {"用户", "我", "user", "i", "ai助手", "助手", "ai assistant", "assistant"} + end_user_id = entity_nodes[0].end_user_id if entity_nodes else None + if end_user_id: + try: + # 查询已有的特殊实体 + cypher = """ + MATCH (e:ExtractedEntity) + WHERE e.end_user_id = $end_user_id AND toLower(e.name) IN $names + RETURN e.id AS id, e.name AS name + """ + existing = await connector.execute_query( + cypher, + end_user_id=end_user_id, + names=list(_SPECIAL_NAMES), + ) + # 建立 name(lower) → existing_id 映射 + existing_id_map = {} + for record in (existing or []): + name_lower = (record.get("name") or "").strip().lower() + if name_lower and record.get("id"): + existing_id_map[name_lower] = record["id"] + + if existing_id_map: + # 替换新实体的 ID 为已有 ID,同时更新所有引用该 ID 的边 + for ent in entity_nodes: + name_lower = (ent.name or "").strip().lower() + if name_lower in existing_id_map: + old_id = ent.id + new_id = existing_id_map[name_lower] + if old_id != new_id: + ent.id = new_id + # 更新 statement_entity_edges 中的引用 + for edge in statement_entity_edges: + if edge.target == old_id: + edge.target = new_id + if edge.source == old_id: + edge.source = new_id + # 更新 entity_edges 中的引用 + for edge in entity_edges: + if edge.source == old_id: + edge.source = new_id + if edge.target == old_id: + edge.target = new_id + logger.info( + f"特殊实体 '{ent.name}' ID 复用: {old_id[:8]}... → {new_id[:8]}..." + ) + except Exception as e: + logger.warning(f"特殊实体 ID 复用查询失败(不影响写入): {e}") # 定义事务函数,将所有写操作放在一个事务中 async def _save_all_in_transaction(tx): diff --git a/api/app/repositories/neo4j/graph_search.py b/api/app/repositories/neo4j/graph_search.py index 32ec4474..a191dad6 100644 --- a/api/app/repositories/neo4j/graph_search.py +++ b/api/app/repositories/neo4j/graph_search.py @@ -2,6 +2,7 @@ import asyncio import logging from typing import Any, Dict, List, Optional +from app.core.memory.utils.data.text_utils import escape_lucene_query from app.repositories.neo4j.cypher_queries import ( CHUNK_EMBEDDING_SEARCH, COMMUNITY_EMBEDDING_SEARCH, @@ -87,7 +88,7 @@ async def _update_activation_values_batch( unique_node_ids.append(node_id) if not unique_node_ids: - logger.warning(f"批量更新激活值:没有有效的节点ID") + logger.warning("批量更新激活值:没有有效的节点ID") return nodes # 记录去重信息(仅针对具有有效 ID 的节点) @@ -223,7 +224,7 @@ async def _update_search_results_activation( async def search_graph( connector: Neo4jConnector, - q: str, + query: str, end_user_id: Optional[str] = None, limit: int = 50, include: List[str] = None, @@ -234,14 +235,14 @@ async def search_graph( OPTIMIZED: Runs all queries in parallel using asyncio.gather() INTEGRATED: Updates activation values for knowledge nodes before returning results - - Statements: matches s.statement CONTAINS q - - Entities: matches e.name CONTAINS q - - Chunks: matches s.content CONTAINS q (from Statement nodes) - - Summaries: matches ms.content CONTAINS q + - Statements: matches s.statement CONTAINS query + - Entities: matches e.name CONTAINS query + - Chunks: matches s.content CONTAINS query (from Statement nodes) + - Summaries: matches ms.content CONTAINS query Args: connector: Neo4j connector - q: Query text + query: Query text for full-text search end_user_id: Optional group filter limit: Max results per category include: List of categories to search (default: all) @@ -252,6 +253,9 @@ async def search_graph( if include is None: include = ["statements", "chunks", "entities", "summaries"] + # Escape Lucene special characters to prevent query parse errors + escaped_query = escape_lucene_query(query) + # Prepare tasks for parallel execution tasks = [] task_keys = [] @@ -260,7 +264,7 @@ async def search_graph( tasks.append(connector.execute_query( SEARCH_STATEMENTS_BY_KEYWORD, json_format=True, - q=q, + query=escaped_query, end_user_id=end_user_id, limit=limit, )) @@ -270,7 +274,7 @@ async def search_graph( tasks.append(connector.execute_query( SEARCH_ENTITIES_BY_NAME_OR_ALIAS, json_format=True, - q=q, + query=escaped_query, end_user_id=end_user_id, limit=limit, )) @@ -280,7 +284,7 @@ async def search_graph( tasks.append(connector.execute_query( SEARCH_CHUNKS_BY_CONTENT, json_format=True, - q=q, + query=escaped_query, end_user_id=end_user_id, limit=limit, )) @@ -290,7 +294,7 @@ async def search_graph( tasks.append(connector.execute_query( SEARCH_MEMORY_SUMMARIES_BY_KEYWORD, json_format=True, - q=q, + query=escaped_query, end_user_id=end_user_id, limit=limit, )) @@ -300,7 +304,7 @@ async def search_graph( tasks.append(connector.execute_query( SEARCH_COMMUNITIES_BY_KEYWORD, json_format=True, - q=q, + query=escaped_query, end_user_id=end_user_id, limit=limit, )) @@ -482,7 +486,7 @@ async def search_graph_by_embedding( update_time = time.time() - update_start logger.info(f"[PERF] Activation value updates took: {update_time:.4f}s") else: - logger.info(f"[PERF] Skipping activation updates (only summaries)") + logger.info("[PERF] Skipping activation updates (only summaries)") return results @@ -520,7 +524,7 @@ async def get_dedup_candidates_for_entities( # 适配新版查询:使用全 # 全文索引按名称检索(包含 CONTAINS 语义) rows = await connector.execute_query( SEARCH_ENTITIES_BY_NAME, - q=name, + query=escape_lucene_query(name), end_user_id=end_user_id, limit=100, ) @@ -544,7 +548,7 @@ async def get_dedup_candidates_for_entities( # 适配新版查询:使用全 try: rows = await connector.execute_query( SEARCH_ENTITIES_BY_NAME, - q=name.lower(), + query=escape_lucene_query(name.lower()), end_user_id=end_user_id, limit=100, ) @@ -593,11 +597,12 @@ async def search_graph_by_keyword_temporal( - Returns up to 'limit' statements """ if not query_text: - logger.warning(f"query_text不能为空") + logger.warning("query_text不能为空") return {"statements": []} + escaped_query = escape_lucene_query(query_text) statements = await connector.execute_query( SEARCH_STATEMENTS_BY_KEYWORD_TEMPORAL, - q=query_text, + query=escaped_query, end_user_id=end_user_id, start_date=start_date, end_date=end_date, @@ -671,7 +676,7 @@ async def search_graph_by_dialog_id( - Returns up to 'limit' dialogues """ if not dialog_id: - logger.warning(f"dialog_id不能为空") + logger.warning("dialog_id不能为空") return {"dialogues": []} dialogues = await connector.execute_query( @@ -690,7 +695,7 @@ async def search_graph_by_chunk_id( limit: int = 1, ) -> Dict[str, List[Dict[str, Any]]]: if not chunk_id: - logger.warning(f"chunk_id不能为空") + logger.warning("chunk_id不能为空") return {"chunks": []} chunks = await connector.execute_query( SEARCH_CHUNK_BY_CHUNK_ID, @@ -968,7 +973,7 @@ async def search_graph_l_valid_at( async def search_perceptual( connector: Neo4jConnector, - q: str, + query: str, end_user_id: Optional[str] = None, limit: int = 10, ) -> Dict[str, List[Dict[str, Any]]]: @@ -979,7 +984,7 @@ async def search_perceptual( Args: connector: Neo4j connector - q: Query text + query: Query text for full-text search end_user_id: Optional user filter limit: Max results @@ -989,7 +994,7 @@ async def search_perceptual( try: perceptuals = await connector.execute_query( SEARCH_PERCEPTUAL_BY_KEYWORD, - q=q, + query=escape_lucene_query(query), end_user_id=end_user_id, limit=limit, ) diff --git a/api/app/repositories/neo4j/neo4j_connector.py b/api/app/repositories/neo4j/neo4j_connector.py index ea8fa917..d20bf75f 100644 --- a/api/app/repositories/neo4j/neo4j_connector.py +++ b/api/app/repositories/neo4j/neo4j_connector.py @@ -77,11 +77,11 @@ class Neo4jConnector: """ await self.driver.close() - async def execute_query(self, query: str, json_format=False, **kwargs: Any) -> List[Dict[str, Any]]: + async def execute_query(self, cypher: str, json_format=False, **kwargs: Any) -> List[Dict[str, Any]]: """执行Cypher查询 Args: - query: Cypher查询语句 + cypher: Cypher查询语句 json_format: json格式化 **kwargs: 查询参数,将作为参数传递给Cypher查询 @@ -92,7 +92,7 @@ class Neo4jConnector: """ result = await self.driver.execute_query( - query, + cypher, database="neo4j", **kwargs ) diff --git a/api/app/repositories/tool_repository.py b/api/app/repositories/tool_repository.py index 1a9b0b87..1348c4e8 100644 --- a/api/app/repositories/tool_repository.py +++ b/api/app/repositories/tool_repository.py @@ -161,6 +161,17 @@ class BuiltinToolRepository: BuiltinToolConfig.id == tool_id ).first() + @staticmethod + def get_existing_tool_classes(db: Session, tenant_id: uuid.UUID) -> set: + """获取该租户已有的内置工具 tool_class 集合""" + rows = db.query(BuiltinToolConfig.tool_class).join( + ToolConfig, BuiltinToolConfig.id == ToolConfig.id + ).filter( + ToolConfig.tenant_id == tenant_id, + ToolConfig.tool_type == ToolType.BUILTIN.value + ).all() + return {row[0] for row in rows} + class CustomToolRepository: """自定义工具仓储类""" diff --git a/api/app/repositories/user_repository.py b/api/app/repositories/user_repository.py index af4449e5..6874f9bf 100644 --- a/api/app/repositories/user_repository.py +++ b/api/app/repositories/user_repository.py @@ -23,7 +23,7 @@ class UserRepository: db_logger.debug(f"根据 ID 查询用户:user_id={user_id}") try: - user = self.db.query(User).options(joinedload(User.tenant)).filter(User.id == user_id).first() + user = self.db.query(User).options(joinedload(User.tenant)).filter(User.id == user_id, User.is_active.is_(True)).first() if user: # 检查租户状态,租户禁用时返回 None if user.tenant and not user.tenant.is_active: @@ -297,6 +297,10 @@ def get_user_by_id(db: Session, user_id: uuid.UUID) -> Optional[User]: """根据ID获取用户""" return UserRepository(db).get_user_by_id(user_id) +def get_user_by_id_regardless_active(db: Session, user_id: uuid.UUID) -> Optional[User]: + """根据ID获取用户(不过滤 is_active,用于启用/禁用场景)""" + return db.query(User).filter(User.id == user_id).first() + def get_user_by_email(db: Session, email: str) -> Optional[User]: """根据邮箱获取用户""" return UserRepository(db).get_user_by_email(email) diff --git a/api/app/schemas/app_schema.py b/api/app/schemas/app_schema.py index 85cff671..5f73cde1 100644 --- a/api/app/schemas/app_schema.py +++ b/api/app/schemas/app_schema.py @@ -616,6 +616,7 @@ class AppChatRequest(BaseModel): stream: bool = Field(default=False, description="是否流式返回") thinking: bool = Field(default=False, description="是否启用深度思考(需Agent配置支持)") files: List[FileInput] = Field(default_factory=list, description="附件列表(支持多文件)") + version: Optional[uuid.UUID] = Field(default=None, description="指定发布版本ID,不传则使用当前生效版本") class DraftRunRequest(BaseModel): diff --git a/api/app/services/app_chat_service.py b/api/app/services/app_chat_service.py index fb4955b3..ec0c4b79 100644 --- a/api/app/services/app_chat_service.py +++ b/api/app/services/app_chat_service.py @@ -165,7 +165,14 @@ class AppChatService: multimodal_service = MultimodalService(self.db, model_info) processed_files = await multimodal_service.process_files(files) logger.info(f"处理了 {len(processed_files)} 个文件") - + # 为需要运行时上下文的工具注入上下文 + for t in tools: + if hasattr(t, 'tool_instance') and hasattr(t.tool_instance, 'set_runtime_context'): + t.tool_instance.set_runtime_context( + user_id=user_id or "anonymous", + conversation_id=str(conversation_id) if conversation_id else None, + uploaded_files=processed_files or [] + ) # 调用 Agent(支持多模态) result = await agent.chat( message=message, @@ -413,6 +420,15 @@ class AppChatService: processed_files = await multimodal_service.process_files(files) logger.info(f"处理了 {len(processed_files)} 个文件") + # 为需要运行时上下文的工具注入上下文 + for t in tools: + if hasattr(t, 'tool_instance') and hasattr(t.tool_instance, 'set_runtime_context'): + t.tool_instance.set_runtime_context( + user_id=user_id or "anonymous", + conversation_id=str(conversation_id) if conversation_id else None, + uploaded_files=processed_files or [] + ) + # 流式调用 Agent(支持多模态),同时并行启动 TTS full_content = "" full_reasoning = "" diff --git a/api/app/services/app_dsl_service.py b/api/app/services/app_dsl_service.py index 8c198be4..3a897109 100644 --- a/api/app/services/app_dsl_service.py +++ b/api/app/services/app_dsl_service.py @@ -73,15 +73,14 @@ class AppDslService: AppType.MULTI_AGENT: "multi_agent_config", AppType.WORKFLOW: "workflow" }.get(app.type, "config") - config_data = self._enrich_release_config(app.type, release.config or {}) + config_data = self._enrich_release_config(app.type, release.config or {}, release.default_model_config_id) dsl = {**meta, "app": app_meta, config_key: config_data} return yaml.dump(dsl, default_flow_style=False, allow_unicode=True), f"{release.name}_v{release.version_name}.yaml" - def _enrich_release_config(self, app_type: str, cfg: dict) -> dict: + def _enrich_release_config(self, app_type: str, cfg: dict, default_model_config_id=None) -> dict: if app_type == AppType.AGENT: enriched = {**cfg} - if "default_model_config_id" in cfg: - enriched["default_model_config_ref"] = self._model_ref(cfg["default_model_config_id"]) + enriched["default_model_config_ref"] = self._model_ref(default_model_config_id) if "knowledge_retrieval" in cfg: enriched["knowledge_retrieval"] = self._enrich_knowledge_retrieval(cfg["knowledge_retrieval"]) if "tools" in cfg: @@ -91,8 +90,7 @@ class AppDslService: return enriched if app_type == AppType.MULTI_AGENT: enriched = {**cfg} - if "default_model_config_id" in cfg: - enriched["default_model_config_ref"] = self._model_ref(cfg["default_model_config_id"]) + enriched["default_model_config_ref"] = self._model_ref(default_model_config_id) if "master_agent_id" in cfg: enriched["master_agent_ref"] = self._release_ref(cfg["master_agent_id"]) if "sub_agents" in cfg: diff --git a/api/app/services/app_service.py b/api/app/services/app_service.py index 5e26a629..534ab8d0 100644 --- a/api/app/services/app_service.py +++ b/api/app/services/app_service.py @@ -411,6 +411,7 @@ class AppService: edges=[edge.model_dump() for edge in data.edges] if data.edges else [], variables=[var.model_dump() for var in data.variables] if data.variables else [], execution_config=data.execution_config.model_dump() if data.execution_config else {}, + features=data.features if data.features else {}, triggers=[trigger.model_dump() for trigger in data.triggers] if data.triggers else [], is_active=True, created_at=now, @@ -619,6 +620,28 @@ class AppService: self._validate_app_accessible(app, workspace_id) return app + def get_release_by_id(self, app_id: uuid.UUID, release_id: uuid.UUID) -> AppRelease: + """按发布版本ID获取发布快照 + + Args: + app_id: 应用ID + release_id: 发布版本ID + + Returns: + AppRelease: 发布快照 + + Raises: + BusinessException: 版本不存在或已下线 + """ + from app.repositories.app_repository import get_release_by_id + release = get_release_by_id(self.db, app_id, release_id) + if not release: + raise BusinessException( + f"版本 {release_id} 不存在或已下线", + BizCode.RELEASE_NOT_FOUND, + ) + return release + def create_app( self, *, diff --git a/api/app/services/draft_run_service.py b/api/app/services/draft_run_service.py index 978dfdab..5c10e4f8 100644 --- a/api/app/services/draft_run_service.py +++ b/api/app/services/draft_run_service.py @@ -640,7 +640,14 @@ class AgentRunService: multimodal_service = MultimodalService(self.db, model_info) processed_files = await multimodal_service.process_files(files) logger.info(f"处理了 {len(processed_files)} 个文件,provider={provider}") - + # 为需要运行时上下文的工具注入上下文 + for t in tools: + if hasattr(t, 'tool_instance') and hasattr(t.tool_instance, 'set_runtime_context'): + t.tool_instance.set_runtime_context( + user_id=user_id or "anonymous", + conversation_id=str(conversation_id) if conversation_id else None, + uploaded_files=processed_files or [] + ) # 7. 知识库检索 context = None @@ -890,7 +897,14 @@ class AgentRunService: multimodal_service = MultimodalService(self.db, model_info) processed_files = await multimodal_service.process_files(files) logger.info(f"处理了 {len(processed_files)} 个文件,provider={provider}") - + # 为需要运行时上下文的工具注入上下文 + for t in tools: + if hasattr(t, 'tool_instance') and hasattr(t.tool_instance, 'set_runtime_context'): + t.tool_instance.set_runtime_context( + user_id=user_id or "anonymous", + conversation_id=str(conversation_id) if conversation_id else None, + uploaded_files=processed_files or [] + ) # 7. 知识库检索 context = None diff --git a/api/app/services/emotion_analytics_service.py b/api/app/services/emotion_analytics_service.py index c226348e..9a215cd6 100644 --- a/api/app/services/emotion_analytics_service.py +++ b/api/app/services/emotion_analytics_service.py @@ -679,9 +679,9 @@ class EmotionAnalyticsService: # 查询用户的实体和标签 query = """ - MATCH (e:Entity) + MATCH (e:ExtractedEntity) WHERE e.end_user_id = $end_user_id - RETURN e.name as name, e.type as type + RETURN e.name as name, e.entity_type as type ORDER BY e.created_at DESC LIMIT 20 """ diff --git a/api/app/services/implicit_memory_service.py b/api/app/services/implicit_memory_service.py index 4bd11deb..7a186f33 100644 --- a/api/app/services/implicit_memory_service.py +++ b/api/app/services/implicit_memory_service.py @@ -34,6 +34,7 @@ from app.schemas.implicit_memory_schema import ( UserMemorySummary, ) from app.schemas.memory_config_schema import MemoryConfig +from app.services.memory_base_service import MIN_MEMORY_SUMMARY_COUNT from sqlalchemy.orm import Session logger = logging.getLogger(__name__) @@ -379,12 +380,59 @@ class ImplicitMemoryService: raise + def _build_empty_profile(self) -> dict: + """构建 MemorySummary 不足时返回的固定空白画像数据""" + now_ms = int(datetime.utcnow().timestamp() * 1000) + insufficient = "Insufficient data for analysis" + + def _empty_dimension(name: str) -> dict: + return { + "evidence": [insufficient], + "reasoning": f"No clear evidence found for {name} dimension", + "percentage": 0.0, + "dimension_name": name, + "confidence_level": 20, + } + + def _empty_category(name: str) -> dict: + return { + "evidence": [insufficient], + "percentage": 25.0, + "category_name": name, + "trending_direction": None, + } + + return { + "habits": [], + "portrait": { + "aesthetic": _empty_dimension("aesthetic"), + "creativity": _empty_dimension("creativity"), + "literature": _empty_dimension("literature"), + "technology": _empty_dimension("technology"), + "historical_trends": None, + "analysis_timestamp": now_ms, + "total_summaries_analyzed": 0, + }, + "preferences": [], + "interest_areas": { + "art": _empty_category("art"), + "tech": _empty_category("tech"), + "music": _empty_category("music"), + "lifestyle": _empty_category("lifestyle"), + "analysis_timestamp": now_ms, + "total_summaries_analyzed": 0, + }, + } + async def generate_complete_profile( self, user_id: str ) -> dict: """生成完整的用户画像(包含所有4个模块) + 需要该用户的 MemorySummary 节点数量 >= 5 才会真正调用 LLM 生成画像, + 否则返回固定的空白画像数据。 + Args: user_id: 用户ID @@ -394,6 +442,16 @@ class ImplicitMemoryService: logger.info(f"生成完整用户画像: user={user_id}") try: + # 前置检查:查询该用户有效的 MemorySummary 节点数量(排除孤立节点) + from app.services.memory_base_service import MemoryBaseService + base_service = MemoryBaseService() + memory_summary_count = await base_service.get_valid_memory_summary_count(user_id) + logger.info(f"用户 MemorySummary 节点数量: {memory_summary_count} (user={user_id})") + + if memory_summary_count < MIN_MEMORY_SUMMARY_COUNT: + logger.info(f"MemorySummary 数量不足 {MIN_MEMORY_SUMMARY_COUNT}(当前 {memory_summary_count}),返回空白画像: user={user_id}") + return self._build_empty_profile() + # 并行调用4个分析方法 preferences, portrait, interest_areas, habits = await asyncio.gather( self.get_preference_tags(user_id=user_id), diff --git a/api/app/services/memory_base_service.py b/api/app/services/memory_base_service.py index bc647752..e615af8b 100644 --- a/api/app/services/memory_base_service.py +++ b/api/app/services/memory_base_service.py @@ -265,12 +265,50 @@ async def Translation_English(modid, text, fields=None): # 其他类型(数字、布尔值、None等):原样返回 else: return text +# 隐性记忆画像生成所需的最低 MemorySummary 节点数量 +MIN_MEMORY_SUMMARY_COUNT = 5 + + class MemoryBaseService: """记忆服务基类,提供共享的辅助方法""" def __init__(self): self.neo4j_connector = Neo4jConnector() + async def get_valid_memory_summary_count( + self, + end_user_id: str + ) -> int: + """获取用户有效的 MemorySummary 节点数量(排除孤立节点)。 + + 只统计存在 DERIVED_FROM_STATEMENT 关系的 MemorySummary 节点。 + + Args: + end_user_id: 终端用户ID + + Returns: + 有效 MemorySummary 节点数量 + """ + try: + query = """ + MATCH (n:MemorySummary)-[:DERIVED_FROM_STATEMENT]->(:Statement) + WHERE n.end_user_id = $end_user_id + RETURN count(DISTINCT n) as count + """ + result = await self.neo4j_connector.execute_query( + query, end_user_id=end_user_id + ) + count = result[0]["count"] if result and len(result) > 0 else 0 + logger.debug( + f"有效 MemorySummary 节点数量: {count} (end_user_id={end_user_id})" + ) + return count + except Exception as e: + logger.error( + f"获取有效 MemorySummary 数量失败: {str(e)}", exc_info=True + ) + return 0 + @staticmethod def parse_timestamp(timestamp_value) -> Optional[int]: """ diff --git a/api/app/services/memory_dashboard_service.py b/api/app/services/memory_dashboard_service.py index b390aa10..a01b1d00 100644 --- a/api/app/services/memory_dashboard_service.py +++ b/api/app/services/memory_dashboard_service.py @@ -803,7 +803,6 @@ def get_rag_content( "page": { "page": page, "pagesize": pagesize, - "total": 0, "hasnext": False, }, "items": [] @@ -897,13 +896,12 @@ def get_rag_content( "page": { "page": page, "pagesize": pagesize, - "total": global_total, "hasnext": offset_end < global_total, }, "items": conversations } - business_logger.info(f"成功获取RAG内容: total={global_total}, page={page}, 返回={len(conversations)} 条对话") + business_logger.info(f"成功获取RAG内容: page={page}, 返回={len(conversations)} 条对话") return result except Exception as e: diff --git a/api/app/services/prompt_optimizer_service.py b/api/app/services/prompt_optimizer_service.py index fde8c4f9..30901111 100644 --- a/api/app/services/prompt_optimizer_service.py +++ b/api/app/services/prompt_optimizer_service.py @@ -227,10 +227,20 @@ class PromptOptimizerService: content = getattr(chunk, "content", chunk) if not content: continue - buffer += content + if isinstance(content, str): + buffer += content + elif isinstance(content, list): + for _ in content: + buffer += _["text"] + else: + logger.error(f"Unsupported content type - {content}") + raise Exception("Unsupported content type") cache = buffer[:-20] + last_idx = 19 + while cache and cache[-1] == '\\' and last_idx > 0: + cache = buffer[:-last_idx] + last_idx -= 1 - # 尝试找到 "prompt": " 开始位置 if prompt_finished: continue @@ -272,7 +282,7 @@ class PromptOptimizerService: def parser_prompt_variables(prompt: str): try: pattern = r'\{\{\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*\}\}' - matches = re.findall(pattern, prompt) + matches = re.findall(pattern, str(prompt)) variables = list(set(matches)) return variables except Exception as e: diff --git a/api/app/services/tool_service.py b/api/app/services/tool_service.py index 165b060f..9a59cd81 100644 --- a/api/app/services/tool_service.py +++ b/api/app/services/tool_service.py @@ -34,7 +34,8 @@ BUILTIN_TOOLS = { "JsonTool": "app.core.tools.builtin.json_tool", "BaiduSearchTool": "app.core.tools.builtin.baidu_search_tool", "MinerUTool": "app.core.tools.builtin.mineru_tool", - "TextInTool": "app.core.tools.builtin.textin_tool" + "TextInTool": "app.core.tools.builtin.textin_tool", + "OpenClawTool": "app.core.tools.builtin.openclaw_tool", } @@ -340,18 +341,18 @@ class ToolService: return {"success": False, "message": f"测试失败: {str(e)}"} def ensure_builtin_tools_initialized(self, tenant_id: uuid.UUID): - """确保内置工具已初始化""" - existing = self.tool_repo.exists_builtin_for_tenant(self.db, tenant_id) - - if existing: + """确保内置工具已初始化(支持增量补充新工具)""" + builtin_config = self._load_builtin_config() + if not builtin_config: return - # 从配置文件加载内置工具定义 - builtin_config = self._load_builtin_config() + existing_classes = self.builtin_repo.get_existing_tool_classes(self.db, tenant_id) + added = False for tool_key, tool_info in builtin_config.items(): + if tool_info['tool_class'] in existing_classes: + continue try: - # 创建工具配置 initial_status = self._determine_initial_status(tool_info) tool_config = ToolConfig( name=tool_info['name'], @@ -367,7 +368,6 @@ class ToolService: self.db.add(tool_config) self.db.flush() - # 创建内置工具配置 builtin_config_obj = BuiltinToolConfig( id=tool_config.id, tool_class=tool_info['tool_class'], @@ -375,12 +375,14 @@ class ToolService: requires_config=tool_info.get('requires_config', False) ) self.db.add(builtin_config_obj) + added = True except Exception as e: logger.error(f"初始化内置工具失败: {tool_key}, {e}") - self.db.commit() - logger.info(f"租户 {tenant_id} 内置工具初始化完成") + if added: + self.db.commit() + logger.info(f"租户 {tenant_id} 内置工具增量初始化完成") async def get_tool_methods(self, tool_id: str, tenant_id: uuid.UUID) -> Optional[List[Dict[str, Any]]]: """获取工具的所有方法 @@ -458,6 +460,9 @@ class ToolService: # 对于json_tool,根据操作类型返回相关参数 elif hasattr(tool_instance, 'name') and tool_instance.name == 'json_tool': return self._get_json_tool_params(operation) + # 对于openclaw_tool,根据操作类型返回不同描述的参数 + elif hasattr(tool_instance, 'name') and tool_instance.name == 'openclaw_tool': + return self._get_openclaw_tool_params(operation) # 其他工具的默认处理:返回除operation外的所有参数 return [{ @@ -710,6 +715,65 @@ class ToolService: return base_params + @staticmethod + def _get_openclaw_tool_params(operation: str) -> List[Dict[str, Any]]: + """获取 openclaw_tool 特定操作的参数""" + if operation == "print_task": + return [ + { + "name": "message", + "type": "string", + "description": "发送给 OpenClaw 的打印任务描述,将用户的原始消息原封不动地传递给 OpenClaw,禁止改写、补充或润色用户的原文", + "required": True + }, + { + "name": "image_url", + "type": "string", + "description": "可选,附带的设计图片或参考图,OpenClaw 可据此生成 3D 模型", + "required": False + } + ] + elif operation == "device_query": + return [ + { + "name": "message", + "type": "string", + "description": "发送给 OpenClaw 的设备查询指令", + "required": True + } + ] + elif operation == "image_understand": + return [ + { + "name": "message", + "type": "string", + "description": "发送给 OpenClaw 的图片理解任务,应描述需要对图片做什么(如描述内容、提取文字、分析信息)", + "required": True + }, + { + "name": "image_url", + "type": "string", + "description": "要分析的图片 URL 或 base64 data URI", + "required": False + } + ] + else: + # general 及其他 + return [ + { + "name": "message", + "type": "string", + "description": "发送给 OpenClaw Agent 的任务描述,应包含完整的任务需求", + "required": True + }, + { + "name": "image_url", + "type": "string", + "description": "可选,附带的图片 URL 或 base64 data URI", + "required": False + } + ] + async def _get_custom_tool_methods(self, config: ToolConfig) -> List[Dict[str, Any]]: """获取自定义工具的方法""" custom_config = self.custom_repo.find_by_tool_id(self.db, config.id) diff --git a/api/app/services/user_memory_service.py b/api/app/services/user_memory_service.py index ab51d922..9389ecfa 100644 --- a/api/app/services/user_memory_service.py +++ b/api/app/services/user_memory_service.py @@ -14,6 +14,7 @@ from pydantic import BaseModel, Field from sqlalchemy.orm import Session from app.core.logging_config import get_logger +from app.core.memory.storage_services.extraction_engine.deduplication.deduped_and_disamb import _USER_PLACEHOLDER_NAMES from app.core.memory.utils.llm.llm_utils import MemoryClientFactory from app.db import get_db_context from app.repositories.conversation_repository import ConversationRepository @@ -21,7 +22,7 @@ from app.repositories.end_user_repository import EndUserRepository from app.repositories.neo4j.cypher_queries import Graph_Node_query from app.repositories.neo4j.neo4j_connector import Neo4jConnector from app.schemas.memory_episodic_schema import EmotionSubject, EmotionType, type_mapping -from app.services.memory_base_service import MemoryBaseService +from app.services.memory_base_service import MemoryBaseService, MIN_MEMORY_SUMMARY_COUNT from app.services.memory_config_service import MemoryConfigService from app.services.memory_perceptual_service import MemoryPerceptualService from app.services.memory_short_service import ShortService @@ -473,7 +474,7 @@ class UserMemoryService: allowed_fields = {'other_name', 'aliases', 'meta_data'} # 用户占位名称黑名单,不允许作为 other_name 或出现在 aliases 中 - _user_placeholder_names = {'用户', '我', 'User', 'I'} + _user_placeholder_names = _USER_PLACEHOLDER_NAMES # 过滤 other_name:不允许设置为占位名称 if 'other_name' in update_data and update_data['other_name'] and update_data['other_name'].strip() in _user_placeholder_names: @@ -1500,7 +1501,7 @@ async def analytics_memory_types( 2. 工作记忆 (WORKING_MEMORY) = 会话数量(通过 ConversationRepository.get_conversation_by_user_id 获取) 3. 短期记忆 (SHORT_TERM_MEMORY) = /short_term 接口返回的问答对数量 4. 显性记忆 (EXPLICIT_MEMORY) = 情景记忆 + 语义记忆(通过 MemoryBaseService.get_explicit_memory_count 获取) - 5. 隐性记忆 (IMPLICIT_MEMORY) = Statement 节点数量的三分之一 + 5. 隐性记忆 (IMPLICIT_MEMORY) = MemorySummary 节点数量(需 >= MIN_MEMORY_SUMMARY_COUNT 才显示,否则为 0) 6. 情绪记忆 (EMOTIONAL_MEMORY) = 情绪标签统计总数(通过 MemoryBaseService.get_emotional_memory_count 获取) 7. 情景记忆 (EPISODIC_MEMORY) = memory_summary(通过 MemoryBaseService.get_episodic_memory_count 获取) 8. 遗忘记忆 (FORGET_MEMORY) = 激活值低于阈值的节点数(通过 MemoryBaseService.get_forget_memory_count 获取) @@ -1557,23 +1558,15 @@ async def analytics_memory_types( logger.warning(f"获取会话数量失败,工作记忆数量设为0: {str(e)}") work_count = 0 - # 获取隐性记忆数量(基于 Statement 节点数量的三分之一) + # 获取隐性记忆数量(基于有关联关系的 MemorySummary 节点数量,需 >= MIN_MEMORY_SUMMARY_COUNT 才计入) implicit_count = 0 if end_user_id: try: - # 查询 Statement 节点数量 - query = """ - MATCH (n:Statement) - WHERE n.end_user_id = $end_user_id - RETURN count(n) as count - """ - result = await _neo4j_connector.execute_query(query, end_user_id=end_user_id) - statement_count = result[0]["count"] if result and len(result) > 0 else 0 - # 取三分之一作为隐性记忆数量 - implicit_count = round(statement_count / 3) - logger.debug(f"隐性记忆数量(Statement数量的1/3): {implicit_count} (Statement总数={statement_count}, end_user_id={end_user_id})") + memory_summary_count = await base_service.get_valid_memory_summary_count(end_user_id) + implicit_count = memory_summary_count if memory_summary_count >= MIN_MEMORY_SUMMARY_COUNT else 0 + logger.debug(f"隐性记忆数量(有效MemorySummary节点数): {implicit_count} (有效MemorySummary总数={memory_summary_count}, end_user_id={end_user_id})") except Exception as e: - logger.warning(f"获取Statement数量失败,隐性记忆数量设为0: {str(e)}") + logger.warning(f"获取MemorySummary数量失败,隐性记忆数量设为0: {str(e)}") implicit_count = 0 # 原有的基于行为习惯的统计方式(已注释) @@ -1639,7 +1632,7 @@ async def analytics_memory_types( "WORKING_MEMORY": work_count, # 工作记忆(基于会话数量) "SHORT_TERM_MEMORY": short_term_count, # 短期记忆(基于问答对数量) "EXPLICIT_MEMORY": explicit_count, # 显性记忆(情景记忆 + 语义记忆) - "IMPLICIT_MEMORY": implicit_count, # 隐性记忆(Statement数量的1/3) + "IMPLICIT_MEMORY": implicit_count, # 隐性记忆(MemorySummary节点数,需>=MIN_MEMORY_SUMMARY_COUNT) "EMOTIONAL_MEMORY": emotion_count, # 情绪记忆(使用情绪标签统计) "EPISODIC_MEMORY": episodic_count, # 情景记忆 "FORGET_MEMORY": forget_count # 遗忘记忆(激活值低于阈值) diff --git a/api/app/services/user_service.py b/api/app/services/user_service.py index 3122d282..43a58c5f 100644 --- a/api/app/services/user_service.py +++ b/api/app/services/user_service.py @@ -285,7 +285,7 @@ def activate_user(db: Session, user_id_to_activate: uuid.UUID, current_user: Use try: # 查找用户 business_logger.debug(f"查找待激活用户: {user_id_to_activate}") - db_user = user_repository.get_user_by_id(db, user_id=user_id_to_activate) + db_user = user_repository.get_user_by_id_regardless_active(db, user_id=user_id_to_activate) if not db_user: business_logger.warning(f"用户不存在: {user_id_to_activate}") raise BusinessException("用户不存在", code=BizCode.USER_NOT_FOUND) diff --git a/api/app/services/workflow_import_service.py b/api/app/services/workflow_import_service.py index fd8f25f3..5a766a72 100644 --- a/api/app/services/workflow_import_service.py +++ b/api/app/services/workflow_import_service.py @@ -69,6 +69,7 @@ class WorkflowImportService: edges=workflow_config.edges, nodes=workflow_config.nodes, variables=workflow_config.variables, + features=workflow_config.features, warnings=workflow_config.warnings, errors=workflow_config.errors ) @@ -95,7 +96,8 @@ class WorkflowImportService: workflow_config=WorkflowConfigCreate( nodes=config["nodes"], edges=config["edges"], - variables=config["variables"] + variables=config["variables"], + features=config.get("features", {}) ) ) ) diff --git a/api/app/tasks.py b/api/app/tasks.py index f918743c..9afb6225 100644 --- a/api/app/tasks.py +++ b/api/app/tasks.py @@ -1,4 +1,5 @@ import asyncio +import json import os import re import shutil @@ -1001,7 +1002,7 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID): except Exception as e: print(f"\n\nError during fetch feishu: {e}") case _: # General - print(f"General: No synchronization needed\n") + print("General: No synchronization needed\n") result = f"sync knowledge '{db_knowledge.name}' processed successfully." return result @@ -1510,6 +1511,7 @@ def write_all_workspaces_memory_task(self) -> Dict[str, Any]: "status": "SUCCESS", "total_num": total_num, "end_user_count": len(end_users), + "end_user_details": end_user_details, "memory_increment_id": str(memory_increment.id), "created_at": memory_increment.created_at.isoformat(), }) @@ -2602,35 +2604,34 @@ def init_interest_distribution_for_users(self, end_user_ids: List[str]) -> Dict[ service = MemoryAgentService() - with get_db_context() as db: - for end_user_id in end_user_ids: - # 存在性检查:缓存有数据则跳过 - cached = await InterestMemoryCache.get_interest_distribution( + for end_user_id in end_user_ids: + # 存在性检查:缓存有数据则跳过 + cached = await InterestMemoryCache.get_interest_distribution( + end_user_id=end_user_id, + language=language, + ) + if cached is not None: + skipped += 1 + continue + + logger.info(f"用户 {end_user_id} 无兴趣分布缓存,开始生成") + try: + result = await service.get_interest_distribution_by_user( end_user_id=end_user_id, + limit=5, language=language, ) - if cached is not None: - skipped += 1 - continue - - logger.info(f"用户 {end_user_id} 无兴趣分布缓存,开始生成") - try: - result = await service.get_interest_distribution_by_user( - end_user_id=end_user_id, - limit=5, - language=language, - ) - await InterestMemoryCache.set_interest_distribution( - end_user_id=end_user_id, - language=language, - data=result, - expire=INTEREST_CACHE_EXPIRE, - ) - initialized += 1 - logger.info(f"用户 {end_user_id} 兴趣分布缓存生成成功") - except Exception as e: - failed += 1 - logger.error(f"用户 {end_user_id} 兴趣分布缓存生成失败: {e}") + await InterestMemoryCache.set_interest_distribution( + end_user_id=end_user_id, + language=language, + data=result, + expire=INTEREST_CACHE_EXPIRE, + ) + initialized += 1 + logger.info(f"用户 {end_user_id} 兴趣分布缓存生成成功") + except Exception as e: + failed += 1 + logger.error(f"用户 {end_user_id} 兴趣分布缓存生成失败: {e}") logger.info(f"兴趣分布按需初始化完成: 初始化={initialized}, 跳过={skipped}, 失败={failed}") return { @@ -2914,4 +2915,270 @@ def init_community_clustering_for_users(self, end_user_ids: List[str], workspace } +# ─── User Metadata Extraction Task ─────────────────────────────────────────── + + +def _update_timestamps(existing: dict, new: dict, updated_at: dict, now: str, prefix: str = "") -> None: + """对比新旧元数据,更新变更字段的 _updated_at 时间戳。""" + for key, new_val in new.items(): + if key == "_updated_at": + continue + path = f"{prefix}.{key}" if prefix else key + old_val = existing.get(key) + + if isinstance(new_val, dict) and isinstance(old_val, dict): + _update_timestamps(old_val, new_val, updated_at, now, prefix=path) + elif old_val != new_val: + updated_at[path] = now + +@celery_app.task( + bind=True, + name='app.tasks.extract_user_metadata', + ignore_result=False, + max_retries=0, + acks_late=True, + time_limit=300, + soft_time_limit=240, +) +def extract_user_metadata_task( + self, + end_user_id: str, + statements: List[str], + config_id: Optional[str] = None, + language: str = "zh", +) -> Dict[str, Any]: + """异步提取用户元数据并写入数据库。 + + 在去重消歧完成后由编排器触发,使用独立 LLM 调用提取元数据。 + LLM 配置优先使用 config_id 对应的应用配置,失败时回退到工作空间默认配置。 + + Args: + end_user_id: 终端用户 ID + statements: 用户相关的 statement 文本列表 + config_id: 应用配置 ID(可选) + language: 语言类型 ("zh" 中文, "en" 英文) + + Returns: + 包含任务执行结果的字典 + """ + start_time = time.time() + logger.info( + f"[CELERY METADATA] Starting metadata extraction - end_user_id={end_user_id}, " + f"statements_count={len(statements)}, config_id={config_id}, language={language}" + ) + + async def _run() -> Dict[str, Any]: + from app.core.memory.storage_services.extraction_engine.knowledge_extraction.metadata_extractor import MetadataExtractor + from app.repositories.end_user_info_repository import EndUserInfoRepository + from app.repositories.end_user_repository import EndUserRepository + from app.services.memory_config_service import MemoryConfigService + + # 1. 获取 LLM 配置(应用配置 → 工作空间配置兜底)并创建 LLM client + with get_db_context() as db: + end_user_uuid = uuid.UUID(end_user_id) + + # 获取 workspace_id from end_user + end_user = EndUserRepository(db).get_by_id(end_user_uuid) + if not end_user: + return {"status": "FAILURE", "error": f"End user not found: {end_user_id}"} + + workspace_id = end_user.workspace_id + + config_service = MemoryConfigService(db) + memory_config = config_service.get_config_with_fallback( + memory_config_id=uuid.UUID(config_id) if config_id else None, + workspace_id=workspace_id, + ) + if not memory_config: + return {"status": "FAILURE", "error": "No LLM config available (app + workspace fallback failed)"} + + # 2. 创建 LLM client + from app.core.memory.utils.llm.llm_utils import MemoryClientFactory + factory = MemoryClientFactory(db) + if not memory_config.llm_id: + return {"status": "FAILURE", "error": "Memory config has no LLM model configured"} + llm_client = factory.get_llm_client(memory_config.llm_id) + + # 2.5 读取已有元数据和别名,传给 extractor 作为上下文 + existing_metadata = None + existing_aliases = None + try: + info = EndUserInfoRepository(db).get_by_end_user_id(end_user_uuid) + if info: + if info.meta_data: + existing_metadata = info.meta_data + existing_aliases = info.aliases if info.aliases else [] + logger.info(f"[CELERY METADATA] 已读取已有元数据和别名(aliases={existing_aliases})") + except Exception as e: + logger.warning(f"[CELERY METADATA] 读取已有数据失败(继续无上下文提取): {e}") + + # 3. 提取元数据和别名(传入已有数据作为上下文) + extractor = MetadataExtractor(llm_client=llm_client, language=language) + extract_result = await extractor.extract_metadata( + statements, + existing_metadata=existing_metadata, + existing_aliases=existing_aliases, + ) + + if not extract_result: + logger.info(f"[CELERY METADATA] No metadata extracted for end_user_id={end_user_id}") + return {"status": "SUCCESS", "result": "no_metadata_extracted"} + + user_metadata, aliases_to_add, aliases_to_remove = extract_result + logger.info(f"[CELERY METADATA] LLM 别名新增: {aliases_to_add}, 移除: {aliases_to_remove}") + + # 4. 清洗元数据、覆盖写入元数据和别名 + def clean_metadata(raw: dict) -> dict: + """递归移除空字符串、空列表、空字典。""" + result = {} + for k, v in raw.items(): + if v == "" or v == []: + continue + if isinstance(v, dict): + cleaned = clean_metadata(v) + if cleaned: + result[k] = cleaned + else: + result[k] = v + return result + + raw_dict = user_metadata.model_dump(exclude_none=True) if user_metadata else {} + logger.info(f"[CELERY METADATA] LLM 输出完整元数据: {json.dumps(raw_dict, ensure_ascii=False)}") + + cleaned = clean_metadata(raw_dict) if raw_dict else {} + logger.info(f"[CELERY METADATA] 清洗后元数据: {json.dumps(cleaned, ensure_ascii=False)}") + + from datetime import datetime as dt, timezone as tz + now = dt.now(tz.utc).isoformat() + + # 过滤别名中的占位名称,执行增量增删 + _PLACEHOLDER_NAMES = {"用户", "我", "user", "i"} + + def _filter_aliases(aliases_list): + seen = set() + result = [] + for a in aliases_list: + a_stripped = a.strip() + if a_stripped and a_stripped.lower() not in _PLACEHOLDER_NAMES and a_stripped.lower() not in seen: + result.append(a_stripped) + seen.add(a_stripped.lower()) + return result + + filtered_add = _filter_aliases(aliases_to_add) + filtered_remove = _filter_aliases(aliases_to_remove) + remove_lower = {a.lower() for a in filtered_remove} + + with get_db_context() as db: + end_user_uuid = uuid.UUID(end_user_id) + info = EndUserInfoRepository(db).get_by_end_user_id(end_user_uuid) + end_user = EndUserRepository(db).get_by_id(end_user_uuid) + + if info: + # 元数据覆盖写入 + if cleaned: + existing_meta = info.meta_data if info.meta_data else {} + updated_at = dict(existing_meta.get("_updated_at", {})) + _update_timestamps(existing_meta, cleaned, updated_at, now) + final = dict(cleaned) + final["_updated_at"] = updated_at + info.meta_data = final + logger.info("[CELERY METADATA] 覆盖写入元数据") + + # 别名增量增删:(已有 - remove) + add + old_aliases = info.aliases if info.aliases else [] + # 先移除 + merged = [a for a in old_aliases if a.strip().lower() not in remove_lower] + # 再追加(去重) + existing_lower = {a.strip().lower() for a in merged} + for a in filtered_add: + if a.lower() not in existing_lower: + merged.append(a) + existing_lower.add(a.lower()) + + if merged != old_aliases: + info.aliases = merged + # other_name 更新逻辑 + if merged and ( + not info.other_name + or info.other_name.strip().lower() in _PLACEHOLDER_NAMES + or info.other_name.strip().lower() in remove_lower + ): + info.other_name = merged[0] + if end_user and merged and ( + not end_user.other_name + or end_user.other_name.strip().lower() in _PLACEHOLDER_NAMES + or end_user.other_name.strip().lower() in remove_lower + ): + end_user.other_name = merged[0] + logger.info( + f"[CELERY METADATA] 别名增量更新: {old_aliases} - {filtered_remove} + {filtered_add} → {merged}" + ) + else: + # 没有 end_user_info 记录,创建一条 + from app.models.end_user_info_model import EndUserInfo + initial_aliases = filtered_add # 新记录只有 add,没有 remove + first_alias = initial_aliases[0] if initial_aliases else "" + if first_alias or cleaned: + new_info = EndUserInfo( + end_user_id=end_user_uuid, + other_name=first_alias or "", + aliases=initial_aliases, + meta_data=cleaned if cleaned else None, + ) + db.add(new_info) + if end_user and first_alias and ( + not end_user.other_name or end_user.other_name.strip().lower() in _PLACEHOLDER_NAMES + ): + end_user.other_name = first_alias + logger.info(f"[CELERY METADATA] 创建 end_user_info: other_name={first_alias}, aliases={initial_aliases}") + else: + return {"status": "SUCCESS", "result": "no_data_to_write"} + + db.commit() + + # 同步 PgSQL aliases 到 Neo4j 用户实体(PgSQL 为权威源) + final_aliases = info.aliases if info else initial_aliases + if final_aliases: + try: + from app.repositories.neo4j.neo4j_connector import Neo4jConnector + neo4j_connector = Neo4jConnector() + cypher = """ + MATCH (e:ExtractedEntity) + WHERE e.end_user_id = $end_user_id AND e.name IN ['用户', '我', 'User', 'I'] + SET e.aliases = $aliases + """ + await neo4j_connector.execute_query( + cypher, end_user_id=end_user_id, aliases=final_aliases + ) + await neo4j_connector.close() + logger.info(f"[CELERY METADATA] Neo4j 用户实体 aliases 已同步: {final_aliases}") + except Exception as neo4j_err: + logger.warning(f"[CELERY METADATA] Neo4j aliases 同步失败(不影响主流程): {neo4j_err}") + + return {"status": "SUCCESS", "result": "metadata_and_aliases_written"} + + loop = None + try: + loop = set_asyncio_event_loop() + result = loop.run_until_complete(_run()) + elapsed = time.time() - start_time + result["elapsed_time"] = elapsed + result["task_id"] = self.request.id + logger.info(f"[CELERY METADATA] Task completed - elapsed={elapsed:.2f}s, result={result.get('result')}") + return result + + except Exception as e: + elapsed = time.time() - start_time + logger.error(f"[CELERY METADATA] Task failed - elapsed={elapsed:.2f}s, error={e}", exc_info=True) + return { + "status": "FAILURE", + "error": str(e), + "elapsed_time": elapsed, + "task_id": self.request.id, + } + finally: + if loop: + _shutdown_loop_gracefully(loop) + + # unused task \ No newline at end of file diff --git a/api/app/version_info.json b/api/app/version_info.json index b4f6976f..d07035e2 100644 --- a/api/app/version_info.json +++ b/api/app/version_info.json @@ -1,4 +1,36 @@ { + "v0.2.10": { + "introduction": { + "codeName": "炼剑", + "releaseDate": "2026-4-8", + "upgradePosition": "🐻 全面强化工作流引擎、引入 Agent 深度思考模式与多模态记忆读取,百炼成锋,剑指生产就绪", + "coreUpgrades": [ + "1. 工作流引擎增强
* 会话变量文件格式支持:支持文件类型值及本地/远程默认值配置
* 列表操作节点:新增专用列表操作节点
* 模板转换支持 HTML:扩展富内容渲染能力
* 表单返回与提交:工作流返回交互式表单,前端支持提交
* HTTP 节点 XML 响应:拓宽企业级 API 集成兼容性
* 开场白与文件引用:支持配置开场白及附件引用
* 模板转换三级变量:支持深层嵌套变量访问
* 节点连线添加按钮:连线处新增内联添加按钮", + "2. Agent 智能 🧠
* Agent 深度思考模式:支持更充分的推理以产出高质量回答
* 模型深度思考特性开关:模型级特性标识与应用级开关控制", + "3. 记忆系统升级 📚
* 用户记忆库分页:支持大规模记忆集合分页浏览
* RAG 用户记忆数据结构刷新:后端 API 数据结构重新设计
* 多模态记忆读取:支持检索图像、音频等非文本记忆
* 语义剪枝阈值提示文案:显示描述性区间标签", + "4. 前端与体验 🎨
* 技能工具删除状态展示:工具列表显示删除状态标识
* 仪表盘日环比数据:关键指标增加与昨日对比数据", + "5. 稳健性与缺陷修复 🔧
* 参数提取空值处理:优雅处理缺失数据
* Token 消耗展示优化:确保用量报告准确
* 模型参数负值修复:明确参数范围定义
* 应用共享删除同步:正确更新所有共享记录
* 记忆写入任务排序:按时间戳顺序执行
* 多模态模型缺失优雅处理:不再中断感知记忆写入
* 自定义工具 Number 变量传递:解决类型转换问题
* 集群子代理保存后显示:修复未反显问题
* 记忆开启后流式输出修复:解决字符串序列化问题", + "
", + "v0.2.10 标志着平台向生产成熟度迈出的重要一步。深度思考、交互式表单工作流与多模态记忆的结合展现了平台从记忆存储向综合认知基础设施的演进。我们期待 4 月 17 日 v0.3.0 发布会,届时将带来更深层的 Agent 推理能力、多智能体协作功能及记忆智能管线的进一步优化。剑已炼成,只待出鞘。", + "MemoryBear — 百炼成锋 🐻✨" + ] + }, + "introduction_en": { + "codeName": "LianJian", + "releaseDate": "2026-4-8", + "upgradePosition": "🐻 Comprehensive workflow engine enhancements, Agent deep thinking mode, and multimodal memory reading — forging the blade for production readiness", + "coreUpgrades": [ + "1. Workflow Engine Enhancements
* Session Variable File Support: File-type values with local/remote defaults
* List Operation Node: Dedicated node for array manipulation
* Template Conversion HTML Support: Rich-content rendering
* Form Return & Submission: Interactive forms in workflow conversations
* HTTP Node XML Response: Enterprise API integration compatibility
* Opening Remarks & File References: Configurable conversation openers
* Template Conversion Three-Level Variables: Deep nested variable access
* Node Connection Add Button: Inline add button on connections", + "2. Agent Intelligence 🧠
* Agent Deep Thinking Mode: Thorough reasoning for complex queries
* Model Deep Thinking Feature Toggle: Model-level flag with per-app control", + "3. Memory System Upgrades 📚
* User Memory Pagination: Paginated browsing for large collections
* RAG User Memory Data Structure Refresh: Redesigned backend API contracts
* Multimodal Memory Reading: Retrieval of image, audio, and non-text memory
* Semantic Pruning Threshold Hints: Descriptive range labels for configuration", + "4. Frontend & Usability 🎨
* Skill Tool Deletion Status Display: Deletion indicators in tool list
* Dashboard Day-over-Day Comparison: Key metrics with yesterday comparison", + "5. Robustness & Bug Fixes 🔧
* Parameter Extraction Null Handling: Graceful handling of missing data
* Token Consumption Display Optimization: Accurate usage reporting
* Model Parameter Negative Value Fix: Clear parameter range definitions
* App Share Deletion Sync: Correct update of all share records
* Memory Write Task Ordering: Chronological execution per end_user
* Multimodal Model Missing Graceful Handling: No more interrupted writes
* Custom Tool Number Variable Pass-through: Type coercion fix
* Cluster Sub-Agent Display After Save: Fixed UI reflection
* Memory-Enabled Streaming Output Fix: String serialization resolved", + "
", + "v0.2.10 marks a significant step toward production maturity. The combination of deep thinking, interactive form workflows, and multimodal memory demonstrates the platform's evolution from memory storage to comprehensive cognitive infrastructure. We look forward to the v0.3.0 launch on April 17, bringing deeper agent reasoning, multi-agent collaboration, and further memory intelligence refinements. The blade has been forged — now it's time to wield it.", + "MemoryBear — Forging the Blade 🐻✨" + ] + } + }, "v0.2.8": { "introduction": { "codeName": "景玉", diff --git a/web/src/assets/images/workflow/checkList.svg b/web/src/assets/images/workflow/checkList.svg new file mode 100644 index 00000000..169743dc --- /dev/null +++ b/web/src/assets/images/workflow/checkList.svg @@ -0,0 +1,16 @@ + + + 参与 + + + + + + + + + + + + + \ No newline at end of file diff --git a/web/src/assets/images/workflow/features.svg b/web/src/assets/images/workflow/features.svg index 2ff48584..bd31b107 100644 --- a/web/src/assets/images/workflow/features.svg +++ b/web/src/assets/images/workflow/features.svg @@ -1,12 +1,14 @@ 参与 - - + + - - + + + + diff --git a/web/src/components/Chat/ChatContent.tsx b/web/src/components/Chat/ChatContent.tsx index b06e1e88..5c722e45 100644 --- a/web/src/components/Chat/ChatContent.tsx +++ b/web/src/components/Chat/ChatContent.tsx @@ -2,7 +2,7 @@ * @Author: ZhaoYing * @Date: 2025-12-10 16:46:17 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-04-08 11:23:18 + * @Last Modified time: 2026-04-10 18:46:57 */ import { type FC, useRef, useEffect, useState } from 'react' import clsx from 'clsx' @@ -217,7 +217,7 @@ const ChatContent: FC = ({ {/* Message bubble */}
= ({source = 'manage'}) => { { key: '1', icon: - {/[\u4e00-\u9fa5]/.test(user.username) ? user.username.slice(0, 2) : user.username?.[0]} + {/[\u4e00-\u9fa5]/.test(user.username) ? user.username.slice(-2) : user.username[0]} , label: (<>
{user.username}
@@ -135,28 +135,30 @@ const AppHeader: FC<{source?: 'space' | 'manage';}> = ({source = 'manage'}) => { * - Disables navigation for the last breadcrumb item */ const formatBreadcrumbNames = () => { - return breadcrumbs.filter(item => item.type !== 'group').map((menu, index) => { + const filtered = breadcrumbs.filter(item => item.type !== 'group'); + return filtered.map((menu, index) => { + const label = menu.i18nKey ? t(menu.i18nKey) : menu.label; + const isLast = index === filtered.length - 1; const item: any = { - title: menu.i18nKey ? t(menu.i18nKey) : menu.label, + title: ( + + {label} + + ), }; - // If it's the last item, don't set path - if (index === breadcrumbs.length - 1) { - return item; + if (!isLast) { + if ((menu as any).onClick) { + item.onClick = (e: React.MouseEvent) => { + e.preventDefault(); + (menu as any).onClick(e); + }; + item.href = '#'; + } else if (menu.path && menu.path !== '#') { + item.path = menu.path; + } } - - // If has custom onClick, use onClick and set href to '#' to show pointer cursor - if ((menu as any).onClick) { - item.onClick = (e: React.MouseEvent) => { - e.preventDefault(); - (menu as any).onClick(e); - }; - item.href = '#'; - } else if (menu.path && menu.path !== '#') { - // Only set path when path is not '#' - item.path = menu.path; - } - + return item; }); } @@ -180,7 +182,7 @@ const AppHeader: FC<{source?: 'space' | 'manage';}> = ({source = 'manage'}) => { > - {/[\u4e00-\u9fa5]/.test(user.username) ? user.username.slice(user.username.length, -2) : user.username[0]} + {/[\u4e00-\u9fa5]/.test(user.username) ? user.username.slice(-2) : user.username[0]} {user.username}
+ setCheckResults: (appId: string, results: NodeCheckResult[]) => void + getCheckResults: (appId: string) => NodeCheckResult[] +} + +export const useWorkflowStore = create((set, get) => ({ + checkResults: {}, + setCheckResults: (appId, results) => + set(state => ({ checkResults: { ...state.checkResults, [appId]: results } })), + getCheckResults: (appId) => get().checkResults[appId] ?? [], +})) diff --git a/web/src/styles/index.css b/web/src/styles/index.css index 7c8abb4a..66051085 100644 --- a/web/src/styles/index.css +++ b/web/src/styles/index.css @@ -420,4 +420,7 @@ body { .ant-picker-outlined:focus, .ant-picker-outlined:focus-within { box-shadow: none; +} +.ͼ1.cm-focused { + outline: none; } \ No newline at end of file diff --git a/web/src/views/ApplicationConfig/Api.tsx b/web/src/views/ApplicationConfig/Api.tsx index 14e478dd..4fa19c3e 100644 --- a/web/src/views/ApplicationConfig/Api.tsx +++ b/web/src/views/ApplicationConfig/Api.tsx @@ -2,7 +2,7 @@ * @Author: ZhaoYing * @Date: 2026-02-03 16:29:29 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-03-26 15:31:36 + * @Last Modified time: 2026-04-10 18:09:56 */ import { type FC, useState, useRef, useEffect } from 'react'; import clsx from 'clsx'; @@ -18,6 +18,7 @@ import ApiKeyConfigModal from './components/ApiKeyConfigModal'; import { getApiKeyList, getApiKeyStats, deleteApiKey } from '@/api/apiKey'; import { maskApiKeys } from '@/utils/apiKeyReplacer' import RbCard from '@/components/RbCard/Card'; +import CodeMirrorEditor from '@/components/CodeMirrorEditor' /** * API configuration page component @@ -155,6 +156,21 @@ const Api: FC<{ application: Application | null }> = ({ application }) => { {t('common.copy')} + +
+ {t('application.body')} +
+ + + + + + ( diff --git a/web/src/views/ApplicationConfig/ReleasePage.tsx b/web/src/views/ApplicationConfig/ReleasePage.tsx index 3e516c88..ba573795 100644 --- a/web/src/views/ApplicationConfig/ReleasePage.tsx +++ b/web/src/views/ApplicationConfig/ReleasePage.tsx @@ -2,12 +2,13 @@ * @Author: ZhaoYing * @Date: 2026-02-03 16:29:41 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-03-26 15:24:41 + * @Last Modified time: 2026-04-10 17:02:07 */ import { type FC, useState, useEffect, useRef } from 'react'; import { useTranslation } from 'react-i18next'; import clsx from 'clsx'; import { Space, Input, Form, App, Flex } from 'antd'; +import copy from 'copy-to-clipboard'; import Tag, { type TagProps } from './components/Tag' import RbCard from '@/components/RbCard/Card' @@ -17,6 +18,7 @@ import ReleaseShareModal from './components/ReleaseShareModal' import AppSharingModal from './components/AppSharingModal' import type { Release, ReleaseModalRef, ReleaseShareModalRef, AppSharingModalRef } from './types' import type { Application } from '@/views/ApplicationManagement/types' +import { useWorkflowStore } from '@/store/workflow' import Empty from '@/components/Empty' import { formatDateTime } from '@/utils/format'; import Markdown from '@/components/Markdown' @@ -40,6 +42,7 @@ const heightClass = 'rb:max-h-[calc(100vh-140px)]' const ReleasePage: FC<{data: Application; refresh: () => void}> = ({data, refresh}) => { const { t } = useTranslation(); const { message } = App.useApp() + const { getCheckResults } = useWorkflowStore() const releaseModalRef = useRef(null) const releaseShareModalRef = useRef(null) const appSharingModalRef = useRef(null) @@ -75,6 +78,10 @@ const ReleasePage: FC<{data: Application; refresh: () => void}> = ({data, refres if (!selectedVersion) return appExport(data.id, data.name, { release_id: selectedVersion.id}) } + const handleCopy = (id: string) => { + copy(id) + message.success(t('common.copySuccess')) + } return (
@@ -102,7 +109,7 @@ const ReleasePage: FC<{data: Application; refresh: () => void}> = ({data, refres } } className={clsx("rb:hover:shadow-[0px_2px_8px_0px_rgba(0,0,0,0.2)]! rb:cursor-pointer rb:bg-white", { - 'rb:border-[#171719]!': version.id === selectedVersion.id, + 'rb:border! rb:border-[#171719]!': version.id === selectedVersion.id, 'rb:border-[#DFE4ED] ': version.id !== selectedVersion.id })} headerType="borderless" @@ -140,13 +147,30 @@ const ReleasePage: FC<{data: Application; refresh: () => void}> = ({data, refres releaseShareModalRef.current?.handleOpen()}>{t('application.share')} {data?.type !== 'multi_agent' && appSharingModalRef.current?.handleOpen()}>{t('application.sharing')}} } - releaseModalRef.current?.handleOpen()}>{t('application.release')} + { + if (data?.type === 'workflow') { + const errors = getCheckResults(data.id) + if (errors.length) { + message.error(t('workflow.checkListHasErrors')) + return + } + } + releaseModalRef.current?.handleOpen() + }}>{t('application.release')} {selectedVersion && {t('application.VersionInformation')} + + (ID: {selectedVersion.id} +
handleCopy(selectedVersion.id)} + >
+ ) +
+
} headerType="borderless" >
diff --git a/web/src/views/ApplicationConfig/components/Chat.tsx b/web/src/views/ApplicationConfig/components/Chat.tsx index c2abf17d..eb3a9ea0 100644 --- a/web/src/views/ApplicationConfig/components/Chat.tsx +++ b/web/src/views/ApplicationConfig/components/Chat.tsx @@ -2,7 +2,7 @@ * @Author: ZhaoYing * @Date: 2026-02-03 16:27:39 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-03-31 15:02:07 + * @Last Modified time: 2026-04-10 18:51:43 */ /** * Chat debugging component for application testing @@ -291,7 +291,6 @@ const Chat: FC = ({ addAssistantMessage() const handleStreamMessage = (data: SSEMessage[]) => { - setCompareLoading(false) data.map(item => { const { model_config_id, conversation_id, content, message_length, audio_url, citations } = item.data as { @@ -306,12 +305,21 @@ const Chat: FC = ({ switch (item.event) { case 'model_reasoning': + if (compareLoading) { + setCompareLoading(false) + } updateAssistantReasoningMessage(content, model_config_id, conversation_id) break; case 'model_message': + if (compareLoading) { + setCompareLoading(false) + } updateAssistantMessage(content, model_config_id, conversation_id, audio_url) break; case 'model_end': + if (compareLoading) { + setCompareLoading(false) + } const idToPoll = `${model_config_id}_${audio_url}` if (audio_url && !audioStatusMap[idToPoll]) { setAudioStatusMap(prev => ({ @@ -352,6 +360,9 @@ const Chat: FC = ({ updateErrorAssistantMessage(message_length, model_config_id) break; case 'compare_end': + if (compareLoading) { + setCompareLoading(false) + } setLoading(false); break; } @@ -473,7 +484,6 @@ const Chat: FC = ({ addClusterAssistantMessage() const handleStreamMessage = (data: SSEMessage[]) => { - setCompareLoading(false) data.map(item => { const { conversation_id, content, message_length } = item.data as { conversation_id: string, content: string, message_length: number }; @@ -485,15 +495,24 @@ const Chat: FC = ({ } break case 'message': + if (compareLoading) { + setCompareLoading(false) + } updateClusterAssistantMessage(content) if (conversation_id && conversationId !== conversation_id) { setConversationId(conversation_id); } break; case 'model_end': + if (compareLoading) { + setCompareLoading(false) + } updateClusterErrorAssistantMessage(message_length) break; case 'compare_end': + if (compareLoading) { + setCompareLoading(false) + } setLoading(false); break; } diff --git a/web/src/views/ApplicationConfig/components/ConfigHeader.tsx b/web/src/views/ApplicationConfig/components/ConfigHeader.tsx index d77ae27c..d38a657a 100644 --- a/web/src/views/ApplicationConfig/components/ConfigHeader.tsx +++ b/web/src/views/ApplicationConfig/components/ConfigHeader.tsx @@ -4,7 +4,7 @@ * @Last Modified by: ZhaoYing * @Last Modified time: 2026-04-07 16:28:33 */ -import { type FC, useRef, useMemo, useCallback } from 'react'; +import { type FC, useRef, useMemo } from 'react'; import { useNavigate, useParams } from 'react-router-dom'; import { Tabs, Dropdown, Flex, Popover } from 'antd'; import type { MenuProps } from 'antd'; @@ -18,6 +18,7 @@ import type { CopyModalRef, AgentRef, ClusterRef, WorkflowRef, FeaturesConfigFor import { deleteApplication, appExport } from '@/api/application' import CopyModal from './CopyModal' import PageHeader from '@/components/Layout/PageHeader' +import CheckList from '@/views/Workflow/components/CheckList' /** * Tab keys for application configuration @@ -206,6 +207,7 @@ const ConfigHeader: FC = ({ } extra={application?.type === 'workflow' && source !== 'sharing' && activeTab === 'arrangement' ? +
(); - console.log('chatVariables', chatVariables) - const handleClose = () => { setVisible(false); form.resetFields(); diff --git a/web/src/views/ApplicationConfig/types.ts b/web/src/views/ApplicationConfig/types.ts index 61d8d9be..83c11791 100644 --- a/web/src/views/ApplicationConfig/types.ts +++ b/web/src/views/ApplicationConfig/types.ts @@ -12,6 +12,7 @@ import type { ChatVariable, GraphRef, WorkflowConfig } from '@/views/Workflow/ty import type { ApiKey } from '@/views/ApiKeyManagement/types' import type { SkillConfigForm } from './components/Skill/types' import type { Capability } from '@/views/ModelManagement/types' +import { Node } from '@antv/x6'; /** * Model configuration parameters @@ -170,6 +171,7 @@ export interface WorkflowRef { features: WorkflowConfig['features']; handleFeaturesConfig?: () => void; handleSaveFeaturesConfig?: (value: FeaturesConfigForm) => void; + nodeClick: ({ node }: { node: Node }) => void; } /** diff --git a/web/src/views/ModelManagement/List.tsx b/web/src/views/ModelManagement/List.tsx index 10026aa5..7342ecd1 100644 --- a/web/src/views/ModelManagement/List.tsx +++ b/web/src/views/ModelManagement/List.tsx @@ -106,6 +106,10 @@ const ModelList = forwardRef void; handleEdit: (vo?: ModelListItem) => void; handleCloseConfig?: () => void; + query?: any; } /** * Model list detail drawer component */ -const ModelListDetail = forwardRef(({ refresh, handleEdit, handleCloseConfig }, ref) => { +const ModelListDetail = forwardRef(({ refresh, handleEdit, handleCloseConfig, query }, ref) => { const { t } = useTranslation(); const [open, setOpen] = useState(false); const [data, setData] = useState({} as ProviderModelItem) @@ -58,7 +59,8 @@ const ModelListDetail = forwardRef(({ if (!vo.provider) return getModelNewList({ - provider: vo.provider + provider: vo.provider, + ...query, }) .then(res => { const response = res as ProviderModelItem[] diff --git a/web/src/views/ToolManagement/Inner.tsx b/web/src/views/ToolManagement/Inner.tsx index b88428b0..67c3a6f5 100644 --- a/web/src/views/ToolManagement/Inner.tsx +++ b/web/src/views/ToolManagement/Inner.tsx @@ -101,13 +101,13 @@ const Inner: React.FC<{ getStatusTag: (status: string) => ReactNode; keyword?: s {InnerConfigData[item.config_data.tool_class].features?.slice(0, 2).map((type, i) => ( -
{type}
+
{t(`tool.${type}`)}
))}
{InnerConfigData[item.config_data.tool_class].features.length > 2 && ( {InnerConfigData[item.config_data.tool_class].features?.slice(2, InnerConfigData[item.config_data.tool_class].features.length).map((type, i) => ( -
{type}
+
{t(`tool.${type}`)}
))}
} color="white" placement="bottom" @@ -135,7 +135,7 @@ const Inner: React.FC<{ getStatusTag: (status: string) => ReactNode; keyword?: s {InnerConfigData[item.config_data.tool_class].eg} : -
{t('configStatus')}
+
{t('tool.configStatus')}
{t(`tool.${item.status}_desc`)} } diff --git a/web/src/views/ToolManagement/constant.ts b/web/src/views/ToolManagement/constant.ts index 6763a140..5641ed4d 100644 --- a/web/src/views/ToolManagement/constant.ts +++ b/web/src/views/ToolManagement/constant.ts @@ -186,5 +186,43 @@ export const InnerConfigData: Record = { 'multilingualSupport', 'highPrecisionRecognition' ], + }, + OpenClawTool: { + link: 'https://openclaw.ai/', + config: { + server_url: { + name: ['config', 'parameters', 'server_url'], + type: 'input', + desc: 'OpenClawTool_server_url_desc', + rules: [ + { required: true, message: 'common.pleaseEnter' } + ] + }, + api_key: { + name: ['config', 'parameters', 'api_key'], + type: 'input', + desc: 'OpenClawTool_api_key_desc', + rules: [ + { required: true, message: 'common.pleaseEnter' } + ] + }, + agent_id: { + name: ['config', 'parameters', 'agent_id'], + type: 'input', + desc: 'OpenClawTool_agent_id_desc', + defaultValue: 'main', + }, + OpenClawTool_enable: { + name: ['config', 'is_enabled'], + type: 'checkbox', + defaultValue: true, + }, + }, + features: [ + '3dPrinting', + 'deviceManagement', + 'multimodalInteraction', + 'remoteAgent' + ], } } \ No newline at end of file diff --git a/web/src/views/UserMemoryDetail/Neo4j.tsx b/web/src/views/UserMemoryDetail/Neo4j.tsx index 51be7c8d..3fdaaed3 100644 --- a/web/src/views/UserMemoryDetail/Neo4j.tsx +++ b/web/src/views/UserMemoryDetail/Neo4j.tsx @@ -2,7 +2,7 @@ * @Author: ZhaoYing * @Date: 2026-02-03 17:57:26 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-03-26 18:59:53 + * @Last Modified time: 2026-04-14 16:04:08 */ /** * Neo4j User Memory Detail View @@ -22,7 +22,7 @@ import InterestDistribution from './components/InterestDistribution' import NodeStatistics from './components/NodeStatistics' import RelationshipNetwork from './components/RelationshipNetwork' import MemoryInsight from './components/MemoryInsight' -import type { EndUserProfileRef, MemoryInsightRef, AboutMeRef } from './types' +import type { EndUserProfileRef, MemoryInsightRef, AboutMeRef, EndUser } from './types' import { analyticsRefresh, } from '@/api/memory' @@ -39,8 +39,10 @@ const Neo4j: FC = () => { const [selectedKey, setSelectedKey] = useState(null) /** Update displayed name */ - const handleNameUpdate = (data: { other_name?: string; id: string }) => { - setName(data.other_name && data.other_name !== '' ? data.other_name : data.id) + const handleNameUpdate = (data?: EndUser) => { + if (!data) return + let name = data.other_name && data.other_name !== '' ? data.other_name : data.id || data.end_user_id + setName(name) } /** Navigate back */ diff --git a/web/src/views/UserMemoryDetail/components/ConversationMemory.tsx b/web/src/views/UserMemoryDetail/components/ConversationMemory.tsx index cd080cc6..4aa87fe5 100644 --- a/web/src/views/UserMemoryDetail/components/ConversationMemory.tsx +++ b/web/src/views/UserMemoryDetail/components/ConversationMemory.tsx @@ -2,9 +2,9 @@ * @Author: ZhaoYing * @Date: 2026-02-03 18:34:04 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-03-31 15:35:13 + * @Last Modified time: 2026-04-10 16:32:52 */ -import { type FC, useState } from 'react' +import { type FC } from 'react' import { useTranslation } from 'react-i18next' import { useParams } from 'react-router-dom' import { Divider, Flex } from 'antd' @@ -23,7 +23,6 @@ interface DataItem { const ConversationMemory: FC = () => { const { t } = useTranslation() const { id } = useParams() - const [total, setTotal] = useState(0) return ( { headerClassName="rb:min-h-[54px]! rb:pt-0! rb:mb-0!" bodyClassName="rb:p-4! rb:pt-0! rb:pb-1! rb:h-[calc(100%-54px)]!" className="rb:h-full!" - extra={
{t('userMemory.totalRagMemory')}: {total}
} > url={getRagContentUrl} query={{ end_user_id: id }} column={1} gutter={0} - onTotalChange={setTotal} renderItem={(item, index) => (
{index !== 0 && } diff --git a/web/src/views/UserMemoryDetail/components/EndUserProfile.tsx b/web/src/views/UserMemoryDetail/components/EndUserProfile.tsx index c689bf72..4dee9d4f 100644 --- a/web/src/views/UserMemoryDetail/components/EndUserProfile.tsx +++ b/web/src/views/UserMemoryDetail/components/EndUserProfile.tsx @@ -2,7 +2,7 @@ * @Author: ZhaoYing * @Date: 2026-02-03 18:33:30 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-03-27 11:11:09 + * @Last Modified time: 2026-04-14 16:03:41 */ /** * End User Profile Component @@ -27,11 +27,11 @@ import type { EndUser, EndUserProfileModalRef, EndUserProfileRef } from '../type * Component props */ interface EndUserProfileProps { - onDataLoaded?: (data: { other_name?: string; id: string }) => void; + onDataLoaded?: (data?: EndUser) => void; className?: string; } -const EndUserProfile = forwardRef(({ className }, ref) => { +const EndUserProfile = forwardRef(({ className, onDataLoaded }, ref) => { const { t } = useTranslation() const { id } = useParams() const endUserProfileModalRef = useRef(null) @@ -51,6 +51,7 @@ const EndUserProfile = forwardRef(({ cla const userData = res as EndUser setData(userData) setLoading(false) + onDataLoaded?.(userData as EndUser) }) .finally(() => { setLoading(false) diff --git a/web/src/views/UserMemoryDetail/components/InterestAreas.tsx b/web/src/views/UserMemoryDetail/components/InterestAreas.tsx index 91554880..4d9be5b5 100644 --- a/web/src/views/UserMemoryDetail/components/InterestAreas.tsx +++ b/web/src/views/UserMemoryDetail/components/InterestAreas.tsx @@ -2,7 +2,7 @@ * @Author: ZhaoYing * @Date: 2026-02-03 18:32:53 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-03-16 14:27:12 + * @Last Modified time: 2026-04-13 13:37:43 */ import { useEffect, useState, forwardRef, useImperativeHandle, useRef } from 'react' import { useTranslation } from 'react-i18next' @@ -93,7 +93,7 @@ const InterestAreas = forwardRef<{ handleRefresh: () => void; }>((_props, ref) = ref={chartRef} option={{ color: Colors, - grid: { top: 8, left: 38, right: 8, bottom: 24 }, + grid: { top: 14, left: 38, right: 8, bottom: 24 }, xAxis: { type: 'category', data: keys.map(k => t(`implicitDetail.${k}`)), diff --git a/web/src/views/UserMemoryDetail/types.ts b/web/src/views/UserMemoryDetail/types.ts index 9e56bb5d..d8bc6f23 100644 --- a/web/src/views/UserMemoryDetail/types.ts +++ b/web/src/views/UserMemoryDetail/types.ts @@ -2,7 +2,7 @@ * @Author: ZhaoYing * @Date: 2026-02-03 17:57:15 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-03-24 17:58:54 + * @Last Modified time: 2026-04-14 16:03:16 */ /** * User Memory Detail Types @@ -172,6 +172,7 @@ export interface EndUser { other_name: string; aliases: string | null; meta_data: Record; + id?: string; end_user_info_id: string; end_user_id: string; created_at: string; diff --git a/web/src/views/Workflow/components/AddChatVariable/ChatVariableModal.tsx b/web/src/views/Workflow/components/AddChatVariable/ChatVariableModal.tsx index d718b2eb..e4f62432 100644 --- a/web/src/views/Workflow/components/AddChatVariable/ChatVariableModal.tsx +++ b/web/src/views/Workflow/components/AddChatVariable/ChatVariableModal.tsx @@ -2,10 +2,10 @@ * @Author: ZhaoYing * @Date: 2025-12-30 13:59:36 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-04-08 11:05:34 + * @Last Modified time: 2026-04-13 15:26:33 */ import { forwardRef, useImperativeHandle, useState, useRef, useMemo } from 'react'; -import { Form, Input, Select, InputNumber, Button, Row, Col, Flex, Spin } from 'antd'; +import { Form, Input, Select, InputNumber, Button, Row, Col, Flex } from 'antd'; import { PlusOutlined } from '@ant-design/icons'; import { useTranslation } from 'react-i18next'; @@ -45,6 +45,7 @@ const array_object_placeholder = `# example interface ChatVariableModalProps { refresh: (value: ChatVariable, editIndex?: number) => void; + variables?: ChatVariable[]; } const types = [ @@ -52,7 +53,7 @@ const types = [ 'number', 'boolean', 'object', - // 'file', + 'file', 'array[file]', 'array[string]', 'array[number]', @@ -61,7 +62,8 @@ const types = [ ] const ChatVariableModal = forwardRef(({ - refresh + refresh, + variables }, ref) => { const { t } = useTranslation(); const uploadFileListModalRef = useRef(null); @@ -122,7 +124,7 @@ const ChatVariableModal = forwardRef { const defaultValue = Array.isArray(values.defaultValue) ? values.defaultValue.filter((v: any) => v !== undefined && v !== null && v !== '') - : values.type.includes('object') + : values.type.includes('object') && values.defaultValue ? JSON.parse(values.defaultValue) : values.defaultValue; refresh({ ...values, defaultValue }, editIndex); @@ -244,6 +246,12 @@ const ChatVariableModal = forwardRef { + const duplicate = variables?.some((v, i) => v.name === value && i !== editIndex); + return duplicate ? Promise.reject(t('workflow.config.duplicateName')) : Promise.resolve(); + } + }, ]} > @@ -334,7 +342,20 @@ const ChatVariableModal = forwardRef ) : ( - + { + if (!value) return Promise.resolve(); + try { JSON.parse(value); return Promise.resolve(); } + catch { return Promise.reject(t('workflow.invalidJSON')); } + } + }] + : undefined + } + > {type === 'number' ? : type === 'boolean' diff --git a/web/src/views/Workflow/components/AddChatVariable/index.tsx b/web/src/views/Workflow/components/AddChatVariable/index.tsx index d7ab5cee..07e070d9 100644 --- a/web/src/views/Workflow/components/AddChatVariable/index.tsx +++ b/web/src/views/Workflow/components/AddChatVariable/index.tsx @@ -104,6 +104,7 @@ const AddChatVariable = forwardRef(({ ); diff --git a/web/src/views/Workflow/components/Chat/Runtime.tsx b/web/src/views/Workflow/components/Chat/Runtime.tsx index 68bdc452..4a5be793 100644 --- a/web/src/views/Workflow/components/Chat/Runtime.tsx +++ b/web/src/views/Workflow/components/Chat/Runtime.tsx @@ -2,7 +2,7 @@ * @Author: ZhaoYing * @Date: 2026-02-24 17:57:08 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-04-07 14:05:50 + * @Last Modified time: 2026-04-14 16:33:33 */ /* * Runtime Component @@ -161,8 +161,7 @@ const Runtime: FC<{ item: ChatItem; index: number;}> = ({ children: ( {/* Display error message for failed nodes */} - - {item.error && + {vo.content?.error && vo.content?.error !== '' && @@ -219,11 +218,11 @@ const Runtime: FC<{ item: ChatItem; index: number;}> = ({
} - /** Copy value to clipboard and show success message */ - const handleCopy = (value: string) => { - copy(value) - message.success(t('common.copySuccess')) - } + /** Copy value to clipboard and show success message */ + const handleCopy = (value: string) => { + copy(value) + message.success(t('common.copySuccess')) + } return (
= ({
) :
- {item.error && + {item.error && item.error !== '' && } {renderChild(item.subContent)} diff --git a/web/src/views/Workflow/components/CheckList/index.tsx b/web/src/views/Workflow/components/CheckList/index.tsx new file mode 100644 index 00000000..0256416a --- /dev/null +++ b/web/src/views/Workflow/components/CheckList/index.tsx @@ -0,0 +1,293 @@ +import { useState, useCallback, useEffect, useRef, type FC } from 'react' +import { Popover, Flex } from 'antd' +import { WarningFilled } from '@ant-design/icons' +import { useTranslation } from 'react-i18next' +import { Node } from '@antv/x6'; + +import type { WorkflowRef } from '@/views/ApplicationConfig/types' +import { nodeLibrary } from '../../constant' +import { getToolMethods } from '@/api/tools' +import RbDrawer from '@/components/RbDrawer' +import { useWorkflowStore } from '@/store/workflow' + +interface CheckListProps { + workflowRef: React.RefObject + appId: string +} + +export interface CheckError { + key: string + message: string +} + +export interface NodeCheckResult { + id: string + name: string + type: string + icon: string + errors: CheckError[] +} + +const allNodes = nodeLibrary.flatMap(c => c.nodes) +const nodeIconMap: Record = Object.fromEntries(allNodes.map(n => [n.type, n.icon])) +const nodeConfigMap: Record> = Object.fromEntries( + allNodes.filter(n => n.config).map(n => [n.type, n.config!]) +) + +// Special validators for fields that need deeper checks beyond simple empty check +const specialValidators: Record boolean> = { + // llm.messages: at least one message with non-empty content + 'llm.messages': (val: any[]) => !Array.isArray(val) || !val.some(m => m?.content && String(m.content).trim()), + // knowledge-retrieval.knowledge_retrieval: knowledge_bases array must be non-empty + 'knowledge-retrieval.knowledge_retrieval': (val: any) => !(val?.knowledge_bases?.length > 0), + 'memory-write.messages': (val: any[]) => !Array.isArray(val) || !val.some(m => m?.content && String(m.content).trim()), + // if-else.cases: every case must have at least one expression, and every expression must be fully set + 'if-else.cases': (val: any[]) => { + if (!Array.isArray(val) || !val.length) return true + return val.some(c => { + if (!c?.expressions?.length) return true + return c.expressions.some((expr: any) => { + if (!expr?.left) return true + if (['not_empty', 'empty'].includes(expr.operator)) return false + return !(!!expr.left && (!!expr.right || typeof expr.right === 'boolean' || typeof expr.right === 'number')) + }) + }) + }, + // question-classifier.categories: every category must have a value + 'question-classifier.categories': (val: any[]) => !Array.isArray(val) || !val.some(c => c?.class_name && String(c.class_name).trim()), + // var-aggregator.group_variables: must be non-empty array + 'var-aggregator.group_variables': (val: any[]) => !Array.isArray(val) || !val.length, + // assigner.assignments: every item needs variable_selector + operation; value required unless operation is 'clear' + 'assigner.assignments': (val: any[]) => { + if (!Array.isArray(val) || !val.length) return false + return val.some(a => { + if (!a?.variable_selector || !a?.operation) return true + if (a.operation === 'clear') return false + return a.value === undefined || a.value === null || a.value === '' + }) + }, + // http-request.body: binary content_type requires data + 'http-request.body': (val: any) => val?.content_type === 'binary' && !val?.data, + // tool.tool_parameters: validated async via API, placeholder always returns false + 'tool.tool_parameters': () => false, + // code.input_variables: if non-empty, every item must have both name and variable + 'code.input_variables': (val: any[]) => Array.isArray(val) && val.length > 0 && val.some(v => !v?.name || !v?.variable), + // code.output_variables: must be non-empty + 'code.output_variables': (val: any[]) => !Array.isArray(val) || !val.length, + // jinja-render.mapping: if non-empty, every item must have a name + 'jinja-render.mapping': (val: any[]) => Array.isArray(val) && val.length > 0 && val.some(v => !v?.name || !v?.value), +} + +function isEmpty(val: any): boolean { + if (val === undefined || val === null || val === '') return true + if (Array.isArray(val)) return val.length === 0 + return false +} + +function validateNode(type: string, config: Record): CheckError[] { + const errors: CheckError[] = [] + const nodeConfig = nodeConfigMap[type] + if (!nodeConfig) return errors + + const get = (key: string) => config[key]?.defaultValue + + Object.entries(nodeConfig).forEach(([field, fieldConfig]) => { + if (!fieldConfig?.required) return + const val = get(field) + const specialKey = `${type}.${field}` + const specialValidator = specialValidators[specialKey] + const isInvalid = specialValidator ? specialValidator(val) : isEmpty(val) + if (isInvalid) errors.push({ key: specialKey, message: '' }) + }) + + // http-request body.data (binary) — not a top-level required field, check separately + if (type === 'http-request') { + const body = get('body') + if (body?.content_type === 'binary' && !body?.data) { + errors.push({ key: 'http-request.body.data', message: '' }) + } + } + + // console.log('nodeConfig', nodeConfigMap, nodeConfig, errors) + return errors +} + +const CheckList: FC = ({ workflowRef, appId }) => { + const { t } = useTranslation() + const [open, setOpen] = useState(false) + const { setCheckResults, getCheckResults } = useWorkflowStore() + const results = getCheckResults(appId) + const timerRef = useRef>() + + const runCheck = useCallback(async () => { + const graph = workflowRef.current?.graphRef?.current + if (!graph) return [] + + const nodes = graph.getNodes() + const edges = graph.getEdges() + const sourceIds = new Set() + const targetIds = new Set() + // child-to-child edges within same parent (cycle) + const childTargetIds = new Set() + edges.forEach(e => { + sourceIds.add(e.getSourceCellId()) + targetIds.add(e.getTargetCellId()) + const srcData = graph.getCellById(e.getSourceCellId())?.getData() + const tgtData = graph.getCellById(e.getTargetCellId())?.getData() + if (srcData?.cycle && tgtData?.cycle && srcData.cycle === tgtData.cycle) { + childTargetIds.add(e.getTargetCellId()) + } + }) + + const checked: NodeCheckResult[] = [] + for (const node of nodes) { + const data = node.getData() + if (!data || ['add-node', 'notes', 'cycle-start', 'break'].includes(data.type)) continue + + const errors: CheckError[] = [] + + + // Check connectivity + const isChildNode = !!data.cycle + const hasIncoming = isChildNode ? childTargetIds.has(node.id) : !['start', 'cycle-start'].includes(data.type) ? targetIds.has(node.id) : true + if (!hasIncoming) { + errors.push({ key: 'notConnected', message: t('workflow.notConnected') }) + } + + // Validate config + const configErrors = validateNode(data.type, data.config ?? {}) + configErrors.forEach(e => { + errors.push({ key: e.key, message: `${t(`workflow.checkListErrors.${e.key}`)} ${t('workflow.cannotBeEmpty')}`.trim() }) + }) + + // Tool node: fetch parameters via API and check required fields + if (data.type === 'tool') { + const toolId = data.config?.tool_id?.defaultValue ?? data.config?.tool_id + const toolParameters = data.config?.tool_parameters?.defaultValue ?? data.config?.tool_parameters ?? {} + + if (typeof toolId === 'string') { + try { + const methods = await getToolMethods(toolId) as Array<{ name: string; parameters: Array<{ name: string; required: boolean }> }> + const operation = toolParameters?.operation + const method = operation ? methods.find(m => m.name === operation) : methods[0] + if (method) { + const missingParams = method.parameters.filter(p => p.required && (toolParameters[p.name] === undefined || toolParameters[p.name] === null || toolParameters[p.name] === '')) + missingParams.forEach(p => errors.push({ key: 'tool.tool_parameters', message: `${p.name} ${t('workflow.cannotBeEmpty')}` })) + } + } catch { + // ignore API errors + } + } + } + + if (errors.length) { + checked.push({ + id: node.id, + name: data.name || t(`workflow.${data.type}`), + type: data.type, + icon: nodeIconMap[data.type] ?? '', + errors, + }) + } + } + + return checked + }, [workflowRef.current?.graphRef?.current, t]) + + const scheduleCheckRef = useRef<() => void>() + + const scheduleCheck = useCallback(() => { + clearTimeout(timerRef.current) + timerRef.current = setTimeout(async () => { + setCheckResults(appId, await runCheck()) + }, 300) + }, [runCheck]) + + scheduleCheckRef.current = scheduleCheck + + useEffect(() => { + const graph = workflowRef.current?.graphRef?.current + console.log('graph') + if (!graph) return + const handler = () => scheduleCheckRef.current?.() + const events = ['node:added', 'node:removed', 'node:change:data', 'edge:added', 'edge:removed', 'edge:connected', 'edge:changed'] + events.forEach(e => graph.on(e, handler)) + scheduleCheckRef.current?.() + return () => { + events.forEach(e => graph.off(e, handler)) + clearTimeout(timerRef.current) + } + }, [workflowRef.current?.graphRef?.current]) + +const handleOpen = () => { + setOpen(true) + } + + const focusNode = (id: string) => { + const graph = workflowRef.current?.graphRef?.current + if (!graph) return + const node = graph.getCellById(id) + if (node) { + workflowRef.current?.nodeClick({node} as { node: Node }) + } + setOpen(false) + } + + return ( + <> + +
+
+ {results.length > 0 && ( + + {results.reduce((sum, n) => sum + n.errors.length, 0)} + + )} +
+ + + {t('workflow.checkList')}{results.length > 0 ? `(${results.reduce((sum, n) => sum + n.errors.length, 0)})` : ''} + + } + open={open} + onClose={() => setOpen(false)} + width={360} + styles={{ body: { padding: '12px 16px' } }} + > +

{t('workflow.checkListDesc')}

+ {results.length === 0 + ?
{t('workflow.checkListEmpty')}
+ : + {results.map(node => ( +
+ +
+ {node.name} + focusNode(node.id)} + > + {t('workflow.goto')} → + + + + + {node.errors.map((err, i) => ( + + + {err.message} + + ))} + +
+ ))} +
+ } + + + ) +} + +export default CheckList diff --git a/web/src/views/Workflow/components/Editor/nodes/VariableNode.tsx b/web/src/views/Workflow/components/Editor/nodes/VariableNode.tsx index 5688342c..72e73220 100644 --- a/web/src/views/Workflow/components/Editor/nodes/VariableNode.tsx +++ b/web/src/views/Workflow/components/Editor/nodes/VariableNode.tsx @@ -48,17 +48,13 @@ const VariableComponent: React.FC<{ nodeKey: NodeKey; data: Suggestion }> = ({ return ( - {data.isContext ? ( - 📄 - ) : data.group !== 'CONVERSATION' && !data.value.includes('conv') ? ( - - ) : } + {!data.isContext && data.group !== 'CONVERSATION' && !data.value.includes('conv') + ?
+ : null + } {!data.isContext && data.group !== 'CONVERSATION' && ( <> {!data.value.includes('conv') && <> @@ -73,7 +69,7 @@ const VariableComponent: React.FC<{ nodeKey: NodeKey; data: Suggestion }> = ({ )} )} - {data.label} + {data.label} ); }; diff --git a/web/src/views/Workflow/components/Editor/plugin/AutocompletePlugin.tsx b/web/src/views/Workflow/components/Editor/plugin/AutocompletePlugin.tsx index f9537032..6d3b7a4f 100644 --- a/web/src/views/Workflow/components/Editor/plugin/AutocompletePlugin.tsx +++ b/web/src/views/Workflow/components/Editor/plugin/AutocompletePlugin.tsx @@ -2,12 +2,13 @@ * @Author: ZhaoYing * @Date: 2025-12-23 16:22:51 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-04-07 16:51:04 + * @Last Modified time: 2026-04-13 14:00:07 */ import { useEffect, useLayoutEffect, useState, useRef, type FC } from 'react'; import { useLexicalComposerContext } from '@lexical/react/LexicalComposerContext'; import { $getSelection, $isRangeSelection, COMMAND_PRIORITY_HIGH, KEY_ENTER_COMMAND, KEY_ARROW_DOWN_COMMAND, KEY_ARROW_UP_COMMAND, KEY_ESCAPE_COMMAND } from 'lexical'; import { Space, Flex } from 'antd'; +import clsx from 'clsx'; import { INSERT_VARIABLE_COMMAND, CLOSE_AUTOCOMPLETE_COMMAND } from '../commands'; import type { NodeProperties } from '../../../types' @@ -284,23 +285,24 @@ const AutocompletePlugin: FC<{ options: Suggestion[] }> = ({ options }) => { ref={popupRef} data-autocomplete-popup="true" onMouseDown={(e) => e.preventDefault()} - className="rb:fixed rb:z-1000 rb:bg-white rb:rounded-xl rb:shadow-[0px_2px_12px_0px_rgba(23,23,25,0.12)]" + className="rb:fixed rb:z-1000 rb:bg-white rb:rounded-lg rb:border-[0.5px] rb:border-[#EBEBEB] rb:shadow-[0px_2px_6px_0px_rgba(0,0,0,0.1)] rb:py-3 rb:px-2" style={{ top: popupPosition.top, left: popupPosition.left, }} > -
+
{Object.entries(groupedSuggestions).map(([nodeId, nodeOptions]) => { const nodeName = nodeOptions[0]?.nodeData?.name || nodeId; - const nodeIcon = nodeOptions[0]?.nodeData?.icon; return ( -
- {nodeName !== 'undefined' && - {nodeIcon &&
} - {nodeName} - } +
+ {nodeName !== 'undefined' && +
+ {nodeName} +
+ } + {nodeOptions.map((option) => { const globalIndex = flatOptions.indexOf(option); const isExpanded = expandedParent?.key === option.key; @@ -310,14 +312,13 @@ const AutocompletePlugin: FC<{ options: Suggestion[] }> = ({ options }) => { key={option.key} ref={(el) => { if (el) itemRefs.current.set(option.key, el); }} data-selected={selectedIndex === globalIndex} - className="rb:pl-6! rb:pr-3! rb:py-2!" + className={clsx("rb:px-2! rb:py-0.75! rb:rounded-sm rb:leading-4.5 rb:text-[#5B6167] rb:hover:bg-[#F6F6F6]", { + 'rb:bg-[#F6F6F6]': selectedIndex === globalIndex || isExpanded, + 'rb:cursor-not-allowed rb:opacity-65': option.disabled, + 'rb:cursor-pointer': !option.disabled, + })} align="center" justify="space-between" - style={{ - cursor: option.disabled ? 'not-allowed' : 'pointer', - background: (selectedIndex === globalIndex || isExpanded) ? '#f0f8ff' : 'white', - opacity: option.disabled ? 0.5 : 1, - }} onClick={() => { if (option.disabled) return; insertMention(option); @@ -337,17 +338,19 @@ const AutocompletePlugin: FC<{ options: Suggestion[] }> = ({ options }) => { } }} > - {option.label && - {option.isContext ? '📄' : `{x}`} - {option.label} - } - - {option.dataType && {option.dataType}} - {hasChildren && } + {option.label && +
+ {`{x}`} {option.label} +
+ } + + {option.dataType && {option.dataType}} + {hasChildren &&
}
); })} +
); })} @@ -356,7 +359,7 @@ const AutocompletePlugin: FC<{ options: Suggestion[] }> = ({ options }) => { {/* Child variables panel - floats to the left */} {expandedParent?.children?.length && (
= ({ options }) => { }} onMouseEnter={() => setExpandedParent(expandedParent)} > - {/* Header */} -
- +
+ {expandedParent.nodeData.name}.{expandedParent.label} {expandedParent.dataType} @@ -377,19 +379,20 @@ const AutocompletePlugin: FC<{ options: Suggestion[] }> = ({ options }) => { !child.disabled && insertMention(child)} onMouseEnter={() => setSelectedIndex(childIndex)} > - {child.label} - {child.dataType && {child.dataType}} + + {`{x}`} {child.label} + + {child.dataType && {child.dataType}} ); })} diff --git a/web/src/views/Workflow/components/Nodes/ConditionNode.tsx b/web/src/views/Workflow/components/Nodes/ConditionNode.tsx index 79e8352c..625a1b4d 100644 --- a/web/src/views/Workflow/components/Nodes/ConditionNode.tsx +++ b/web/src/views/Workflow/components/Nodes/ConditionNode.tsx @@ -14,7 +14,7 @@ const caculateIsSet = (item: any, type: string) => { case 'cases': { if (!item.left) return false if (['not_empty', 'empty'].includes(item.operator)) return true - return !!item.left && (!!item.right || typeof item.right === 'boolean') + return !!item.left && (!!item.right || typeof item.right === 'boolean' || typeof item.right === 'number') } } } @@ -22,7 +22,7 @@ const ConditionNode: ReactShapeConfig['component'] = ({ node }) => { const data = node?.getData() || {}; const { t } = useTranslation() const graphRef = useRef(node?.model?.graph) - const variableList = useVariableList(node ?? null, graphRef, []) + const variableList = useVariableList(node ?? null, graphRef, data.chatVariables ?? []) const getLocaleField = (field: string, filedType: string) => { const key = filedType === 'boolean' ? `workflow.config.if-else..boolean.${field}` : filedType === 'number' ? `workflow.config.if-else.num.${field}` : `workflow.config.if-else.${field}` @@ -31,6 +31,8 @@ const ConditionNode: ReactShapeConfig['component'] = ({ node }) => { }; const labelRender = (value: string) => { const filterOption = variableList.find(vo => `{{${vo.value}}}` === value) + ?? variableList.flatMap(vo => vo.children ?? []).find(child => `{{${child.value}}}` === value) + ?? variableList.flatMap(vo => vo.children ?? []).flatMap((child: any) => child.children ?? []).find((grandchild: any) => `{{${grandchild.value}}}` === value) if (filterOption) { return ( diff --git a/web/src/views/Workflow/components/PortClickHandler.tsx b/web/src/views/Workflow/components/PortClickHandler.tsx index b556ffab..cb3e16c4 100644 --- a/web/src/views/Workflow/components/PortClickHandler.tsx +++ b/web/src/views/Workflow/components/PortClickHandler.tsx @@ -328,7 +328,7 @@ const PortClickHandler: React.FC = ({ graph }) => { }; const content = ( - + {nodeLibrary.map((category) => { const sourceNodeData = sourceNode?.getData(); const isChildOfLoop = sourceNodeData?.cycle && graph?.getNodes().find((n: any) => n.getData()?.id === sourceNodeData.cycle && n.getData()?.type === 'loop'); diff --git a/web/src/views/Workflow/components/Properties/AssignmentList/index.tsx b/web/src/views/Workflow/components/Properties/AssignmentList/index.tsx index 98f86ecf..e24d531d 100644 --- a/web/src/views/Workflow/components/Properties/AssignmentList/index.tsx +++ b/web/src/views/Workflow/components/Properties/AssignmentList/index.tsx @@ -30,6 +30,25 @@ const operationsObj = { ], } +const filterByDataType = (options: Suggestion[], dataType: string): Suggestion[] => + options.reduce((acc, vo) => { + if (vo.children?.length) { + const children = vo.children.reduce((cacc, child) => { + if (child.children?.length) { + const grandchildren = child.children.filter(gc => gc.dataType === dataType); + if (grandchildren.length) cacc.push({ ...child, children: grandchildren }); + } else if (child.dataType === dataType) { + cacc.push(child); + } + return cacc; + }, []); + if (children.length) acc.push({ ...vo, children }); + } else if (vo.dataType === dataType) { + acc.push(vo); + } + return acc; + }, []); + const AssignmentList: FC = ({ parentName, options = [], @@ -59,7 +78,9 @@ const AssignmentList: FC = ({ {fields.map(({ key, name, ...restField }) => { const variableSelector = form.getFieldValue([parentName, name, 'variable_selector']); - const selectedOption = options.find(option => `{{${option.value}}}` === variableSelector); + const selectedOption = options.find(option => `{{${option.value}}}` === variableSelector) + ?? options.flatMap(o => o.children ?? []).find(child => `{{${child.value}}}` === variableSelector) + ?? options.flatMap(o => o.children ?? []).flatMap((c: any) => c.children ?? []).find((gc: any) => `{{${gc.value}}}` === variableSelector); const dataType = selectedOption?.dataType; const operationOptions = dataType === 'number' ? operationsObj.number : operationsObj.default; @@ -119,7 +140,7 @@ const AssignmentList: FC = ({ {dataType === 'number' && operation === 'cover' ? vo.dataType === dataType) : options} + options={dataType ? filterByDataType(options, dataType) : options} size={size} className="rb:flex-1!" variant="filled" @@ -150,7 +171,7 @@ const AssignmentList: FC = ({ : vo.dataType === dataType) : options} + options={dataType ? filterByDataType(options, dataType) : options} size={size} className="rb:flex-1!" variant="filled" diff --git a/web/src/views/Workflow/components/Properties/CaseList/index.tsx b/web/src/views/Workflow/components/Properties/CaseList/index.tsx index e1583ca0..f0a58517 100644 --- a/web/src/views/Workflow/components/Properties/CaseList/index.tsx +++ b/web/src/views/Workflow/components/Properties/CaseList/index.tsx @@ -4,7 +4,7 @@ * @Last Modified by: ZhaoYing * @Last Modified time: 2026-03-25 15:23:45 */ -import { type FC } from 'react' +import { useMemo, type FC } from 'react' import clsx from 'clsx' import { useTranslation } from 'react-i18next'; import { Form, Button, Select, Space, Divider, InputNumber, type SelectProps, Flex, Row, Col } from 'antd' @@ -15,7 +15,7 @@ import Editor from '../../Editor' import { edgeAttrs, nodeWidth } from '../../../constant' import RbButton from '@/components/RbButton'; import RadioGroupBtn from '../RadioGroupBtn' -import { calcConditionNodeTotalHeight, getConditionNodeCasePortY } from '../../../utils' +import { calcConditionNodeTotalHeight, getConditionNodeCasePortY } from '../../../utils'; interface CaseListProps { value?: Array<{ logical_operator: 'and' | 'or'; expressions: { left: string; operator: string; right: string; input_type?: string; }[] }>; @@ -49,6 +49,34 @@ const operatorsObj: { [key: string]: SelectProps['options'] } = { boolean: [ { value: 'eq', label: 'workflow.config.if-else.boolean.eq' }, { value: 'ne', label: 'workflow.config.if-else.boolean.ne' }, + ], + object: [ + { value: 'eq', label: 'workflow.config.if-else.boolean.eq' }, + { value: 'ne', label: 'workflow.config.if-else.boolean.ne' }, + { value: 'empty', label: 'workflow.config.if-else.empty' }, + { value: 'not_empty', label: 'workflow.config.if-else.not_empty' }, + ], + file: [ + { value: 'empty', label: 'workflow.config.if-else.file.empty' }, + { value: 'not_empty', label: 'workflow.config.if-else.file.not_empty' }, + ], + // TODO:包含、不包含、全都是 + 'array[file]': [ + { value: 'empty', label: 'workflow.config.if-else.empty' }, + { value: 'not_empty', label: 'workflow.config.if-else.not_empty' }, + // { value: 'eq', label: 'workflow.config.if-else.eq' }, + // { value: 'contains', label: 'workflow.config.if-else.contains' }, + // { value: 'not_contains', label: 'workflow.config.if-else.not_contains' }, + ], + 'array': [ + { value: 'contains', label: 'workflow.config.if-else.contains' }, + { value: 'not_contains', label: 'workflow.config.if-else.not_contains' }, + { value: 'empty', label: 'workflow.config.if-else.empty' }, + { value: 'not_empty', label: 'workflow.config.if-else.not_empty' }, + ], + 'array[object]': [ + { value: 'empty', label: 'workflow.config.if-else.empty' }, + { value: 'not_empty', label: 'workflow.config.if-else.not_empty' }, ] } @@ -247,6 +275,22 @@ const CaseList: FC = ({ form.setFieldValue([name, caseIndex, 'expressions', conditionIndex, 'right'], undefined); }; + const filterNumberOptions = useMemo(() => { + const filterList: Suggestion[] = [] + options.forEach(vo => { + if (vo.children && vo.children?.length > 0) { + filterList.push({ + ...vo, + children: vo.children.filter(child => child.dataType === 'number') + }) + } else if (vo.dataType === 'number') { + filterList.push(vo) + } + }) + + return filterList + }, [options]) + return ( <> @@ -284,11 +328,17 @@ const CaseList: FC = ({ const currentCase = cases[caseIndex] || {}; const currentExpression = currentCase.expressions?.[conditionIndex] || {}; const currentOperator = currentExpression.operator; - const hideRightField = currentOperator === 'empty' || currentOperator === 'not_empty'; const leftFieldValue = currentExpression.left; - const leftFieldOption = options.find(option => `{{${option.value}}}` === leftFieldValue); + const leftFieldOption = options.find(option => `{{${option.value}}}` === leftFieldValue) + ?? options.flatMap(o => o.children ?? []).find(child => `{{${child.value}}}` === leftFieldValue) + ?? options.flatMap(o => o.children ?? []).flatMap((c: any) => c.children ?? []).find((gc: any) => `{{${gc.value}}}` === leftFieldValue); const leftFieldType = leftFieldOption?.dataType; - const operatorList = operatorsObj[leftFieldType || 'default'] || operatorsObj.default || []; + const hideRightField = currentOperator === 'empty' || currentOperator === 'not_empty' || leftFieldType === 'file' || leftFieldType === 'array[object]' || leftFieldType === 'array[file]'; + const operatorList = leftFieldType && operatorsObj[leftFieldType] + ? operatorsObj[leftFieldType] + : leftFieldType && leftFieldType?.includes('array') + ? operatorsObj.array + : operatorsObj.default; const inputType = leftFieldType === 'number' ? currentExpression.input_type : undefined; return ( @@ -312,7 +362,7 @@ const CaseList: FC = ({ = ({ {inputType === 'variable' ? vo.dataType === 'number')} + options={filterNumberOptions} allowClear={false} variant="borderless" size="small" /> : form.setFieldValue([name, caseIndex, 'expressions', conditionIndex, 'right'], value)} - /> + placeholder={t('common.pleaseEnter')} + variant="borderless" + className="rb:w-full!" + onChange={(value) => form.setFieldValue([name, caseIndex, 'expressions', conditionIndex, 'right'], value)} + /> } : ( - {leftFieldType === 'boolean' - ? + {['boolean', 'array[boolean]'].includes(leftFieldType as string) + ? : } diff --git a/web/src/views/Workflow/components/Properties/CodeExecution/OutputList.tsx b/web/src/views/Workflow/components/Properties/CodeExecution/OutputList.tsx index 6bb12d9b..0080c493 100644 --- a/web/src/views/Workflow/components/Properties/CodeExecution/OutputList.tsx +++ b/web/src/views/Workflow/components/Properties/CodeExecution/OutputList.tsx @@ -27,7 +27,8 @@ const OutputList: FC = ({ label, name, extra }) => { <>
- {label} + + *{label}
diff --git a/web/src/views/Workflow/components/Properties/ConditionList/index.tsx b/web/src/views/Workflow/components/Properties/ConditionList/index.tsx index d484da09..3e9f3261 100644 --- a/web/src/views/Workflow/components/Properties/ConditionList/index.tsx +++ b/web/src/views/Workflow/components/Properties/ConditionList/index.tsx @@ -1,4 +1,4 @@ -import { type FC } from 'react' +import { type FC, useMemo } from 'react' import clsx from 'clsx' import { useTranslation } from 'react-i18next'; import { Form, Button, Select, InputNumber, Input, Divider, type SelectProps, Flex, Space, Row, Col } from 'antd' @@ -47,6 +47,18 @@ const operatorsObj: { [key: string]: SelectProps['options'] } = { { value: 'ne', label: 'workflow.config.if-else.boolean.ne' }, { value: 'empty', label: 'workflow.config.if-else.empty' }, { value: 'not_empty', label: 'workflow.config.if-else.not_empty' }, + ], + // 为空、不为空 + object: [ + { value: 'empty', label: 'workflow.config.if-else.empty' }, + { value: 'not_empty', label: 'workflow.config.if-else.not_empty' }, + ], + // 包含、不包含、为空、不为空 + 'array': [ + { value: 'contains', label: 'workflow.config.if-else.contains' }, + { value: 'not_contains', label: 'workflow.config.if-else.not_contains' }, + { value: 'empty', label: 'workflow.config.if-else.empty' }, + { value: 'not_empty', label: 'workflow.config.if-else.not_empty' }, ] } @@ -58,7 +70,7 @@ const ConditionList: FC = ({ const { t } = useTranslation(); const form = Form.useFormInstance(); - const handleLeftFieldChange = (index: number, newValue: string) => { + const handleLeftFieldChange = (index: number, newValue?: string | string[]) => { form.setFieldsValue({ [parentName]: { expressions: { @@ -81,6 +93,23 @@ const ConditionList: FC = ({ const currentValue = form.getFieldValue([parentName, 'logical_operator']); form.setFieldValue([parentName, 'logical_operator'], currentValue === 'and' ? 'or' : 'and'); }; + + const getNumVariable = useMemo(() => { + const filterList: Suggestion[] = [] + options.forEach(variable => { + if (variable.dataType === 'number') { + filterList.push(variable) + } else if (variable.dataType === 'file') { + filterList.push({ + ...variable, + disabled: true, + children: variable.children?.filter(child => child.dataType === 'number') + }) + } + }) + + return filterList + }, [options]) return ( <> @@ -125,11 +154,21 @@ const ConditionList: FC = ({ const expressions = form.getFieldValue([parentName, 'expressions']) || []; const currentExpression = expressions[index] || {}; const currentOperator = currentExpression.operator; - const hideRightField = currentOperator === 'empty' || currentOperator === 'not_empty'; const leftFieldValue = currentExpression.left; - const leftFieldOption = options.find(option => `{{${option.value}}}` === leftFieldValue); + const leftFieldOption = options.find(option => `{{${option.value}}}` === leftFieldValue) + ?? options.flatMap(o => o.children ?? []).find(child => `{{${child.value}}}` === leftFieldValue) + ?? options.flatMap(o => o.children ?? []).flatMap((c: any) => c.children ?? []).find((gc: any) => `{{${gc.value}}}` === leftFieldValue); const leftFieldType = leftFieldOption?.dataType; - const operatorList = operatorsObj[leftFieldType || 'default'] || operatorsObj.default || []; + const hideRightField = currentOperator === 'empty' || currentOperator === 'not_empty' || ['array[object]', 'object'].includes(leftFieldType as string); + const operatorList = leftFieldType && ['array[object]', 'object'].includes(leftFieldType) + ? operatorsObj.object + : leftFieldType && ['array[boolean]', 'boolean'].includes(leftFieldType) + ? operatorsObj.boolean + : leftFieldType && operatorsObj[leftFieldType] + ? operatorsObj[leftFieldType] + : leftFieldType?.includes('array') + ? operatorsObj.array + : operatorsObj.default const inputType = leftFieldType === 'number' ? currentExpression.input_type : undefined; return ( = ({ - vo.value.includes('sys.') || + !['file', 'array[file]'].includes(vo.dataType) && + (vo.value.includes('sys.') || vo.value.includes('conv.') || vo.nodeData.type === 'loop' || - (vo.nodeData.cycle && vo.nodeData.cycle === selectedNode?.id) + (vo.nodeData.cycle && vo.nodeData.cycle === selectedNode?.id)) )} size="small" allowClear={false} @@ -163,7 +203,7 @@ const ConditionList: FC = ({ } diff --git a/web/src/views/Workflow/components/Properties/CycleVarsList/index.tsx b/web/src/views/Workflow/components/Properties/CycleVarsList/index.tsx index 09106a77..5d1138f0 100644 --- a/web/src/views/Workflow/components/Properties/CycleVarsList/index.tsx +++ b/web/src/views/Workflow/components/Properties/CycleVarsList/index.tsx @@ -6,6 +6,7 @@ import VariableSelect from '../VariableSelect' import type { Suggestion } from '../../Editor/plugin/AutocompletePlugin' import RadioGroupBtn from '../RadioGroupBtn' import { getChildNodeVariables } from '../hooks/useVariableList' +import CodeMirrorEditor from '@/components/CodeMirrorEditor'; interface CycleVar { name: string; @@ -28,11 +29,17 @@ const types = [ 'string', 'number', 'boolean', + 'object', 'array[string]', 'array[number]', 'array[boolean]', 'array[object]' ] +const object_placeholder = `# example +# { +# "name": "redbear", +# "age": 2 +# }` const CycleVarsList: FC = ({ value = [], @@ -144,6 +151,13 @@ const CycleVarsList: FC = ({ { value: true, label: 'True' }, { value: false, label: 'False' }]} /> + : currentType === 'object' + ? : ( = ({ */ useEffect(() => { if (!isCanAdd && value[0]) { - const firstVariable = options.find(opt => `{{${opt.value}}}` === value[0]); + const firstVariable = options.find(opt => `{{${opt.value}}}` === value[0]) + ?? options.flatMap(o => o.children ?? []).find(c => `{{${c.value}}}` === value[0]) + ?? options.flatMap(o => o.children ?? []).flatMap((c: any) => c.children ?? []).find((gc: any) => `{{${gc.value}}}` === value[0]); if (firstVariable) { form.setFieldValue(['group_type', 'output'], firstVariable.dataType); } } else if (isCanAdd) { value.forEach((item: any, index: number) => { if (item?.value?.[0]) { - const firstVariable = options.find(opt => `{{${opt.value}}}` === item.value[0]); + const firstVariable = options.find(opt => `{{${opt.value}}}` === item.value[0]) + ?? options.flatMap(o => o.children ?? []).find(c => `{{${c.value}}}` === item.value[0]) + ?? options.flatMap(o => o.children ?? []).flatMap((c: any) => c.children ?? []).find((gc: any) => `{{${gc.value}}}` === item.value[0]); if (firstVariable) { form.setFieldValue(['group_type', index], firstVariable.dataType); } diff --git a/web/src/views/Workflow/components/Properties/HttpRequest/EditableTable.tsx b/web/src/views/Workflow/components/Properties/HttpRequest/EditableTable.tsx index f5d3e649..e4b2cc29 100644 --- a/web/src/views/Workflow/components/Properties/HttpRequest/EditableTable.tsx +++ b/web/src/views/Workflow/components/Properties/HttpRequest/EditableTable.tsx @@ -38,12 +38,44 @@ const EditableTable: FC = ({ ...(typeOptions.length > 0 && { type: typeOptions[0].value }) }); - // Filter options based on boolean type if needed - const booleanFilterOptions = useMemo(() => { - return filterBooleanType - ? options.filter(option => option.dataType !== 'boolean') - : options - }, [options, filterBooleanType]) + const namefilterOptions = useMemo(() => { + const filterList: Suggestion[] = []; + options.forEach(vo => { + if (vo.dataType === 'file') { + filterList.push({ + ...vo, + disabled: true, + children: vo.children?.filter(child => child.dataType !== 'boolean') + }) + } else if (vo.dataType !== 'array[file]') { + filterList.push(vo) + } + }) + + return filterList + }, [options]) + const valueFilterOptions = (type?: string) => { + let filterOptions: Suggestion[] = [] + options.forEach(vo => { + if (type === 'file' && vo.dataType === 'file') { + filterOptions.push({ + ...vo, + children: [] + }) + } else if (type === 'file' && vo.dataType === 'array[file]') { + filterOptions.push(vo) + } else if (vo.dataType === 'file') { + filterOptions.push({ + ...vo, + disabled: true + }) + } else if (vo.dataType !== 'array[file]') { + filterOptions.push(vo) + } + }) + + return filterOptions + } const getColumns = (remove: (index: number) => void): TableProps['columns'] => { const hasType = typeOptions.length > 0; @@ -53,11 +85,11 @@ const EditableTable: FC = ({ return [ { title: t('workflow.config.name'), - dataIndex: 'name', + dataIndex: 'key', render: (_: any, __: TableRow, index: number) => ( - + !option.dataType.includes('file'))} + options={namefilterOptions} type="input" className={contentClassName} size={size} @@ -105,9 +137,7 @@ const EditableTable: FC = ({ > {(form) => { const currentType = form.getFieldValue([...Array.isArray(parentName) ? parentName : [parentName], index, 'type']); - const filteredOptions = currentType === 'file' - ? booleanFilterOptions.filter(option => option.dataType.includes('file')) - : booleanFilterOptions.filter(option => !option.dataType.includes('file')); + const filteredOptions = valueFilterOptions(currentType) return ( diff --git a/web/src/views/Workflow/components/Properties/HttpRequest/index.tsx b/web/src/views/Workflow/components/Properties/HttpRequest/index.tsx index 53714327..4cf7c150 100644 --- a/web/src/views/Workflow/components/Properties/HttpRequest/index.tsx +++ b/web/src/views/Workflow/components/Properties/HttpRequest/index.tsx @@ -2,9 +2,9 @@ * @Author: ZhaoYing * @Date: 2026-02-09 18:35:43 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-04-02 17:17:06 + * @Last Modified time: 2026-04-14 17:36:53 */ -import { type FC, useRef, useState } from "react"; +import { type FC, useMemo, useRef, useState } from "react"; import { useTranslation } from 'react-i18next' import { Form, Row, Col, Select, Button, Divider, InputNumber, Switch, Input, Flex, Radio } from 'antd' import { CaretDownOutlined, CaretRightOutlined, SettingOutlined } from '@ant-design/icons'; @@ -35,9 +35,8 @@ const HttpRequest: FC<{ options: Suggestion[]; selectedNode?: any; graphRef?: an form.setFieldsValue({ auth }) } - const handleChangeBodyContentType = (e: any) => { - const value = e.target.value || e.target.value - form.setFieldValue(['body', 'data'], ['form-data', 'x-www-form-urlencoded'].includes(value) ? [{}] : undefined) + const handleChangeBodyContentType = () => { + form.setFieldValue(['body', 'data'], undefined) } // Handle error handling method change and update node ports accordingly @@ -84,10 +83,70 @@ const HttpRequest: FC<{ options: Suggestion[]; selectedNode?: any; graphRef?: an setCollapsed((prev: boolean) => !prev) } + const filterVariables = useMemo(() => { + const filterList: Suggestion[] = [] + options.forEach(variable => { + if (['number', 'string'].includes(variable.dataType)) { + filterList.push(variable) + } else if (variable.dataType === 'file') { + filterList.push({ + ...variable, + disabled: true, + children: variable.children?.filter(child => ['number', 'string'].includes(child.dataType)) + }) + } + }) + + return filterList + }, [options]) + const filterVariablesWithFile = useMemo(() => { + const filterList: Suggestion[] = [] + options.forEach(variable => { + if (['number', 'string', 'file', 'array[file]'].includes(variable.dataType)) { + filterList.push(variable) + } + }) + + return filterList + }, [options]) + const jsonRawFilterVariables = useMemo(() => { + const filterList: Suggestion[] = [] + options.forEach(variable => { + if (['number', 'string', 'array[string]', 'array[number]'].includes(variable.dataType)) { + filterList.push(variable) + } else if (variable.dataType === 'file') { + filterList.push({ + ...variable, + disabled: true, + children: variable.children?.filter(child => ['number', 'string', 'file', 'array[string]', 'array[number]'].includes(child.dataType)) + }) + } + }) + + return filterList + }, [options]) + const fileFilterVariables = useMemo(() => { + const filterList: Suggestion[] = [] + options.forEach(variable => { + if (['array[file]'].includes(variable.dataType)) { + filterList.push(variable) + } else if (variable.dataType === 'file') { + filterList.push({ + ...variable, + children: [] + }) + } + }) + + return filterList + }, [options]) + return ( <> -
API
+
+ *API +