From c5ae82c3c2736b67a3ecec2a7c587820d62c3d2c Mon Sep 17 00:00:00 2001 From: Eternity <1533512157@qq.com> Date: Wed, 22 Apr 2026 16:50:06 +0800 Subject: [PATCH 01/26] refactor(core): migrate memory write tasks to centralized scheduler --- api/app/celery_app.py | 7 +- api/app/celery_task_scheduler.py | 241 ++++++++++++++++++ .../service/memory_api_controller.py | 2 +- .../langgraph_graph/routing/write_router.py | 56 ++-- api/app/core/memory/pipelines/memory_read.py | 4 +- .../read_services/generate_engine/__init__.py | 0 .../query_preprocessor.py | 0 .../retrieval_summary.py | 0 .../read_services/search_engine/__init__.py | 0 .../{ => search_engine}/content_search.py | 2 +- .../{ => search_engine}/result_builder.py | 0 api/app/core/workflow/nodes/memory/node.py | 25 +- api/app/services/memory_api_service.py | 28 +- api/app/tasks.py | 30 +-- api/docker-compose.yml | 17 ++ 15 files changed, 358 insertions(+), 54 deletions(-) create mode 100644 api/app/celery_task_scheduler.py create mode 100644 api/app/core/memory/read_services/generate_engine/__init__.py rename api/app/core/memory/read_services/{ => generate_engine}/query_preprocessor.py (100%) rename api/app/core/memory/read_services/{ => generate_engine}/retrieval_summary.py (100%) create mode 100644 api/app/core/memory/read_services/search_engine/__init__.py rename api/app/core/memory/read_services/{ => search_engine}/content_search.py (99%) rename api/app/core/memory/read_services/{ => search_engine}/result_builder.py (100%) diff --git a/api/app/celery_app.py b/api/app/celery_app.py index b0894eb8..717709da 100644 --- a/api/app/celery_app.py +++ b/api/app/celery_app.py @@ -17,6 +17,7 @@ def _mask_url(url: str) -> str: """隐藏 URL 中的密码部分,适用于 redis:// 和 amqp:// 等协议""" return re.sub(r'(://[^:]*:)[^@]+(@)', r'\1***\2', url) + # macOS fork() safety - must be set before any Celery initialization if platform.system() == 'Darwin': os.environ.setdefault('OBJC_DISABLE_INITIALIZE_FORK_SAFETY', 'YES') @@ -29,7 +30,7 @@ if platform.system() == 'Darwin': # 这些名称会被 Celery CLI 的 Click 框架劫持,详见 docs/celery-env-bug-report.md _broker_url = os.getenv("CELERY_BROKER_URL") or \ - f"redis://:{quote(settings.REDIS_PASSWORD)}@{settings.REDIS_HOST}:{settings.REDIS_PORT}/{settings.REDIS_DB_CELERY_BROKER}" + f"redis://:{quote(settings.REDIS_PASSWORD)}@{settings.REDIS_HOST}:{settings.REDIS_PORT}/{settings.REDIS_DB_CELERY_BROKER}" _backend_url = f"redis://:{quote(settings.REDIS_PASSWORD)}@{settings.REDIS_HOST}:{settings.REDIS_PORT}/{settings.REDIS_DB_CELERY_BACKEND}" os.environ["CELERY_BROKER_URL"] = _broker_url os.environ["CELERY_RESULT_BACKEND"] = _backend_url @@ -66,11 +67,11 @@ celery_app.conf.update( task_serializer='json', accept_content=['json'], result_serializer='json', - + # # 时区 # timezone='Asia/Shanghai', # enable_utc=False, - + # 任务追踪 task_track_started=True, task_ignore_result=False, diff --git a/api/app/celery_task_scheduler.py b/api/app/celery_task_scheduler.py new file mode 100644 index 00000000..88afe341 --- /dev/null +++ b/api/app/celery_task_scheduler.py @@ -0,0 +1,241 @@ +import json +import threading +import time + +import redis + +from app.core.config import settings +from celery_app import celery_app +from core.logging_config import get_named_logger + +logger = get_named_logger("task_scheduler") + +STREAM_KEY = "celery_task_stream" +PENDING_HASH = "scheduler:pending_tasks" +TASK_TIMEOUT = 7800 + + +def health_check_server(): + import uvicorn + from fastapi import FastAPI + + health_app = FastAPI() + + @health_app.get("/") + def health(): + return scheduler.health() + + threading.Thread( + target=uvicorn.run, + kwargs={ + "app": health_app, + "host": "0.0.0.0", + "port": 8001, + "log_config": None + }, + daemon=True + ).start() + logger.info(f"[Health] Server started at http://0.0.0.0:8001") + + +class RedisTaskScheduler: + def __init__(self): + self.redis = redis.Redis( + host=settings.REDIS_HOST, + port=settings.REDIS_PORT, + db=settings.REDIS_DB_CELERY_BACKEND, + password=settings.REDIS_PASSWORD, + decode_responses=True, + ) + self.running = False + self.dispatched = 0 + self.errors = 0 + self._leader = False + + def push_task(self, task_name, user_id, params): + try: + msg_id = self.redis.xadd( + STREAM_KEY, + fields={ + "task_name": task_name, + "user_id": user_id, + "params": json.dumps(params), + } + ) + return msg_id + except Exception as e: + logger.error("Push task exception %s", e, exc_info=True) + raise e + + def _cleanup_finished(self): + pending = self.redis.hgetall(PENDING_HASH) + if not pending: + return + + now = time.time() + task_ids = list(pending.keys()) + + pipe = self.redis.pipeline() + for task_id in task_ids: + pipe.get(f"celery-task-meta-{task_id}") + results = pipe.execute() + + cleanup_pipe = self.redis.pipeline() + has_cleanup = False + + for task_id, raw_result in zip(task_ids, results): + try: + meta = json.loads(pending[task_id]) + lock_key = meta["lock_key"] + dispatched_at = meta.get("dispatched_at", 0) + age = now - dispatched_at + + should_cleanup = False + if raw_result is not None: + result_data = json.loads(raw_result) + if result_data.get("status") in ("SUCCESS", "FAILURE", "REVOKED"): + should_cleanup = True + logger.info("Task finished: %s state=%s", task_id, result_data.get("status")) + elif age > TASK_TIMEOUT: + should_cleanup = True + logger.warning( + "Task expired or lost: %s age=%.0fs, force cleanup", + task_id, age, + ) + + if should_cleanup: + cleanup_pipe.delete(lock_key) + cleanup_pipe.hdel(PENDING_HASH, task_id) + has_cleanup = True + except Exception as e: + logger.error("Cleanup error for %s: %s", task_id, e, exc_info=True) + self.errors += 1 + if has_cleanup: + cleanup_pipe.execute() + + def _dispatch(self, msg_id, msg_data) -> bool: + user_id = msg_data['user_id'] + task_name = msg_data['task_name'] + params = json.loads(msg_data.get('params', "{}")) + + lock_key = f"{task_name}:{user_id}" + try: + task = celery_app.send_task(task_name, kwargs=params) + pipe = self.redis.pipeline() + pipe.set(lock_key, task.id, ex=3600) + pipe.hset(PENDING_HASH, task.id, json.dumps({ + "lock_key": lock_key, + "dispatched_at": time.time() + })) + pipe.xdel(STREAM_KEY, msg_id) + pipe.execute() + self.dispatched += 1 + logger.info("Task dispatched: %s", task.id) + return True + except Exception as e: + self.errors += 1 + logger.error("Task dispatch error for %s: %s", task_name, e, exc_info=True) + return False + + def _leader_lock_extend(self, lock, interval=20): + while self._leader: + try: + lock.extend(60) + except redis.exceptions.LockNotOwnedError: + logger.warning("Lost leader lock during extend") + self._leader = False + except Exception as e: + logger.error("Lock extend error: %s", e) + for _ in range(interval): + if not self._leader: + break + time.sleep(1) + + def schedule_loop(self): + self.running = True + self._cleanup_finished() + resp = self.redis.xread( + streams={STREAM_KEY: '0-0'}, + count=500, + block=5000, + ) + if not resp: + return + + messages = [] + for stream_key, msgs in resp: + messages.extend(msgs) + + lock_keys = [] + for msg_id, msg_data in messages: + lock_keys.append(f"{msg_data['task_name']}:{msg_data['user_id']}") + + pipe = self.redis.pipeline() + for key in lock_keys: + pipe.exists(key) + lock_exists = pipe.execute() + + deliver_keys = set() + for (msg_id, msg_data), locked in zip(messages, lock_exists): + user_id = msg_data['user_id'] + lock_key = f"{msg_data['task_name']}:{user_id}" + + if locked or lock_key in deliver_keys: + continue + + key = self._dispatch(msg_id, msg_data) + if key: + deliver_keys.add(lock_key) + time.sleep(0.1) + + def run_server(self): + health_check_server() + + lock = self.redis.lock( + "scheduler:leader", + timeout=60, + blocking_timeout=10, + thread_local=False + ) + while True: + try: + if lock.acquire(blocking=True): + self._leader = True + t = threading.Thread( + target=self._leader_lock_extend, + args=(lock, 20), + daemon=True + ) + t.start() + try: + while self._leader: + self.schedule_loop() + finally: + self._leader = False + t.join(timeout=30) + try: + lock.release() + except redis.exceptions.LockNotOwnedError: + pass + self.running = False + else: + time.sleep(5) + except Exception as e: + logger.error("Scheduler exception %s", e, exc_info=True) + time.sleep(5) + + def health(self) -> dict: + return { + "running": self.running, + "pending": self.redis.xlen(STREAM_KEY), + "dispatched": self.dispatched, + "errors": self.errors + } + + +scheduler: RedisTaskScheduler | None = None +if scheduler is None: + scheduler = RedisTaskScheduler() + +if __name__ == '__main__': + scheduler.run_server() diff --git a/api/app/controllers/service/memory_api_controller.py b/api/app/controllers/service/memory_api_controller.py index 313781d2..de56f56e 100644 --- a/api/app/controllers/service/memory_api_controller.py +++ b/api/app/controllers/service/memory_api_controller.py @@ -86,7 +86,7 @@ async def write_memory( user_rag_memory_id=payload.user_rag_memory_id, ) - logger.info(f"Memory write task submitted: task_id={result['task_id']}, end_user_id: {payload.end_user_id}") + logger.info(f"Memory write task submitted: end_user_id: {payload.end_user_id}") return success(data=MemoryWriteResponse(**result).model_dump(), msg="Memory write task submitted") diff --git a/api/app/core/memory/agent/langgraph_graph/routing/write_router.py b/api/app/core/memory/agent/langgraph_graph/routing/write_router.py index 74fb6bae..50f7ddb9 100644 --- a/api/app/core/memory/agent/langgraph_graph/routing/write_router.py +++ b/api/app/core/memory/agent/langgraph_graph/routing/write_router.py @@ -12,9 +12,8 @@ from app.core.memory.utils.llm.llm_utils import MemoryClientFactory from app.db import get_db_context from app.repositories.memory_short_repository import LongTermMemoryRepository from app.schemas.memory_agent_schema import AgentMemory_Long_Term -from app.services.task_service import get_task_memory_write_result -from app.tasks import write_message_task from app.utils.config_utils import resolve_config_id +from celery_task_scheduler import scheduler logger = get_agent_logger(__name__) template_root = os.path.join(PROJECT_ROOT_, 'memory', 'agent', 'utils', 'prompt') @@ -86,16 +85,28 @@ async def write( logger.info( f"[WRITE] Submitting Celery task - user={actual_end_user_id}, messages={len(structured_messages)}, config={actual_config_id}") - write_id = write_message_task.delay( - actual_end_user_id, # end_user_id: User ID - structured_messages, # message: JSON string format message list - str(actual_config_id), # config_id: Configuration ID string - storage_type, # storage_type: "neo4j" - user_rag_memory_id or "" # user_rag_memory_id: RAG memory ID (not used in Neo4j mode) + # write_id = write_message_task.delay( + # actual_end_user_id, # end_user_id: User ID + # structured_messages, # message: JSON string format message list + # str(actual_config_id), # config_id: Configuration ID string + # storage_type, # storage_type: "neo4j" + # user_rag_memory_id or "" # user_rag_memory_id: RAG memory ID (not used in Neo4j mode) + # ) + scheduler.push_task( + "app.core.memory.agent.write_message", + end_user_id, + { + "end_user_id": end_user_id, + "message": structured_messages, + "config_id": str(actual_config_id), + "storage_type": storage_type, + "user_rag_memory_id": user_rag_memory_id or "" + } ) - logger.info(f"[WRITE] Celery task submitted - task_id={write_id}") - write_status = get_task_memory_write_result(str(write_id)) - logger.info(f'[WRITE] Task result - user={actual_end_user_id}, status={write_status}') + + # logger.info(f"[WRITE] Celery task submitted - task_id={write_id}") + # write_status = get_task_memory_write_result(str(write_id)) + # logger.info(f'[WRITE] Task result - user={actual_end_user_id}') async def term_memory_save(end_user_id, strategy_type, scope): @@ -164,13 +175,24 @@ async def window_dialogue(end_user_id, langchain_messages, memory_config, scope) else: config_id = memory_config - write_message_task.delay( - end_user_id, # end_user_id: User ID - redis_messages, # message: JSON string format message list - config_id, # config_id: Configuration ID string - AgentMemory_Long_Term.STORAGE_NEO4J, # storage_type: "neo4j" - "" # user_rag_memory_id: RAG memory ID (not used in Neo4j mode) + scheduler.push_task( + "app.core.memory.agent.write_message", + end_user_id, + { + "end_user_id": end_user_id, + "message": redis_messages, + "config_id": config_id, + "storage_type": AgentMemory_Long_Term.STORAGE_NEO4J, + "user_rag_memory_id": "" + } ) + # write_message_task.delay( + # end_user_id, # end_user_id: User ID + # redis_messages, # message: JSON string format message list + # config_id, # config_id: Configuration ID string + # AgentMemory_Long_Term.STORAGE_NEO4J, # storage_type: "neo4j" + # "" # user_rag_memory_id: RAG memory ID (not used in Neo4j mode) + # ) count_store.update_sessions_count(end_user_id, 0, []) diff --git a/api/app/core/memory/pipelines/memory_read.py b/api/app/core/memory/pipelines/memory_read.py index 96ff929a..35ace00d 100644 --- a/api/app/core/memory/pipelines/memory_read.py +++ b/api/app/core/memory/pipelines/memory_read.py @@ -1,8 +1,8 @@ from app.core.memory.enums import SearchStrategy, StorageType from app.core.memory.models.service_models import MemorySearchResult from app.core.memory.pipelines.base_pipeline import ModelClientMixin, DBRequiredPipeline -from app.core.memory.read_services.content_search import Neo4jSearchService, RAGSearchService -from app.core.memory.read_services.query_preprocessor import QueryPreprocessor +from core.memory.read_services.search_engine.content_search import Neo4jSearchService, RAGSearchService +from core.memory.read_services.generate_engine.query_preprocessor import QueryPreprocessor class ReadPipeLine(ModelClientMixin, DBRequiredPipeline): diff --git a/api/app/core/memory/read_services/generate_engine/__init__.py b/api/app/core/memory/read_services/generate_engine/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/api/app/core/memory/read_services/query_preprocessor.py b/api/app/core/memory/read_services/generate_engine/query_preprocessor.py similarity index 100% rename from api/app/core/memory/read_services/query_preprocessor.py rename to api/app/core/memory/read_services/generate_engine/query_preprocessor.py diff --git a/api/app/core/memory/read_services/retrieval_summary.py b/api/app/core/memory/read_services/generate_engine/retrieval_summary.py similarity index 100% rename from api/app/core/memory/read_services/retrieval_summary.py rename to api/app/core/memory/read_services/generate_engine/retrieval_summary.py diff --git a/api/app/core/memory/read_services/search_engine/__init__.py b/api/app/core/memory/read_services/search_engine/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/api/app/core/memory/read_services/content_search.py b/api/app/core/memory/read_services/search_engine/content_search.py similarity index 99% rename from api/app/core/memory/read_services/content_search.py rename to api/app/core/memory/read_services/search_engine/content_search.py index ef4e90f1..16c23f91 100644 --- a/api/app/core/memory/read_services/content_search.py +++ b/api/app/core/memory/read_services/search_engine/content_search.py @@ -8,7 +8,7 @@ from neo4j import Session from app.core.memory.enums import Neo4jNodeType from app.core.memory.memory_service import MemoryContext from app.core.memory.models.service_models import Memory, MemorySearchResult -from app.core.memory.read_services.result_builder import data_builder_factory +from core.memory.read_services.search_engine.result_builder import data_builder_factory from app.core.models import RedBearEmbeddings from app.core.rag.nlp.search import knowledge_retrieval from app.repositories import knowledge_repository diff --git a/api/app/core/memory/read_services/result_builder.py b/api/app/core/memory/read_services/search_engine/result_builder.py similarity index 100% rename from api/app/core/memory/read_services/result_builder.py rename to api/app/core/memory/read_services/search_engine/result_builder.py diff --git a/api/app/core/workflow/nodes/memory/node.py b/api/app/core/workflow/nodes/memory/node.py index bcdc80c7..b74af6b9 100644 --- a/api/app/core/workflow/nodes/memory/node.py +++ b/api/app/core/workflow/nodes/memory/node.py @@ -11,7 +11,7 @@ from app.core.workflow.variable.base_variable import VariableType from app.core.workflow.variable.variable_objects import FileVariable, ArrayVariable from app.db import get_db_read from app.schemas import FileInput -from app.tasks import write_message_task +from celery_task_scheduler import scheduler class MemoryReadNode(BaseNode): @@ -126,12 +126,23 @@ class MemoryWriteNode(BaseNode): "files": file_info }) - write_message_task.delay( - end_user_id=end_user_id, - message=messages, - config_id=str(self.typed_config.config_id), - storage_type=state["memory_storage_type"], - user_rag_memory_id=state["user_rag_memory_id"] + scheduler.push_task( + "app.core.memory.agent.write_message", + end_user_id, + { + "end_user_id": end_user_id, + "message": messages, + "config_id": str(self.typed_config.config_id), + "storage_type": state["memory_storage_type"], + "user_rag_memory_id": state["user_rag_memory_id"] + } ) + # write_message_task.delay( + # end_user_id=end_user_id, + # message=messages, + # config_id=str(self.typed_config.config_id), + # storage_type=state["memory_storage_type"], + # user_rag_memory_id=state["user_rag_memory_id"] + # ) return "success" diff --git a/api/app/services/memory_api_service.py b/api/app/services/memory_api_service.py index 330b84ad..221ca4cf 100644 --- a/api/app/services/memory_api_service.py +++ b/api/app/services/memory_api_service.py @@ -10,6 +10,7 @@ from typing import Any, Dict, Optional from sqlalchemy.orm import Session +from app.celery_task_scheduler import scheduler from app.core.error_codes import BizCode from app.core.exceptions import BusinessException, ResourceNotFoundException from app.core.logging_config import get_logger @@ -166,19 +167,30 @@ class MemoryAPIService: # Convert to message list format expected by write_message_task messages = message if isinstance(message, list) else [{"role": "user", "content": message}] - from app.tasks import write_message_task - task = write_message_task.delay( + # from app.tasks import write_message_task + # task = write_message_task.delay( + # end_user_id, + # messages, + # config_id, + # storage_type, + # user_rag_memory_id or "", + # ) + scheduler.push_task( + "app.core.memory.agent.write_message", end_user_id, - messages, - config_id, - storage_type, - user_rag_memory_id or "", + { + "end_user_id": end_user_id, + "message": messages, + "config_id": config_id, + "storage_type": storage_type, + "user_rag_memory_id": user_rag_memory_id or "" + } ) - logger.info(f"Memory write task submitted: task_id={task.id}, end_user_id={end_user_id}") + logger.info(f"Memory write task submitted, end_user_id={end_user_id}") return { - "task_id": task.id, + # "task_id": task.id, "status": "PENDING", "end_user_id": end_user_id, } diff --git a/api/app/tasks.py b/api/app/tasks.py index 8bbbdc6e..5978af4d 100644 --- a/api/app/tasks.py +++ b/api/app/tasks.py @@ -34,7 +34,7 @@ from app.core.rag.prompts.generator import question_proposal from app.core.rag.vdb.elasticsearch.elasticsearch_vector import ( ElasticSearchVectorFactory, ) -from app.db import get_db, get_db_context +from app.db import get_db_context from app.models import Document, File, Knowledge from app.models.end_user_model import EndUser from app.schemas import document_schema, file_schema @@ -1993,7 +1993,7 @@ def run_forgetting_cycle_task(self, config_id: Optional[uuid.UUID] = None) -> Di end_users = db.query(EndUser).all() if not end_users: logger.info("没有终端用户,跳过遗忘周期") - return {"status": "SUCCESS", "message": "没有终端用户", + return {"status": "SUCCESS", "message": "没有终端用户", "report": {"merged_count": 0, "failed_count": 0, "processed_users": 0}, "duration_seconds": time.time() - start_time} @@ -2007,7 +2007,7 @@ def run_forgetting_cycle_task(self, config_id: Optional[uuid.UUID] = None) -> Di # 获取用户配置(自动回退到工作空间默认配置) connected_config = get_end_user_connected_config(str(end_user.id), db) user_config_id = resolve_config_id(connected_config.get("memory_config_id"), db) - + if not user_config_id: failed_users.append({"end_user_id": str(end_user.id), "error": "无法获取配置"}) continue @@ -2016,13 +2016,13 @@ def run_forgetting_cycle_task(self, config_id: Optional[uuid.UUID] = None) -> Di report = await forget_service.trigger_forgetting_cycle( db=db, end_user_id=str(end_user.id), config_id=user_config_id ) - + total_merged += report.get('merged_count', 0) total_failed += report.get('failed_count', 0) processed_users += 1 - + logger.info(f"用户 {end_user.id}: 融合 {report.get('merged_count', 0)} 对节点") - + except Exception as e: logger.error(f"处理用户 {end_user.id} 失败: {e}", exc_info=True) failed_users.append({"end_user_id": str(end_user.id), "error": str(e)}) @@ -2769,18 +2769,18 @@ def run_incremental_clustering( 包含任务执行结果的字典 """ start_time = time.time() - + async def _run() -> Dict[str, Any]: from app.core.logging_config import get_logger from app.repositories.neo4j.neo4j_connector import Neo4jConnector from app.core.memory.storage_services.clustering_engine.label_propagation import LabelPropagationEngine - + logger = get_logger(__name__) logger.info( f"[IncrementalClustering] 开始增量聚类任务 - end_user_id={end_user_id}, " f"实体数={len(new_entity_ids)}, llm_model_id={llm_model_id}" ) - + connector = Neo4jConnector() try: engine = LabelPropagationEngine( @@ -2788,12 +2788,12 @@ def run_incremental_clustering( llm_model_id=llm_model_id, embedding_model_id=embedding_model_id, ) - + # 执行增量聚类 await engine.run(end_user_id=end_user_id, new_entity_ids=new_entity_ids) - + logger.info(f"[IncrementalClustering] 增量聚类完成 - end_user_id={end_user_id}") - + return { "status": "SUCCESS", "end_user_id": end_user_id, @@ -2804,18 +2804,18 @@ def run_incremental_clustering( raise finally: await connector.close() - + try: loop = set_asyncio_event_loop() result = loop.run_until_complete(_run()) result["elapsed_time"] = time.time() - start_time result["task_id"] = self.request.id - + logger.info( f"[IncrementalClustering] 任务完成 - task_id={self.request.id}, " f"elapsed_time={result['elapsed_time']:.2f}s" ) - + return result except Exception as e: elapsed_time = time.time() - start_time diff --git a/api/docker-compose.yml b/api/docker-compose.yml index 5d358f2c..a0fd4791 100644 --- a/api/docker-compose.yml +++ b/api/docker-compose.yml @@ -63,6 +63,23 @@ services: networks: - celery + celery-task-scheduler: + image: redbear-mem-open:latest + container_name: celery-task-scheduler + env_file: + - .env + volumes: + - /etc/localtime:/etc/localtime:ro + command: python app/celery_task_scheduler.py + restart: unless-stopped + healthcheck: + test: CMD curl -f 127.0.0.1:8001 || exit 1 + interval: 30s + timeout: 5s + retries: 3 + networks: + - celery + # Celery Beat - scheduler beat: image: redbear-mem-open:latest From f93ec8d6096af6d792fe13877c6e55b37242aade Mon Sep 17 00:00:00 2001 From: Eternity <1533512157@qq.com> Date: Wed, 22 Apr 2026 17:46:38 +0800 Subject: [PATCH 02/26] fix(core): fix end_user_id reference and add task status tracking - Fix write_router to use actual_end_user_id instead of end_user_id - Add task status tracking via Redis in scheduler - Expose task_id in memory write response - Fix logging import path in scheduler --- api/app/celery_task_scheduler.py | 50 +++++++++++++++++-- .../service/memory_api_controller.py | 6 +-- .../langgraph_graph/routing/write_router.py | 4 +- api/app/schemas/memory_api_schema.py | 8 +-- api/app/services/memory_api_service.py | 6 +-- 5 files changed, 58 insertions(+), 16 deletions(-) diff --git a/api/app/celery_task_scheduler.py b/api/app/celery_task_scheduler.py index 88afe341..8cdce658 100644 --- a/api/app/celery_task_scheduler.py +++ b/api/app/celery_task_scheduler.py @@ -6,7 +6,7 @@ import redis from app.core.config import settings from celery_app import celery_app -from core.logging_config import get_named_logger +from app.core.logging_config import get_named_logger logger = get_named_logger("task_scheduler") @@ -62,11 +62,34 @@ class RedisTaskScheduler: "params": json.dumps(params), } ) + self.redis.set( + f"task_tracker:{msg_id}", + json.dumps({"status": "QUEUED", "task_id": None}), + ex=86400 + ) return msg_id except Exception as e: logger.error("Push task exception %s", e, exc_info=True) raise e + def get_task_status(self, msg_id: str) -> dict: + raw = self.redis.get(f"task_tracker:{msg_id}") + if raw is None: + return {"status": "NOT_FOUND"} + + tracker = json.loads(raw) + status = tracker["status"] + task_id = tracker.get("task_id") + result_content = tracker.get("result") or {} + if status == "DISPATCHED" and task_id: + result_raw = self.redis.get(f"celery-task-meta-{task_id}") + if result_raw: + result_data = json.loads(result_raw) + status = result_data.get("status", status) + result_content = result_data.get("result") + + return {"status": status, "task_id": task_id, "result": result_content} + def _cleanup_finished(self): pending = self.redis.hgetall(PENDING_HASH) if not pending: @@ -91,6 +114,7 @@ class RedisTaskScheduler: age = now - dispatched_at should_cleanup = False + result_data = None if raw_result is not None: result_data = json.loads(raw_result) if result_data.get("status") in ("SUCCESS", "FAILURE", "REVOKED"): @@ -104,8 +128,20 @@ class RedisTaskScheduler: ) if should_cleanup: + final_status = result_data.get("status", "UNKNOWN") if result_data else "EXPIRED" cleanup_pipe.delete(lock_key) cleanup_pipe.hdel(PENDING_HASH, task_id) + tracker_msg_id = meta.get("msg_id") + if tracker_msg_id: + cleanup_pipe.set( + f"task_tracker:{tracker_msg_id}", + json.dumps({ + "status": final_status, + "task_id": task_id, + "result": result_data.get("result") or {} + }), + ex=86400, + ) has_cleanup = True except Exception as e: logger.error("Cleanup error for %s: %s", task_id, e, exc_info=True) @@ -125,9 +161,15 @@ class RedisTaskScheduler: pipe.set(lock_key, task.id, ex=3600) pipe.hset(PENDING_HASH, task.id, json.dumps({ "lock_key": lock_key, - "dispatched_at": time.time() + "dispatched_at": time.time(), + "msg_id": msg_id })) pipe.xdel(STREAM_KEY, msg_id) + pipe.set( + f"task_tracker:{msg_id}", + json.dumps({"status": "DISPATCHED", "task_id": task.id}), + ex=86400, + ) pipe.execute() self.dispatched += 1 logger.info("Task dispatched: %s", task.id) @@ -183,8 +225,8 @@ class RedisTaskScheduler: if locked or lock_key in deliver_keys: continue - key = self._dispatch(msg_id, msg_data) - if key: + dispatched_successfully = self._dispatch(msg_id, msg_data) + if dispatched_successfully: deliver_keys.add(lock_key) time.sleep(0.1) diff --git a/api/app/controllers/service/memory_api_controller.py b/api/app/controllers/service/memory_api_controller.py index de56f56e..57664e4e 100644 --- a/api/app/controllers/service/memory_api_controller.py +++ b/api/app/controllers/service/memory_api_controller.py @@ -18,6 +18,7 @@ from app.schemas.memory_api_schema import ( MemoryWriteSyncResponse, ) from app.services.memory_api_service import MemoryAPIService +from celery_task_scheduler import scheduler router = APIRouter(prefix="/memory", tags=["V1 - Memory API"]) logger = get_business_logger() @@ -86,7 +87,7 @@ async def write_memory( user_rag_memory_id=payload.user_rag_memory_id, ) - logger.info(f"Memory write task submitted: end_user_id: {payload.end_user_id}") + logger.info(f"Memory write task submitted: task_id: {result['task_id']} end_user_id: {payload.end_user_id}") return success(data=MemoryWriteResponse(**result).model_dump(), msg="Memory write task submitted") @@ -105,8 +106,7 @@ async def get_write_task_status( """ logger.info(f"Write task status check - task_id: {task_id}") - from app.services.task_service import get_task_memory_write_result - result = get_task_memory_write_result(task_id) + result = scheduler.get_task_status(task_id) return success(data=_sanitize_task_result(result), msg="Task status retrieved") diff --git a/api/app/core/memory/agent/langgraph_graph/routing/write_router.py b/api/app/core/memory/agent/langgraph_graph/routing/write_router.py index 50f7ddb9..7ef4ed12 100644 --- a/api/app/core/memory/agent/langgraph_graph/routing/write_router.py +++ b/api/app/core/memory/agent/langgraph_graph/routing/write_router.py @@ -94,9 +94,9 @@ async def write( # ) scheduler.push_task( "app.core.memory.agent.write_message", - end_user_id, + actual_end_user_id, { - "end_user_id": end_user_id, + "end_user_id": actual_end_user_id, "message": structured_messages, "config_id": str(actual_config_id), "storage_type": storage_type, diff --git a/api/app/schemas/memory_api_schema.py b/api/app/schemas/memory_api_schema.py index 4cc548f3..7e4ca74a 100644 --- a/api/app/schemas/memory_api_schema.py +++ b/api/app/schemas/memory_api_schema.py @@ -112,12 +112,12 @@ class MemoryWriteResponse(BaseModel): """Response schema for memory write operation. Attributes: - task_id: Celery task ID for status polling - status: Initial task status (PENDING) + task_id: task ID for status polling + status: Initial task status (QUEUED) end_user_id: End user ID the write was submitted for """ - task_id: str = Field(..., description="Celery task ID for polling") - status: str = Field(..., description="Task status: PENDING") + task_id: str = Field(..., description="task ID for polling") + status: str = Field(..., description="Task status: QUEUED") end_user_id: str = Field(..., description="End user ID") diff --git a/api/app/services/memory_api_service.py b/api/app/services/memory_api_service.py index 221ca4cf..a1ceef86 100644 --- a/api/app/services/memory_api_service.py +++ b/api/app/services/memory_api_service.py @@ -175,7 +175,7 @@ class MemoryAPIService: # storage_type, # user_rag_memory_id or "", # ) - scheduler.push_task( + task_id = scheduler.push_task( "app.core.memory.agent.write_message", end_user_id, { @@ -190,8 +190,8 @@ class MemoryAPIService: logger.info(f"Memory write task submitted, end_user_id={end_user_id}") return { - # "task_id": task.id, - "status": "PENDING", + "task_id": task_id, + "status": "QUEUED", "end_user_id": end_user_id, } From 5c836c90c94e46dde6bf98fe0c56482b0b716c81 Mon Sep 17 00:00:00 2001 From: miao <1468212639@qq.com> Date: Thu, 23 Apr 2026 12:05:31 +0800 Subject: [PATCH 03/26] feat(memory): add episodic memory pagination and semantic memory list API Split explicit memory overview into two independent endpoints: - GET /memory/explicit-memory/episodics: episodic memory paginated query with date range filter (millisecond timestamp) and episodic type filter using Neo4j datetime() for precise time comparison - GET /memory/explicit-memory/semantics: semantic memory full list query returns data as array directly Modified files: - api/app/controllers/memory_explicit_controller.py - api/app/services/memory_explicit_service.py --- .../controllers/memory_explicit_controller.py | 139 +++++++++++- api/app/services/memory_explicit_service.py | 206 +++++++++++++++++- 2 files changed, 342 insertions(+), 3 deletions(-) diff --git a/api/app/controllers/memory_explicit_controller.py b/api/app/controllers/memory_explicit_controller.py index c52f308c..90758dc7 100644 --- a/api/app/controllers/memory_explicit_controller.py +++ b/api/app/controllers/memory_explicit_controller.py @@ -4,7 +4,10 @@ 处理显性记忆相关的API接口,包括情景记忆和语义记忆的查询。 """ -from fastapi import APIRouter, Depends +from datetime import date +from typing import Optional + +from fastapi import APIRouter, Depends, Query from app.core.logging_config import get_api_logger from app.core.response_utils import success, fail @@ -69,6 +72,140 @@ async def get_explicit_memory_overview_api( return fail(BizCode.INTERNAL_ERROR, "显性记忆总览查询失败", str(e)) +@router.get("/episodics", response_model=ApiResponse) +async def get_episodic_memory_list_api( + end_user_id: str = Query(..., description="end user ID"), + page: int = Query(1, gt=0, description="page number, starting from 1"), + pagesize: int = Query(10, gt=0, le=100, description="number of items per page, max 100"), + start_date: Optional[int] = Query(None, description="start timestamp (ms)"), + end_date: Optional[int] = Query(None, description="end timestamp (ms)"), + episodic_type: str = Query("all", description="episodic type :all/conversation/project_work/learning/decision/important_event"), + current_user: User = Depends(get_current_user), +) -> dict: + """ + 获取情景记忆分页列表 + + 返回指定用户的情景记忆列表,支持分页、时间范围筛选和情景类型筛选。 + + Args: + end_user_id: 终端用户ID(必填) + page: 页码(从1开始,默认1) + pagesize: 每页数量(默认10,最大100) + start_date: 开始时间戳(可选,毫秒),自动扩展到当天 00:00:00 + end_date: 结束时间戳(可选,毫秒),自动扩展到当天 23:59:59 + episodic_type: 情景类型筛选(可选,默认all) + current_user: 当前用户 + + Returns: + ApiResponse: 包含情景记忆分页列表 + + Examples: + - 基础分页查询:GET /episodic-list?end_user_id=xxx&page=1&pagesize=5 + 返回第1页,每页5条数据 + - 按时间范围筛选:GET /episodic-list?end_user_id=xxx&page=1&pagesize=5&start_date=1738684800000&end_date=1738771199000 + 返回指定时间范围内的数据 + - 按情景类型筛选:GET /episodic-list?end_user_id=xxx&page=1&pagesize=5&episodic_type=important_event + 返回类型为"重要事件"的数据 + + Notes: + - start_date 和 end_date 必须同时提供或同时不提供 + - start_date 不能大于 end_date + - episodic_type 可选值:all, conversation, project_work, learning, decision, important_event + - total 为该用户情景记忆总数(不受筛选条件影响) + - page.total 为筛选后的总条数 + """ + workspace_id = current_user.current_workspace_id + + # 检查用户是否已选择工作空间 + if workspace_id is None: + api_logger.warning(f"用户 {current_user.username} 尝试查询情景记忆列表但未选择工作空间") + return fail(BizCode.INVALID_PARAMETER, "请先切换到一个工作空间", "current_workspace_id is None") + + api_logger.info( + f"情景记忆分页查询: end_user_id={end_user_id}, " + f"start_date={start_date}, end_date={end_date}, episodic_type={episodic_type}, " + f"page={page}, pagesize={pagesize}, username={current_user.username}" + ) + + # 1. 参数校验 + if page < 1 or pagesize < 1: + api_logger.warning(f"分页参数错误: page={page}, pagesize={pagesize}") + return fail(BizCode.INVALID_PARAMETER, "分页参数必须大于0") + + valid_episodic_types = ["all", "conversation", "project_work", "learning", "decision", "important_event"] + if episodic_type not in valid_episodic_types: + api_logger.warning(f"无效的情景类型参数: {episodic_type}") + return fail(BizCode.INVALID_PARAMETER, f"无效的情景类型参数,可选值:{', '.join(valid_episodic_types)}") + + # 时间戳参数校验 + if (start_date is not None and end_date is None) or (end_date is not None and start_date is None): + return fail(BizCode.INVALID_PARAMETER, "start_date和end_date必须同时提供") + + if start_date is not None and end_date is not None and start_date > end_date: + return fail(BizCode.INVALID_PARAMETER, "start_date不能大于end_date") + + # 2. 执行查询 + try: + result = await memory_explicit_service.get_episodic_memory_list( + end_user_id=end_user_id, + page=page, + pagesize=pagesize, + start_date=start_date, + end_date=end_date, + episodic_type=episodic_type, + ) + api_logger.info( + f"情景记忆分页查询成功: end_user_id={end_user_id}, " + f"total={result['total']}, 返回={len(result['items'])}条" + ) + except Exception as e: + api_logger.error(f"情景记忆分页查询失败: end_user_id={end_user_id}, error={str(e)}") + return fail(BizCode.INTERNAL_ERROR, "情景记忆分页查询失败", str(e)) + + # 3. 返回结构化响应 + return success(data=result, msg="查询成功") + +@router.get("/semantics", response_model=ApiResponse) +async def get_semantic_memory_list_api( + end_user_id: str = Query(..., description="终端用户ID"), + current_user: User = Depends(get_current_user), +) -> dict: + """ + 获取语义记忆列表 + + 返回指定用户的全量语义记忆列表。 + + Args: + end_user_id: 终端用户ID(必填) + current_user: 当前用户 + + Returns: + ApiResponse: 包含语义记忆全量列表 + """ + workspace_id = current_user.current_workspace_id + + if workspace_id is None: + api_logger.warning(f"用户 {current_user.username} 尝试查询语义记忆列表但未选择工作空间") + return fail(BizCode.INVALID_PARAMETER, "请先切换到一个工作空间", "current_workspace_id is None") + + api_logger.info( + f"语义记忆列表查询: end_user_id={end_user_id}, username={current_user.username}" + ) + + try: + result = await memory_explicit_service.get_semantic_memory_list( + end_user_id=end_user_id + ) + api_logger.info( + f"语义记忆列表查询成功: end_user_id={end_user_id}, total={len(result)}" + ) + except Exception as e: + api_logger.error(f"语义记忆列表查询失败: end_user_id={end_user_id}, error={str(e)}") + return fail(BizCode.INTERNAL_ERROR, "语义记忆列表查询失败", str(e)) + + return success(data=result, msg="查询成功") + + @router.post("/details", response_model=ApiResponse) async def get_explicit_memory_details_api( request: ExplicitMemoryDetailsRequest, diff --git a/api/app/services/memory_explicit_service.py b/api/app/services/memory_explicit_service.py index f8d39ae8..c5ab2e38 100644 --- a/api/app/services/memory_explicit_service.py +++ b/api/app/services/memory_explicit_service.py @@ -4,7 +4,8 @@ 处理显性记忆相关的业务逻辑,包括情景记忆和语义记忆的查询。 """ -from typing import Any, Dict +from datetime import date +from typing import Any, Dict, Optional from app.core.logging_config import get_logger from app.services.memory_base_service import MemoryBaseService @@ -104,7 +105,7 @@ class MemoryExplicitService(MemoryBaseService): e.description AS core_definition ORDER BY e.name ASC """ - + semantic_result = await self.neo4j_connector.execute_query( semantic_query, end_user_id=end_user_id @@ -146,6 +147,207 @@ class MemoryExplicitService(MemoryBaseService): logger.error(f"获取显性记忆总览时出错: {str(e)}", exc_info=True) raise + + async def get_episodic_memory_list( + self, + end_user_id: str, + page: int, + pagesize: int, + start_date: Optional[int] = None, + end_date: Optional[int] = None, + episodic_type: str = "all", + ) -> Dict[str, Any]: + """ + 获取情景记忆分页列表 + + Args: + end_user_id: 终端用户ID + page: 页码 + pagesize: 每页数量 + start_date: 开始时间戳(毫秒),可选 + end_date: 结束时间戳(毫秒),可选 + episodic_type: 情景类型筛选 + + Returns: + { + "total": int, # 该用户情景记忆总数(不受筛选影响) + "items": [...], # 当前页数据 + "page": { + "page": int, + "pagesize": int, + "total": int, # 筛选后总数 + "hasnext": bool + } + } + """ + try: + logger.info( + f"情景记忆分页查询: end_user_id={end_user_id}, " + f"start_date={start_date}, end_date={end_date}, " + f"episodic_type={episodic_type}, page={page}, pagesize={pagesize}" + ) + + # 1. 查询情景记忆总数(不受筛选条件限制) + total_all_query = """ + MATCH (s:MemorySummary) + WHERE s.end_user_id = $end_user_id + RETURN count(s) AS total + """ + total_all_result = await self.neo4j_connector.execute_query( + total_all_query, end_user_id=end_user_id + ) + total_all = total_all_result[0]["total"] if total_all_result else 0 + + # 2. 构建筛选条件 + where_clauses = ["s.end_user_id = $end_user_id"] + params = {"end_user_id": end_user_id} + + # 时间戳筛选(毫秒时间戳转为 ISO 字符串,使用 Neo4j datetime() 精确比较) + if start_date is not None and end_date is not None: + from datetime import datetime + start_dt = datetime.fromtimestamp(start_date / 1000) + end_dt = datetime.fromtimestamp(end_date / 1000) + # 开始时间取当天 00:00:00,结束时间取当天 23:59:59.999999 + start_iso = start_dt.strftime("%Y-%m-%dT") + "00:00:00.000000" + end_iso = end_dt.strftime("%Y-%m-%dT") + "23:59:59.999999" + + where_clauses.append("datetime(s.created_at) >= datetime($start_iso) AND datetime(s.created_at) <= datetime($end_iso)") + params["start_iso"] = start_iso + params["end_iso"] = end_iso + + # 类型筛选下推到 Cypher(兼容中英文) + if episodic_type != "all": + type_mapping = { + "conversation": "对话", + "project_work": "项目/工作", + "learning": "学习", + "decision": "决策", + "important_event": "重要事件" + } + chinese_type = type_mapping.get(episodic_type) + if chinese_type: + where_clauses.append( + "(s.memory_type = $episodic_type OR s.memory_type = $chinese_type)" + ) + params["episodic_type"] = episodic_type + params["chinese_type"] = chinese_type + else: + where_clauses.append("s.memory_type = $episodic_type") + params["episodic_type"] = episodic_type + + where_str = " AND ".join(where_clauses) + + # 3. 查询筛选后的总数 + count_query = f""" + MATCH (s:MemorySummary) + WHERE {where_str} + RETURN count(s) AS total + """ + count_result = await self.neo4j_connector.execute_query(count_query, **params) + filtered_total = count_result[0]["total"] if count_result else 0 + + # 4. 查询分页数据 + skip = (page - 1) * pagesize + data_query = f""" + MATCH (s:MemorySummary) + WHERE {where_str} + RETURN elementId(s) AS id, + s.name AS title, + s.memory_type AS memory_type, + s.content AS content, + s.created_at AS created_at + ORDER BY s.created_at DESC + SKIP {skip} LIMIT {pagesize} + """ + + result = await self.neo4j_connector.execute_query(data_query, **params) + + # 5. 处理结果 + items = [] + if result: + for record in result: + raw_created_at = record.get("created_at") + created_at_timestamp = self.parse_timestamp(raw_created_at) + items.append({ + "id": record["id"], + "title": record.get("title") or "未命名", + "memory_type": record.get("memory_type") or "其他", + "content": record.get("content") or "", + "created_at": created_at_timestamp + }) + + # 6. 构建返回结果 + return { + "total": total_all, + "items": items, + "page": { + "page": page, + "pagesize": pagesize, + "total": filtered_total, + "hasnext": (page * pagesize) < filtered_total + } + } + + except Exception as e: + logger.error(f"情景记忆分页查询出错: {str(e)}", exc_info=True) + raise + + async def get_semantic_memory_list( + self, + end_user_id: str + ) -> list: + """ + 获取语义记忆全量列表 + + Args: + end_user_id: 终端用户ID + + Returns: + [ + { + "id": str, + "name": str, + "entity_type": str, + "core_definition": str + } + ] + """ + try: + logger.info(f"语义记忆列表查询: end_user_id={end_user_id}") + + semantic_query = """ + MATCH (e:ExtractedEntity) + WHERE e.end_user_id = $end_user_id + AND e.is_explicit_memory = true + RETURN elementId(e) AS id, + e.name AS name, + e.entity_type AS entity_type, + e.description AS core_definition + ORDER BY e.name ASC + """ + + result = await self.neo4j_connector.execute_query( + semantic_query, end_user_id=end_user_id + ) + + items = [] + if result: + for record in result: + items.append({ + "id": record["id"], + "name": record.get("name") or "未命名", + "entity_type": record.get("entity_type") or "未分类", + "core_definition": record.get("core_definition") or "" + }) + + logger.info(f"语义记忆列表查询成功: end_user_id={end_user_id}, total={len(items)}") + + return items + + except Exception as e: + logger.error(f"语义记忆列表查询出错: {str(e)}", exc_info=True) + raise + async def get_explicit_memory_details( self, end_user_id: str, From bf9a3503de19ec3291b423cbf4d6d80648eff6cd Mon Sep 17 00:00:00 2001 From: miao <1468212639@qq.com> Date: Thu, 23 Apr 2026 15:23:39 +0800 Subject: [PATCH 04/26] feat(memory-api): add memory detail external service APIs Add external service APIs for memory detail queries Provides memory data access endpoints for external service integration Add utility functions for API key user resolution and end_user validation Modified files: - api/app/controllers/service/user_memory_api_controller.py - api/app/core/api_key_utils.py - api/app/controllers/service/__init__.py --- api/app/controllers/service/__init__.py | 2 + .../service/user_memory_api_controller.py | 197 ++++++++++++++++++ api/app/core/api_key_utils.py | 66 ++++++ 3 files changed, 265 insertions(+) create mode 100644 api/app/controllers/service/user_memory_api_controller.py diff --git a/api/app/controllers/service/__init__.py b/api/app/controllers/service/__init__.py index 52d4b732..850b496d 100644 --- a/api/app/controllers/service/__init__.py +++ b/api/app/controllers/service/__init__.py @@ -14,6 +14,7 @@ from . import ( rag_api_document_controller, rag_api_file_controller, rag_api_knowledge_controller, + user_memory_api_controller, ) # 创建 V1 API 路由器 @@ -28,5 +29,6 @@ service_router.include_router(rag_api_chunk_controller.router) service_router.include_router(memory_api_controller.router) service_router.include_router(end_user_api_controller.router) service_router.include_router(memory_config_api_controller.router) +service_router.include_router(user_memory_api_controller.router) __all__ = ["service_router"] diff --git a/api/app/controllers/service/user_memory_api_controller.py b/api/app/controllers/service/user_memory_api_controller.py new file mode 100644 index 00000000..6d2498c0 --- /dev/null +++ b/api/app/controllers/service/user_memory_api_controller.py @@ -0,0 +1,197 @@ +"""User Memory 服务接口 — 基于 API Key 认证 + +包装 user_memory_controllers.py 和 memory_agent_controller.py 中的内部接口, +提供基于 API Key 认证的对外服务: +1./analytics/graph_data - 知识图谱数据接口 +2./analytics/community_graph - 社区图谱接口 +3./analytics/node_statistics - 记忆节点统计接口 +4./analytics/user_summary - 用户摘要接口 +5./analytics/memory_insight - 记忆洞察接口 +6./analytics/interest_distribution - 兴趣分布接口 +7./analytics/end_user_info - 终端用户信息接口 + + +路由前缀: /memory +子路径: /analytics/... +最终路径: /v1/memory/analytics/... +认证方式: API Key (@require_api_key) +""" + +from typing import Optional + +from fastapi import APIRouter, Depends, Header, Query, Request +from sqlalchemy.orm import Session + +from app.core.api_key_auth import require_api_key +from app.core.api_key_utils import get_current_user_from_api_key, validate_end_user_in_workspace +from app.core.logging_config import get_business_logger +from app.db import get_db +from app.schemas.api_key_schema import ApiKeyAuth + +# 包装内部服务 controller +from app.controllers import user_memory_controllers, memory_agent_controller + +router = APIRouter(prefix="/memory", tags=["V1 - User Memory API"]) +logger = get_business_logger() + + +# ==================== 知识图谱 ==================== + + +@router.get("/analytics/graph_data") +@require_api_key(scopes=["memory"]) +async def get_graph_data( + request: Request, + end_user_id: str = Query(..., description="End user ID"), + node_types: Optional[str] = Query(None, description="Comma-separated node types filter"), + limit: int = Query(100, description="Max nodes to return, capped at 1000"), + depth: int = Query(1, description="Graph traversal depth, capped at 3"), + center_node_id: Optional[str] = Query(None, description="Center node for subgraph"), + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), +): + """Get knowledge graph data (nodes + edges) for an end user.""" + current_user = get_current_user_from_api_key(db, api_key_auth) + validate_end_user_in_workspace(db, end_user_id, api_key_auth.workspace_id) + + return await user_memory_controllers.get_graph_data_api( + end_user_id=end_user_id, + node_types=node_types, + limit=limit, + depth=depth, + center_node_id=center_node_id, + current_user=current_user, + db=db, + ) + + +@router.get("/analytics/community_graph") +@require_api_key(scopes=["memory"]) +async def get_community_graph( + request: Request, + end_user_id: str = Query(..., description="End user ID"), + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), +): + """Get community clustering graph for an end user.""" + current_user = get_current_user_from_api_key(db, api_key_auth) + validate_end_user_in_workspace(db, end_user_id, api_key_auth.workspace_id) + + return await user_memory_controllers.get_community_graph_data_api( + end_user_id=end_user_id, + current_user=current_user, + db=db, + ) + + +# ==================== 节点统计 ==================== + + +@router.get("/analytics/node_statistics") +@require_api_key(scopes=["memory"]) +async def get_node_statistics( + request: Request, + end_user_id: str = Query(..., description="End user ID"), + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), +): + """Get memory node type statistics for an end user.""" + current_user = get_current_user_from_api_key(db, api_key_auth) + validate_end_user_in_workspace(db, end_user_id, api_key_auth.workspace_id) + + return await user_memory_controllers.get_node_statistics_api( + end_user_id=end_user_id, + current_user=current_user, + db=db, + ) + + +# ==================== 用户摘要 & 洞察 ==================== + + +@router.get("/analytics/user_summary") +@require_api_key(scopes=["memory"]) +async def get_user_summary( + request: Request, + end_user_id: str = Query(..., description="End user ID"), + language_type: str = Header(default=None, alias="X-Language-Type"), + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), +): + """Get cached user summary for an end user.""" + current_user = get_current_user_from_api_key(db, api_key_auth) + validate_end_user_in_workspace(db, end_user_id, api_key_auth.workspace_id) + + return await user_memory_controllers.get_user_summary_api( + end_user_id=end_user_id, + language_type=language_type, + current_user=current_user, + db=db, + ) + + +@router.get("/analytics/memory_insight") +@require_api_key(scopes=["memory"]) +async def get_memory_insight( + request: Request, + end_user_id: str = Query(..., description="End user ID"), + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), +): + """Get cached memory insight report for an end user.""" + current_user = get_current_user_from_api_key(db, api_key_auth) + validate_end_user_in_workspace(db, end_user_id, api_key_auth.workspace_id) + + return await user_memory_controllers.get_memory_insight_report_api( + end_user_id=end_user_id, + current_user=current_user, + db=db, + ) + + +# ==================== 兴趣分布 ==================== + + +@router.get("/analytics/interest_distribution") +@require_api_key(scopes=["memory"]) +async def get_interest_distribution( + request: Request, + end_user_id: str = Query(..., description="End user ID"), + limit: int = Query(5, le=5, description="Max interest tags to return"), + language_type: str = Header(default=None, alias="X-Language-Type"), + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), +): + """Get interest distribution tags for an end user.""" + current_user = get_current_user_from_api_key(db, api_key_auth) + validate_end_user_in_workspace(db, end_user_id, api_key_auth.workspace_id) + + return await memory_agent_controller.get_interest_distribution_by_user_api( + end_user_id=end_user_id, + limit=limit, + language_type=language_type, + current_user=current_user, + db=db, + ) + + +# ==================== 终端用户信息 ==================== + + +@router.get("/analytics/end_user_info") +@require_api_key(scopes=["memory"]) +async def get_end_user_info( + request: Request, + end_user_id: str = Query(..., description="End user ID"), + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), +): + """Get end user basic information (name, aliases, metadata).""" + current_user = get_current_user_from_api_key(db, api_key_auth) + validate_end_user_in_workspace(db, end_user_id, api_key_auth.workspace_id) + + return await user_memory_controllers.get_end_user_info( + end_user_id=end_user_id, + current_user=current_user, + db=db, + ) \ No newline at end of file diff --git a/api/app/core/api_key_utils.py b/api/app/core/api_key_utils.py index fb6b9552..290630ce 100644 --- a/api/app/core/api_key_utils.py +++ b/api/app/core/api_key_utils.py @@ -3,6 +3,12 @@ import secrets from typing import Optional, Union from datetime import datetime +from sqlalchemy.orm import Session as _Session +from app.core.error_codes import BizCode as _BizCode +from app.core.exceptions import BusinessException as _BusinessException +from app.models.end_user_model import EndUser as _EndUser +from app.repositories.end_user_repository import EndUserRepository as _EndUserRepository + from app.models.api_key_model import ApiKeyType from fastapi import Response from fastapi.responses import JSONResponse @@ -65,3 +71,63 @@ def datetime_to_timestamp(dt: Optional[datetime]) -> Optional[int]: return None return int(dt.timestamp() * 1000) + + +def get_current_user_from_api_key(db: _Session, api_key_auth): + """通过 API Key 构造 current_user 对象。 + + 从 API Key 反查创建者(管理员用户),并设置其 workspace 上下文。 + 与内部接口的 Depends(get_current_user) (JWT) 等价。 + + Args: + db: 数据库会话 + api_key_auth: API Key 认证信息(ApiKeyAuth) + + Returns: + User ORM 对象,已设置 current_workspace_id + """ + from app.services import api_key_service + + api_key = api_key_service.ApiKeyService.get_api_key( + db, api_key_auth.api_key_id, api_key_auth.workspace_id + ) + current_user = api_key.creator + current_user.current_workspace_id = api_key_auth.workspace_id + return current_user + + +def validate_end_user_in_workspace( + db: _Session, + end_user_id: str, + workspace_id, +) -> _EndUser: + """校验 end_user 是否存在且属于指定 workspace。 + + Args: + db: 数据库会话 + end_user_id: 终端用户 ID + workspace_id: 工作空间 ID(UUID 或字符串均可) + + Returns: + EndUser ORM 对象(校验通过时) + + Raises: + BusinessException(USER_NOT_FOUND): end_user 不存在 + BusinessException(PERMISSION_DENIED): end_user 不属于该 workspace + """ + end_user_repo = _EndUserRepository(db) + end_user = end_user_repo.get_end_user_by_id(end_user_id) + + if end_user is None: + raise _BusinessException( + "End user not found", + _BizCode.USER_NOT_FOUND, + ) + + if str(end_user.workspace_id) != str(workspace_id): + raise _BusinessException( + "End user does not belong to this workspace", + _BizCode.PERMISSION_DENIED, + ) + + return end_user \ No newline at end of file From aac89b172fe0aa7ba66ccb449ba8ba7cc67f060b Mon Sep 17 00:00:00 2001 From: miao <1468212639@qq.com> Date: Thu, 23 Apr 2026 15:26:49 +0800 Subject: [PATCH 05/26] fix(memory): remove unused date import and fix docstring route paths Remove unused rom datetime import date in controller and service Fix Examples route paths from /episodic-list to /episodics to match actual router --- api/app/controllers/memory_explicit_controller.py | 7 +++---- api/app/services/memory_explicit_service.py | 1 - 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/api/app/controllers/memory_explicit_controller.py b/api/app/controllers/memory_explicit_controller.py index 90758dc7..88877de3 100644 --- a/api/app/controllers/memory_explicit_controller.py +++ b/api/app/controllers/memory_explicit_controller.py @@ -4,7 +4,6 @@ 处理显性记忆相关的API接口,包括情景记忆和语义记忆的查询。 """ -from datetime import date from typing import Optional from fastapi import APIRouter, Depends, Query @@ -100,11 +99,11 @@ async def get_episodic_memory_list_api( ApiResponse: 包含情景记忆分页列表 Examples: - - 基础分页查询:GET /episodic-list?end_user_id=xxx&page=1&pagesize=5 + - 基础分页查询:GET /episodics?end_user_id=xxx&page=1&pagesize=5 返回第1页,每页5条数据 - - 按时间范围筛选:GET /episodic-list?end_user_id=xxx&page=1&pagesize=5&start_date=1738684800000&end_date=1738771199000 + - 按时间范围筛选:GET /episodics?end_user_id=xxx&page=1&pagesize=5&start_date=1738684800000&end_date=1738771199000 返回指定时间范围内的数据 - - 按情景类型筛选:GET /episodic-list?end_user_id=xxx&page=1&pagesize=5&episodic_type=important_event + - 按情景类型筛选:GET /episodics?end_user_id=xxx&page=1&pagesize=5&episodic_type=important_event 返回类型为"重要事件"的数据 Notes: diff --git a/api/app/services/memory_explicit_service.py b/api/app/services/memory_explicit_service.py index c5ab2e38..8da3b167 100644 --- a/api/app/services/memory_explicit_service.py +++ b/api/app/services/memory_explicit_service.py @@ -4,7 +4,6 @@ 处理显性记忆相关的业务逻辑,包括情景记忆和语义记忆的查询。 """ -from datetime import date from typing import Any, Dict, Optional from app.core.logging_config import get_logger From 7ac0eff0b89c1bbec4ef0b1c1e52031da1de62ad Mon Sep 17 00:00:00 2001 From: miao <1468212639@qq.com> Date: Thu, 23 Apr 2026 16:29:22 +0800 Subject: [PATCH 06/26] fix(memory): fix problems - Parameterize SKIP/LIMIT in Cypher query instead of f-string interpolation - Add UUID format validation in validate_end_user_in_workspace before DB query - Update limit/depth Query descriptions to clarify auto-cap behavior in service layer - Move uuid import to module level in api_key_utils.py Modified files: - api/app/services/memory_explicit_service.py - api/app/core/api_key_utils.py - api/app/controllers/service/user_memory_api_controller.py --- .../controllers/service/user_memory_api_controller.py | 4 ++-- api/app/core/api_key_utils.py | 10 ++++++++++ api/app/services/memory_explicit_service.py | 4 +++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/api/app/controllers/service/user_memory_api_controller.py b/api/app/controllers/service/user_memory_api_controller.py index 6d2498c0..ecbdec50 100644 --- a/api/app/controllers/service/user_memory_api_controller.py +++ b/api/app/controllers/service/user_memory_api_controller.py @@ -44,8 +44,8 @@ async def get_graph_data( request: Request, end_user_id: str = Query(..., description="End user ID"), node_types: Optional[str] = Query(None, description="Comma-separated node types filter"), - limit: int = Query(100, description="Max nodes to return, capped at 1000"), - depth: int = Query(1, description="Graph traversal depth, capped at 3"), + limit: int = Query(100, description="Max nodes to return (auto-capped at 1000 in service layer)"), + depth: int = Query(1, description="Graph traversal depth (auto-capped at 3 in service layer)"), center_node_id: Optional[str] = Query(None, description="Center node for subgraph"), api_key_auth: ApiKeyAuth = None, db: Session = Depends(get_db), diff --git a/api/app/core/api_key_utils.py b/api/app/core/api_key_utils.py index 290630ce..7687d8af 100644 --- a/api/app/core/api_key_utils.py +++ b/api/app/core/api_key_utils.py @@ -1,5 +1,6 @@ """API Key 工具函数""" import secrets +import uuid as _uuid from typing import Optional, Union from datetime import datetime @@ -112,9 +113,18 @@ def validate_end_user_in_workspace( EndUser ORM 对象(校验通过时) Raises: + BusinessException(INVALID_PARAMETER): end_user_id 格式无效 BusinessException(USER_NOT_FOUND): end_user 不存在 BusinessException(PERMISSION_DENIED): end_user 不属于该 workspace """ + try: + _uuid.UUID(end_user_id) + except (ValueError, AttributeError): + raise _BusinessException( + f"Invalid end_user_id format: {end_user_id}", + _BizCode.INVALID_PARAMETER, + ) + end_user_repo = _EndUserRepository(db) end_user = end_user_repo.get_end_user_by_id(end_user_id) diff --git a/api/app/services/memory_explicit_service.py b/api/app/services/memory_explicit_service.py index 8da3b167..e640124e 100644 --- a/api/app/services/memory_explicit_service.py +++ b/api/app/services/memory_explicit_service.py @@ -256,8 +256,10 @@ class MemoryExplicitService(MemoryBaseService): s.content AS content, s.created_at AS created_at ORDER BY s.created_at DESC - SKIP {skip} LIMIT {pagesize} + SKIP $skip LIMIT $limit """ + params["skip"] = skip + params["limit"] = pagesize result = await self.neo4j_connector.execute_query(data_query, **params) From 5960b5add8341de9eab2747f24aa15a8cd033b88 Mon Sep 17 00:00:00 2001 From: zhaoying Date: Thu, 23 Apr 2026 16:58:07 +0800 Subject: [PATCH 07/26] feat(web): document-extractor add images output variable --- .../Workflow/components/Properties/hooks/useVariableList.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/web/src/views/Workflow/components/Properties/hooks/useVariableList.ts b/web/src/views/Workflow/components/Properties/hooks/useVariableList.ts index 994a0ae1..f979d924 100644 --- a/web/src/views/Workflow/components/Properties/hooks/useVariableList.ts +++ b/web/src/views/Workflow/components/Properties/hooks/useVariableList.ts @@ -56,6 +56,8 @@ const NODE_VARIABLES = { ], 'document-extractor': [ { label: 'text', dataType: 'string', field: 'text' }, + // { label: 'chunks', dataType: 'array[string]', field: 'chunks' }, + { label: 'images', dataType: 'array[file]', field: 'images' }, ], 'list-operator': [ { label: 'result', dataType: 'array[string]', field: 'result' }, From 4619b40d031c97308148419b55d2abcb43d84a70 Mon Sep 17 00:00:00 2001 From: miao <1468212639@qq.com> Date: Thu, 23 Apr 2026 19:32:13 +0800 Subject: [PATCH 08/26] =?UTF-8?q?fix(memory):=20fix=20timezone=20and=20add?= =?UTF-8?q?=20generate=5Fcache=20API=20endpoint=20=EF=BB=BF=20-=20Fix=20ep?= =?UTF-8?q?isodic=20memory=20time=20filter=20to=20use=20UTC=20(datetime.fr?= =?UTF-8?q?omtimestamp=20with=20tz=3Dtimezone.utc)=20=20=20to=20match=20Ne?= =?UTF-8?q?o4j=20stored=20UTC=20timestamps=20-=20Add=20POST=20/v1/memory/a?= =?UTF-8?q?nalytics/generate=5Fcache=20endpoint=20for=20cache=20generation?= =?UTF-8?q?=20via=20API=20Key=20=EF=BB=BF=20Modified=20files:=20-=20api/ap?= =?UTF-8?q?p/services/memory=5Fexplicit=5Fservice.py=20-=20api/app/control?= =?UTF-8?q?lers/service/user=5Fmemory=5Fapi=5Fcontroller.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../service/user_memory_api_controller.py | 37 ++++++++++++++++++- api/app/services/memory_explicit_service.py | 10 ++--- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/api/app/controllers/service/user_memory_api_controller.py b/api/app/controllers/service/user_memory_api_controller.py index ecbdec50..19a3a92f 100644 --- a/api/app/controllers/service/user_memory_api_controller.py +++ b/api/app/controllers/service/user_memory_api_controller.py @@ -9,6 +9,7 @@ 5./analytics/memory_insight - 记忆洞察接口 6./analytics/interest_distribution - 兴趣分布接口 7./analytics/end_user_info - 终端用户信息接口 +8./analytics/generate_cache - 缓存生成接口 路由前缀: /memory @@ -19,7 +20,7 @@ from typing import Optional -from fastapi import APIRouter, Depends, Header, Query, Request +from fastapi import APIRouter, Depends, Header, Query, Request, Body from sqlalchemy.orm import Session from app.core.api_key_auth import require_api_key @@ -27,6 +28,7 @@ from app.core.api_key_utils import get_current_user_from_api_key, validate_end_u from app.core.logging_config import get_business_logger from app.db import get_db from app.schemas.api_key_schema import ApiKeyAuth +from app.schemas.memory_storage_schema import GenerateCacheRequest # 包装内部服务 controller from app.controllers import user_memory_controllers, memory_agent_controller @@ -194,4 +196,35 @@ async def get_end_user_info( end_user_id=end_user_id, current_user=current_user, db=db, - ) \ No newline at end of file + ) + + +# ==================== 缓存生成 ==================== + + +@router.post("/analytics/generate_cache") +@require_api_key(scopes=["memory"]) +async def generate_cache( + request: Request, + api_key_auth: ApiKeyAuth = None, + db: Session = Depends(get_db), + message: str = Body(None, description="Request body"), + language_type: str = Header(default=None, alias="X-Language-Type"), +): + """Trigger cache generation (user summary + memory insight) for an end user or all workspace users.""" + body = await request.json() + cache_request = GenerateCacheRequest(**body) + + current_user = get_current_user_from_api_key(db, api_key_auth) + + if cache_request.end_user_id: + validate_end_user_in_workspace(db, cache_request.end_user_id, api_key_auth.workspace_id) + + return await user_memory_controllers.generate_cache_api( + request=cache_request, + language_type=language_type, + current_user=current_user, + db=db, + ) + + diff --git a/api/app/services/memory_explicit_service.py b/api/app/services/memory_explicit_service.py index e640124e..4d9a5c2b 100644 --- a/api/app/services/memory_explicit_service.py +++ b/api/app/services/memory_explicit_service.py @@ -201,12 +201,12 @@ class MemoryExplicitService(MemoryBaseService): where_clauses = ["s.end_user_id = $end_user_id"] params = {"end_user_id": end_user_id} - # 时间戳筛选(毫秒时间戳转为 ISO 字符串,使用 Neo4j datetime() 精确比较) + # 时间戳筛选(毫秒时间戳转为 UTC ISO 字符串,使用 Neo4j datetime() 精确比较) if start_date is not None and end_date is not None: - from datetime import datetime - start_dt = datetime.fromtimestamp(start_date / 1000) - end_dt = datetime.fromtimestamp(end_date / 1000) - # 开始时间取当天 00:00:00,结束时间取当天 23:59:59.999999 + from datetime import datetime, timezone + start_dt = datetime.fromtimestamp(start_date / 1000, tz=timezone.utc) + end_dt = datetime.fromtimestamp(end_date / 1000, tz=timezone.utc) + # 开始时间取当天 UTC 00:00:00,结束时间取当天 UTC 23:59:59.999999 start_iso = start_dt.strftime("%Y-%m-%dT") + "00:00:00.000000" end_iso = end_dt.strftime("%Y-%m-%dT") + "23:59:59.999999" From 5c89acced68717d96ed5f941fd8b1139b19c2231 Mon Sep 17 00:00:00 2001 From: wwq Date: Fri, 24 Apr 2026 10:29:41 +0800 Subject: [PATCH 09/26] fix(api_key): validate application publication status before key generation - Ensure the application exists and is published when resource_id is present; raise an exception otherwise. --- api/app/services/api_key_service.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/api/app/services/api_key_service.py b/api/app/services/api_key_service.py index 49b07121..e4367e98 100644 --- a/api/app/services/api_key_service.py +++ b/api/app/services/api_key_service.py @@ -65,6 +65,11 @@ class ApiKeyService: BizCode.BAD_REQUEST ) + if data.resource_id: + app = db.get(App, data.resource_id) + if not app or not app.current_release_id: + raise BusinessException("该应用未发布", BizCode.APP_NOT_PUBLISHED) + # 生成 API Key api_key = generate_api_key(data.type) From 0f7a7263ebad9f7b91267244ea4a837b5d58c5b5 Mon Sep 17 00:00:00 2001 From: wwq Date: Fri, 24 Apr 2026 11:39:33 +0800 Subject: [PATCH 10/26] fix(workflow): rectify error handling and bolster execution logging - Rectify exception propagation during node execution failures to ensure errors are correctly raised. - Bolster workflow logging to support failed status records and persist node execution data, including loop nodes. --- api/app/core/workflow/nodes/base_node.py | 29 ++++----- api/app/services/app_log_service.py | 56 ++++++++--------- api/app/services/workflow_service.py | 78 +++++++++++++++++++++++- 3 files changed, 117 insertions(+), 46 deletions(-) diff --git a/api/app/core/workflow/nodes/base_node.py b/api/app/core/workflow/nodes/base_node.py index 5458a80c..85f4bc5f 100644 --- a/api/app/core/workflow/nodes/base_node.py +++ b/api/app/core/workflow/nodes/base_node.py @@ -228,19 +228,21 @@ class BaseNode(ABC): logger.error( f"Node {self.node_id} execution timed out ({timeout} seconds)." ) - return self._wrap_error( + self._wrap_error( f"Node execution timed out ({timeout} seconds).", elapsed_time, state, variable_pool, ) + raise except Exception as e: elapsed_time = (time.time() - start_time) * 1000 logger.error( f"Node {self.node_id} execution failed: {e}", exc_info=True, ) - return self._wrap_error(str(e), elapsed_time, state, variable_pool) + self._wrap_error(str(e), elapsed_time, state, variable_pool) + raise async def run_stream( self, state: WorkflowState, @@ -351,11 +353,13 @@ class BaseNode(ABC): variable_pool ) yield error_output + raise except Exception as e: elapsed_time = (time.time() - start_time) * 1000 logger.error(f"Node {self.node_id} execution failed: {e}", exc_info=True) error_output = self._wrap_error(str(e), elapsed_time, state, variable_pool) yield error_output + raise def _wrap_output( self, @@ -447,26 +451,19 @@ class BaseNode(ABC): "error": error_message } - # if error_edge: - # # If an error edge exists, log a warning and continue to error node - # logger.warning( - # f"Node {self.node_id} execution failed, redirecting to error node: {error_edge['target']}" - # ) - # return { - # "node_outputs": { - # self.node_id: node_output - # }, - # "error": error_message, - # "error_node": self.node_id - # } - # else: writer = get_stream_writer() writer({ "type": "node_error", **node_output }) logger.error(f"Node {self.node_id} execution failed, stopping workflow: {error_message}") - raise Exception(f"Node {self.node_id} execution failed: {error_message}") + return { + "node_outputs": { + self.node_id: node_output + }, + "error": error_message, + "error_node": self.node_id + } def _extract_input(self, state: WorkflowState, variable_pool: VariablePool) -> dict[str, Any]: """Extracts the input data for this node (used for logging or audit). diff --git a/api/app/services/app_log_service.py b/api/app/services/app_log_service.py index 8f5052e6..b2801df3 100644 --- a/api/app/services/app_log_service.py +++ b/api/app/services/app_log_service.py @@ -163,7 +163,7 @@ class AppLogService: # 查询该会话关联的所有工作流执行记录(按时间正序) stmt = select(WorkflowExecution).where( WorkflowExecution.conversation_id == conversation_id, - WorkflowExecution.status == "completed" + WorkflowExecution.status.in_(["completed", "failed"]) ).order_by(WorkflowExecution.started_at.asc()) executions = self.db.scalars(stmt).all() @@ -188,10 +188,33 @@ class AppLogService: used_message_ids: set[str] = set() for execution in executions: - if not execution.output_data: + # 构建节点执行记录列表 + execution_nodes = [] + for node_exec in execution.node_executions: + node_execution = AppLogNodeExecution( + node_id=node_exec.node_id, + node_type=node_exec.node_type, + node_name=node_exec.node_name, + status=node_exec.status, + error=node_exec.error_message, + input=node_exec.input_data, + process=None, + output=node_exec.output_data, + elapsed_time=node_exec.elapsed_time, + token_usage=node_exec.token_usage, + ) + node_executions.append(node_execution) + execution_nodes.append(node_execution) + + if not execution_nodes: continue - # 找到该 execution 对应的 assistant message + # 失败的执行没有 assistant message,直接用 execution id 作为 key + if execution.status == "failed": + node_executions_map[f"execution_{str(execution.id)}"] = execution_nodes + continue + + # completed:通过时序匹配关联到对应的 assistant message # 逻辑:找 execution.started_at 之后最近的、未使用的 assistant message best_msg = None best_dt = None @@ -210,31 +233,6 @@ class AppLogService: msg_id_str = str(best_msg.id) used_message_ids.add(msg_id_str) - - # 提取节点输出 - output_data = execution.output_data - if isinstance(output_data, dict): - node_outputs = output_data.get("node_outputs", {}) - execution_nodes = [] - for node_id, node_data in node_outputs.items(): - if not isinstance(node_data, dict): - continue - node_execution = AppLogNodeExecution( - node_id=node_data.get("node_id", node_id), - node_type=node_data.get("node_type", "unknown"), - node_name=node_data.get("node_name"), - status=node_data.get("status", "unknown"), - error=node_data.get("error"), - input=node_data.get("input"), - process=node_data.get("process"), - output=node_data.get("output"), - elapsed_time=node_data.get("elapsed_time"), - token_usage=node_data.get("token_usage"), - ) - node_executions.append(node_execution) - execution_nodes.append(node_execution) - - # 将节点记录关联到 message_id - node_executions_map[msg_id_str] = execution_nodes + node_executions_map[msg_id_str] = execution_nodes return node_executions, node_executions_map diff --git a/api/app/services/workflow_service.py b/api/app/services/workflow_service.py index 0d282d78..6c93893a 100644 --- a/api/app/services/workflow_service.py +++ b/api/app/services/workflow_service.py @@ -17,8 +17,9 @@ from app.core.workflow.executor import execute_workflow, execute_workflow_stream from app.core.workflow.nodes.enums import NodeType from app.core.workflow.validator import validate_workflow_config from app.db import get_db +from sqlalchemy import select from app.models import App -from app.models.workflow_model import WorkflowConfig, WorkflowExecution +from app.models.workflow_model import WorkflowConfig, WorkflowExecution, WorkflowNodeExecution from app.repositories import knowledge_repository from app.repositories.workflow_repository import ( WorkflowConfigRepository, @@ -909,6 +910,8 @@ class WorkflowService: input_data["conv_messages"] = conv_messages init_message_length = len(input_data.get("conv_messages", [])) message_id = uuid.uuid4() + _node_order_counter = 0 + _cycle_items: dict[str, list] = {} # 新会话时写入开场白 is_new_conversation = init_message_length == 0 @@ -939,6 +942,52 @@ class WorkflowService: memory_storage_type=storage_type, user_rag_memory_id=user_rag_memory_id ): + event_type = event.get("event") + event_data = event.get("data", {}) + + # 持久化节点执行记录 + if event_type == "node_end": + node_id = event_data.get("node_id") + node_cfg = next((n for n in config.nodes if n.get("id") == node_id), {}) + self.db.add(WorkflowNodeExecution( + execution_id=execution.id, + node_id=node_id, + node_type=node_cfg.get("type", "unknown"), + node_name=node_cfg.get("data", {}).get("label") or node_id, + execution_order=_node_order_counter, + status="completed", + input_data=event_data.get("input"), + output_data=event_data.get("output"), + elapsed_time=event_data.get("elapsed_time"), + token_usage=event_data.get("token_usage"), + )) + self.db.commit() + _node_order_counter += 1 + + elif event_type == "node_error": + node_id = event_data.get("node_id") + node_cfg = next((n for n in config.nodes if n.get("id") == node_id), {}) + self.db.add(WorkflowNodeExecution( + execution_id=execution.id, + node_id=node_id, + node_type=node_cfg.get("type", "unknown"), + node_name=node_cfg.get("data", {}).get("label") or node_id, + execution_order=_node_order_counter, + status="failed", + input_data=event_data.get("input"), + output_data=None, + error_message=event_data.get("error"), + elapsed_time=event_data.get("elapsed_time"), + )) + self.db.commit() + _node_order_counter += 1 + + elif event_type == "cycle_item": + cycle_id = event_data.get("cycle_id") + if cycle_id not in _cycle_items: + _cycle_items[cycle_id] = [] + _cycle_items[cycle_id].append(event_data) + if event.get("event") == "workflow_end": status = event.get("data", {}).get("status") token_usage = event.get("data", {}).get("token_usage", {}) or {} @@ -1003,6 +1052,33 @@ class WorkflowService: ) else: logger.error(f"unexpect workflow run status, status: {status}") + # 把积累的 cycle_item 写入对应循环节点的 output_data + if _cycle_items: + for cycle_node_id, items in _cycle_items.items(): + node_exec = self.db.execute( + select(WorkflowNodeExecution).where( + WorkflowNodeExecution.execution_id == execution.id, + WorkflowNodeExecution.node_id == cycle_node_id + ) + ).scalar_one_or_none() + if node_exec: + node_exec.output_data = { + **(node_exec.output_data or {}), + "cycle_items": items + } + else: + node_cfg = next((n for n in config.nodes if n.get("id") == cycle_node_id), {}) + self.db.add(WorkflowNodeExecution( + execution_id=execution.id, + node_id=cycle_node_id, + node_type=node_cfg.get("type", "cycle"), + node_name=node_cfg.get("data", {}).get("label") or cycle_node_id, + execution_order=_node_order_counter, + status="completed", + output_data={"cycle_items": items}, + )) + _node_order_counter += 1 + self.db.commit() elif event.get("event") == "workflow_start": event["data"]["message_id"] = str(message_id) event = self._emit(public, event) From 2d120a64b1d9975f05dd5df4482ddce5723e1950 Mon Sep 17 00:00:00 2001 From: wwq Date: Fri, 24 Apr 2026 11:50:48 +0800 Subject: [PATCH 11/26] fix(workflow): rectify error handling and bolster execution logging - Rectify exception propagation during node execution failures to ensure errors are correctly raised. - Bolster workflow logging to support failed status records and persist node execution data, including loop nodes. --- api/app/core/workflow/nodes/base_node.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/api/app/core/workflow/nodes/base_node.py b/api/app/core/workflow/nodes/base_node.py index 85f4bc5f..a0f1f01e 100644 --- a/api/app/core/workflow/nodes/base_node.py +++ b/api/app/core/workflow/nodes/base_node.py @@ -228,21 +228,19 @@ class BaseNode(ABC): logger.error( f"Node {self.node_id} execution timed out ({timeout} seconds)." ) - self._wrap_error( + return self._wrap_error( f"Node execution timed out ({timeout} seconds).", elapsed_time, state, variable_pool, ) - raise except Exception as e: elapsed_time = (time.time() - start_time) * 1000 logger.error( f"Node {self.node_id} execution failed: {e}", exc_info=True, ) - self._wrap_error(str(e), elapsed_time, state, variable_pool) - raise + return self._wrap_error(str(e), elapsed_time, state, variable_pool) async def run_stream( self, state: WorkflowState, @@ -353,13 +351,11 @@ class BaseNode(ABC): variable_pool ) yield error_output - raise except Exception as e: elapsed_time = (time.time() - start_time) * 1000 logger.error(f"Node {self.node_id} execution failed: {e}", exc_info=True) error_output = self._wrap_error(str(e), elapsed_time, state, variable_pool) yield error_output - raise def _wrap_output( self, @@ -457,13 +453,7 @@ class BaseNode(ABC): **node_output }) logger.error(f"Node {self.node_id} execution failed, stopping workflow: {error_message}") - return { - "node_outputs": { - self.node_id: node_output - }, - "error": error_message, - "error_node": self.node_id - } + raise Exception(f"Node {self.node_id} execution failed: {error_message}") def _extract_input(self, state: WorkflowState, variable_pool: VariablePool) -> dict[str, Any]: """Extracts the input data for this node (used for logging or audit). From 279353e1ce51da15aa23cc9bcc33b2a7b8ea8e94 Mon Sep 17 00:00:00 2001 From: zhaoying Date: Fri, 24 Apr 2026 11:52:11 +0800 Subject: [PATCH 12/26] feat(web): file upload add document_image_recognition config --- web/src/i18n/en.ts | 3 +++ web/src/i18n/zh.ts | 3 +++ .../FeaturesConfig/FileUploadSettingModal.tsx | 22 ++++++++++++++----- .../OpenStatementSettingModal.tsx | 1 + .../components/ModelConfigModal.tsx | 3 ++- web/src/views/ApplicationConfig/types.ts | 2 ++ 6 files changed, 28 insertions(+), 6 deletions(-) diff --git a/web/src/i18n/en.ts b/web/src/i18n/en.ts index 31354817..e5a80431 100644 --- a/web/src/i18n/en.ts +++ b/web/src/i18n/en.ts @@ -1460,6 +1460,7 @@ export const en = { maxCount: 'Max Files', singleMaxSize: 'Max Size', unix: 'items', + document_image_recognition: 'Enable image recognition in documents', text_to_speech: 'Text to Speech', text_to_speech_desc: 'Text can be converted to speech', opening_statement: 'Conversation Opening', @@ -1536,6 +1537,7 @@ export const en = { json_output: 'Support JSON formatted output', thinking_budget_tokens: 'thinking budget tokens', thinking_budget_tokens_max_error: "Cannot exceed the max tokens limit ({{max}})", + logSearchPlaceholder: 'Search log content', }, userMemory: { userMemory: 'User Memory', @@ -2529,6 +2531,7 @@ Memory Bear: After the rebellion, regional warlordism intensified for several re input_result: 'Input', output_result: 'Output', + process_result: 'Data Processing', error: 'Error Message', loopNum: ' loops', iterationNum: ' iterations', diff --git a/web/src/i18n/zh.ts b/web/src/i18n/zh.ts index d802b731..8d71876e 100644 --- a/web/src/i18n/zh.ts +++ b/web/src/i18n/zh.ts @@ -790,6 +790,7 @@ export const zh = { maxCount: '最大文件数', singleMaxSize: '单文件最大大小', unix: '个', + document_image_recognition: '是否识别文档中的图片', text_to_speech: '文字转语音', text_to_speech_desc: '文本可以转换成语音', opening_statement: '对话开场白', @@ -866,6 +867,7 @@ export const zh = { json_output: '支持JSON格式化输出', thinking_budget_tokens: '深度思考预算Token数', thinking_budget_tokens_max_error: "不能超过 最大令牌数 ({{max}})", + logSearchPlaceholder: '搜索日志内容', }, table: { totalRecords: '共 {{total}} 条记录' @@ -2493,6 +2495,7 @@ export const zh = { input_result: '输入', output_result: '输出', + process_result: '数据处理', error: '错误信息', loopNum: '个循环', iterationNum: '个迭代', diff --git a/web/src/views/ApplicationConfig/components/FeaturesConfig/FileUploadSettingModal.tsx b/web/src/views/ApplicationConfig/components/FeaturesConfig/FileUploadSettingModal.tsx index 5c17aa53..2ae09a5e 100644 --- a/web/src/views/ApplicationConfig/components/FeaturesConfig/FileUploadSettingModal.tsx +++ b/web/src/views/ApplicationConfig/components/FeaturesConfig/FileUploadSettingModal.tsx @@ -97,6 +97,7 @@ export const defaultValues: FileUpload = { "json", "md", ], + document_image_recognition: false, video_enabled: false, video_max_size_mb: 100, video_allowed_extensions: [ @@ -219,11 +220,22 @@ const FileUploadSettingModal = forwardRef {isEnabled && ( - -
{t('application.singleMaxSize')}:
- - - + +
+
{t('application.singleMaxSize')}
+ + + +
+ {option.type === 'document' && +
+
{t('application.document_image_recognition')}
+ + + +
+ } +
)} diff --git a/web/src/views/ApplicationConfig/components/FeaturesConfig/OpenStatementSettingModal.tsx b/web/src/views/ApplicationConfig/components/FeaturesConfig/OpenStatementSettingModal.tsx index a46d973a..ed9204da 100644 --- a/web/src/views/ApplicationConfig/components/FeaturesConfig/OpenStatementSettingModal.tsx +++ b/web/src/views/ApplicationConfig/components/FeaturesConfig/OpenStatementSettingModal.tsx @@ -104,6 +104,7 @@ const OpenStatementSettingModal = forwardRef {source === 'workflow' ? diff --git a/web/src/views/ApplicationConfig/components/ModelConfigModal.tsx b/web/src/views/ApplicationConfig/components/ModelConfigModal.tsx index a7f9eb57..bda18571 100644 --- a/web/src/views/ApplicationConfig/components/ModelConfigModal.tsx +++ b/web/src/views/ApplicationConfig/components/ModelConfigModal.tsx @@ -184,7 +184,8 @@ const ModelConfigModal = forwardRef( { validator: (_, value) => { const maxTokens = values?.max_tokens - if (value !== undefined && maxTokens !== undefined && value > maxTokens) { + const deep_thinking = values?.deep_thinking; + if (deep_thinking && value !== undefined && maxTokens !== undefined && value > maxTokens) { return Promise.reject(t('application.thinking_budget_tokens_max_error', { max: maxTokens })) } return Promise.resolve() diff --git a/web/src/views/ApplicationConfig/types.ts b/web/src/views/ApplicationConfig/types.ts index 1b54d3aa..c34f882f 100644 --- a/web/src/views/ApplicationConfig/types.ts +++ b/web/src/views/ApplicationConfig/types.ts @@ -438,6 +438,7 @@ interface FileSetttings { document_enabled: boolean; document_max_size_mb: number; document_allowed_extensions: string[]; + document_image_recognition: boolean; video_enabled: boolean; video_max_size_mb: number; video_allowed_extensions: string[]; @@ -499,6 +500,7 @@ export interface LogItem { is_draft: boolean; created_at: number; updated_at: number; + node_executions_map?: Record } export interface LogDetailModalRef { handleOpen: (vo: LogItem) => void; From b33f5951d854d60ee635cc47b562a720a72e3e62 Mon Sep 17 00:00:00 2001 From: wwq Date: Fri, 24 Apr 2026 11:52:15 +0800 Subject: [PATCH 13/26] fix(workflow): rectify error handling and bolster execution logging - Rectify exception propagation during node execution failures to ensure errors are correctly raised. - Bolster workflow logging to support failed status records and persist node execution data, including loop nodes. --- api/app/core/workflow/nodes/base_node.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/api/app/core/workflow/nodes/base_node.py b/api/app/core/workflow/nodes/base_node.py index a0f1f01e..5458a80c 100644 --- a/api/app/core/workflow/nodes/base_node.py +++ b/api/app/core/workflow/nodes/base_node.py @@ -447,6 +447,19 @@ class BaseNode(ABC): "error": error_message } + # if error_edge: + # # If an error edge exists, log a warning and continue to error node + # logger.warning( + # f"Node {self.node_id} execution failed, redirecting to error node: {error_edge['target']}" + # ) + # return { + # "node_outputs": { + # self.node_id: node_output + # }, + # "error": error_message, + # "error_node": self.node_id + # } + # else: writer = get_stream_writer() writer({ "type": "node_error", From be10bab763e55b492a77afa9a6124bf8df1878fb Mon Sep 17 00:00:00 2001 From: Eternity <1533512157@qq.com> Date: Fri, 24 Apr 2026 13:56:32 +0800 Subject: [PATCH 14/26] refactor(core): migrate task scheduler to per-user queue with dynamic sharding --- api/app/celery_task_scheduler.py | 431 +++++++++++++----- .../generate_engine/retrieval_summary.py | 2 +- api/app/services/memory_api_service.py | 2 +- 3 files changed, 326 insertions(+), 109 deletions(-) diff --git a/api/app/celery_task_scheduler.py b/api/app/celery_task_scheduler.py index 8cdce658..e7f946b6 100644 --- a/api/app/celery_task_scheduler.py +++ b/api/app/celery_task_scheduler.py @@ -1,21 +1,70 @@ +import hashlib import json +import os +import socket import threading import time +import uuid import redis from app.core.config import settings -from celery_app import celery_app from app.core.logging_config import get_named_logger +from app.celery_app import celery_app logger = get_named_logger("task_scheduler") -STREAM_KEY = "celery_task_stream" +# per-user queue scheduler:uq:{user_id} +USER_QUEUE_PREFIX = "scheduler:uq:" +# User Collection of Pending Messages +ACTIVE_USERS = "scheduler:active_users" +# Set of users that can dispatch (ready signal) +READY_SET = "scheduler:ready_users" +# Metadata of tasks that have been dispatched and are pending completion PENDING_HASH = "scheduler:pending_tasks" -TASK_TIMEOUT = 7800 +# Dynamic Sharding: Instance Registry +REGISTRY_KEY = "scheduler:instances" + +TASK_TIMEOUT = 7800 # Task timeout (seconds), considered lost if exceeded +HEARTBEAT_INTERVAL = 10 # Heartbeat interval (seconds) +INSTANCE_TTL = 30 # Instance timeout (seconds) + +LUA_ATOMIC_LOCK = """ +local dispatch_lock = KEYS[1] +local lock_key = KEYS[2] +local instance_id = ARGV[1] +local dispatch_ttl = tonumber(ARGV[2]) +local lock_ttl = tonumber(ARGV[3]) + +if redis.call('SET', dispatch_lock, instance_id, 'NX', 'EX', dispatch_ttl) == false then + return 0 +end + +if redis.call('EXISTS', lock_key) == 1 then + redis.call('DEL', dispatch_lock) + return -1 +end + +redis.call('SET', lock_key, 'dispatching', 'EX', lock_ttl) +return 1 +""" + +LUA_SAFE_DELETE = """ +if redis.call('GET', KEYS[1]) == ARGV[1] then + return redis.call('DEL', KEYS[1]) +end +return 0 +""" -def health_check_server(): +def stable_hash(value: str) -> int: + return int.from_bytes( + hashlib.md5(value.encode("utf-8")).digest(), + "big" + ) + + +def health_check_server(scheduler_ref): import uvicorn from fastapi import FastAPI @@ -23,19 +72,20 @@ def health_check_server(): @health_app.get("/") def health(): - return scheduler.health() + return scheduler_ref.health() + port = int(os.environ.get("SCHEDULER_HEALTH_PORT", "8001")) threading.Thread( target=uvicorn.run, kwargs={ "app": health_app, "host": "0.0.0.0", - "port": 8001, - "log_config": None + "port": port, + "log_config": None, }, - daemon=True + daemon=True, ).start() - logger.info(f"[Health] Server started at http://0.0.0.0:8001") + logger.info("[Health] Server started at http://0.0.0.0:%s", port) class RedisTaskScheduler: @@ -50,27 +100,43 @@ class RedisTaskScheduler: self.running = False self.dispatched = 0 self.errors = 0 - self._leader = False + + self.instance_id = f"{socket.gethostname()}-{os.getpid()}" + self._shard_index = 0 + self._shard_count = 1 + self._last_heartbeat = 0.0 def push_task(self, task_name, user_id, params): try: - msg_id = self.redis.xadd( - STREAM_KEY, - fields={ - "task_name": task_name, - "user_id": user_id, - "params": json.dumps(params), - } - ) - self.redis.set( + msg_id = str(uuid.uuid4()) + msg = json.dumps({ + "msg_id": msg_id, + "task_name": task_name, + "user_id": user_id, + "params": json.dumps(params), + }) + + lock_key = f"{task_name}:{user_id}" + queue_key = f"{USER_QUEUE_PREFIX}{user_id}" + + pipe = self.redis.pipeline() + pipe.rpush(queue_key, msg) + pipe.sadd(ACTIVE_USERS, user_id) + pipe.set( f"task_tracker:{msg_id}", json.dumps({"status": "QUEUED", "task_id": None}), - ex=86400 + ex=86400, ) + pipe.execute() + + if not self.redis.exists(lock_key): + self.redis.sadd(READY_SET, user_id) + + logger.info("Task pushed: msg_id=%s task=%s user=%s", msg_id, task_name, user_id) return msg_id except Exception as e: logger.error("Push task exception %s", e, exc_info=True) - raise e + raise def get_task_status(self, msg_id: str) -> dict: raw = self.redis.get(f"task_tracker:{msg_id}") @@ -81,6 +147,7 @@ class RedisTaskScheduler: status = tracker["status"] task_id = tracker.get("task_id") result_content = tracker.get("result") or {} + if status == "DISPATCHED" and task_id: result_raw = self.redis.get(f"celery-task-meta-{task_id}") if result_raw: @@ -105,6 +172,7 @@ class RedisTaskScheduler: cleanup_pipe = self.redis.pipeline() has_cleanup = False + ready_user_ids = set() for task_id, raw_result in zip(task_ids, results): try: @@ -114,12 +182,16 @@ class RedisTaskScheduler: age = now - dispatched_at should_cleanup = False - result_data = None + result_data = {} + if raw_result is not None: result_data = json.loads(raw_result) if result_data.get("status") in ("SUCCESS", "FAILURE", "REVOKED"): should_cleanup = True - logger.info("Task finished: %s state=%s", task_id, result_data.get("status")) + logger.info( + "Task finished: %s state=%s", task_id, + result_data.get("status"), + ) elif age > TASK_TIMEOUT: should_cleanup = True logger.warning( @@ -128,9 +200,14 @@ class RedisTaskScheduler: ) if should_cleanup: - final_status = result_data.get("status", "UNKNOWN") if result_data else "EXPIRED" - cleanup_pipe.delete(lock_key) + final_status = ( + result_data.get("status", "UNKNOWN") if result_data else "EXPIRED" + ) + + self.redis.eval(LUA_SAFE_DELETE, 1, lock_key, task_id) + cleanup_pipe.hdel(PENDING_HASH, task_id) + tracker_msg_id = meta.get("msg_id") if tracker_msg_id: cleanup_pipe.set( @@ -138,146 +215,286 @@ class RedisTaskScheduler: json.dumps({ "status": final_status, "task_id": task_id, - "result": result_data.get("result") or {} + "result": result_data.get("result") or {}, }), ex=86400, ) has_cleanup = True + + parts = lock_key.split(":", 1) + if len(parts) == 2: + ready_user_ids.add(parts[1]) + except Exception as e: logger.error("Cleanup error for %s: %s", task_id, e, exc_info=True) self.errors += 1 + if has_cleanup: cleanup_pipe.execute() + if ready_user_ids: + self.redis.sadd(READY_SET, *ready_user_ids) + + def _heartbeat(self): + now = time.time() + if now - self._last_heartbeat < HEARTBEAT_INTERVAL: + return + self._last_heartbeat = now + + self.redis.hset(REGISTRY_KEY, self.instance_id, str(now)) + + all_instances = self.redis.hgetall(REGISTRY_KEY) + + alive = [] + dead = [] + for iid, ts in all_instances.items(): + if now - float(ts) < INSTANCE_TTL: + alive.append(iid) + else: + dead.append(iid) + + if dead: + pipe = self.redis.pipeline() + for iid in dead: + pipe.hdel(REGISTRY_KEY, iid) + pipe.execute() + logger.info("Cleaned dead instances: %s", dead) + + alive.sort() + self._shard_count = max(len(alive), 1) + self._shard_index = ( + alive.index(self.instance_id) if self.instance_id in alive else 0 + ) + logger.debug( + "Shard: %s/%s (instance=%s, alive=%d)", + self._shard_index, self._shard_count, + self.instance_id, len(alive), + ) + + def _is_mine(self, user_id: str) -> bool: + if self._shard_count <= 1: + return True + return stable_hash(user_id) % self._shard_count == self._shard_index + def _dispatch(self, msg_id, msg_data) -> bool: - user_id = msg_data['user_id'] - task_name = msg_data['task_name'] - params = json.loads(msg_data.get('params', "{}")) + user_id = msg_data["user_id"] + task_name = msg_data["task_name"] + params = json.loads(msg_data.get("params", "{}")) lock_key = f"{task_name}:{user_id}" + dispatch_lock = f"dispatch:{msg_id}" + + result = self.redis.eval( + LUA_ATOMIC_LOCK, 2, + dispatch_lock, lock_key, + self.instance_id, str(300), str(3600), + ) + + if result == 0: + return False + if result == -1: + return False + try: task = celery_app.send_task(task_name, kwargs=params) + except Exception as e: + pipe = self.redis.pipeline() + pipe.delete(dispatch_lock) + pipe.delete(lock_key) + pipe.execute() + self.errors += 1 + logger.error( + "send_task failed for %s:%s msg=%s: %s", + task_name, user_id, msg_id, e, exc_info=True, + ) + return False + + try: pipe = self.redis.pipeline() pipe.set(lock_key, task.id, ex=3600) pipe.hset(PENDING_HASH, task.id, json.dumps({ "lock_key": lock_key, "dispatched_at": time.time(), - "msg_id": msg_id + "msg_id": msg_id, })) - pipe.xdel(STREAM_KEY, msg_id) + pipe.delete(dispatch_lock) pipe.set( f"task_tracker:{msg_id}", json.dumps({"status": "DISPATCHED", "task_id": task.id}), ex=86400, ) pipe.execute() - self.dispatched += 1 - logger.info("Task dispatched: %s", task.id) - return True except Exception as e: + logger.error( + "Post-dispatch state update failed for %s: %s", + task.id, e, exc_info=True, + ) self.errors += 1 - logger.error("Task dispatch error for %s: %s", task_name, e, exc_info=True) - return False - def _leader_lock_extend(self, lock, interval=20): - while self._leader: - try: - lock.extend(60) - except redis.exceptions.LockNotOwnedError: - logger.warning("Lost leader lock during extend") - self._leader = False - except Exception as e: - logger.error("Lock extend error: %s", e) - for _ in range(interval): - if not self._leader: - break - time.sleep(1) + self.dispatched += 1 + logger.info("Task dispatched: %s (msg=%s)", task.id, msg_id) + return True - def schedule_loop(self): - self.running = True - self._cleanup_finished() - resp = self.redis.xread( - streams={STREAM_KEY: '0-0'}, - count=500, - block=5000, - ) - if not resp: + def _process_batch(self, user_ids): + if not user_ids: return - messages = [] - for stream_key, msgs in resp: - messages.extend(msgs) + pipe = self.redis.pipeline() + for uid in user_ids: + pipe.lindex(f"{USER_QUEUE_PREFIX}{uid}", 0) + heads = pipe.execute() - lock_keys = [] - for msg_id, msg_data in messages: - lock_keys.append(f"{msg_data['task_name']}:{msg_data['user_id']}") + candidates = [] # (user_id, msg_dict) + empty_users = [] + + for uid, head in zip(user_ids, heads): + if head is None: + empty_users.append(uid) + else: + try: + candidates.append((uid, json.loads(head))) + except (json.JSONDecodeError, TypeError) as e: + logger.error("Bad message in queue for user %s: %s", uid, e) + self.redis.lpop(f"{USER_QUEUE_PREFIX}{uid}") + + if empty_users: + pipe = self.redis.pipeline() + for uid in empty_users: + pipe.srem(ACTIVE_USERS, uid) + pipe.execute() + + if not candidates: + return + + for uid, msg in candidates: + if self._dispatch(msg["msg_id"], msg): + self.redis.lpop(f"{USER_QUEUE_PREFIX}{uid}") + + def schedule_loop(self): + self._heartbeat() + self._cleanup_finished() pipe = self.redis.pipeline() - for key in lock_keys: - pipe.exists(key) - lock_exists = pipe.execute() + pipe.smembers(READY_SET) + pipe.delete(READY_SET) + results = pipe.execute() + ready_users = results[0] or set() - deliver_keys = set() - for (msg_id, msg_data), locked in zip(messages, lock_exists): - user_id = msg_data['user_id'] - lock_key = f"{msg_data['task_name']}:{user_id}" + my_users = [uid for uid in ready_users if self._is_mine(uid)] - if locked or lock_key in deliver_keys: - continue + if not my_users: + time.sleep(0.5) + return - dispatched_successfully = self._dispatch(msg_id, msg_data) - if dispatched_successfully: - deliver_keys.add(lock_key) + self._process_batch(my_users) time.sleep(0.1) - def run_server(self): - health_check_server() + def _full_scan(self): + cursor = 0 + ready_batch = [] + while True: + cursor, user_ids = self.redis.sscan( + ACTIVE_USERS, cursor=cursor, count=1000, + ) + if user_ids: + my_users = [uid for uid in user_ids if self._is_mine(uid)] + if my_users: + pipe = self.redis.pipeline() + for uid in my_users: + pipe.lindex(f"{USER_QUEUE_PREFIX}{uid}", 0) + heads = pipe.execute() - lock = self.redis.lock( - "scheduler:leader", - timeout=60, - blocking_timeout=10, - thread_local=False + for uid, head in zip(my_users, heads): + if head is None: + continue + try: + msg = json.loads(head) + lock_key = f"{msg['task_name']}:{uid}" + ready_batch.append((uid, lock_key)) + except (json.JSONDecodeError, TypeError): + continue + + if cursor == 0: + break + + if not ready_batch: + return + + pipe = self.redis.pipeline() + for _, lock_key in ready_batch: + pipe.exists(lock_key) + lock_exists = pipe.execute() + + ready_uids = [ + uid for (uid, _), locked in zip(ready_batch, lock_exists) + if not locked + ] + + if ready_uids: + self.redis.sadd(READY_SET, *ready_uids) + logger.info("Full scan found %d ready users", len(ready_uids)) + + def run_server(self): + health_check_server(self) + self.running = True + + last_full_scan = 0.0 + full_scan_interval = 30.0 + + logger.info( + "Scheduler started: instance=%s", self.instance_id, ) + while True: try: - if lock.acquire(blocking=True): - self._leader = True - t = threading.Thread( - target=self._leader_lock_extend, - args=(lock, 20), - daemon=True - ) - t.start() - try: - while self._leader: - self.schedule_loop() - finally: - self._leader = False - t.join(timeout=30) - try: - lock.release() - except redis.exceptions.LockNotOwnedError: - pass - self.running = False - else: - time.sleep(5) + self.schedule_loop() + + now = time.time() + if now - last_full_scan > full_scan_interval: + self._full_scan() + last_full_scan = now + except Exception as e: logger.error("Scheduler exception %s", e, exc_info=True) + self.errors += 1 time.sleep(5) def health(self) -> dict: return { "running": self.running, - "pending": self.redis.xlen(STREAM_KEY), + "active_users": self.redis.scard(ACTIVE_USERS), + "ready_users": self.redis.scard(READY_SET), + "pending_tasks": self.redis.hlen(PENDING_HASH), "dispatched": self.dispatched, - "errors": self.errors + "errors": self.errors, + "shard": f"{self._shard_index}/{self._shard_count}", + "instance": self.instance_id, } + def shutdown(self): + logger.info("Scheduler shutting down: instance=%s", self.instance_id) + self.running = False + try: + self.redis.hdel(REGISTRY_KEY, self.instance_id) + except Exception as e: + logger.error("Shutdown cleanup error: %s", e) + scheduler: RedisTaskScheduler | None = None if scheduler is None: scheduler = RedisTaskScheduler() -if __name__ == '__main__': +if __name__ == "__main__": + import signal + import sys + + + def _signal_handler(signum, frame): + scheduler.shutdown() + sys.exit(0) + + + signal.signal(signal.SIGTERM, _signal_handler) + signal.signal(signal.SIGINT, _signal_handler) + scheduler.run_server() diff --git a/api/app/core/memory/read_services/generate_engine/retrieval_summary.py b/api/app/core/memory/read_services/generate_engine/retrieval_summary.py index 6b166cf2..c46e93f0 100644 --- a/api/app/core/memory/read_services/generate_engine/retrieval_summary.py +++ b/api/app/core/memory/read_services/generate_engine/retrieval_summary.py @@ -8,4 +8,4 @@ class RetrievalSummaryProcessor: @staticmethod def verify(content: str, llm_client: RedBearLLM): - return \ No newline at end of file + return diff --git a/api/app/services/memory_api_service.py b/api/app/services/memory_api_service.py index a1ceef86..82d1c463 100644 --- a/api/app/services/memory_api_service.py +++ b/api/app/services/memory_api_service.py @@ -187,7 +187,7 @@ class MemoryAPIService: } ) - logger.info(f"Memory write task submitted, end_user_id={end_user_id}") + logger.info(f"Memory write task submitted, task_id={task_id} end_user_id={end_user_id}") return { "task_id": task_id, From c556995f3a65e85445273a105d1ce4cdbdec131a Mon Sep 17 00:00:00 2001 From: zhaoying Date: Fri, 24 Apr 2026 15:10:32 +0800 Subject: [PATCH 15/26] feat(web): app citation features add allow_download --- web/src/assets/images/application/export.svg | 8 ++--- web/src/assets/images/application/import.svg | 8 ++--- web/src/components/Chat/ChatContent.tsx | 24 +++++++++----- web/src/components/Chat/types.ts | 3 +- web/src/i18n/en.ts | 1 + web/src/i18n/zh.ts | 1 + web/src/views/ApplicationConfig/Agent.tsx | 32 ++++++++++--------- .../FeaturesConfig/FeaturesConfigModal.tsx | 6 ++++ 8 files changed, 51 insertions(+), 32 deletions(-) diff --git a/web/src/assets/images/application/export.svg b/web/src/assets/images/application/export.svg index c07a346d..6dde8f3c 100644 --- a/web/src/assets/images/application/export.svg +++ b/web/src/assets/images/application/export.svg @@ -1,12 +1,12 @@ - 导出 + 导入 - - + + - + diff --git a/web/src/assets/images/application/import.svg b/web/src/assets/images/application/import.svg index 6dde8f3c..c07a346d 100644 --- a/web/src/assets/images/application/import.svg +++ b/web/src/assets/images/application/import.svg @@ -1,12 +1,12 @@ - 导入 + 导出 - - + + - + diff --git a/web/src/components/Chat/ChatContent.tsx b/web/src/components/Chat/ChatContent.tsx index f28b5dce..f4c89578 100644 --- a/web/src/components/Chat/ChatContent.tsx +++ b/web/src/components/Chat/ChatContent.tsx @@ -272,14 +272,22 @@ const ChatContent: FC = ({
{t('memoryConversation.citations')}
{item.meta_data?.citations?.map((citation, idx) => ( -
{ - const params = new URLSearchParams({ documentId: citation.document_id, parentId: citation.knowledge_id }); - window.open(`/#/knowledge-base/${citation.knowledge_id}/DocumentDetails?${params}`, '_blank'); - }} - >{citation.file_name}
+ +
{ + const params = new URLSearchParams({ documentId: citation.document_id, parentId: citation.knowledge_id }); + window.open(`/#/knowledge-base/${citation.knowledge_id}/DocumentDetails?${params}`, '_blank'); + }} + >{citation.file_name}
+ + {citation.download_url && +
handleDownload({ url: citation.download_url })} + >
+ } +
))}
} diff --git a/web/src/components/Chat/types.ts b/web/src/components/Chat/types.ts index e7967bad..f251db3a 100644 --- a/web/src/components/Chat/types.ts +++ b/web/src/components/Chat/types.ts @@ -24,7 +24,7 @@ export interface ChatItem { subContent?: Record[]; error?: string; meta_data?: { - audio_url?: string; + audio_url?: string | null; audio_status?: string; files?: any[]; suggested_questions?: string[]; @@ -33,6 +33,7 @@ export interface ChatItem { file_name: string; knowledge_id: string; score: string; + download_url?: string; }[]; reasoning_content?: string; }, diff --git a/web/src/i18n/en.ts b/web/src/i18n/en.ts index e5a80431..2a7534c4 100644 --- a/web/src/i18n/en.ts +++ b/web/src/i18n/en.ts @@ -1470,6 +1470,7 @@ export const en = { add_questions: 'Add Option', citation: 'Citation and Attribution', citation_desc: 'Display the attribution of source documents and generated content', + allow_download: 'Allow downloading cited source text', invalidVariablesTitle: "The following undefined variables are referenced in the conversation opening. Do you want to save the opening configuration?", deep_thinking: 'Enable Deep Thinking', diff --git a/web/src/i18n/zh.ts b/web/src/i18n/zh.ts index 8d71876e..6989cf3f 100644 --- a/web/src/i18n/zh.ts +++ b/web/src/i18n/zh.ts @@ -800,6 +800,7 @@ export const zh = { add_questions: '添加选项', citation: '引用和归属', citation_desc: '显示源文档和生成内容的归属部分', + allow_download: '允许下载引用原文', invalidVariablesTitle: "对话开场白中引用了以下未定义的变量,是否保存开场白配置?", deep_thinking: '开启深度思考', diff --git a/web/src/views/ApplicationConfig/Agent.tsx b/web/src/views/ApplicationConfig/Agent.tsx index 4d86fba7..338f50f9 100644 --- a/web/src/views/ApplicationConfig/Agent.tsx +++ b/web/src/views/ApplicationConfig/Agent.tsx @@ -7,7 +7,7 @@ import { useEffect, useRef, useState, forwardRef, useImperativeHandle, useMemo } from 'react'; import { useTranslation } from 'react-i18next' import { useParams } from 'react-router-dom'; -import { Row, Col, Space, Form, Input, Button, App, Spin, Flex } from 'antd' +import { Row, Col, Space, Form, Input, Button, App, Flex } from 'antd' import Chat from './components/Chat' import RbCard from '@/components/RbCard/Card' @@ -357,21 +357,23 @@ const Agent = forwardRef m[1]))] - const variables = values?.variables - const validNames = new Set(variables.map(v => v.name)) - const invalid = usedVars.filter(v => !validNames.has(v)) - if (invalid.length > 0) { - const newVars = invalid.map((name, i) => ({ - index: variables.length + i, - name, - display_name: name, - type: 'text', - required: true, - max_length: 48, - })) + if (value.opening_statement.enabled) { + const usedVars = [...new Set([...(statement?.matchAll(/\{\{(\w+)\}\}/g) ?? [])].map(m => m[1]))] + const variables = values?.variables + const validNames = new Set(variables.map(v => v.name)) + const invalid = usedVars.filter(v => !validNames.has(v)) + if (invalid.length > 0) { + const newVars = invalid.map((name, i) => ({ + index: variables.length + i, + name, + display_name: name, + type: 'text', + required: true, + max_length: 48, + })) - form.setFieldValue('variables', [...variables, ...newVars]) + form.setFieldValue('variables', [...variables, ...newVars]) + } } } const modelLogo = useMemo(() => { diff --git a/web/src/views/ApplicationConfig/components/FeaturesConfig/FeaturesConfigModal.tsx b/web/src/views/ApplicationConfig/components/FeaturesConfig/FeaturesConfigModal.tsx index 57d11295..e3664a03 100644 --- a/web/src/views/ApplicationConfig/components/FeaturesConfig/FeaturesConfigModal.tsx +++ b/web/src/views/ApplicationConfig/components/FeaturesConfig/FeaturesConfigModal.tsx @@ -155,6 +155,12 @@ const FeaturesConfigModal = forwardRef +
From c7c1570d406472d82dcb2c17157ee086664df01d Mon Sep 17 00:00:00 2001 From: zhaoying Date: Fri, 24 Apr 2026 15:18:14 +0800 Subject: [PATCH 16/26] feat(web): app citations --- web/src/components/Chat/ChatContent.tsx | 3 +-- web/src/views/ApplicationConfig/Agent.tsx | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/web/src/components/Chat/ChatContent.tsx b/web/src/components/Chat/ChatContent.tsx index f4c89578..a785ea49 100644 --- a/web/src/components/Chat/ChatContent.tsx +++ b/web/src/components/Chat/ChatContent.tsx @@ -272,9 +272,8 @@ const ChatContent: FC = ({
{t('memoryConversation.citations')}
{item.meta_data?.citations?.map((citation, idx) => ( - +
{ const params = new URLSearchParams({ documentId: citation.document_id, parentId: citation.knowledge_id }); diff --git a/web/src/views/ApplicationConfig/Agent.tsx b/web/src/views/ApplicationConfig/Agent.tsx index 338f50f9..d7455793 100644 --- a/web/src/views/ApplicationConfig/Agent.tsx +++ b/web/src/views/ApplicationConfig/Agent.tsx @@ -357,7 +357,7 @@ const Agent = forwardRef m[1]))] const variables = values?.variables const validNames = new Set(variables.map(v => v.name)) From cedf47b3bca5bf999b35f7b1bd74e6028e63d1f4 Mon Sep 17 00:00:00 2001 From: wwq Date: Fri, 24 Apr 2026 15:29:33 +0800 Subject: [PATCH 17/26] fix(workflow): rectify error handling and bolster execution logging --- api/app/services/workflow_service.py | 39 +--------------------------- 1 file changed, 1 insertion(+), 38 deletions(-) diff --git a/api/app/services/workflow_service.py b/api/app/services/workflow_service.py index 6c93893a..ea302a9c 100644 --- a/api/app/services/workflow_service.py +++ b/api/app/services/workflow_service.py @@ -945,44 +945,7 @@ class WorkflowService: event_type = event.get("event") event_data = event.get("data", {}) - # 持久化节点执行记录 - if event_type == "node_end": - node_id = event_data.get("node_id") - node_cfg = next((n for n in config.nodes if n.get("id") == node_id), {}) - self.db.add(WorkflowNodeExecution( - execution_id=execution.id, - node_id=node_id, - node_type=node_cfg.get("type", "unknown"), - node_name=node_cfg.get("data", {}).get("label") or node_id, - execution_order=_node_order_counter, - status="completed", - input_data=event_data.get("input"), - output_data=event_data.get("output"), - elapsed_time=event_data.get("elapsed_time"), - token_usage=event_data.get("token_usage"), - )) - self.db.commit() - _node_order_counter += 1 - - elif event_type == "node_error": - node_id = event_data.get("node_id") - node_cfg = next((n for n in config.nodes if n.get("id") == node_id), {}) - self.db.add(WorkflowNodeExecution( - execution_id=execution.id, - node_id=node_id, - node_type=node_cfg.get("type", "unknown"), - node_name=node_cfg.get("data", {}).get("label") or node_id, - execution_order=_node_order_counter, - status="failed", - input_data=event_data.get("input"), - output_data=None, - error_message=event_data.get("error"), - elapsed_time=event_data.get("elapsed_time"), - )) - self.db.commit() - _node_order_counter += 1 - - elif event_type == "cycle_item": + if event_type == "cycle_item": cycle_id = event_data.get("cycle_id") if cycle_id not in _cycle_items: _cycle_items[cycle_id] = [] From cf8db47389f3d0db856fc2a9be6ab26f27b1e78c Mon Sep 17 00:00:00 2001 From: wwq Date: Fri, 24 Apr 2026 17:02:03 +0800 Subject: [PATCH 18/26] feat(workflow): augment logging capabilities with execution status and loop support - Augment workflow logs with execution status fields and loop node information. - Refactor log service to handle distinct processing logic for workflows and agents. - Construct message and node logs derived from workflow_executions data. --- api/app/controllers/app_log_controller.py | 27 ++- .../workflow/nodes/cycle_graph/iteration.py | 2 + .../core/workflow/nodes/cycle_graph/loop.py | 2 + api/app/schemas/app_log_schema.py | 2 + api/app/services/app_log_service.py | 165 ++++++++++++++---- 5 files changed, 158 insertions(+), 40 deletions(-) diff --git a/api/app/controllers/app_log_controller.py b/api/app/controllers/app_log_controller.py index dea555b9..ea7962c1 100644 --- a/api/app/controllers/app_log_controller.py +++ b/api/app/controllers/app_log_controller.py @@ -9,7 +9,7 @@ from app.core.logging_config import get_business_logger from app.core.response_utils import success from app.db import get_db from app.dependencies import get_current_user, cur_workspace_access_guard -from app.schemas.app_log_schema import AppLogConversation, AppLogConversationDetail +from app.schemas.app_log_schema import AppLogConversation, AppLogConversationDetail, AppLogMessage from app.schemas.response_schema import PageData, PageMeta from app.services.app_service import AppService from app.services.app_log_service import AppLogService @@ -78,17 +78,32 @@ def get_app_log_detail( # 验证应用访问权限 app_service = AppService(db) - app_service.get_app(app_id, workspace_id) + app = app_service.get_app(app_id, workspace_id) # 使用 Service 层查询 log_service = AppLogService(db) - conversation, node_executions_map = log_service.get_conversation_detail( + conversation, messages, node_executions_map = log_service.get_conversation_detail( app_id=app_id, conversation_id=conversation_id, - workspace_id=workspace_id + workspace_id=workspace_id, + app_type=app.type ) - detail = AppLogConversationDetail.model_validate(conversation) - detail.node_executions_map = node_executions_map + # 构建基础会话信息(不经过 ORM relationship) + base = AppLogConversation.model_validate(conversation) + + # 单独处理 messages,避免触发 SQLAlchemy relationship 校验 + if messages and isinstance(messages[0], AppLogMessage): + # 工作流:已经是 AppLogMessage 实例 + msg_list = messages + else: + # Agent:ORM Message 对象逐个转换 + msg_list = [AppLogMessage.model_validate(m) for m in messages] + + detail = AppLogConversationDetail( + **base.model_dump(), + messages=msg_list, + node_executions_map=node_executions_map, + ) return success(data=detail) diff --git a/api/app/core/workflow/nodes/cycle_graph/iteration.py b/api/app/core/workflow/nodes/cycle_graph/iteration.py index 1633b9c7..1ac46180 100644 --- a/api/app/core/workflow/nodes/cycle_graph/iteration.py +++ b/api/app/core/workflow/nodes/cycle_graph/iteration.py @@ -180,6 +180,8 @@ class IterationRuntime: "cycle_id": self.node_id, "cycle_idx": idx, "node_id": node_name, + "node_type": node_type, + "node_name": node_cfg.get("data", {}).get("label") if node_cfg else node_name, "input": result.get("node_outputs", {}).get(node_name, {}).get("input") if not cycle_variable else cycle_variable, "output": result.get("node_outputs", {}).get(node_name, {}).get("output") diff --git a/api/app/core/workflow/nodes/cycle_graph/loop.py b/api/app/core/workflow/nodes/cycle_graph/loop.py index e555a228..ca432a04 100644 --- a/api/app/core/workflow/nodes/cycle_graph/loop.py +++ b/api/app/core/workflow/nodes/cycle_graph/loop.py @@ -210,6 +210,8 @@ class LoopRuntime: "cycle_id": self.node_id, "cycle_idx": idx, "node_id": node_name, + "node_type": node_type, + "node_name": node_name, "input": result.get("node_outputs", {}).get(node_name, {}).get("input") if not cycle_variable else cycle_variable, "output": result.get("node_outputs", {}).get(node_name, {}).get("output") diff --git a/api/app/schemas/app_log_schema.py b/api/app/schemas/app_log_schema.py index a60a8428..ce9ddd44 100644 --- a/api/app/schemas/app_log_schema.py +++ b/api/app/schemas/app_log_schema.py @@ -14,6 +14,7 @@ class AppLogMessage(BaseModel): conversation_id: uuid.UUID role: str = Field(description="角色: user / assistant / system") content: str + status: Optional[str] = Field(default=None, description="执行状态(工作流专用): completed / failed") meta_data: Optional[Dict[str, Any]] = None created_at: datetime.datetime @@ -58,6 +59,7 @@ class AppLogNodeExecution(BaseModel): input: Optional[Any] = None process: Optional[Any] = None output: Optional[Any] = None + cycle_items: Optional[List[Any]] = None elapsed_time: Optional[float] = None token_usage: Optional[Dict[str, Any]] = None diff --git a/api/app/services/app_log_service.py b/api/app/services/app_log_service.py index b2801df3..e8a80d40 100644 --- a/api/app/services/app_log_service.py +++ b/api/app/services/app_log_service.py @@ -1,16 +1,17 @@ """应用日志服务层""" import uuid +import datetime as dt from typing import Optional, Tuple -from datetime import datetime from sqlalchemy import select from sqlalchemy.orm import Session from app.core.logging_config import get_business_logger +from app.models.app_model import AppType from app.models.conversation_model import Conversation, Message from app.models.workflow_model import WorkflowExecution from app.repositories.conversation_repository import ConversationRepository, MessageRepository -from app.schemas.app_log_schema import AppLogNodeExecution +from app.schemas.app_log_schema import AppLogMessage, AppLogNodeExecution logger = get_business_logger() @@ -83,51 +84,40 @@ class AppLogService: self, app_id: uuid.UUID, conversation_id: uuid.UUID, - workspace_id: uuid.UUID - ) -> Tuple[Conversation, dict[str, list[AppLogNodeExecution]]]: + workspace_id: uuid.UUID, + app_type: str = AppType.AGENT + ) -> Tuple[Conversation, list, dict[str, list[AppLogNodeExecution]]]: """ - 查询会话详情(包含消息和工作流节点执行记录) - - Args: - app_id: 应用 ID - conversation_id: 会话 ID - workspace_id: 工作空间 ID + 查询会话详情 Returns: - Tuple[Conversation, dict[str, list[AppLogNodeExecution]]]: - (包含消息的会话对象, 按消息ID分组的节点执行记录) - - Raises: - ResourceNotFoundException: 当会话不存在时 + Tuple[Conversation, list[AppLogMessage|Message], dict[str, list[AppLogNodeExecution]]] """ logger.info( "查询应用日志会话详情", extra={ "app_id": str(app_id), "conversation_id": str(conversation_id), - "workspace_id": str(workspace_id) + "workspace_id": str(workspace_id), + "app_type": app_type } ) - # 查询会话 conversation = self.conversation_repository.get_conversation_for_app_log( conversation_id=conversation_id, app_id=app_id, workspace_id=workspace_id ) - # 查询消息(按时间正序) - messages = self.message_repository.get_messages_by_conversation( - conversation_id=conversation_id - ) - - # 将消息附加到会话对象 - conversation.messages = messages - - # 查询工作流节点执行记录(按消息分组) - _, node_executions_map = self._get_workflow_node_executions_with_map( - conversation_id, messages - ) + if app_type == AppType.WORKFLOW: + messages, node_executions_map = self._get_workflow_messages_and_nodes(conversation_id) + else: + messages = self.message_repository.get_messages_by_conversation( + conversation_id=conversation_id + ) + _, node_executions_map = self._get_workflow_node_executions_with_map( + conversation_id, messages + ) logger.info( "查询应用日志会话详情成功", @@ -139,7 +129,97 @@ class AppLogService: } ) - return conversation, node_executions_map + return conversation, messages, node_executions_map + + def _get_workflow_messages_and_nodes( + self, + conversation_id: uuid.UUID, + ) -> Tuple[list[AppLogMessage], dict[str, list[AppLogNodeExecution]]]: + """ + 工作流应用专用:从 workflow_executions 构建 messages 和节点日志。 + + 每条 WorkflowExecution 对应一轮对话: + - user message:来自 execution.input_data + - assistant message:来自 execution.output_data(失败时内容为错误信息) + 节点日志以 execution id 为 key 分组。 + + Returns: + (messages 列表, node_executions_map) + """ + stmt = ( + select(WorkflowExecution) + .where( + WorkflowExecution.conversation_id == conversation_id, + WorkflowExecution.status.in_(["completed", "failed"]) + ) + .order_by(WorkflowExecution.started_at.asc()) + ) + executions = list(self.db.scalars(stmt).all()) + + messages: list[AppLogMessage] = [] + node_executions_map: dict[str, list[AppLogNodeExecution]] = {} + + for execution in executions: + started_at = execution.started_at or dt.datetime.now() + completed_at = execution.completed_at or started_at + + # assistant message 的 id,同时作为 node_executions_map 的 key + assistant_msg_id = uuid.uuid5(execution.id, "assistant") + + # --- user message(输入)--- + input_content = _extract_text(execution.input_data) + user_msg = AppLogMessage( + id=uuid.uuid5(execution.id, "user"), + conversation_id=conversation_id, + role="user", + content=input_content, + meta_data=None, + created_at=started_at, + ) + messages.append(user_msg) + + # --- assistant message(输出)--- + if execution.status == "completed": + output_content = _extract_text(execution.output_data) + meta = {"usage": execution.token_usage or {}, "elapsed_time": execution.elapsed_time} + else: + output_content = _extract_text(execution.output_data) or "" + meta = {"error": execution.error_message, "error_node_id": execution.error_node_id} + + assistant_msg = AppLogMessage( + id=assistant_msg_id, + conversation_id=conversation_id, + role="assistant", + content=output_content, + status=execution.status, + meta_data=meta, + created_at=completed_at, + ) + messages.append(assistant_msg) + + # --- 节点执行记录,key 与 assistant message id 一致 --- + execution_nodes = [] + for node_exec in execution.node_executions: + output_data = dict(node_exec.output_data or {}) + cycle_items = output_data.pop("cycle_items", None) + execution_nodes.append(AppLogNodeExecution( + node_id=node_exec.node_id, + node_type=node_exec.node_type, + node_name=node_exec.node_name, + status=node_exec.status, + error=node_exec.error_message, + input=node_exec.input_data, + process=None, + output=output_data, + cycle_items=cycle_items, + elapsed_time=node_exec.elapsed_time, + token_usage=node_exec.token_usage, + )) + + if execution_nodes: + node_executions_map[str(assistant_msg_id)] = execution_nodes + + return messages, node_executions_map def _get_workflow_node_executions_with_map( self, @@ -191,6 +271,8 @@ class AppLogService: # 构建节点执行记录列表 execution_nodes = [] for node_exec in execution.node_executions: + output_data = dict(node_exec.output_data or {}) + cycle_items = output_data.pop("cycle_items", None) node_execution = AppLogNodeExecution( node_id=node_exec.node_id, node_type=node_exec.node_type, @@ -199,7 +281,8 @@ class AppLogService: error=node_exec.error_message, input=node_exec.input_data, process=None, - output=node_exec.output_data, + output=output_data, + cycle_items=cycle_items, elapsed_time=node_exec.elapsed_time, token_usage=node_exec.token_usage, ) @@ -223,9 +306,9 @@ class AppLogService: if msg_id_str in used_message_ids: continue if msg.created_at and msg.created_at >= execution.started_at: - dt = (msg.created_at - execution.started_at).total_seconds() - if best_dt is None or dt < best_dt: - best_dt = dt + delta = (msg.created_at - execution.started_at).total_seconds() + if best_dt is None or delta < best_dt: + best_dt = delta best_msg = msg if not best_msg: @@ -236,3 +319,17 @@ class AppLogService: node_executions_map[msg_id_str] = execution_nodes return node_executions, node_executions_map + + +def _extract_text(data: Optional[dict]) -> str: + """从 workflow execution 的 input_data / output_data 中提取可读文本。 + + 优先取 'text'、'content'、'output' 字段;若都没有则 JSON 序列化整个 dict。 + """ + if not data: + return "" + for key in ("text", "content", "output", "result", "answer"): + if key in data and isinstance(data[key], str): + return data[key] + import json + return json.dumps(data, ensure_ascii=False) From 32dfee803a8f69b6bfa8f39ede9e725272754195 Mon Sep 17 00:00:00 2001 From: zhaoying Date: Fri, 24 Apr 2026 18:05:01 +0800 Subject: [PATCH 19/26] feat(web): workflow app logs --- web/src/views/ApplicationConfig/Logs.tsx | 18 +++++++- .../components/LogDetailModal.tsx | 38 ++++++++++++++-- .../views/Workflow/components/Chat/Chat.tsx | 6 +-- .../Workflow/components/Chat/Runtime.tsx | 45 ++++++++++--------- .../views/Workflow/hooks/useWorkflowGraph.ts | 4 +- 5 files changed, 80 insertions(+), 31 deletions(-) diff --git a/web/src/views/ApplicationConfig/Logs.tsx b/web/src/views/ApplicationConfig/Logs.tsx index cf56059c..75a5bdec 100644 --- a/web/src/views/ApplicationConfig/Logs.tsx +++ b/web/src/views/ApplicationConfig/Logs.tsx @@ -7,7 +7,7 @@ import { type FC, useRef } from 'react'; import { useTranslation } from 'react-i18next'; import { useParams } from 'react-router-dom'; -import { Flex, Button } from 'antd'; +import { Flex, Button, Form } from 'antd'; import type { ColumnsType } from 'antd/es/table'; import { getAppLogsUrl } from '@/api/application'; @@ -15,11 +15,14 @@ import Table from '@/components/Table' import { formatDateTime } from '@/utils/format'; import type { LogItem, LogDetailModalRef } from './types' import LogDetailModal from './components/LogDetailModal' +import SearchInput from '@/components/SearchInput' const Statistics: FC = () => { const { t } = useTranslation(); const { id } = useParams(); const logDetailRef = useRef(null); + const [form] = Form.useForm(); + const values = Form.useWatch([], form); const handleViewDetail = (item: LogItem) => { logDetailRef.current?.handleOpen(item); @@ -62,15 +65,26 @@ const Statistics: FC = () => { ]; return (
+ +
+ + + +
+
apiUrl={getAppLogsUrl(id || '')} apiParams={{ is_draft: false, + ...(values ?? {}) }} columns={columns} rowKey="id" isScroll={true} - scrollY="calc(100vh - 214px)" + scrollY="calc(100vh - 242px)" />
diff --git a/web/src/views/ApplicationConfig/components/LogDetailModal.tsx b/web/src/views/ApplicationConfig/components/LogDetailModal.tsx index 26d8741b..b37c3ae2 100644 --- a/web/src/views/ApplicationConfig/components/LogDetailModal.tsx +++ b/web/src/views/ApplicationConfig/components/LogDetailModal.tsx @@ -1,8 +1,8 @@ /* * @Author: ZhaoYing * @Date: 2026-03-24 16:31:24 - * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-03-24 16:31:24 + * @Last Modified by: ZhaoYing + * @Last Modified time: 2026-04-24 17:49:58 */ import { forwardRef, useImperativeHandle, useState, useEffect } from 'react'; import { Flex, Button, Empty, Skeleton } from 'antd'; @@ -14,6 +14,12 @@ import { getAppLogDetail } from '@/api/application' import ChatContent from '@/components/Chat/ChatContent' import { formatDateTime } from '@/utils/format' import type { ChatItem } from '@/components/Chat/types' +import Runtime from '@/views/Workflow/components/Chat/Runtime' +import { nodeLibrary } from '@/views/Workflow/constant' + +const nodeIconMap = Object.fromEntries( + nodeLibrary.flatMap(c => c.nodes.map(n => [n.type, n.icon])) +) /** Log detail data with conversation messages */ type Data = LogItem & { @@ -54,7 +60,30 @@ const LogDetailModal = forwardRef((_props, ref) => { if (!vo) return setLoading(true) getAppLogDetail(vo.app_id, vo.id).then(res => { - setData(res as Data) + const { node_executions_map, messages, ...rest } = res as Data; + let hasSubContentMessages = messages + if (messages && messages.length > 0 && node_executions_map && Object.keys(node_executions_map).length > 0) { + hasSubContentMessages = messages.map(item => { + if (item.id && node_executions_map[item.id]) { + item.subContent = node_executions_map[item.id]?.map(({ input, output, cycle_items = [], error, process, ...node }: any) => { + const converted: any = { ...node, icon: nodeIconMap[node.node_type], content: { input, output, process, error } } + if (node.node_type === 'loop' && Array.isArray(cycle_items) && cycle_items.length > 0) { + converted.subContent = cycle_items.map(({ input: cInput, output: cOutput, error: cError, process: cProcess, ...cNode }: any) => ({ + ...cNode, + icon: nodeIconMap[cNode.node_type], + content: { input: cInput, output: cOutput, process: cProcess, error: cError } + })) + } + return converted + }) + } + return { ...item } + }) + } + setData({ + ...rest, + messages: hasSubContentMessages + }) }) .finally(() => { setLoading(false) @@ -66,6 +95,8 @@ const LogDetailModal = forwardRef((_props, ref) => { handleClose })); + console.log('data', data) + return ( @@ -92,6 +123,7 @@ const LogDetailModal = forwardRef((_props, ref) => { data={data.messages || []} streamLoading={false} labelFormat={(item) => formatDateTime(item.created_at)} + renderRuntime={(item, index) => } /> ) } diff --git a/web/src/views/Workflow/components/Chat/Chat.tsx b/web/src/views/Workflow/components/Chat/Chat.tsx index a6b4a2a8..820fe8c7 100644 --- a/web/src/views/Workflow/components/Chat/Chat.tsx +++ b/web/src/views/Workflow/components/Chat/Chat.tsx @@ -2,7 +2,7 @@ * @Author: ZhaoYing * @Date: 2026-02-06 21:10:56 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-04-21 14:59:13 + * @Last Modified time: 2026-04-24 17:34:51 */ /** * Workflow Chat Component @@ -66,7 +66,7 @@ const Chat = forwardRef([]) const [message, setMessage] = useState(undefined) - console.log('abortRef', abortRef) + console.log('abortRef', abortRef, chatList) /** * Opens the chat drawer and loads workflow variables from the start node @@ -305,7 +305,7 @@ const Chat = forwardRef = ({ )} {/* Display input and output data as JSON code blocks */} - {['input', 'output'].map(key => ( -
-
- {isLoop ? t(`workflow.runtime.${key}_cycle_vars`) : t(`workflow.${key}_result`)} - + {['input', 'process', 'output'].map(key => { + if (vo.node_type !== 'http-request' && key === 'process') return null + return ( +
+
+ {isLoop ? t(`workflow.runtime.${key}_cycle_vars`) : t(`workflow.${key}_result`)} + +
+
+ +
-
- -
-
- ))} + ) + })} ) }]} diff --git a/web/src/views/Workflow/hooks/useWorkflowGraph.ts b/web/src/views/Workflow/hooks/useWorkflowGraph.ts index f30db10f..a22ee6c0 100644 --- a/web/src/views/Workflow/hooks/useWorkflowGraph.ts +++ b/web/src/views/Workflow/hooks/useWorkflowGraph.ts @@ -2,7 +2,7 @@ * @Author: ZhaoYing * @Date: 2026-02-03 15:17:48 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-04-20 16:00:26 + * @Last Modified time: 2026-04-24 17:21:09 */ import { Clipboard, Graph, Keyboard, MiniMap, Node, Snapline, History, type Edge } from '@antv/x6'; import type { HistoryCommand as Command } from '@antv/x6/lib/plugin/history/type'; @@ -1492,7 +1492,7 @@ export const useWorkflowGraph = ({ // Reset all node execution status first nodes.forEach(node => { const data = node.getData(); - if (typeof data.status === 'string') { + if (typeof data.executionStatus === 'string') { node.setData({ ...data, executionStatus: undefined }); } }); From 2c864f63375fac9a4a7d9dfa5d74d695ac368970 Mon Sep 17 00:00:00 2001 From: zhaoying Date: Fri, 24 Apr 2026 18:15:01 +0800 Subject: [PATCH 20/26] feat(web): http request add process --- web/src/views/ApplicationConfig/TestChat/index.tsx | 9 ++++++--- web/src/views/Workflow/components/Chat/Chat.tsx | 7 +++++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/web/src/views/ApplicationConfig/TestChat/index.tsx b/web/src/views/ApplicationConfig/TestChat/index.tsx index b62efc6b..2fc66aa6 100644 --- a/web/src/views/ApplicationConfig/TestChat/index.tsx +++ b/web/src/views/ApplicationConfig/TestChat/index.tsx @@ -2,7 +2,7 @@ * @Author: ZhaoYing * @Date: 2026-03-13 17:27:52 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-04-07 21:48:30 + * @Last Modified time: 2026-04-24 18:14:25 */ import { type FC, useState, useRef, useEffect } from 'react' import { useTranslation } from 'react-i18next' @@ -59,6 +59,7 @@ interface NodeData { node_type?: string; input?: any; output?: any; + process?: any; elapsed_time?: string; error?: any; state: Record; @@ -485,7 +486,7 @@ const TestChat: FC = ({ } const updateWorkflowNodeEndMessage = (data: NodeData) => { - const { node_id, input, output, error, elapsed_time, status } = data; + const { node_id, input, output, process, error, elapsed_time, status } = data; setChatList(prev => { const newList = [...prev] const lastIndex = newList.length - 1 @@ -498,6 +499,7 @@ const TestChat: FC = ({ content: { input, output, + process, error, }, status: status || 'completed', @@ -514,7 +516,7 @@ const TestChat: FC = ({ } const updateWorkflowCycleMessage = (data: NodeData) => { - const { node_id, cycle_id, cycle_idx, input, output, error, elapsed_time, status } = data; + const { node_id, cycle_id, cycle_idx, input, output, process, error, elapsed_time, status } = data; const { nodes } = config as WorkflowConfig const node = nodes.find(n => n.id === node_id); const { name, type } = node || {} @@ -538,6 +540,7 @@ const TestChat: FC = ({ cycle_idx, input, output, + process, error, }, status: status || 'completed', diff --git a/web/src/views/Workflow/components/Chat/Chat.tsx b/web/src/views/Workflow/components/Chat/Chat.tsx index 820fe8c7..863825ba 100644 --- a/web/src/views/Workflow/components/Chat/Chat.tsx +++ b/web/src/views/Workflow/components/Chat/Chat.tsx @@ -2,7 +2,7 @@ * @Author: ZhaoYing * @Date: 2026-02-06 21:10:56 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-04-24 17:34:51 + * @Last Modified time: 2026-04-24 18:13:22 */ /** * Workflow Chat Component @@ -185,7 +185,7 @@ const Chat = forwardRef { data.forEach(item => { - const { content, conversation_id, node_id, cycle_id, cycle_idx, input, output, error, elapsed_time, status, citations } = item.data as { + const { content, conversation_id, node_id, cycle_id, cycle_idx, input, output, process, error, elapsed_time, status, citations } = item.data as { content: string; conversation_id: string | null; cycle_id: string; @@ -193,6 +193,7 @@ const Chat = forwardRef Date: Fri, 24 Apr 2026 18:20:14 +0800 Subject: [PATCH 21/26] refactor(workflow): streamline node execution handling and log service logic - Consolidate node data retrieval from workflow_executions.output_data to unify storage access. - Optimize the construction of messages and execution records to support opening suggestions. - Eliminate redundant queries and storage logic to simplify the overall codebase structure. --- .../workflow/nodes/cycle_graph/iteration.py | 1 + .../core/workflow/nodes/cycle_graph/loop.py | 1 + api/app/services/app_log_service.py | 157 ++++++++++++------ api/app/services/workflow_service.py | 34 +--- 4 files changed, 121 insertions(+), 72 deletions(-) diff --git a/api/app/core/workflow/nodes/cycle_graph/iteration.py b/api/app/core/workflow/nodes/cycle_graph/iteration.py index 1ac46180..3ee7774f 100644 --- a/api/app/core/workflow/nodes/cycle_graph/iteration.py +++ b/api/app/core/workflow/nodes/cycle_graph/iteration.py @@ -182,6 +182,7 @@ class IterationRuntime: "node_id": node_name, "node_type": node_type, "node_name": node_cfg.get("data", {}).get("label") if node_cfg else node_name, + "status": result.get("node_outputs", {}).get(node_name, {}).get("status", "completed"), "input": result.get("node_outputs", {}).get(node_name, {}).get("input") if not cycle_variable else cycle_variable, "output": result.get("node_outputs", {}).get(node_name, {}).get("output") diff --git a/api/app/core/workflow/nodes/cycle_graph/loop.py b/api/app/core/workflow/nodes/cycle_graph/loop.py index ca432a04..93f1a1e4 100644 --- a/api/app/core/workflow/nodes/cycle_graph/loop.py +++ b/api/app/core/workflow/nodes/cycle_graph/loop.py @@ -212,6 +212,7 @@ class LoopRuntime: "node_id": node_name, "node_type": node_type, "node_name": node_name, + "status": result.get("node_outputs", {}).get(node_name, {}).get("status", "completed"), "input": result.get("node_outputs", {}).get(node_name, {}).get("input") if not cycle_variable else cycle_variable, "output": result.get("node_outputs", {}).get(node_name, {}).get("output") diff --git a/api/app/services/app_log_service.py b/api/app/services/app_log_service.py index e8a80d40..7ca05d42 100644 --- a/api/app/services/app_log_service.py +++ b/api/app/services/app_log_service.py @@ -115,7 +115,7 @@ class AppLogService: messages = self.message_repository.get_messages_by_conversation( conversation_id=conversation_id ) - _, node_executions_map = self._get_workflow_node_executions_with_map( + node_executions_map = self._get_workflow_node_executions_with_map( conversation_id, messages ) @@ -139,9 +139,9 @@ class AppLogService: 工作流应用专用:从 workflow_executions 构建 messages 和节点日志。 每条 WorkflowExecution 对应一轮对话: - - user message:来自 execution.input_data + - user message:来自 execution.input_data(content 取 message 字段,files 放 meta_data) - assistant message:来自 execution.output_data(失败时内容为错误信息) - 节点日志以 execution id 为 key 分组。 + 开场白的 suggested_questions 合并到第一条 assistant message 的 meta_data 里。 Returns: (messages 列表, node_executions_map) @@ -156,9 +156,44 @@ class AppLogService: ) executions = list(self.db.scalars(stmt).all()) + # 查开场白:Message 表里 meta_data 含 suggested_questions 的第一条 assistant 消息 + opening_stmt = ( + select(Message) + .where( + Message.conversation_id == conversation_id, + Message.role == "assistant", + ) + .order_by(Message.created_at.asc()) + .limit(10) + ) + early_messages = list(self.db.scalars(opening_stmt).all()) + suggested_questions: list = [] + for m in early_messages: + if isinstance(m.meta_data, dict) and "suggested_questions" in m.meta_data: + suggested_questions = m.meta_data.get("suggested_questions") or [] + break + messages: list[AppLogMessage] = [] node_executions_map: dict[str, list[AppLogNodeExecution]] = {} + # 如果有开场白,作为第一条 assistant 消息插入 + if suggested_questions or early_messages: + opening_msg = next( + (m for m in early_messages + if isinstance(m.meta_data, dict) and "suggested_questions" in m.meta_data), + None + ) + if opening_msg: + messages.append(AppLogMessage( + id=opening_msg.id, + conversation_id=conversation_id, + role="assistant", + content=opening_msg.content, + status=None, + meta_data={"suggested_questions": suggested_questions}, + created_at=opening_msg.created_at, + )) + for execution in executions: started_at = execution.started_at or dt.datetime.now() completed_at = execution.completed_at or started_at @@ -167,13 +202,20 @@ class AppLogService: assistant_msg_id = uuid.uuid5(execution.id, "assistant") # --- user message(输入)--- - input_content = _extract_text(execution.input_data) + input_data = execution.input_data or {} + input_content = input_data.get("message") or _extract_text(input_data) + + # 跳过没有用户输入的 execution(如开场白触发的记录) + if not input_content or not input_content.strip(): + continue + + files = input_data.get("files") or [] user_msg = AppLogMessage( id=uuid.uuid5(execution.id, "user"), conversation_id=conversation_id, role="user", content=input_content, - meta_data=None, + meta_data={"files": files} if files else None, created_at=started_at, ) messages.append(user_msg) @@ -197,24 +239,8 @@ class AppLogService: ) messages.append(assistant_msg) - # --- 节点执行记录,key 与 assistant message id 一致 --- - execution_nodes = [] - for node_exec in execution.node_executions: - output_data = dict(node_exec.output_data or {}) - cycle_items = output_data.pop("cycle_items", None) - execution_nodes.append(AppLogNodeExecution( - node_id=node_exec.node_id, - node_type=node_exec.node_type, - node_name=node_exec.node_name, - status=node_exec.status, - error=node_exec.error_message, - input=node_exec.input_data, - process=None, - output=output_data, - cycle_items=cycle_items, - elapsed_time=node_exec.elapsed_time, - token_usage=node_exec.token_usage, - )) + # --- 节点执行记录,从 workflow_executions.output_data["node_outputs"] 读取 --- + execution_nodes = _build_nodes_from_output_data(execution.output_data) if execution_nodes: node_executions_map[str(assistant_msg_id)] = execution_nodes @@ -225,7 +251,7 @@ class AppLogService: self, conversation_id: uuid.UUID, messages: list[Message] - ) -> Tuple[list[AppLogNodeExecution], dict[str, list[AppLogNodeExecution]]]: + ) -> dict[str, list[AppLogNodeExecution]]: """ 从 workflow_executions 表中提取节点执行记录,并按 assistant message 分组 @@ -237,7 +263,6 @@ class AppLogService: Tuple[list[AppLogNodeExecution], dict[str, list[AppLogNodeExecution]]]: (所有节点执行记录列表, 按 message_id 分组的节点执行记录字典) """ - node_executions = [] node_executions_map: dict[str, list[AppLogNodeExecution]] = {} # 查询该会话关联的所有工作流执行记录(按时间正序) @@ -268,26 +293,8 @@ class AppLogService: used_message_ids: set[str] = set() for execution in executions: - # 构建节点执行记录列表 - execution_nodes = [] - for node_exec in execution.node_executions: - output_data = dict(node_exec.output_data or {}) - cycle_items = output_data.pop("cycle_items", None) - node_execution = AppLogNodeExecution( - node_id=node_exec.node_id, - node_type=node_exec.node_type, - node_name=node_exec.node_name, - status=node_exec.status, - error=node_exec.error_message, - input=node_exec.input_data, - process=None, - output=output_data, - cycle_items=cycle_items, - elapsed_time=node_exec.elapsed_time, - token_usage=node_exec.token_usage, - ) - node_executions.append(node_execution) - execution_nodes.append(node_execution) + # 构建节点执行记录列表,从 workflow_executions.output_data["node_outputs"] 读取 + execution_nodes = _build_nodes_from_output_data(execution.output_data) if not execution_nodes: continue @@ -318,7 +325,7 @@ class AppLogService: used_message_ids.add(msg_id_str) node_executions_map[msg_id_str] = execution_nodes - return node_executions, node_executions_map + return node_executions_map def _extract_text(data: Optional[dict]) -> str: @@ -328,8 +335,64 @@ def _extract_text(data: Optional[dict]) -> str: """ if not data: return "" - for key in ("text", "content", "output", "result", "answer"): + for key in ("message", "text", "content", "output", "result", "answer"): if key in data and isinstance(data[key], str): return data[key] import json return json.dumps(data, ensure_ascii=False) + + +def _build_nodes_from_output_data(output_data: Optional[dict]) -> list[AppLogNodeExecution]: + """从 workflow_executions.output_data["node_outputs"] 构建节点执行记录列表。 + + output_data 结构: + { + "node_outputs": { + "": { + "node_type": ..., + "node_name": ..., + "status": ..., + "input": ..., + "output": ..., + "elapsed_time": ..., + "token_usage": ..., + "error": ..., + "cycle_items": [...], + ... + } + }, + "error": ..., + ... + } + """ + if not output_data: + return [] + node_outputs: dict = output_data.get("node_outputs") or {} + result = [] + for node_id, node_data in node_outputs.items(): + if not isinstance(node_data, dict): + continue + output = dict(node_data) + cycle_items = output.pop("cycle_items", None) + # 把已知的顶层字段剥离,剩余的作为 output + node_type = output.pop("node_type", "unknown") + node_name = output.pop("node_name", None) + status = output.pop("status", "completed") + error = output.pop("error", None) + inp = output.pop("input", None) + elapsed_time = output.pop("elapsed_time", None) + token_usage = output.pop("token_usage", None) + result.append(AppLogNodeExecution( + node_id=node_id, + node_type=node_type, + node_name=node_name, + status=status, + error=error, + input=inp, + process=None, + output=output if output else None, + cycle_items=cycle_items, + elapsed_time=elapsed_time, + token_usage=token_usage, + )) + return result diff --git a/api/app/services/workflow_service.py b/api/app/services/workflow_service.py index ea302a9c..aa91de89 100644 --- a/api/app/services/workflow_service.py +++ b/api/app/services/workflow_service.py @@ -910,7 +910,6 @@ class WorkflowService: input_data["conv_messages"] = conv_messages init_message_length = len(input_data.get("conv_messages", [])) message_id = uuid.uuid4() - _node_order_counter = 0 _cycle_items: dict[str, list] = {} # 新会话时写入开场白 @@ -1015,32 +1014,17 @@ class WorkflowService: ) else: logger.error(f"unexpect workflow run status, status: {status}") - # 把积累的 cycle_item 写入对应循环节点的 output_data - if _cycle_items: + # 把积累的 cycle_item 写入 workflow_executions.output_data["node_outputs"] + if _cycle_items and execution.output_data: + import copy + new_output_data = copy.deepcopy(execution.output_data) + node_outputs = new_output_data.setdefault("node_outputs", {}) for cycle_node_id, items in _cycle_items.items(): - node_exec = self.db.execute( - select(WorkflowNodeExecution).where( - WorkflowNodeExecution.execution_id == execution.id, - WorkflowNodeExecution.node_id == cycle_node_id - ) - ).scalar_one_or_none() - if node_exec: - node_exec.output_data = { - **(node_exec.output_data or {}), - "cycle_items": items - } + if cycle_node_id in node_outputs: + node_outputs[cycle_node_id]["cycle_items"] = items else: - node_cfg = next((n for n in config.nodes if n.get("id") == cycle_node_id), {}) - self.db.add(WorkflowNodeExecution( - execution_id=execution.id, - node_id=cycle_node_id, - node_type=node_cfg.get("type", "cycle"), - node_name=node_cfg.get("data", {}).get("label") or cycle_node_id, - execution_order=_node_order_counter, - status="completed", - output_data={"cycle_items": items}, - )) - _node_order_counter += 1 + node_outputs[cycle_node_id] = {"cycle_items": items} + execution.output_data = new_output_data self.db.commit() elif event.get("event") == "workflow_start": event["data"]["message_id"] = str(message_id) From caef0fe44e37c2c9b8345518248615e827fa0ba9 Mon Sep 17 00:00:00 2001 From: Eternity <1533512157@qq.com> Date: Fri, 24 Apr 2026 18:36:27 +0800 Subject: [PATCH 22/26] fix(api): correct import paths in memory_read and celery task command - Fix relative imports in memory_read.py to use absolute app paths - Change celery scheduler command from `python app/celery_task_scheduler.py` to `python -m app.celery_task_scheduler` --- api/app/core/memory/pipelines/memory_read.py | 4 ++-- api/docker-compose.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/api/app/core/memory/pipelines/memory_read.py b/api/app/core/memory/pipelines/memory_read.py index 35ace00d..0bd57b08 100644 --- a/api/app/core/memory/pipelines/memory_read.py +++ b/api/app/core/memory/pipelines/memory_read.py @@ -1,8 +1,8 @@ from app.core.memory.enums import SearchStrategy, StorageType from app.core.memory.models.service_models import MemorySearchResult from app.core.memory.pipelines.base_pipeline import ModelClientMixin, DBRequiredPipeline -from core.memory.read_services.search_engine.content_search import Neo4jSearchService, RAGSearchService -from core.memory.read_services.generate_engine.query_preprocessor import QueryPreprocessor +from app.core.memory.read_services.search_engine.content_search import Neo4jSearchService, RAGSearchService +from app.core.memory.read_services.generate_engine.query_preprocessor import QueryPreprocessor class ReadPipeLine(ModelClientMixin, DBRequiredPipeline): diff --git a/api/docker-compose.yml b/api/docker-compose.yml index a0fd4791..a3937add 100644 --- a/api/docker-compose.yml +++ b/api/docker-compose.yml @@ -70,7 +70,7 @@ services: - .env volumes: - /etc/localtime:/etc/localtime:ro - command: python app/celery_task_scheduler.py + command: python -m app.celery_task_scheduler restart: unless-stopped healthcheck: test: CMD curl -f 127.0.0.1:8001 || exit 1 From f63bcd632105ca47eb9a23b432ce29303fc8fdf1 Mon Sep 17 00:00:00 2001 From: wxy Date: Fri, 24 Apr 2026 18:49:55 +0800 Subject: [PATCH 23/26] refactor(tool): flatten request body parameters for model exposure - Refactor the extraction logic in tool service to flatten request body parameters into independent arguments exposed to the model. --- api/app/core/tools/custom/base.py | 18 ++++++++++++++++++ api/app/services/tool_service.py | 7 ++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/api/app/core/tools/custom/base.py b/api/app/core/tools/custom/base.py index c03fe206..06237d32 100644 --- a/api/app/core/tools/custom/base.py +++ b/api/app/core/tools/custom/base.py @@ -73,6 +73,7 @@ class CustomTool(BaseTool): # 添加通用参数(基于第一个操作的参数) if self._parsed_operations: first_operation = next(iter(self._parsed_operations.values())) + # path/query 参数 for param_name, param_info in first_operation.get("parameters", {}).items(): params.append(ToolParameter( name=param_name, @@ -85,6 +86,23 @@ class CustomTool(BaseTool): maximum=param_info.get("maximum"), pattern=param_info.get("pattern") )) + # requestBody 参数 — 将 body 字段平铺为独立参数暴露给模型 + request_body = first_operation.get("request_body") + if request_body: + body_schema = request_body.get("properties", {}) + required_fields = request_body.get("required", []) + for prop_name, prop_schema in body_schema.items(): + params.append(ToolParameter( + name=prop_name, + type=self._convert_openapi_type(prop_schema.get("type", "string")), + description=prop_schema.get("description", ""), + required=prop_name in required_fields, + default=prop_schema.get("default"), + enum=prop_schema.get("enum"), + minimum=prop_schema.get("minimum"), + maximum=prop_schema.get("maximum"), + pattern=prop_schema.get("pattern") + )) return params diff --git a/api/app/services/tool_service.py b/api/app/services/tool_service.py index 9a59cd81..ff734c9d 100644 --- a/api/app/services/tool_service.py +++ b/api/app/services/tool_service.py @@ -815,11 +815,12 @@ class ToolService: "default": param_info.get("default") }) - # 请求体参数 + # 请求体参数 — _extract_request_body 返回 {"schema": {...}, "required": bool, ...} request_body = operation.get("request_body") if request_body: - schema_props = request_body.get("schema", {}).get("properties", {}) - required_props = request_body.get("schema", {}).get("required", []) + body_schema = request_body.get("schema", {}) + schema_props = body_schema.get("properties", {}) + required_props = body_schema.get("required", []) for prop_name, prop_schema in schema_props.items(): parameters.append({ From 8dee2eae6ad66a9f61c162b7e9f09975e7d31d1e Mon Sep 17 00:00:00 2001 From: Eternity <1533512157@qq.com> Date: Fri, 24 Apr 2026 18:50:58 +0800 Subject: [PATCH 24/26] fix(api): correct import paths in memory_read and celery task command - Fix relative imports in memory_read.py to use absolute app paths - Change celery scheduler command from `python app/celery_task_scheduler.py` to `python -m app.celery_task_scheduler` --- .../core/memory/read_services/search_engine/content_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/app/core/memory/read_services/search_engine/content_search.py b/api/app/core/memory/read_services/search_engine/content_search.py index 16c23f91..4ba4dce7 100644 --- a/api/app/core/memory/read_services/search_engine/content_search.py +++ b/api/app/core/memory/read_services/search_engine/content_search.py @@ -8,7 +8,7 @@ from neo4j import Session from app.core.memory.enums import Neo4jNodeType from app.core.memory.memory_service import MemoryContext from app.core.memory.models.service_models import Memory, MemorySearchResult -from core.memory.read_services.search_engine.result_builder import data_builder_factory +from app.core.memory.read_services.search_engine.result_builder import data_builder_factory from app.core.models import RedBearEmbeddings from app.core.rag.nlp.search import knowledge_retrieval from app.repositories import knowledge_repository From 422af699048e524175f1833901f784618b8d08f3 Mon Sep 17 00:00:00 2001 From: Eternity <1533512157@qq.com> Date: Fri, 24 Apr 2026 19:09:18 +0800 Subject: [PATCH 25/26] fix(api): correct import paths in memory_read and celery task command - Fix relative imports in memory_read.py to use absolute app paths - Change celery scheduler command from `python app/celery_task_scheduler.py` to `python -m app.celery_task_scheduler` --- api/app/controllers/service/memory_api_controller.py | 2 +- .../core/memory/agent/langgraph_graph/routing/write_router.py | 2 +- api/app/core/workflow/nodes/memory/node.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/api/app/controllers/service/memory_api_controller.py b/api/app/controllers/service/memory_api_controller.py index 57664e4e..43a8824a 100644 --- a/api/app/controllers/service/memory_api_controller.py +++ b/api/app/controllers/service/memory_api_controller.py @@ -3,6 +3,7 @@ from fastapi import APIRouter, Body, Depends, Query, Request from sqlalchemy.orm import Session +from app.celery_task_scheduler import scheduler from app.core.api_key_auth import require_api_key from app.core.logging_config import get_business_logger from app.core.quota_stub import check_end_user_quota @@ -18,7 +19,6 @@ from app.schemas.memory_api_schema import ( MemoryWriteSyncResponse, ) from app.services.memory_api_service import MemoryAPIService -from celery_task_scheduler import scheduler router = APIRouter(prefix="/memory", tags=["V1 - Memory API"]) logger = get_business_logger() diff --git a/api/app/core/memory/agent/langgraph_graph/routing/write_router.py b/api/app/core/memory/agent/langgraph_graph/routing/write_router.py index 7ef4ed12..d016f2e0 100644 --- a/api/app/core/memory/agent/langgraph_graph/routing/write_router.py +++ b/api/app/core/memory/agent/langgraph_graph/routing/write_router.py @@ -1,6 +1,7 @@ import json import os +from app.celery_task_scheduler import scheduler from app.core.logging_config import get_agent_logger from app.core.memory.agent.langgraph_graph.tools.write_tool import format_parsing, messages_parse from app.core.memory.agent.models.write_aggregate_model import WriteAggregateModel @@ -13,7 +14,6 @@ from app.db import get_db_context from app.repositories.memory_short_repository import LongTermMemoryRepository from app.schemas.memory_agent_schema import AgentMemory_Long_Term from app.utils.config_utils import resolve_config_id -from celery_task_scheduler import scheduler logger = get_agent_logger(__name__) template_root = os.path.join(PROJECT_ROOT_, 'memory', 'agent', 'utils', 'prompt') diff --git a/api/app/core/workflow/nodes/memory/node.py b/api/app/core/workflow/nodes/memory/node.py index b74af6b9..6d9fcdad 100644 --- a/api/app/core/workflow/nodes/memory/node.py +++ b/api/app/core/workflow/nodes/memory/node.py @@ -1,6 +1,7 @@ import re from typing import Any +from app.celery_task_scheduler import scheduler from app.core.memory.enums import SearchStrategy from app.core.memory.memory_service import MemoryService from app.core.workflow.engine.state_manager import WorkflowState @@ -11,7 +12,6 @@ from app.core.workflow.variable.base_variable import VariableType from app.core.workflow.variable.variable_objects import FileVariable, ArrayVariable from app.db import get_db_read from app.schemas import FileInput -from celery_task_scheduler import scheduler class MemoryReadNode(BaseNode): From 0fc463036ef92a03ab1b04275a11d5db1baa0693 Mon Sep 17 00:00:00 2001 From: zhaoying Date: Fri, 24 Apr 2026 19:12:35 +0800 Subject: [PATCH 26/26] fix(web): node status ui --- web/src/views/Workflow/components/Nodes/ConditionNode.tsx | 4 ++-- web/src/views/Workflow/components/Nodes/LoopNode.tsx | 4 ++-- web/src/views/Workflow/components/Nodes/NormalNode.tsx | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/web/src/views/Workflow/components/Nodes/ConditionNode.tsx b/web/src/views/Workflow/components/Nodes/ConditionNode.tsx index 19966823..b431ddd4 100644 --- a/web/src/views/Workflow/components/Nodes/ConditionNode.tsx +++ b/web/src/views/Workflow/components/Nodes/ConditionNode.tsx @@ -65,8 +65,8 @@ const ConditionNode: ReactShapeConfig['component'] = ({ node }) => { return (
diff --git a/web/src/views/Workflow/components/Nodes/LoopNode.tsx b/web/src/views/Workflow/components/Nodes/LoopNode.tsx index c540db76..cffb62dd 100644 --- a/web/src/views/Workflow/components/Nodes/LoopNode.tsx +++ b/web/src/views/Workflow/components/Nodes/LoopNode.tsx @@ -131,8 +131,8 @@ const LoopNode: ReactShapeConfig['component'] = ({ node, graph }) => { return (
diff --git a/web/src/views/Workflow/components/Nodes/NormalNode.tsx b/web/src/views/Workflow/components/Nodes/NormalNode.tsx index ce936be9..cb3ef134 100644 --- a/web/src/views/Workflow/components/Nodes/NormalNode.tsx +++ b/web/src/views/Workflow/components/Nodes/NormalNode.tsx @@ -12,8 +12,8 @@ const NormalNode: ReactShapeConfig['component'] = ({ node }) => { return (