diff --git a/api/Dockerfile b/api/Dockerfile index f6c082d2..a38739ca 100644 --- a/api/Dockerfile +++ b/api/Dockerfile @@ -45,7 +45,8 @@ RUN --mount=type=cache,id=mem_apt,target=/var/cache/apt,sharing=locked \ apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \ apt install -y libjemalloc-dev && \ apt install -y python3-pip pipx nginx unzip curl wget git vim less && \ - apt install -y ghostscript + apt install -y ghostscript && \ + apt install -y libmagic1 RUN if [ "$NEED_MIRROR" == "1" ]; then \ pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \ diff --git a/api/alembic.ini b/api/alembic.ini index 71a6553f..7cb2e801 100644 --- a/api/alembic.ini +++ b/api/alembic.ini @@ -60,7 +60,12 @@ version_path_separator = os # Use os.pathsep. Default configuration used for ne # are written from script.py.mako # output_encoding = utf-8 -sqlalchemy.url = postgresql://user:password@localhost/dbname +# Database connection URL - DO NOT hardcode credentials here! +# Connection string is set dynamically from environment variables in migrations/env.py +# Required env vars: DB_USER, DB_PASSWORD, DB_HOST, DB_PORT, DB_NAME +# Example: postgresql://user:password@localhost:5432/dbname +; sqlalchemy.url = postgresql://user:password@host:port/dbname +sqlalchemy.url = driver://user:password@host:port/dbname [post_write_hooks] diff --git a/api/app/aioRedis.py b/api/app/aioRedis.py index f758dd15..aac2aa84 100644 --- a/api/app/aioRedis.py +++ b/api/app/aioRedis.py @@ -1,10 +1,11 @@ -import os import asyncio import json import logging from typing import Dict, Any, Optional + import redis.asyncio as redis from redis.asyncio import ConnectionPool + from app.core.config import settings # 设置日志记录器 diff --git a/api/app/celery_app.py b/api/app/celery_app.py index 0319e079..807c59f4 100644 --- a/api/app/celery_app.py +++ b/api/app/celery_app.py @@ -63,56 +63,60 @@ celery_app.conf.update( accept_content=['json'], result_serializer='json', - # 时区 - timezone='Asia/Shanghai', - enable_utc=True, + # # 时区 + # timezone='Asia/Shanghai', + # enable_utc=False, # 任务追踪 task_track_started=True, task_ignore_result=False, - + # 超时设置 task_time_limit=3600, # 60分钟硬超时 task_soft_time_limit=3000, # 50分钟软超时 - + # Worker 设置 (per-worker settings are in docker-compose command line) worker_prefetch_multiplier=1, # Don't hoard tasks, fairer distribution - + # 结果过期时间 result_expires=3600, # 结果保存1小时 - + # 任务确认设置 task_acks_late=True, task_reject_on_worker_lost=True, worker_disable_rate_limits=True, - + # FLower setting worker_send_task_events=True, task_send_sent_event=True, - + # task routing task_routes={ # Memory tasks → memory_tasks queue (threads worker) 'app.core.memory.agent.read_message_priority': {'queue': 'memory_tasks'}, 'app.core.memory.agent.read_message': {'queue': 'memory_tasks'}, 'app.core.memory.agent.write_message': {'queue': 'memory_tasks'}, - + 'app.tasks.write_perceptual_memory': {'queue': 'memory_tasks'}, + # Long-term storage tasks → memory_tasks queue (batched write strategies) 'app.core.memory.agent.long_term_storage.window': {'queue': 'memory_tasks'}, 'app.core.memory.agent.long_term_storage.time': {'queue': 'memory_tasks'}, 'app.core.memory.agent.long_term_storage.aggregate': {'queue': 'memory_tasks'}, - + # Document tasks → document_tasks queue (prefork worker) 'app.core.rag.tasks.parse_document': {'queue': 'document_tasks'}, 'app.core.rag.tasks.build_graphrag_for_kb': {'queue': 'document_tasks'}, 'app.core.rag.tasks.sync_knowledge_for_kb': {'queue': 'document_tasks'}, - + # Beat/periodic tasks → periodic_tasks queue (dedicated periodic worker) 'app.tasks.workspace_reflection_task': {'queue': 'periodic_tasks'}, 'app.tasks.regenerate_memory_cache': {'queue': 'periodic_tasks'}, 'app.tasks.run_forgetting_cycle_task': {'queue': 'periodic_tasks'}, 'app.tasks.write_all_workspaces_memory_task': {'queue': 'periodic_tasks'}, 'app.tasks.update_implicit_emotions_storage': {'queue': 'periodic_tasks'}, + 'app.tasks.init_implicit_emotions_for_users': {'queue': 'periodic_tasks'}, + 'app.tasks.init_interest_distribution_for_users': {'queue': 'periodic_tasks'}, + 'app.tasks.init_community_clustering_for_users': {'queue': 'periodic_tasks'}, }, ) @@ -129,7 +133,7 @@ implicit_emotions_update_schedule = crontab( minute=settings.IMPLICIT_EMOTIONS_UPDATE_MINUTE, ) -#构建定时任务配置 +# 构建定时任务配置 beat_schedule_config = { "run-workspace-reflection": { "task": "app.tasks.workspace_reflection_task", diff --git a/api/app/celery_worker.py b/api/app/celery_worker.py index 7d3ee686..4ea4fee1 100644 --- a/api/app/celery_worker.py +++ b/api/app/celery_worker.py @@ -13,4 +13,4 @@ logger.info("Celery worker logging initialized") # 导入任务模块以注册任务 import app.tasks -__all__ = ['celery_app'] \ No newline at end of file +__all__ = ['celery_app'] diff --git a/api/app/controllers/__init__.py b/api/app/controllers/__init__.py index 85550f94..585de2ed 100644 --- a/api/app/controllers/__init__.py +++ b/api/app/controllers/__init__.py @@ -16,6 +16,7 @@ from . import ( file_controller, file_storage_controller, home_page_controller, + i18n_controller, implicit_memory_controller, knowledge_controller, knowledgeshare_controller, @@ -94,5 +95,6 @@ manager_router.include_router(memory_working_controller.router) manager_router.include_router(file_storage_controller.router) manager_router.include_router(ontology_controller.router) manager_router.include_router(skill_controller.router) +manager_router.include_router(i18n_controller.router) __all__ = ["manager_router"] diff --git a/api/app/controllers/app_controller.py b/api/app/controllers/app_controller.py index cdf94345..e9b539df 100644 --- a/api/app/controllers/app_controller.py +++ b/api/app/controllers/app_controller.py @@ -1,10 +1,12 @@ import uuid +import io from typing import Optional, Annotated import yaml from fastapi import APIRouter, Depends, Path, Form, UploadFile, File from fastapi.responses import StreamingResponse from sqlalchemy.orm import Session +from urllib.parse import quote from app.core.error_codes import BizCode from app.core.logging_config import get_business_logger @@ -25,6 +27,7 @@ from app.services.app_service import AppService from app.services.app_statistics_service import AppStatisticsService from app.services.workflow_import_service import WorkflowImportService from app.services.workflow_service import WorkflowService, get_workflow_service +from app.services.app_dsl_service import AppDslService router = APIRouter(prefix="/apps", tags=["Apps"]) logger = get_business_logger() @@ -50,6 +53,7 @@ def list_apps( status: str | None = None, search: str | None = None, include_shared: bool = True, + shared_only: bool = False, page: int = 1, pagesize: int = 10, ids: Optional[str] = None, @@ -81,6 +85,7 @@ def list_apps( status=status, search=search, include_shared=include_shared, + shared_only=shared_only, page=page, pagesize=pagesize, ) @@ -90,6 +95,37 @@ def list_apps( return success(data=PageData(page=meta, items=items)) +@router.get("/my-shared-out", summary="列出本工作空间主动分享出去的记录") +@cur_workspace_access_guard() +def list_my_shared_out( + db: Session = Depends(get_db), + current_user=Depends(get_current_user), +): + """列出本工作空间主动分享给其他工作空间的所有记录(我的共享)""" + workspace_id = current_user.current_workspace_id + service = app_service.AppService(db) + shares = service.list_my_shared_out(workspace_id=workspace_id) + data = [app_schema.AppShare.model_validate(s) for s in shares] + return success(data=data) + + +@router.delete("/share/{target_workspace_id}", summary="取消对某工作空间的所有应用分享") +@cur_workspace_access_guard() +def unshare_all_apps_to_workspace( + target_workspace_id: uuid.UUID, + db: Session = Depends(get_db), + current_user=Depends(get_current_user), +): + """Cancel all app shares from current workspace to a target workspace.""" + workspace_id = current_user.current_workspace_id + service = app_service.AppService(db) + count = service.unshare_all_apps_to_workspace( + target_workspace_id=target_workspace_id, + workspace_id=workspace_id + ) + return success(msg=f"已取消 {count} 个应用的分享", data={"count": count}) + + @router.get("/{app_id}", summary="获取应用详情") @cur_workspace_access_guard() def get_app( @@ -158,6 +194,7 @@ def delete_app( def copy_app( app_id: uuid.UUID, new_name: Optional[str] = None, + payload: app_schema.CopyAppRequest = None, db: Session = Depends(get_db), current_user=Depends(get_current_user), ): @@ -169,6 +206,8 @@ def copy_app( - 不影响原应用 """ workspace_id = current_user.current_workspace_id + # body takes precedence over query param for backward compatibility + new_name = (payload.new_name if payload else None) or new_name logger.info( "用户请求复制应用", extra={ @@ -218,6 +257,27 @@ def get_agent_config( return success(data=app_schema.AgentConfig.model_validate(cfg)) +@router.get("/{app_id}/opening", summary="获取应用开场白配置") +@cur_workspace_access_guard() +def get_opening( + app_id: uuid.UUID, + db: Session = Depends(get_db), + current_user=Depends(get_current_user), +): + """返回开场白文本和预设问题,供前端对话界面初始化时展示""" + workspace_id = current_user.current_workspace_id + cfg = app_service.get_agent_config(db, app_id=app_id, workspace_id=workspace_id) + features = cfg.features or {} + if hasattr(features, "model_dump"): + features = features.model_dump() + opening = features.get("opening_statement", {}) + return success(data=app_schema.OpeningResponse( + enabled=opening.get("enabled", False), + statement=opening.get("statement"), + suggested_questions=opening.get("suggested_questions", []), + )) + + @router.post("/{app_id}/publish", summary="发布应用(生成不可变快照)") @cur_workspace_access_guard() def publish_app( @@ -299,7 +359,8 @@ def share_app( app_id=app_id, target_workspace_ids=payload.target_workspace_ids, user_id=current_user.id, - workspace_id=workspace_id + workspace_id=workspace_id, + permission=payload.permission ) data = [app_schema.AppShare.model_validate(s) for s in shares] @@ -330,6 +391,32 @@ def unshare_app( return success(msg="应用分享已取消") +@router.patch("/{app_id}/share/{target_workspace_id}", summary="更新共享权限") +@cur_workspace_access_guard() +def update_share_permission( + app_id: uuid.UUID, + target_workspace_id: uuid.UUID, + payload: app_schema.UpdateSharePermissionRequest, + db: Session = Depends(get_db), + current_user=Depends(get_current_user), +): + """更新共享权限(readonly <-> editable) + + - 只能修改自己工作空间应用的共享权限 + """ + workspace_id = current_user.current_workspace_id + + service = app_service.AppService(db) + share = service.update_share_permission( + app_id=app_id, + target_workspace_id=target_workspace_id, + permission=payload.permission, + workspace_id=workspace_id + ) + + return success(data=app_schema.AppShare.model_validate(share)) + + @router.get("/{app_id}/shares", summary="列出应用的分享记录") @cur_workspace_access_guard() def list_app_shares( @@ -353,6 +440,46 @@ def list_app_shares( return success(data=data) +@router.delete("/shared/{source_workspace_id}", summary="批量移除某来源工作空间的所有共享应用") +@cur_workspace_access_guard() +def remove_all_shared_apps_from_workspace( + source_workspace_id: uuid.UUID, + db: Session = Depends(get_db), + current_user=Depends(get_current_user), +): + """Remove all shared apps from a specific source workspace (recipient operation).""" + workspace_id = current_user.current_workspace_id + service = app_service.AppService(db) + count = service.remove_all_shared_apps_from_workspace( + source_workspace_id=source_workspace_id, + workspace_id=workspace_id + ) + return success(msg=f"已移除 {count} 个共享应用", data={"count": count}) + + +@router.delete("/{app_id}/shared", summary="移除共享给我的应用") +@cur_workspace_access_guard() +def remove_shared_app( + app_id: uuid.UUID, + db: Session = Depends(get_db), + current_user=Depends(get_current_user), +): + """被共享者从自己的工作空间移除共享应用 + + - 不会删除源应用,只删除共享记录 + - 只能移除共享给自己工作空间的应用 + """ + workspace_id = current_user.current_workspace_id + + service = app_service.AppService(db) + service.remove_shared_app( + app_id=app_id, + workspace_id=workspace_id + ) + + return success(msg="已移除共享应用") + + @router.post("/{app_id}/draft/run", summary="试运行 Agent(使用当前草稿配置)") @cur_workspace_access_guard() async def draft_run( @@ -393,7 +520,7 @@ async def draft_run( # 提前验证和准备(在流式响应开始前完成) from app.services.app_service import AppService from app.services.multi_agent_service import MultiAgentService - from app.models import AgentConfig, ModelConfig + from app.models import AgentConfig, ModelConfig, AppRelease from sqlalchemy import select from app.core.exceptions import BusinessException from app.services.draft_run_service import AgentRunService @@ -410,11 +537,12 @@ async def draft_run( service._validate_app_accessible(app, workspace_id) if payload.user_id is None: + # 先获取 app 的 workspace_id end_user_repo = EndUserRepository(db) new_end_user = end_user_repo.get_or_create_end_user( app_id=app_id, + workspace_id=app.workspace_id, other_id=str(current_user.id), - original_user_id=str(current_user.id) # Save original user_id to other_id ) payload.user_id = str(new_end_user.id) @@ -431,18 +559,29 @@ async def draft_run( service._check_agent_config(app_id) # 2. 获取 Agent 配置 - stmt = select(AgentConfig).where(AgentConfig.app_id == app_id) - agent_cfg = db.scalars(stmt).first() - if not agent_cfg: - raise BusinessException("Agent 配置不存在", BizCode.AGENT_CONFIG_MISSING) + # 共享应用:从最新发布版本读配置快照,而非草稿 + is_shared = app.workspace_id != workspace_id + if is_shared: + if not app.current_release_id: + raise BusinessException("该应用尚未发布,无法使用", BizCode.AGENT_CONFIG_MISSING) + release = db.get(AppRelease, app.current_release_id) + if not release: + raise BusinessException("发布版本不存在", BizCode.AGENT_CONFIG_MISSING) + agent_cfg = service._agent_config_from_release(release) + model_config = db.get(ModelConfig, release.default_model_config_id) if release.default_model_config_id else None + else: + stmt = select(AgentConfig).where(AgentConfig.app_id == app_id) + agent_cfg = db.scalars(stmt).first() + if not agent_cfg: + raise BusinessException("Agent 配置不存在", BizCode.AGENT_CONFIG_MISSING) - # 3. 获取模型配置 - model_config = None - if agent_cfg.default_model_config_id: - model_config = db.get(ModelConfig, agent_cfg.default_model_config_id) - if not model_config: - from app.core.exceptions import ResourceNotFoundException - raise ResourceNotFoundException("模型配置", str(agent_cfg.default_model_config_id)) + # 3. 获取模型配置 + model_config = None + if agent_cfg.default_model_config_id: + model_config = db.get(ModelConfig, agent_cfg.default_model_config_id) + if not model_config: + from app.core.exceptions import ResourceNotFoundException + raise ResourceNotFoundException("模型配置", str(agent_cfg.default_model_config_id)) # 流式返回 if payload.stream: @@ -598,7 +737,17 @@ async def draft_run( msg="多 Agent 任务执行成功" ) elif app.type == AppType.WORKFLOW: # 工作流 - config = workflow_service.check_config(app_id) + # 共享应用:从最新发布版本读配置快照,而非草稿 + is_shared = app.workspace_id != workspace_id + if is_shared: + if not app.current_release_id: + raise BusinessException("该应用尚未发布,无法使用", BizCode.AGENT_CONFIG_MISSING) + release = db.get(AppRelease, app.current_release_id) + if not release: + raise BusinessException("发布版本不存在", BizCode.AGENT_CONFIG_MISSING) + config = service._workflow_config_from_release(release) + else: + config = workflow_service.check_config(app_id) # 3. 流式返回 if payload.stream: logger.debug( @@ -741,6 +890,16 @@ async def draft_run_compare( raise BusinessException("只有 Agent 类型应用支持试运行", BizCode.APP_TYPE_NOT_SUPPORTED) service._validate_app_accessible(app, workspace_id) + if payload.user_id is None: + # 先获取 app 的 workspace_id + end_user_repo = EndUserRepository(db) + new_end_user = end_user_repo.get_or_create_end_user( + app_id=app_id, + workspace_id=app.workspace_id, + other_id=str(current_user.id), + ) + payload.user_id = str(new_end_user.id) + # 2. 获取 Agent 配置 from sqlalchemy import select from app.models import AgentConfig @@ -786,6 +945,13 @@ async def draft_run_compare( "conversation_id": model_item.conversation_id # 传递每个模型的 conversation_id }) + # 从 features 中读取功能开关(与 draft_run 保持一致) + features_config: dict = agent_cfg.features or {} + if hasattr(features_config, 'model_dump'): + features_config = features_config.model_dump() + web_search_feature = features_config.get("web_search", {}) + web_search = isinstance(web_search_feature, dict) and web_search_feature.get("enabled", False) + # 流式返回 if payload.stream: async def event_generator(): @@ -797,11 +963,11 @@ async def draft_run_compare( message=payload.message, workspace_id=workspace_id, conversation_id=payload.conversation_id, - user_id=payload.user_id or str(current_user.id), + user_id=payload.user_id, variables=payload.variables, storage_type=storage_type, user_rag_memory_id=user_rag_memory_id, - web_search=True, + web_search=web_search, memory=True, parallel=payload.parallel, timeout=payload.timeout or 60, @@ -828,11 +994,11 @@ async def draft_run_compare( message=payload.message, workspace_id=workspace_id, conversation_id=payload.conversation_id, - user_id=payload.user_id or str(current_user.id), + user_id=payload.user_id, variables=payload.variables, storage_type=storage_type, user_rag_memory_id=user_rag_memory_id, - web_search=True, + web_search=web_search, memory=True, parallel=payload.parallel, timeout=payload.timeout or 60, @@ -1010,3 +1176,57 @@ def get_workspace_api_statistics( ) return success(data=result) + + +@router.get("/{app_id}/export", summary="导出应用配置为 YAML 文件") +@cur_workspace_access_guard() +async def export_app( + app_id: uuid.UUID, + db: Annotated[Session, Depends(get_db)], + current_user: Annotated[User, Depends(get_current_user)], + release_id: Optional[uuid.UUID] = None +): + """导出 agent / multi_agent / workflow 应用配置为 YAML 文件流。 + release_id: 指定发布版本id,不传则导出当前草稿配置。 + """ + yaml_str, filename = AppDslService(db).export_dsl(app_id, release_id) + encoded = quote(filename, safe=".") + yaml_bytes = yaml_str.encode("utf-8") + file_stream = io.BytesIO(yaml_bytes) + file_stream.seek(0) + return StreamingResponse( + file_stream, + media_type="application/octet-stream; charset=utf-8", + headers={"Content-Disposition": f"attachment; filename={encoded}", + "Content-Length": str(len(yaml_bytes))} + ) + + +@router.post("/import", summary="从 YAML 文件导入应用") +@cur_workspace_access_guard() +async def import_app( + file: UploadFile = File(...), + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user) +): + """从 YAML 文件导入 agent / multi_agent / workflow 应用。 + 跨空间/跨租户导入时,模型/工具/知识库会按名称匹配,匹配不到则置空并返回 warnings。 + """ + if not file.filename.lower().endswith((".yaml", ".yml")): + return fail(msg="仅支持 YAML 文件", code=BizCode.BAD_REQUEST) + + raw = (await file.read()).decode("utf-8") + dsl = yaml.safe_load(raw) + if not dsl or "app" not in dsl: + return fail(msg="YAML 格式无效,缺少 app 字段", code=BizCode.BAD_REQUEST) + + new_app, warnings = AppDslService(db).import_dsl( + dsl=dsl, + workspace_id=current_user.current_workspace_id, + tenant_id=current_user.tenant_id, + user_id=current_user.id, + ) + return success( + data={"app": app_schema.App.model_validate(new_app), "warnings": warnings}, + msg="应用导入成功" + (",但部分资源需手动配置" if warnings else "") + ) diff --git a/api/app/controllers/auth_controller.py b/api/app/controllers/auth_controller.py index 708cbaa2..2cc72a3b 100644 --- a/api/app/controllers/auth_controller.py +++ b/api/app/controllers/auth_controller.py @@ -1,4 +1,5 @@ from datetime import datetime, timedelta, timezone +from typing import Callable from fastapi import APIRouter, Depends from sqlalchemy.orm import Session @@ -16,6 +17,7 @@ from app.core.exceptions import BusinessException from app.core.error_codes import BizCode from app.dependencies import get_current_user, oauth2_scheme from app.models.user_model import User +from app.i18n.dependencies import get_translator # 获取专用日志器 auth_logger = get_auth_logger() @@ -26,7 +28,8 @@ router = APIRouter(tags=["Authentication"]) @router.post("/token", response_model=ApiResponse) async def login_for_access_token( form_data: TokenRequest, - db: Session = Depends(get_db) + db: Session = Depends(get_db), + t: Callable = Depends(get_translator) ): """用户登录获取token""" auth_logger.info(f"用户登录请求: {form_data.email}") @@ -40,10 +43,10 @@ async def login_for_access_token( invite_info = workspace_service.validate_invite_token(db, form_data.invite) if not invite_info.is_valid: - raise BusinessException("邀请码无效或已过期", code=BizCode.BAD_REQUEST) + raise BusinessException(t("auth.invite.invalid"), code=BizCode.BAD_REQUEST) if invite_info.email != form_data.email: - raise BusinessException("邀请邮箱与登录邮箱不匹配", code=BizCode.BAD_REQUEST) + raise BusinessException(t("auth.invite.email_mismatch"), code=BizCode.BAD_REQUEST) auth_logger.info(f"邀请码验证成功: workspace={invite_info.workspace_name}") try: # 尝试认证用户 @@ -69,7 +72,7 @@ async def login_for_access_token( elif e.code == BizCode.PASSWORD_ERROR: # 用户存在但密码错误 auth_logger.warning(f"接受邀请失败,密码验证错误: {form_data.email}") - raise BusinessException("接受邀请失败,密码验证错误", BizCode.LOGIN_FAILED) + raise BusinessException(t("auth.invite.password_verification_failed"), BizCode.LOGIN_FAILED) else: # 其他认证失败情况,直接抛出 raise @@ -82,7 +85,7 @@ async def login_for_access_token( except BusinessException as e: # 其他认证失败情况,直接抛出 - raise BusinessException(e.message,BizCode.LOGIN_FAILED) + raise BusinessException(e.message, BizCode.LOGIN_FAILED) # 创建 tokens access_token, access_token_id = security.create_access_token(subject=user.id) @@ -110,14 +113,15 @@ async def login_for_access_token( expires_at=access_expires_at, refresh_expires_at=refresh_expires_at ), - msg="登录成功" + msg=t("auth.login.success") ) @router.post("/refresh", response_model=ApiResponse) async def refresh_token( refresh_request: RefreshTokenRequest, - db: Session = Depends(get_db) + db: Session = Depends(get_db), + t: Callable = Depends(get_translator) ): """刷新token""" auth_logger.info("收到token刷新请求") @@ -125,18 +129,18 @@ async def refresh_token( # 验证 refresh token userId = security.verify_token(refresh_request.refresh_token, "refresh") if not userId: - raise BusinessException("无效的refresh token", code=BizCode.TOKEN_INVALID) + raise BusinessException(t("auth.token.invalid_refresh_token"), code=BizCode.TOKEN_INVALID) # 检查用户是否存在 user = auth_service.get_user_by_id(db, userId) if not user: - raise BusinessException("用户不存在", code=BizCode.USER_NOT_FOUND) + raise BusinessException(t("auth.user.not_found"), code=BizCode.USER_NOT_FOUND) # 检查 refresh token 黑名单 if settings.ENABLE_SINGLE_SESSION: refresh_token_id = security.get_token_id(refresh_request.refresh_token) if refresh_token_id and await SessionService.is_token_blacklisted(refresh_token_id): - raise BusinessException("Refresh token已失效", code=BizCode.TOKEN_BLACKLISTED) + raise BusinessException(t("auth.token.refresh_token_blacklisted"), code=BizCode.TOKEN_BLACKLISTED) # 生成新 tokens new_access_token, new_access_token_id = security.create_access_token(subject=user.id) @@ -167,7 +171,7 @@ async def refresh_token( expires_at=access_expires_at, refresh_expires_at=refresh_expires_at ), - msg="token刷新成功" + msg=t("auth.token.refresh_success") ) @@ -175,14 +179,15 @@ async def refresh_token( async def logout( token: str = Depends(oauth2_scheme), current_user: User = Depends(get_current_user), - db: Session = Depends(get_db) + db: Session = Depends(get_db), + t: Callable = Depends(get_translator) ): """登出当前用户:加入token黑名单并清理会话""" auth_logger.info(f"用户 {current_user.username} 请求登出") token_id = security.get_token_id(token) if not token_id: - raise BusinessException("无效的access token", code=BizCode.TOKEN_INVALID) + raise BusinessException(t("auth.token.invalid"), code=BizCode.TOKEN_INVALID) # 加入黑名单 await SessionService.blacklist_token(token_id) @@ -192,5 +197,5 @@ async def logout( await SessionService.clear_user_session(current_user.username) auth_logger.info(f"用户 {current_user.username} 登出成功") - return success(msg="登出成功") + return success(msg=t("auth.logout.success")) diff --git a/api/app/controllers/file_storage_controller.py b/api/app/controllers/file_storage_controller.py index b79035c0..ff284f39 100644 --- a/api/app/controllers/file_storage_controller.py +++ b/api/app/controllers/file_storage_controller.py @@ -15,7 +15,7 @@ import os import uuid from typing import Any -from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status +from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile, status from fastapi.responses import FileResponse, RedirectResponse from sqlalchemy.orm import Session @@ -47,6 +47,19 @@ router = APIRouter( ) +def _match_scheme(request: Request, url: str) -> str: + """ + 将 presigned URL 的协议替换为与当前请求一致的协议(http/https)。 + 解决反向代理场景下 presigned URL 协议与请求协议不匹配的问题。 + """ + incoming_scheme = request.headers.get("x-forwarded-proto") or request.url.scheme + if url.startswith("http://") and incoming_scheme == "https": + return "https://" + url[7:] + if url.startswith("https://") and incoming_scheme == "http": + return "http://" + url[8:] + return url + + @router.post("/files", response_model=ApiResponse) async def upload_file( file: UploadFile = File(...), @@ -280,6 +293,7 @@ async def upload_file_with_share_token( @router.get("/files/{file_id}", response_model=Any) async def download_file( + request: Request, file_id: uuid.UUID, db: Session = Depends(get_db), current_user: User = Depends(get_current_user), @@ -327,6 +341,7 @@ async def download_file( else: try: presigned_url = await storage_service.get_file_url(file_key, expires=3600) + presigned_url = _match_scheme(request, presigned_url) api_logger.info(f"Redirecting to presigned URL: file_key={file_key}") return RedirectResponse(url=presigned_url, status_code=status.HTTP_302_FOUND) except FileNotFoundError: @@ -400,6 +415,7 @@ async def delete_file( @router.get("/files/{file_id}/url", response_model=ApiResponse) async def get_file_url( + request: Request, file_id: uuid.UUID, expires: int = None, permanent: bool = False, @@ -463,6 +479,7 @@ async def get_file_url( else: # For remote storage (OSS/S3), get presigned URL url = await storage_service.get_file_url(file_key, expires=expires) + url = _match_scheme(request, url) api_logger.info(f"Generated file URL: file_id={file_id}") return success( @@ -484,6 +501,7 @@ async def get_file_url( @router.get("/public/{file_id}", response_model=Any) async def public_download_file( + request: Request, file_id: uuid.UUID, expires: int = 0, signature: str = "", @@ -555,6 +573,7 @@ async def public_download_file( # For remote storage, redirect to presigned URL try: presigned_url = await storage_service.get_file_url(file_key, expires=3600) + presigned_url = _match_scheme(request, presigned_url) return RedirectResponse(url=presigned_url, status_code=status.HTTP_302_FOUND) except Exception as e: api_logger.error(f"Failed to get presigned URL: {e}") @@ -566,6 +585,7 @@ async def public_download_file( @router.get("/permanent/{file_id}", response_model=Any) async def permanent_download_file( + request: Request, file_id: uuid.UUID, db: Session = Depends(get_db), storage_service: FileStorageService = Depends(get_file_storage_service), @@ -625,6 +645,7 @@ async def permanent_download_file( try: # Use a very long expiration (7 days max for most cloud providers) presigned_url = await storage_service.get_file_url(file_key, expires=604800) + presigned_url = _match_scheme(request, presigned_url) return RedirectResponse(url=presigned_url, status_code=status.HTTP_302_FOUND) except Exception as e: api_logger.error(f"Failed to get presigned URL: {e}") diff --git a/api/app/controllers/i18n_controller.py b/api/app/controllers/i18n_controller.py new file mode 100644 index 00000000..5dd07797 --- /dev/null +++ b/api/app/controllers/i18n_controller.py @@ -0,0 +1,833 @@ +""" +I18n Management API Controller + +This module provides management APIs for: +- Language management (list, get, add, update languages) +- Translation management (get, update, reload translations) +""" + +from fastapi import APIRouter, Depends, HTTPException, status +from sqlalchemy.orm import Session +from typing import Callable, Optional + +from app.core.logging_config import get_api_logger +from app.core.response_utils import success +from app.db import get_db +from app.dependencies import get_current_user, get_current_superuser +from app.i18n.dependencies import get_translator +from app.i18n.service import get_translation_service +from app.models.user_model import User +from app.schemas.i18n_schema import ( + LanguageInfo, + LanguageListResponse, + LanguageCreateRequest, + LanguageUpdateRequest, + TranslationResponse, + TranslationUpdateRequest, + MissingTranslationsResponse, + ReloadResponse +) +from app.schemas.response_schema import ApiResponse + +api_logger = get_api_logger() + +router = APIRouter( + prefix="/i18n", + tags=["I18n Management"], +) + + +# ============================================================================ +# Language Management APIs +# ============================================================================ + +@router.get("/languages", response_model=ApiResponse) +def get_languages( + t: Callable = Depends(get_translator), + current_user: User = Depends(get_current_user) +): + """ + Get list of all supported languages. + + Returns: + List of language information including code, name, and status + """ + api_logger.info(f"Get languages request from user: {current_user.username}") + + from app.core.config import settings + translation_service = get_translation_service() + + # Get available locales from translation service + available_locales = translation_service.get_available_locales() + + # Build language info list + languages = [] + for locale in available_locales: + is_default = locale == settings.I18N_DEFAULT_LANGUAGE + is_enabled = locale in settings.I18N_SUPPORTED_LANGUAGES + + # Get native names + native_names = { + "zh": "中文(简体)", + "en": "English", + "ja": "日本語", + "ko": "한국어", + "fr": "Français", + "de": "Deutsch", + "es": "Español" + } + + language_info = LanguageInfo( + code=locale, + name=f"{locale.upper()}", + native_name=native_names.get(locale, locale), + is_enabled=is_enabled, + is_default=is_default + ) + languages.append(language_info) + + response = LanguageListResponse(languages=languages) + + api_logger.info(f"Returning {len(languages)} languages") + return success(data=response.dict(), msg=t("common.success.retrieved")) + + +@router.get("/languages/{locale}", response_model=ApiResponse) +def get_language( + locale: str, + t: Callable = Depends(get_translator), + current_user: User = Depends(get_current_user) +): + """ + Get information about a specific language. + + Args: + locale: Language code (e.g., 'zh', 'en') + + Returns: + Language information + """ + api_logger.info(f"Get language info request: locale={locale}, user={current_user.username}") + + from app.core.config import settings + translation_service = get_translation_service() + + # Check if locale exists + available_locales = translation_service.get_available_locales() + if locale not in available_locales: + api_logger.warning(f"Language not found: {locale}") + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=t("i18n.language.not_found", locale=locale) + ) + + # Build language info + is_default = locale == settings.I18N_DEFAULT_LANGUAGE + is_enabled = locale in settings.I18N_SUPPORTED_LANGUAGES + + native_names = { + "zh": "中文(简体)", + "en": "English", + "ja": "日本語", + "ko": "한국어", + "fr": "Français", + "de": "Deutsch", + "es": "Español" + } + + language_info = LanguageInfo( + code=locale, + name=f"{locale.upper()}", + native_name=native_names.get(locale, locale), + is_enabled=is_enabled, + is_default=is_default + ) + + api_logger.info(f"Returning language info for: {locale}") + return success(data=language_info.dict(), msg=t("common.success.retrieved")) + + +@router.post("/languages", response_model=ApiResponse) +def add_language( + request: LanguageCreateRequest, + t: Callable = Depends(get_translator), + current_user: User = Depends(get_current_superuser) +): + """ + Add a new language (admin only). + + Note: This endpoint validates the request but actual language addition + requires creating translation files in the locales directory. + + Args: + request: Language creation request + + Returns: + Success message + """ + api_logger.info( + f"Add language request: code={request.code}, admin={current_user.username}" + ) + + from app.core.config import settings + translation_service = get_translation_service() + + # Check if language already exists + available_locales = translation_service.get_available_locales() + if request.code in available_locales: + api_logger.warning(f"Language already exists: {request.code}") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=t("i18n.language.already_exists", locale=request.code) + ) + + # Note: Actual language addition requires creating translation files + # This endpoint serves as a validation and documentation point + + api_logger.info( + f"Language addition validated: {request.code}. " + "Translation files need to be created manually." + ) + + return success( + msg=t( + "i18n.language.add_instructions", + locale=request.code, + dir=settings.I18N_CORE_LOCALES_DIR + ) + ) + + +@router.put("/languages/{locale}", response_model=ApiResponse) +def update_language( + locale: str, + request: LanguageUpdateRequest, + t: Callable = Depends(get_translator), + current_user: User = Depends(get_current_superuser) +): + """ + Update language configuration (admin only). + + Note: This endpoint validates the request but actual configuration + changes require updating environment variables or config files. + + Args: + locale: Language code + request: Language update request + + Returns: + Success message + """ + api_logger.info( + f"Update language request: locale={locale}, admin={current_user.username}" + ) + + translation_service = get_translation_service() + + # Check if language exists + available_locales = translation_service.get_available_locales() + if locale not in available_locales: + api_logger.warning(f"Language not found: {locale}") + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=t("i18n.language.not_found", locale=locale) + ) + + # Note: Actual configuration changes require updating settings + # This endpoint serves as a validation and documentation point + + api_logger.info( + f"Language update validated: {locale}. " + "Configuration changes require environment variable updates." + ) + + return success(msg=t("i18n.language.update_instructions", locale=locale)) + + +# ============================================================================ +# Translation Management APIs +# ============================================================================ + +@router.get("/translations", response_model=ApiResponse) +def get_all_translations( + locale: Optional[str] = None, + t: Callable = Depends(get_translator), + current_user: User = Depends(get_current_user) +): + """ + Get all translations for all or specific locale. + + Args: + locale: Optional locale filter + + Returns: + All translations organized by locale and namespace + """ + api_logger.info( + f"Get all translations request: locale={locale}, user={current_user.username}" + ) + + translation_service = get_translation_service() + + if locale: + # Get translations for specific locale + available_locales = translation_service.get_available_locales() + if locale not in available_locales: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=t("i18n.language.not_found", locale=locale) + ) + + translations = { + locale: translation_service._cache.get(locale, {}) + } + else: + # Get all translations + translations = translation_service._cache + + response = TranslationResponse(translations=translations) + + api_logger.info(f"Returning translations for: {locale or 'all locales'}") + return success(data=response.dict(), msg=t("common.success.retrieved")) + + +@router.get("/translations/{locale}", response_model=ApiResponse) +def get_locale_translations( + locale: str, + t: Callable = Depends(get_translator), + current_user: User = Depends(get_current_user) +): + """ + Get all translations for a specific locale. + + Args: + locale: Language code + + Returns: + All translations for the locale organized by namespace + """ + api_logger.info( + f"Get locale translations request: locale={locale}, user={current_user.username}" + ) + + translation_service = get_translation_service() + + # Check if locale exists + available_locales = translation_service.get_available_locales() + if locale not in available_locales: + api_logger.warning(f"Language not found: {locale}") + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=t("i18n.language.not_found", locale=locale) + ) + + translations = translation_service._cache.get(locale, {}) + + api_logger.info(f"Returning {len(translations)} namespaces for locale: {locale}") + return success(data={"locale": locale, "translations": translations}, msg=t("common.success.retrieved")) + + +@router.get("/translations/{locale}/{namespace}", response_model=ApiResponse) +def get_namespace_translations( + locale: str, + namespace: str, + t: Callable = Depends(get_translator), + current_user: User = Depends(get_current_user) +): + """ + Get translations for a specific namespace in a locale. + + Args: + locale: Language code + namespace: Translation namespace (e.g., 'common', 'auth') + + Returns: + Translations for the specified namespace + """ + api_logger.info( + f"Get namespace translations request: locale={locale}, " + f"namespace={namespace}, user={current_user.username}" + ) + + translation_service = get_translation_service() + + # Check if locale exists + available_locales = translation_service.get_available_locales() + if locale not in available_locales: + api_logger.warning(f"Language not found: {locale}") + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=t("i18n.language.not_found", locale=locale) + ) + + # Get namespace translations + locale_translations = translation_service._cache.get(locale, {}) + namespace_translations = locale_translations.get(namespace, {}) + + if not namespace_translations: + api_logger.warning(f"Namespace not found: {namespace} in locale: {locale}") + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=t("i18n.namespace.not_found", namespace=namespace, locale=locale) + ) + + api_logger.info( + f"Returning translations for namespace: {namespace} in locale: {locale}" + ) + return success( + data={ + "locale": locale, + "namespace": namespace, + "translations": namespace_translations + }, + msg=t("common.success.retrieved") + ) + + +@router.put("/translations/{locale}/{key:path}", response_model=ApiResponse) +def update_translation( + locale: str, + key: str, + request: TranslationUpdateRequest, + t: Callable = Depends(get_translator), + current_user: User = Depends(get_current_superuser) +): + """ + Update a single translation (admin only). + + Note: This endpoint validates the request but actual translation updates + require modifying translation files in the locales directory. + + Args: + locale: Language code + key: Translation key (format: "namespace.key.subkey") + request: Translation update request + + Returns: + Success message + """ + api_logger.info( + f"Update translation request: locale={locale}, key={key}, " + f"admin={current_user.username}" + ) + + translation_service = get_translation_service() + + # Check if locale exists + available_locales = translation_service.get_available_locales() + if locale not in available_locales: + api_logger.warning(f"Language not found: {locale}") + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=t("i18n.language.not_found", locale=locale) + ) + + # Validate key format + if "." not in key: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=t("i18n.translation.invalid_key_format", key=key) + ) + + # Note: Actual translation updates require modifying JSON files + # This endpoint serves as a validation and documentation point + + api_logger.info( + f"Translation update validated: {locale}/{key}. " + "Translation files need to be updated manually." + ) + + return success( + msg=t("i18n.translation.update_instructions", locale=locale, key=key) + ) + + +@router.get("/translations/missing", response_model=ApiResponse) +def get_missing_translations( + locale: Optional[str] = None, + t: Callable = Depends(get_translator), + current_user: User = Depends(get_current_user) +): + """ + Get list of missing translations. + + Compares translations across locales to find missing keys. + + Args: + locale: Optional locale to check (defaults to checking all non-default locales) + + Returns: + List of missing translation keys + """ + api_logger.info( + f"Get missing translations request: locale={locale}, user={current_user.username}" + ) + + from app.core.config import settings + translation_service = get_translation_service() + + default_locale = settings.I18N_DEFAULT_LANGUAGE + available_locales = translation_service.get_available_locales() + + # Get default locale translations as reference + default_translations = translation_service._cache.get(default_locale, {}) + + # Collect all keys from default locale + def collect_keys(data, prefix=""): + keys = [] + for key, value in data.items(): + full_key = f"{prefix}.{key}" if prefix else key + if isinstance(value, dict): + keys.extend(collect_keys(value, full_key)) + else: + keys.append(full_key) + return keys + + default_keys = set() + for namespace, translations in default_translations.items(): + namespace_keys = collect_keys(translations, namespace) + default_keys.update(namespace_keys) + + # Find missing keys in target locale(s) + missing_by_locale = {} + + target_locales = [locale] if locale else [ + loc for loc in available_locales if loc != default_locale + ] + + for target_locale in target_locales: + if target_locale not in available_locales: + continue + + target_translations = translation_service._cache.get(target_locale, {}) + target_keys = set() + + for namespace, translations in target_translations.items(): + namespace_keys = collect_keys(translations, namespace) + target_keys.update(namespace_keys) + + missing_keys = default_keys - target_keys + if missing_keys: + missing_by_locale[target_locale] = sorted(list(missing_keys)) + + response = MissingTranslationsResponse(missing_translations=missing_by_locale) + + total_missing = sum(len(keys) for keys in missing_by_locale.values()) + api_logger.info(f"Found {total_missing} missing translations across {len(missing_by_locale)} locales") + + return success(data=response.dict(), msg=t("common.success.retrieved")) + + +@router.post("/reload", response_model=ApiResponse) +def reload_translations( + locale: Optional[str] = None, + t: Callable = Depends(get_translator), + current_user: User = Depends(get_current_superuser) +): + """ + Trigger hot reload of translation files (admin only). + + Args: + locale: Optional locale to reload (defaults to reloading all locales) + + Returns: + Reload status and statistics + """ + api_logger.info( + f"Reload translations request: locale={locale or 'all'}, " + f"admin={current_user.username}" + ) + + from app.core.config import settings + + if not settings.I18N_ENABLE_HOT_RELOAD: + api_logger.warning("Hot reload is disabled in configuration") + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail=t("i18n.reload.disabled") + ) + + translation_service = get_translation_service() + + try: + # Reload translations + translation_service.reload(locale) + + # Get statistics + available_locales = translation_service.get_available_locales() + reloaded_locales = [locale] if locale else available_locales + + response = ReloadResponse( + success=True, + reloaded_locales=reloaded_locales, + total_locales=len(available_locales) + ) + + api_logger.info( + f"Successfully reloaded translations for: {', '.join(reloaded_locales)}" + ) + + return success(data=response.dict(), msg=t("i18n.reload.success")) + + except Exception as e: + api_logger.error(f"Failed to reload translations: {str(e)}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=t("i18n.reload.failed", error=str(e)) + ) + + +# ============================================================================ +# Performance Monitoring APIs +# ============================================================================ + +@router.get("/metrics", response_model=ApiResponse) +def get_metrics( + t: Callable = Depends(get_translator), + current_user: User = Depends(get_current_superuser) +): + """ + Get i18n performance metrics (admin only). + + Returns: + Performance metrics including: + - Request counts + - Missing translations + - Timing statistics + - Locale usage + - Error counts + """ + api_logger.info(f"Get metrics request: admin={current_user.username}") + + translation_service = get_translation_service() + metrics = translation_service.get_metrics_summary() + + api_logger.info("Returning i18n metrics") + return success(data=metrics, msg=t("common.success.retrieved")) + + +@router.get("/metrics/cache", response_model=ApiResponse) +def get_cache_stats( + t: Callable = Depends(get_translator), + current_user: User = Depends(get_current_superuser) +): + """ + Get cache statistics (admin only). + + Returns: + Cache statistics including: + - Hit/miss rates + - LRU cache performance + - Loaded locales + - Memory usage + """ + api_logger.info(f"Get cache stats request: admin={current_user.username}") + + translation_service = get_translation_service() + cache_stats = translation_service.get_cache_stats() + memory_usage = translation_service.get_memory_usage() + + data = { + "cache": cache_stats, + "memory": memory_usage + } + + api_logger.info("Returning cache statistics") + return success(data=data, msg=t("common.success.retrieved")) + + +@router.get("/metrics/prometheus") +def get_prometheus_metrics( + current_user: User = Depends(get_current_superuser) +): + """ + Get metrics in Prometheus format (admin only). + + Returns: + Prometheus-formatted metrics as plain text + """ + api_logger.info(f"Get Prometheus metrics request: admin={current_user.username}") + + from app.i18n.metrics import get_metrics + metrics = get_metrics() + prometheus_output = metrics.export_prometheus() + + from fastapi.responses import PlainTextResponse + return PlainTextResponse(content=prometheus_output) + + +@router.post("/metrics/reset", response_model=ApiResponse) +def reset_metrics( + t: Callable = Depends(get_translator), + current_user: User = Depends(get_current_superuser) +): + """ + Reset all metrics (admin only). + + Returns: + Success message + """ + api_logger.info(f"Reset metrics request: admin={current_user.username}") + + from app.i18n.metrics import get_metrics + metrics = get_metrics() + metrics.reset() + + translation_service = get_translation_service() + translation_service.cache.reset_stats() + + api_logger.info("Metrics reset completed") + return success(msg=t("i18n.metrics.reset_success")) + + +# ============================================================================ +# Missing Translation Logging and Reporting APIs +# ============================================================================ + +@router.get("/logs/missing", response_model=ApiResponse) +def get_missing_translation_logs( + locale: Optional[str] = None, + limit: Optional[int] = 100, + t: Callable = Depends(get_translator), + current_user: User = Depends(get_current_superuser) +): + """ + Get missing translation logs (admin only). + + Returns logged missing translations with context information. + + Args: + locale: Optional locale filter + limit: Maximum number of entries to return (default: 100) + + Returns: + Missing translation logs with context + """ + api_logger.info( + f"Get missing translation logs request: locale={locale}, " + f"limit={limit}, admin={current_user.username}" + ) + + translation_service = get_translation_service() + translation_logger = translation_service.translation_logger + + # Get missing translations + missing_translations = translation_logger.get_missing_translations(locale) + + # Get missing with context + missing_with_context = translation_logger.get_missing_with_context(locale, limit) + + # Get statistics + statistics = translation_logger.get_statistics() + + data = { + "missing_translations": missing_translations, + "recent_context": missing_with_context, + "statistics": statistics + } + + api_logger.info( + f"Returning {statistics['total_missing']} missing translations" + ) + return success(data=data, msg=t("common.success.retrieved")) + + +@router.get("/logs/missing/report", response_model=ApiResponse) +def generate_missing_translation_report( + locale: Optional[str] = None, + t: Callable = Depends(get_translator), + current_user: User = Depends(get_current_superuser) +): + """ + Generate a comprehensive missing translation report (admin only). + + Args: + locale: Optional locale filter + + Returns: + Comprehensive report with missing translations and statistics + """ + api_logger.info( + f"Generate missing translation report request: locale={locale}, " + f"admin={current_user.username}" + ) + + translation_service = get_translation_service() + translation_logger = translation_service.translation_logger + + # Generate report + report = translation_logger.generate_report(locale) + + api_logger.info( + f"Generated report with {report['total_missing']} missing translations" + ) + return success(data=report, msg=t("common.success.retrieved")) + + +@router.post("/logs/missing/export", response_model=ApiResponse) +def export_missing_translations( + locale: Optional[str] = None, + t: Callable = Depends(get_translator), + current_user: User = Depends(get_current_superuser) +): + """ + Export missing translations to JSON file (admin only). + + Args: + locale: Optional locale filter + + Returns: + Export status and file path + """ + api_logger.info( + f"Export missing translations request: locale={locale}, " + f"admin={current_user.username}" + ) + + from datetime import datetime + translation_service = get_translation_service() + translation_logger = translation_service.translation_logger + + # Generate filename with timestamp + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + locale_suffix = f"_{locale}" if locale else "_all" + output_file = f"logs/i18n/missing_translations{locale_suffix}_{timestamp}.json" + + # Export to file + translation_logger.export_to_json(output_file) + + api_logger.info(f"Missing translations exported to: {output_file}") + return success( + data={"file_path": output_file}, + msg=t("i18n.logs.export_success", file=output_file) + ) + + +@router.delete("/logs/missing", response_model=ApiResponse) +def clear_missing_translation_logs( + locale: Optional[str] = None, + t: Callable = Depends(get_translator), + current_user: User = Depends(get_current_superuser) +): + """ + Clear missing translation logs (admin only). + + Args: + locale: Optional locale to clear (clears all if not specified) + + Returns: + Success message + """ + api_logger.info( + f"Clear missing translation logs request: locale={locale or 'all'}, " + f"admin={current_user.username}" + ) + + translation_service = get_translation_service() + translation_logger = translation_service.translation_logger + + # Clear logs + translation_logger.clear(locale) + + api_logger.info(f"Cleared missing translation logs for: {locale or 'all locales'}") + return success(msg=t("i18n.logs.clear_success")) diff --git a/api/app/controllers/mcp_market_config_controller.py b/api/app/controllers/mcp_market_config_controller.py index 7f73663e..0f2da3b0 100644 --- a/api/app/controllers/mcp_market_config_controller.py +++ b/api/app/controllers/mcp_market_config_controller.py @@ -19,7 +19,7 @@ from app.models import mcp_market_config_model from app.models.user_model import User from app.schemas import mcp_market_config_schema from app.schemas.response_schema import ApiResponse -from app.services import mcp_market_config_service +from app.services import mcp_market_config_service, mcp_market_service # Obtain a dedicated API logger api_logger = get_api_logger() @@ -55,6 +55,12 @@ async def get_mcp_servers( status_code=status.HTTP_400_BAD_REQUEST, detail="The paging parameter must be greater than 0" ) + if page * pagesize > 100: + api_logger.warning(f"Paging parameters exceed ModelScope limit: page={page}, pagesize={pagesize}") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"The maximum number of MCP services can view is 100. Please visit the ModelScope MCP Plaza." + ) # 2. Query mcp market config information from the database api_logger.debug(f"Query mcp market config: {mcp_market_config_id}") @@ -64,14 +70,16 @@ async def get_mcp_servers( if not db_mcp_market_config: api_logger.warning( f"The mcp market config does not exist or access is denied: mcp_market_config_id={mcp_market_config_id}") - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="The mcp market config does not exist or access is denied" - ) + return success(msg='The mcp market config does not exist or access is denied') # 3. Execute paged query - api = MCPApi() token = db_mcp_market_config.token + if not token: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="MCP market config token is not configured" + ) + api = MCPApi() api.login(token) body = { @@ -115,6 +123,17 @@ async def get_mcp_servers( "has_next": True if page * pagesize < total else False } } + # 5. Update mck_market.mcp_count + db_mcp_market = mcp_market_service.get_mcp_market_by_id(db, mcp_market_id=db_mcp_market_config.mcp_market_id, current_user=current_user) + if not db_mcp_market: + api_logger.warning(f"The mcp market does not exist or access is denied: mcp_market_id={db_mcp_market_config.mcp_market_id}") + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="The mcp market does not exist or access is denied" + ) + db_mcp_market.mcp_count = total + db.commit() + db.refresh(db_mcp_market) return success(data=result, msg="Query of mcp servers list successful") @@ -140,14 +159,16 @@ async def get_operational_mcp_servers( if not db_mcp_market_config: api_logger.warning( f"The mcp market config does not exist or access is denied: mcp_market_config_id={mcp_market_config_id}") - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="The mcp market config does not exist or access is denied" - ) + return success(msg='The mcp market config does not exist or access is denied') # 2. Execute paged query - api = MCPApi() token = db_mcp_market_config.token + if not token: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="MCP market config token is not configured" + ) + api = MCPApi() api.login(token) url = f'{api.mcp_base_url}/operational' @@ -198,14 +219,16 @@ async def get_mcp_server( if not db_mcp_market_config: api_logger.warning( f"The mcp market config does not exist or access is denied: mcp_market_config_id={mcp_market_config_id}") - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="The mcp market config does not exist or access is denied" - ) + return success(msg='The mcp market config does not exist or access is denied') # 2. Get detailed information for a specific MCP Server - api = MCPApi() token = db_mcp_market_config.token + if not token: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="MCP market config token is not configured" + ) + api = MCPApi() api.login(token) result = api.get_mcp_server(server_id=server_id) @@ -226,7 +249,26 @@ async def create_mcp_market_config( try: api_logger.debug(f"Start creating the mcp market config: {create_data.mcp_market_id}") - # 1. Check if the mcp market name already exists + # 1. Validate token can access ModelScope MCP market + if not create_data.token: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Token is required to access ModelScope MCP market" + ) + try: + api = MCPApi() + api.login(create_data.token) + body = {'filter': {}, 'page_number': 1, 'page_size': 1, 'search': None} + cookies = api.get_cookies(create_data.token) + r = api.session.put(url=api.mcp_base_url, headers=api.builder_headers(api.headers), json=body, cookies=cookies) + raise_for_http_status(r) + except Exception as e: + api_logger.warning(f"Token validation failed for ModelScope MCP market: {str(e)}") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Unable to access ModelScope MCP market with the provided token: {str(e)}" + ) + # 2. Check if the mcp market name already exists db_mcp_market_config_exist = mcp_market_config_service.get_mcp_market_config_by_mcp_market_id(db, mcp_market_id=create_data.mcp_market_id, current_user=current_user) if db_mcp_market_config_exist: api_logger.warning(f"The mcp market id already exists: {create_data.mcp_market_id}") @@ -234,6 +276,30 @@ async def create_mcp_market_config( status_code=status.HTTP_400_BAD_REQUEST, detail=f"The mcp market id already exists: {create_data.mcp_market_id}" ) + # 2. verify token + create_data.status = 1 + try: + api = MCPApi() + token = create_data.token + api.login(token) + + body = { + 'filter': {}, + 'page_number': 1, + 'page_size': 20, + 'search': "" + } + cookies = api.get_cookies(token) + r = api.session.put( + url=api.mcp_base_url, + headers=api.builder_headers(api.headers), + json=body, + cookies=cookies) + raise_for_http_status(r) + except requests.exceptions.RequestException as e: + api_logger.error(f"Failed to get MCP servers: {str(e)}") + create_data.status = 0 + # 3. create mcp_market_config db_mcp_market_config = mcp_market_config_service.create_mcp_market_config(db=db, mcp_market_config=create_data, current_user=current_user) api_logger.info( f"The mcp market config has been successfully created: (ID: {db_mcp_market_config.id})") @@ -262,10 +328,7 @@ async def get_mcp_market_config( db_mcp_market_config = mcp_market_config_service.get_mcp_market_config_by_id(db, mcp_market_config_id=mcp_market_config_id, current_user=current_user) if not db_mcp_market_config: api_logger.warning(f"The mcp market config does not exist or access is denied: mcp_market_config_id={mcp_market_config_id}") - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="The mcp market config does not exist or access is denied" - ) + return success(msg='The mcp market config does not exist or access is denied') api_logger.info(f"mcp market config query successful: (ID: {db_mcp_market_config.id})") return success(data=jsonable_encoder(mcp_market_config_schema.McpMarketConfig.model_validate(db_mcp_market_config)), @@ -295,10 +358,7 @@ async def get_mcp_market_config_by_mcp_market_id( db_mcp_market_config = mcp_market_config_service.get_mcp_market_config_by_mcp_market_id(db, mcp_market_id=mcp_market_id, current_user=current_user) if not db_mcp_market_config: api_logger.warning(f"The mcp market config does not exist or access is denied: mcp_market_id={mcp_market_id}") - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="The mcp market config does not exist or access is denied" - ) + return success(msg='The mcp market config does not exist or access is denied') api_logger.info(f"mcp market config query successful: (ID: {db_mcp_market_config.id})") return success(data=jsonable_encoder(mcp_market_config_schema.McpMarketConfig.model_validate(db_mcp_market_config)), @@ -324,12 +384,25 @@ async def update_mcp_market_config( if not db_mcp_market_config: api_logger.warning( f"The mcp market config does not exist or you do not have permission to access it: mcp_market_config_id={mcp_market_config_id}") - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="The mcp market config does not exist or you do not have permission to access it" - ) + return success(msg='The mcp market config does not exist or access is denied') - # 2. Update fields (only update non-null fields) + # 2. Validate new token if provided + if update_data.token is not None: + try: + api = MCPApi() + api.login(update_data.token) + body = {'filter': {}, 'page_number': 1, 'page_size': 1, 'search': None} + cookies = api.get_cookies(update_data.token) + r = api.session.put(url=api.mcp_base_url, headers=api.builder_headers(api.headers), json=body, cookies=cookies) + raise_for_http_status(r) + except Exception as e: + api_logger.warning(f"Token validation failed for ModelScope MCP market: {str(e)}") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Unable to access ModelScope MCP market with the provided token: {str(e)}" + ) + + # 3. Update fields (only update non-null fields) api_logger.debug(f"Start updating the mcp market config fields: {mcp_market_config_id}") update_dict = update_data.dict(exclude_unset=True) updated_fields = [] @@ -344,7 +417,7 @@ async def update_mcp_market_config( if updated_fields: api_logger.debug(f"updated fields: {', '.join(updated_fields)}") - # 3. Save to database + # 4. Save to database try: db.commit() db.refresh(db_mcp_market_config) @@ -357,7 +430,7 @@ async def update_mcp_market_config( detail=f"The mcp market config update failed: {str(e)}" ) - # 4. Return the updated mcp market config + # 5. Return the updated mcp market config return success(data=jsonable_encoder(mcp_market_config_schema.McpMarketConfig.model_validate(db_mcp_market_config)), msg="The mcp market config information updated successfully") @@ -381,10 +454,7 @@ async def delete_mcp_market_config( if not db_mcp_market_config: api_logger.warning( f"The mcp market config does not exist or you do not have permission to access it: mcp_market_config_id={mcp_market_config_id}") - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="The mcp market config does not exist or you do not have permission to access it" - ) + return success(msg='The mcp market config does not exist or access is denied') # 2. Deleting mcp market config mcp_market_config_service.delete_mcp_market_config_by_id(db, mcp_market_config_id=mcp_market_config_id, current_user=current_user) diff --git a/api/app/controllers/memory_dashboard_controller.py b/api/app/controllers/memory_dashboard_controller.py index 1b5b45fb..2c979435 100644 --- a/api/app/controllers/memory_dashboard_controller.py +++ b/api/app/controllers/memory_dashboard_controller.py @@ -1,4 +1,5 @@ from fastapi import APIRouter, Depends, HTTPException, status, Query +from pydantic import BaseModel, Field from sqlalchemy.orm import Session from typing import Optional from app.core.response_utils import success @@ -149,6 +150,21 @@ async def get_workspace_end_users( return {uid: {"total": 0} for uid in end_user_ids} + # 触发按需初始化:为 implicit_emotions_storage 中没有记录的用户异步生成数据 + try: + from app.celery_app import celery_app as _celery_app + _celery_app.send_task( + "app.tasks.init_implicit_emotions_for_users", + kwargs={"end_user_ids": end_user_ids}, + ) + _celery_app.send_task( + "app.tasks.init_interest_distribution_for_users", + kwargs={"end_user_ids": end_user_ids}, + ) + api_logger.info(f"已触发按需初始化任务,候选用户数: {len(end_user_ids)}") + except Exception as e: + api_logger.warning(f"触发按需初始化任务失败(不影响主流程): {e}") + # 并发执行配置查询和记忆数量查询 memory_configs_map, memory_nums_map = await asyncio.gather( get_memory_configs(), @@ -177,7 +193,16 @@ async def get_workspace_end_users( await aio_redis_set(cache_key, json.dumps(result), expire=30) except Exception as e: api_logger.warning(f"Redis 缓存写入失败: {str(e)}") - + + # 触发社区聚类补全任务(异步,不阻塞接口响应) + # 对有 ExtractedEntity 但无 Community 节点的存量用户自动补跑全量聚类 + try: + from app.tasks import init_community_clustering_for_users + init_community_clustering_for_users.delay(end_user_ids=end_user_ids) + api_logger.info(f"已触发社区聚类补全任务,候选用户数: {len(end_user_ids)}") + except Exception as e: + api_logger.warning(f"触发社区聚类补全任务失败(不影响主流程): {str(e)}") + api_logger.info(f"成功获取 {len(end_users)} 个宿主记录") return success(data=result, msg="宿主列表获取成功") @@ -387,14 +412,15 @@ def get_current_user_rag_total_num( @router.get("/rag_content", response_model=ApiResponse) def get_rag_content( end_user_id: str = Query(..., description="宿主ID"), - limit: int = Query(15, description="返回记录数"), + page: int = Query(1, gt=0, description="页码,从1开始"), + pagesize: int = Query(15, gt=0, le=100, description="每页返回记录数"), db: Session = Depends(get_db), current_user: User = Depends(get_current_user), ): """ - 获取当前宿主知识库中的chunk内容 + 获取当前宿主知识库中的chunk内容(分页) """ - data = memory_dashboard_service.get_rag_content(end_user_id, limit, db, current_user) + data = memory_dashboard_service.get_rag_content(end_user_id, page, pagesize, db, current_user) return success(data=data, msg="宿主RAGchunk数据获取成功") @@ -407,26 +433,18 @@ async def get_chunk_summary_tag( current_user: User = Depends(get_current_user), ): """ - 获取chunk总结、提取的标签和人物形象 - + 读取RAG摘要、标签和人物形象(纯读库,不触发生成)。 + 返回格式: { - "summary": "chunk内容的总结", - "tags": [ - {"tag": "标签1", "frequency": 5}, - {"tag": "标签2", "frequency": 3}, - ... - ], - "personas": [ - "产品设计师", - "旅行爱好者", - "摄影发烧友", - ... - ] + "summary": "用户摘要", + "tags": [{"tag": "标签1", "frequency": 5}, ...], + "personas": ["产品设计师", ...], + "generated": true/false // false表示尚未生产,请调用 /generate_rag_profile } """ - api_logger.info(f"用户 {current_user.username} 请求获取宿主 {end_user_id} 的chunk摘要、标签和人物形象") - + api_logger.info(f"用户 {current_user.username} 读取宿主 {end_user_id} 的RAG摘要/标签/人物形象") + data = await memory_dashboard_service.get_chunk_summary_and_tags( end_user_id=end_user_id, limit=limit, @@ -434,9 +452,8 @@ async def get_chunk_summary_tag( db=db, current_user=current_user ) - - api_logger.info(f"成功获取chunk摘要、{len(data.get('tags', []))} 个标签和 {len(data.get('personas', []))} 个人物形象") - return success(data=data, msg="chunk摘要、标签和人物形象获取成功") + + return success(data=data, msg="获取成功") @router.get("/chunk_insight", response_model=ApiResponse) @@ -447,24 +464,57 @@ async def get_chunk_insight( current_user: User = Depends(get_current_user), ): """ - 获取chunk的洞察内容 - + 读取RAG洞察报告(纯读库,不触发生成)。 + 返回格式: { - "insight": "对chunk内容的深度洞察分析" + "insight": "总体概述", + "behavior_pattern": "行为模式", + "key_findings": "关键发现", + "growth_trajectory": "成长轨迹", + "generated": true/false // false表示尚未生产,请调用 /generate_rag_profile } """ - api_logger.info(f"用户 {current_user.username} 请求获取宿主 {end_user_id} 的chunk洞察") - + api_logger.info(f"用户 {current_user.username} 读取宿主 {end_user_id} 的RAG洞察") + data = await memory_dashboard_service.get_chunk_insight( end_user_id=end_user_id, limit=limit, db=db, current_user=current_user ) - - api_logger.info("成功获取chunk洞察") - return success(data=data, msg="chunk洞察获取成功") + + return success(data=data, msg="获取成功") + + +class GenerateRagProfileRequest(BaseModel): + end_user_id: str = Field(..., description="宿主ID") + limit: int = Field(15, description="参与生成的chunk数量上限") + max_tags: int = Field(10, description="最大标签数量") + + +@router.post("/generate_rag_profile", response_model=ApiResponse) +async def generate_rag_profile( + body: GenerateRagProfileRequest, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user), +): + """ + 生产接口:为RAG存储模式的宿主全量重新生成完整画像并持久化到end_user表。 + 每次请求都会重新生成,覆盖已有数据。 + """ + api_logger.info(f"用户 {current_user.username} 触发RAG画像生产: end_user_id={body.end_user_id}") + + data = await memory_dashboard_service.generate_rag_profile( + end_user_id=body.end_user_id, + limit=body.limit, + max_tags=body.max_tags, + db=db, + current_user=current_user, + ) + + api_logger.info(f"RAG画像生产完成: {data}") + return success(data=data, msg="RAG画像生产完成") @router.get("/dashboard_data", response_model=ApiResponse) @@ -553,9 +603,12 @@ async def dashboard_data( ) neo4j_data["total_memory"] = total_memory_data.get("total_memory_count", 0) # total_app: 统计当前空间下的所有app数量 - from app.repositories import app_repository - apps_orm = app_repository.get_apps_by_workspace_id(db, workspace_id) - neo4j_data["total_app"] = len(apps_orm) + # 包含自有app + 被分享给本工作空间的app + from app.services import app_service as _app_svc + _, total_app = _app_svc.AppService(db).list_apps( + workspace_id=workspace_id, include_shared=True, pagesize=1 + ) + neo4j_data["total_app"] = total_app api_logger.info(f"成功获取记忆总量: {neo4j_data['total_memory']}, 应用数量: {neo4j_data['total_app']}") except Exception as e: api_logger.warning(f"获取记忆总量失败: {str(e)}") diff --git a/api/app/controllers/memory_reflection_controller.py b/api/app/controllers/memory_reflection_controller.py index 5a32372a..f827eaaf 100644 --- a/api/app/controllers/memory_reflection_controller.py +++ b/api/app/controllers/memory_reflection_controller.py @@ -1,3 +1,19 @@ +""" +Memory Reflection Controller + +This module provides REST API endpoints for managing memory reflection configurations +and operations. It handles reflection engine setup, configuration management, and +execution of self-reflection processes across memory systems. + +Key Features: +- Reflection configuration management (save, retrieve, update) +- Workspace-wide reflection execution across multiple applications +- Individual configuration-based reflection runs +- Multi-language support for reflection outputs +- Integration with Neo4j memory storage and LLM models +- Comprehensive error handling and logging +""" + import asyncio import time import uuid @@ -28,9 +44,13 @@ from sqlalchemy.orm import Session from app.utils.config_utils import resolve_config_id +# Load environment variables for configuration load_dotenv() + +# Initialize API logger for request tracking and debugging api_logger = get_api_logger() +# Configure router with prefix and tags for API organization router = APIRouter( prefix="/memory", tags=["Memory"], @@ -43,7 +63,38 @@ async def save_reflection_config( current_user: User = Depends(get_current_user), db: Session = Depends(get_db), ) -> dict: - """Save reflection configuration to data_comfig table""" + """ + Save reflection configuration to memory config table + + Persists reflection engine configuration settings to the data_config table, + including reflection parameters, model settings, and evaluation criteria. + Validates configuration parameters and ensures data consistency. + + Args: + request: Memory reflection configuration data including: + - config_id: Configuration identifier to update + - reflection_enabled: Whether reflection is enabled + - reflection_period_in_hours: Reflection execution interval + - reflexion_range: Scope of reflection (partial/all) + - baseline: Reflection strategy (time/fact/hybrid) + - reflection_model_id: LLM model for reflection operations + - memory_verify: Enable memory verification checks + - quality_assessment: Enable quality assessment evaluation + current_user: Authenticated user saving the configuration + db: Database session for data operations + + Returns: + dict: Success response with saved reflection configuration data + + Raises: + HTTPException 400: If config_id is missing or parameters are invalid + HTTPException 500: If configuration save operation fails + + Database Operations: + - Updates memory_config table with reflection settings + - Commits transaction and refreshes entity + - Maintains configuration consistency + """ try: config_id = request.config_id config_id = resolve_config_id(config_id, db) @@ -54,6 +105,7 @@ async def save_reflection_config( ) api_logger.info(f"用户 {current_user.username} 保存反思配置,config_id: {config_id}") + # Update reflection configuration in database memory_config = MemoryConfigRepository.update_reflection_config( db, config_id=config_id, @@ -66,6 +118,7 @@ async def save_reflection_config( quality_assessment=request.quality_assessment ) + # Commit transaction and refresh entity db.commit() db.refresh(memory_config) @@ -102,13 +155,55 @@ async def start_workspace_reflection( current_user: User = Depends(get_current_user), db: Session = Depends(get_db), ) -> dict: - """启动工作空间中所有匹配应用的反思功能""" + """ + Start reflection functionality for all matching applications in workspace + + Initiates reflection processes across all applications within the user's current + workspace that have valid memory configurations. Processes each application's + configurations and associated end users, executing reflection operations + with proper error isolation and transaction management. + + This endpoint serves as a workspace-wide reflection orchestrator, ensuring + that reflection failures for individual users don't affect other operations. + + Args: + current_user: Authenticated user initiating workspace reflection + db: Database session for configuration queries + + Returns: + dict: Success response with reflection results for all processed applications: + - app_id: Application identifier + - config_id: Memory configuration identifier + - end_user_id: End user identifier + - reflection_result: Individual reflection operation result + + Processing Logic: + 1. Retrieve all applications in the current workspace + 2. Filter applications with valid memory configurations + 3. For each configuration, find matching releases + 4. Execute reflection for each end user with isolated transactions + 5. Aggregate results with error handling per user + + Error Handling: + - Individual user reflection failures are isolated + - Failed operations are logged and included in results + - Database transactions are isolated per user to prevent cascading failures + - Comprehensive error reporting for debugging + + Raises: + HTTPException 500: If workspace reflection initialization fails + + Performance Notes: + - Uses independent database sessions for each user operation + - Prevents transaction failures from affecting other users + - Comprehensive logging for operation tracking + """ workspace_id = current_user.current_workspace_id try: api_logger.info(f"用户 {current_user.username} 启动workspace反思,workspace_id: {workspace_id}") - # 使用独立的数据库会话来获取工作空间应用详情,避免事务失败 + # Use independent database session to get workspace app details, avoiding transaction failures from app.db import get_db_context with get_db_context() as query_db: service = WorkspaceAppService(query_db) @@ -116,8 +211,9 @@ async def start_workspace_reflection( reflection_results = [] + # Process each application in the workspace for data in result['apps_detailed_info']: - # 跳过没有配置的应用 + # Skip applications without configurations if not data['memory_configs']: api_logger.debug(f"应用 {data['id']} 没有memory_configs,跳过") continue @@ -126,22 +222,22 @@ async def start_workspace_reflection( memory_configs = data['memory_configs'] end_users = data['end_users'] - # 为每个配置和用户组合执行反思 + # Execute reflection for each configuration and user combination for config in memory_configs: config_id_str = str(config['config_id']) - # 找到匹配此配置的所有release + # Find all releases matching this configuration matching_releases = [r for r in releases if str(r['config']) == config_id_str] if not matching_releases: api_logger.debug(f"配置 {config_id_str} 没有匹配的release") continue - # 为每个用户执行反思 - 使用独立的数据库会话 + # Execute reflection for each user - using independent database sessions for user in end_users: api_logger.info(f"为用户 {user['id']} 启动反思,config_id: {config_id_str}") - # 为每个用户创建独立的数据库会话,避免事务失败影响其他用户 + # Create independent database session for each user to avoid transaction failure impact with get_db_context() as user_db: try: reflection_service = MemoryReflectionService(user_db) @@ -184,14 +280,51 @@ async def start_reflection_configs( current_user: User = Depends(get_current_user), db: Session = Depends(get_db), ) -> dict: - """通过config_id查询memory_config表中的反思配置信息""" + """ + Query reflection configuration information by config_id + + Retrieves detailed reflection configuration settings from the memory_config + table for a specific configuration ID. Provides comprehensive reflection + parameters including model settings, evaluation criteria, and operational flags. + + Args: + config_id: Configuration identifier (UUID or integer) to query + current_user: Authenticated user making the request + db: Database session for data operations + + Returns: + dict: Success response with detailed reflection configuration: + - config_id: Resolved configuration identifier + - reflection_enabled: Whether reflection is enabled for this config + - reflection_period_in_hours: Reflection execution interval + - reflexion_range: Scope of reflection operations (partial/all) + - baseline: Reflection strategy (time/fact/hybrid) + - reflection_model_id: LLM model identifier for reflection + - memory_verify: Memory verification flag + - quality_assessment: Quality assessment flag + + Database Operations: + - Queries memory_config table by resolved config_id + - Retrieves all reflection-related configuration fields + - Resolves configuration ID for consistent formatting + + Raises: + HTTPException 404: If configuration with specified ID is not found + HTTPException 500: If configuration query operation fails + + ID Resolution: + - Supports both UUID and integer config_id formats + - Automatically resolves to appropriate internal format + - Maintains consistency across different ID representations + """ config_id = resolve_config_id(config_id, db) try: config_id=resolve_config_id(config_id,db) api_logger.info(f"用户 {current_user.username} 查询反思配置,config_id: {config_id}") result = MemoryConfigRepository.query_reflection_config_by_id(db, config_id) memory_config_id = resolve_config_id(result.config_id, db) - # 构建返回数据 + + # Build response data with comprehensive configuration details reflection_config = { "config_id": memory_config_id, "reflection_enabled": result.enable_self_reflexion, @@ -204,10 +337,12 @@ async def start_reflection_configs( } api_logger.info(f"成功查询反思配置,config_id: {config_id}") return success(data=reflection_config, msg="反思配置查询成功") - + api_logger.info(f"Successfully queried reflection config, config_id: {config_id}") + return success(data=reflection_config, msg="Reflection configuration query successful") + except HTTPException: - # 重新抛出HTTP异常 + # Re-raise HTTP exceptions without modification raise except Exception as e: api_logger.error(f"查询反思配置失败: {str(e)}") @@ -223,13 +358,66 @@ async def reflection_run( current_user: User = Depends(get_current_user), db: Session = Depends(get_db), ) -> dict: - """Activate the reflection function for all matching applications in the workspace""" - # 使用集中化的语言校验 + """ + Execute reflection engine with specified configuration + + Runs the reflection engine using configuration parameters from the database. + Validates model availability, sets up the reflection engine with proper + configuration, and executes the reflection process with multi-language support. + + This endpoint provides a test run capability for reflection configurations, + allowing users to validate their reflection settings and see results before + deploying to production environments. + + Args: + config_id: Configuration identifier (UUID or integer) for reflection settings + language_type: Language preference header for output localization (optional) + current_user: Authenticated user executing the reflection + db: Database session for configuration queries + + Returns: + dict: Success response with reflection execution results including: + - baseline: Reflection strategy used + - source_data: Input data processed + - memory_verifies: Memory verification results (if enabled) + - quality_assessments: Quality assessment results (if enabled) + - reflexion_data: Generated reflection insights and solutions + + Configuration Validation: + - Verifies configuration exists in database + - Validates LLM model availability + - Falls back to default model if specified model is unavailable + - Ensures all required parameters are properly set + + Reflection Engine Setup: + - Creates ReflectionConfig with database parameters + - Initializes Neo4j connector for memory access + - Sets up ReflectionEngine with validated model + - Configures language preferences for output + + Error Handling: + - Model validation with fallback to default + - Configuration validation and error reporting + - Comprehensive logging for debugging + - Graceful handling of missing configurations + + Raises: + HTTPException 404: If configuration is not found + HTTPException 500: If reflection execution fails + + Performance Notes: + - Direct database query for configuration retrieval + - Model validation to prevent runtime failures + - Efficient reflection engine initialization + - Language-aware output processing + """ + # Use centralized language validation for consistent localization language = get_language_from_header(language_type) api_logger.info(f"用户 {current_user.username} 查询反思配置,config_id: {config_id}") config_id = resolve_config_id(config_id, db) - # 使用MemoryConfigRepository查询反思配置 + + # Query reflection configuration using MemoryConfigRepository result = MemoryConfigRepository.query_reflection_config_by_id(db, config_id) if not result: raise HTTPException( @@ -239,7 +427,7 @@ async def reflection_run( api_logger.info(f"成功查询反思配置,config_id: {config_id}") - # 验证模型ID是否存在 + # Validate model ID existence model_id = result.reflection_model_id if model_id: try: @@ -250,6 +438,7 @@ async def reflection_run( # 可以设置为None,让反思引擎使用默认模型 model_id = None + # Create reflection configuration with database parameters config = ReflectionConfig( enabled=result.enable_self_reflexion, iteration_period=result.iteration_period, @@ -262,11 +451,13 @@ async def reflection_run( model_id=model_id, language_type=language_type ) + + # Initialize Neo4j connector and reflection engine connector = Neo4jConnector() engine = ReflectionEngine( config=config, neo4j_connector=connector, - llm_client=model_id # 传入验证后的 model_id + llm_client=model_id # Pass validated model_id ) result=await (engine.reflection_run()) diff --git a/api/app/controllers/memory_short_term_controller.py b/api/app/controllers/memory_short_term_controller.py index 0acac6ce..b69406a8 100644 --- a/api/app/controllers/memory_short_term_controller.py +++ b/api/app/controllers/memory_short_term_controller.py @@ -1,3 +1,18 @@ +""" +Memory Short Term Controller + +This module provides REST API endpoints for managing short-term and long-term memory +data retrieval and analysis. It handles memory system statistics, data aggregation, +and provides comprehensive memory insights for end users. + +Key Features: +- Short-term memory data retrieval and statistics +- Long-term memory data aggregation +- Entity count integration +- Multi-language response support +- Memory system analytics and reporting +""" + from typing import Optional from dotenv import load_dotenv @@ -13,9 +28,13 @@ from app.models.user_model import User from app.services.memory_short_service import LongService, ShortService from app.services.memory_storage_service import search_entity +# Load environment variables for configuration load_dotenv() + +# Initialize API logger for request tracking and debugging api_logger = get_api_logger() +# Configure router with prefix and tags for API organization router = APIRouter( prefix="/memory/short", tags=["Memory"], @@ -27,24 +46,73 @@ async def short_term_configs( current_user: User = Depends(get_current_user), db: Session = Depends(get_db), ): - # 使用集中化的语言校验 + """ + Retrieve comprehensive short-term and long-term memory statistics + + Provides a comprehensive overview of memory system data for a specific end user, + including short-term memory entries, long-term memory aggregations, entity counts, + and retrieval statistics. Supports multi-language responses based on request headers. + + This endpoint serves as a central dashboard for memory system analytics, combining + data from multiple memory subsystems to provide a holistic view of user memory state. + + Args: + end_user_id: Unique identifier for the end user whose memory data to retrieve + language_type: Language preference header for response localization (optional) + current_user: Authenticated user making the request (injected by dependency) + db: Database session for data operations (injected by dependency) + + Returns: + dict: Success response containing comprehensive memory statistics: + - short_term: List of short-term memory entries with detailed data + - long_term: List of long-term memory aggregations and summaries + - entity: Count of entities associated with the end user + - retrieval_number: Total count of short-term memory retrievals + - long_term_number: Total count of long-term memory entries + + Response Structure: + { + "code": 200, + "msg": "Short-term memory system data retrieved successfully", + "data": { + "short_term": [...], # Short-term memory entries + "long_term": [...], # Long-term memory data + "entity": 42, # Entity count + "retrieval_number": 156, # Short-term retrieval count + "long_term_number": 23 # Long-term memory count + } + } + + Raises: + HTTPException: If end_user_id is invalid or data retrieval fails + + Performance Notes: + - Combines multiple service calls for comprehensive data + - Entity search is performed asynchronously for better performance + - Response time depends on memory data volume for the specified user + """ + # Use centralized language validation for consistent localization language = get_language_from_header(language_type) - # 获取短期记忆数据 - short_term=ShortService(end_user_id, db) - short_result=short_term.get_short_databasets() - short_count=short_term.get_short_count() + # Retrieve short-term memory data and statistics + short_term = ShortService(end_user_id, db) + short_result = short_term.get_short_databasets() # Get short-term memory entries + short_count = short_term.get_short_count() # Get short-term retrieval count - long_term=LongService(end_user_id, db) - long_result=long_term.get_long_databasets() + # Retrieve long-term memory data and aggregations + long_term = LongService(end_user_id, db) + long_result = long_term.get_long_databasets() # Get long-term memory entries + # Get entity count for the specified end user entity_result = await search_entity(end_user_id) + + # Compile comprehensive memory statistics response result = { - 'short_term': short_result, - 'long_term': long_result, - 'entity': entity_result.get('num', 0), - "retrieval_number":short_count, - "long_term_number":len(long_result) + 'short_term': short_result, # Short-term memory entries + 'long_term': long_result, # Long-term memory data + 'entity': entity_result.get('num', 0), # Entity count (default to 0 if not found) + "retrieval_number": short_count, # Short-term retrieval statistics + "long_term_number": len(long_result) # Long-term memory entry count } return success(data=result, msg="短期记忆系统数据获取成功") \ No newline at end of file diff --git a/api/app/controllers/memory_working_controller.py b/api/app/controllers/memory_working_controller.py index e5de3c04..8aab039a 100644 --- a/api/app/controllers/memory_working_controller.py +++ b/api/app/controllers/memory_working_controller.py @@ -8,6 +8,7 @@ from app.core.response_utils import success from app.db import get_db from app.dependencies import get_current_user from app.models import User +from app.schemas import conversation_schema from app.schemas.response_schema import ApiResponse from app.services.conversation_service import ConversationService @@ -90,11 +91,7 @@ def get_messages( conversation_id, ) messages = [ - { - "role": message.role, - "content": message.content, - "created_at": int(message.created_at.timestamp() * 1000), - } + conversation_schema.Message.model_validate(message) for message in messages_obj ] return success(data=messages, msg="get conversation history success") diff --git a/api/app/controllers/public_share_controller.py b/api/app/controllers/public_share_controller.py index 3c634ae0..33d7b60c 100644 --- a/api/app/controllers/public_share_controller.py +++ b/api/app/controllers/public_share_controller.py @@ -13,7 +13,6 @@ from app.core.logging_config import get_business_logger from app.core.response_utils import success, fail from app.db import get_db, get_db_read from app.dependencies import get_share_user_id, ShareTokenData -from app.models.app_model import App from app.models.app_model import AppType from app.repositories import knowledge_repository from app.repositories.end_user_repository import EndUserRepository @@ -22,6 +21,7 @@ from app.schemas import release_share_schema, conversation_schema from app.schemas.response_schema import PageData, PageMeta from app.services import workspace_service from app.services.app_chat_service import AppChatService, get_app_chat_service +from app.services.app_service import AppService from app.services.auth_service import create_access_token from app.services.conversation_service import ConversationService from app.services.release_share_service import ReleaseShareService @@ -215,8 +215,11 @@ def list_conversations( service = SharedChatService(db) share, release = service.get_release_by_share_token(share_data.share_token, password) end_user_repo = EndUserRepository(db) + app_service = AppService(db) + app = app_service._get_app_or_404(share.app_id) new_end_user = end_user_repo.get_or_create_end_user( app_id=share.app_id, + workspace_id=app.workspace_id, other_id=other_id ) logger.debug(new_end_user.id) @@ -308,25 +311,29 @@ async def chat( # Store end_user_id in database with original user_id end_user_repo = EndUserRepository(db) + app_service = AppService(db) + app = app_service._get_app_or_404(share.app_id) + workspace_id = app.workspace_id new_end_user = end_user_repo.get_or_create_end_user( app_id=share.app_id, + workspace_id=workspace_id, other_id=other_id, - original_user_id=user_id # Save original user_id to other_id + original_user_id=user_id ) end_user_id = str(new_end_user.id) - appid = share.app_id + # appid = share.app_id """获取存储类型和工作空间的ID""" # 直接通过 SQLAlchemy 查询 app(仅查询未删除的应用) - app = db.query(App).filter( - App.id == appid, - App.is_active.is_(True) - ).first() - if not app: - raise BusinessException("应用不存在", BizCode.APP_NOT_FOUND) + # app = db.query(App).filter( + # App.id == appid, + # App.is_active.is_(True) + # ).first() + # if not app: + # raise BusinessException("应用不存在", BizCode.APP_NOT_FOUND) - workspace_id = app.workspace_id + # workspace_id = app.workspace_id # 直接从 workspace 获取 storage_type(公开分享场景无需权限检查) storage_type = workspace_service.get_workspace_storage_type_without_auth( @@ -610,11 +617,11 @@ async def chat( # 多 Agent 非流式返回 result = await app_chat_service.workflow_chat( - message=payload.message, conversation_id=conversation.id, # 使用已创建的会话 ID user_id=end_user_id, # 转换为字符串 variables=payload.variables, + files=payload.files, config=config, web_search=payload.web_search, memory=payload.memory, @@ -654,17 +661,21 @@ async def config_query( workflow_service = WorkflowService(db) content = { "app_type": release.app.type, - "variables": workflow_service.get_start_node_variables(release.config) + "variables": workflow_service.get_start_node_variables(release.config), + "memory": workflow_service.is_memory_enable(release.config), + "features": release.config.get("features") } elif release.app.type == AppType.AGENT: content = { "app_type": release.app.type, - "variables": release.config.get("variables") + "variables": release.config.get("variables"), + "features": release.config.get("features") } elif release.app.type == AppType.MULTI_AGENT: content = { "app_type": release.app.type, - "variables": [] + "variables": [], + "features": release.config.get("features") } else: return fail(msg="Unsupported app type", code=BizCode.APP_TYPE_NOT_SUPPORTED) diff --git a/api/app/controllers/service/app_api_controller.py b/api/app/controllers/service/app_api_controller.py index 64143f57..32a911f9 100644 --- a/api/app/controllers/service/app_api_controller.py +++ b/api/app/controllers/service/app_api_controller.py @@ -95,8 +95,8 @@ async def chat( end_user_repo = EndUserRepository(db) new_end_user = end_user_repo.get_or_create_end_user( app_id=app.id, + workspace_id=workspace_id, other_id=other_id, - original_user_id=other_id # Save original user_id to other_id ) end_user_id = str(new_end_user.id) web_search = True @@ -280,6 +280,7 @@ async def chat( memory=memory, storage_type=storage_type, user_rag_memory_id=user_rag_memory_id, + files=payload.files, app_id=app.id, workspace_id=workspace_id, release_id=app.current_release.id diff --git a/api/app/controllers/tool_controller.py b/api/app/controllers/tool_controller.py index ce5b15c0..5563b9d7 100644 --- a/api/app/controllers/tool_controller.py +++ b/api/app/controllers/tool_controller.py @@ -3,8 +3,11 @@ from typing import Optional from fastapi import APIRouter, Depends, HTTPException, Query from sqlalchemy.orm import Session + +from app.core.error_codes import BizCode from app.schemas.tool_schema import ( - ToolCreateRequest, ToolUpdateRequest, ToolExecuteRequest, ParseSchemaRequest, CustomToolTestRequest + ToolCreateRequest, ToolUpdateRequest, ToolExecuteRequest, ParseSchemaRequest, + CustomToolTestRequest, ToolActiveUpdate ) from app.core.response_utils import success @@ -14,6 +17,7 @@ from app.models import User from app.models.tool_model import ToolType, ToolStatus, AuthType from app.services.tool_service import ToolService from app.schemas.response_schema import ApiResponse +from app.core.exceptions import BusinessException router = APIRouter(prefix="/tools", tags=["Tool System"]) @@ -103,7 +107,7 @@ async def create_tool( val = getattr(request, key, None) if val is not None: request.config[key] = val - tool_id = service.create_tool( + tool_id = await service.create_tool( name=request.name, tool_type=request.tool_type, tenant_id=current_user.tenant_id, @@ -113,6 +117,8 @@ async def create_tool( tags=request.tags ) return success(data={"tool_id": tool_id}, msg="工具创建成功") + except BusinessException as e: + raise HTTPException(status_code=400, detail=e.message) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) except Exception as e: @@ -153,7 +159,7 @@ async def delete_tool( current_user: User = Depends(get_current_user), service: ToolService = Depends(get_tool_service) ): - """删除工具""" + """删除工具(逻辑删除,is_active=False)""" try: success_flag = service.delete_tool(tool_id, current_user.tenant_id) if not success_flag: @@ -165,6 +171,30 @@ async def delete_tool( raise HTTPException(status_code=500, detail=str(e)) +@router.patch("/{tool_id}/active", response_model=ApiResponse) +async def set_tool_active( + tool_id: str, + request: ToolActiveUpdate, + current_user: User = Depends(get_current_user), + service: ToolService = Depends(get_tool_service) +): + """设置工具可用状态(启用/禁用) + + - is_active=true: 启用工具 + - is_active=false: 禁用工具(等同于删除,但可恢复) + """ + try: + success_flag = service.set_tool_active(tool_id, current_user.tenant_id, request.is_active) + if not success_flag: + raise HTTPException(status_code=404, detail="工具不存在") + action = "启用" if request.is_active else "禁用" + return success(msg=f"工具已{action}") + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + @router.post("/execution/execute", response_model=ApiResponse) async def execute_tool( request: ToolExecuteRequest, @@ -222,8 +252,10 @@ async def sync_mcp_tools( try: result = await service.sync_mcp_tools(tool_id, current_user.tenant_id) if not result.get("success", False): - raise HTTPException(status_code=400, detail=result.get("message", "同步失败")) + raise BusinessException(result.get("message", "工具列表同步失败"), BizCode.BAD_REQUEST) return success(data=result, msg="MCP工具列表同步完成") + except BusinessException: + raise except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -246,8 +278,10 @@ async def test_tool_connection( # 普通连接测试 result = await service.test_connection(tool_id, current_user.tenant_id) if result["success"] is False: - raise HTTPException(status_code=400, detail=result["message"]) + raise BusinessException(result["message"], BizCode.SERVICE_UNAVAILABLE) return success(data=result, msg="连接测试完成") + except BusinessException: + raise except Exception as e: raise HTTPException(status_code=500, detail=str(e)) diff --git a/api/app/controllers/user_controller.py b/api/app/controllers/user_controller.py index 2806da1a..16213690 100644 --- a/api/app/controllers/user_controller.py +++ b/api/app/controllers/user_controller.py @@ -1,6 +1,7 @@ from fastapi import APIRouter, Depends from sqlalchemy.orm import Session import uuid +from typing import Callable from app.core.error_codes import BizCode from app.core.exceptions import BusinessException @@ -19,6 +20,7 @@ from app.services import user_service from app.core.logging_config import get_api_logger from app.core.response_utils import success from app.core.security import verify_password +from app.i18n.dependencies import get_translator # 获取API专用日志器 api_logger = get_api_logger() @@ -33,7 +35,8 @@ router = APIRouter( def create_superuser( user: user_schema.UserCreate, db: Session = Depends(get_db), - current_superuser: User = Depends(get_current_superuser) + current_superuser: User = Depends(get_current_superuser), + t: Callable = Depends(get_translator) ): """创建超级管理员(仅超级管理员可访问)""" api_logger.info(f"超级管理员创建请求: {user.username}, email: {user.email}") @@ -42,7 +45,7 @@ def create_superuser( api_logger.info(f"超级管理员创建成功: {result.username} (ID: {result.id})") result_schema = user_schema.User.model_validate(result) - return success(data=result_schema, msg="超级管理员创建成功") + return success(data=result_schema, msg=t("users.create.superuser_success")) @router.delete("/{user_id}", response_model=ApiResponse) @@ -50,6 +53,7 @@ def delete_user( user_id: uuid.UUID, db: Session = Depends(get_db), current_user: User = Depends(get_current_user), + t: Callable = Depends(get_translator) ): """停用用户(软删除)""" api_logger.info(f"用户停用请求: user_id={user_id}, 操作者: {current_user.username}") @@ -57,13 +61,14 @@ def delete_user( db=db, user_id_to_deactivate=user_id, current_user=current_user ) api_logger.info(f"用户停用成功: {result.username} (ID: {result.id})") - return success(msg="用户停用成功") + return success(msg=t("users.delete.deactivate_success")) @router.post("/{user_id}/activate", response_model=ApiResponse) def activate_user( user_id: uuid.UUID, db: Session = Depends(get_db), current_user: User = Depends(get_current_user), + t: Callable = Depends(get_translator) ): """激活用户""" api_logger.info(f"用户激活请求: user_id={user_id}, 操作者: {current_user.username}") @@ -74,13 +79,14 @@ def activate_user( api_logger.info(f"用户激活成功: {result.username} (ID: {result.id})") result_schema = user_schema.User.model_validate(result) - return success(data=result_schema, msg="用户激活成功") + return success(data=result_schema, msg=t("users.activate.success")) @router.get("", response_model=ApiResponse) def get_current_user_info( db: Session = Depends(get_db), current_user: User = Depends(get_current_user), + t: Callable = Depends(get_translator) ): """获取当前用户信息""" api_logger.info(f"当前用户信息请求: {current_user.username}") @@ -105,7 +111,7 @@ def get_current_user_info( break api_logger.info(f"当前用户信息获取成功: {result.username}, 角色: {result_schema.role}, 工作空间: {result_schema.current_workspace_name}") - return success(data=result_schema, msg="用户信息获取成功") + return success(data=result_schema, msg=t("users.info.get_success")) @router.get("/superusers", response_model=ApiResponse) @@ -113,6 +119,7 @@ def get_tenant_superusers( include_inactive: bool = False, db: Session = Depends(get_db), current_user: User = Depends(get_current_superuser), + t: Callable = Depends(get_translator) ): """获取当前租户下的超管账号列表(仅超级管理员可访问)""" api_logger.info(f"获取租户超管列表请求: {current_user.username}") @@ -125,7 +132,7 @@ def get_tenant_superusers( api_logger.info(f"租户超管列表获取成功: count={len(superusers)}") superusers_schema = [user_schema.User.model_validate(u) for u in superusers] - return success(data=superusers_schema, msg="租户超管列表获取成功") + return success(data=superusers_schema, msg=t("users.list.superusers_success")) @@ -134,6 +141,7 @@ def get_user_info_by_id( user_id: uuid.UUID, db: Session = Depends(get_db), current_user: User = Depends(get_current_user), + t: Callable = Depends(get_translator) ): """根据用户ID获取用户信息""" api_logger.info(f"获取用户信息请求: user_id={user_id}, 操作者: {current_user.username}") @@ -144,7 +152,7 @@ def get_user_info_by_id( api_logger.info(f"用户信息获取成功: {result.username}") result_schema = user_schema.User.model_validate(result) - return success(data=result_schema, msg="用户信息获取成功") + return success(data=result_schema, msg=t("users.info.get_success")) @router.put("/change-password", response_model=ApiResponse) @@ -152,6 +160,7 @@ async def change_password( request: ChangePasswordRequest, db: Session = Depends(get_db), current_user: User = Depends(get_current_user), + t: Callable = Depends(get_translator) ): """修改当前用户密码""" api_logger.info(f"用户密码修改请求: {current_user.username}") @@ -164,7 +173,7 @@ async def change_password( current_user=current_user ) api_logger.info(f"用户密码修改成功: {current_user.username}") - return success(msg="密码修改成功") + return success(msg=t("auth.password.change_success")) @router.put("/admin/change-password", response_model=ApiResponse) @@ -172,6 +181,7 @@ async def admin_change_password( request: AdminChangePasswordRequest, db: Session = Depends(get_db), current_user: User = Depends(get_current_superuser), + t: Callable = Depends(get_translator) ): """超级管理员修改指定用户的密码""" api_logger.info(f"管理员密码修改请求: 管理员 {current_user.username} 修改用户 {request.user_id}") @@ -186,16 +196,17 @@ async def admin_change_password( # 根据是否生成了随机密码来构造响应 if request.new_password: api_logger.info(f"管理员密码修改成功: 用户 {request.user_id}") - return success(msg="密码修改成功") + return success(msg=t("auth.password.change_success")) else: api_logger.info(f"管理员密码重置成功: 用户 {request.user_id}, 随机密码已生成") - return success(data=generated_password, msg="密码重置成功") + return success(data=generated_password, msg=t("auth.password.reset_success")) @router.post("/verify_pwd", response_model=ApiResponse) def verify_pwd( request: VerifyPasswordRequest, current_user: User = Depends(get_current_user), + t: Callable = Depends(get_translator) ): """验证当前用户密码""" api_logger.info(f"用户验证密码请求: {current_user.username}") @@ -203,8 +214,8 @@ def verify_pwd( is_valid = verify_password(request.password, current_user.hashed_password) api_logger.info(f"用户密码验证结果: {current_user.username}, valid={is_valid}") if not is_valid: - raise BusinessException("密码验证失败", code=BizCode.VALIDATION_FAILED) - return success(data={"valid": is_valid}, msg="验证完成") + raise BusinessException(t("users.errors.password_verification_failed"), code=BizCode.VALIDATION_FAILED) + return success(data={"valid": is_valid}, msg=t("common.success.retrieved")) @router.post("/send-email-code", response_model=ApiResponse) @@ -212,6 +223,7 @@ async def send_email_code( request: SendEmailCodeRequest, db: Session = Depends(get_db), current_user: User = Depends(get_current_user), + t: Callable = Depends(get_translator) ): """发送邮箱验证码""" api_logger.info(f"用户请求发送邮箱验证码: {current_user.username}, email={request.email}") @@ -219,7 +231,7 @@ async def send_email_code( await user_service.send_email_code_method(db=db, email=request.email, user_id=current_user.id) api_logger.info(f"邮箱验证码已发送: {current_user.username}") - return success(msg="验证码已发送到您的邮箱,请查收") + return success(msg=t("users.email.code_sent")) @router.put("/change-email", response_model=ApiResponse) @@ -227,6 +239,7 @@ async def change_email( request: VerifyEmailCodeRequest, db: Session = Depends(get_db), current_user: User = Depends(get_current_user), + t: Callable = Depends(get_translator) ): """验证验证码并修改邮箱""" api_logger.info(f"用户修改邮箱: {current_user.username}, new_email={request.new_email}") @@ -239,4 +252,51 @@ async def change_email( ) api_logger.info(f"用户邮箱修改成功: {current_user.username}") - return success(msg="邮箱修改成功") + return success(msg=t("users.email.change_success")) + + + +@router.get("/me/language", response_model=ApiResponse) +def get_current_user_language( + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user), + t: Callable = Depends(get_translator) +): + """获取当前用户的语言偏好""" + api_logger.info(f"获取用户语言偏好: {current_user.username}") + + language = user_service.get_user_language_preference( + db=db, + user_id=current_user.id, + current_user=current_user + ) + + api_logger.info(f"用户语言偏好获取成功: {current_user.username}, language={language}") + return success( + data=user_schema.LanguagePreferenceResponse(language=language), + msg=t("users.language.get_success") + ) + + +@router.put("/me/language", response_model=ApiResponse) +def update_current_user_language( + request: user_schema.LanguagePreferenceRequest, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user), + t: Callable = Depends(get_translator) +): + """设置当前用户的语言偏好""" + api_logger.info(f"更新用户语言偏好: {current_user.username}, language={request.language}") + + updated_user = user_service.update_user_language_preference( + db=db, + user_id=current_user.id, + language=request.language, + current_user=current_user + ) + + api_logger.info(f"用户语言偏好更新成功: {current_user.username}, language={request.language}") + return success( + data=user_schema.LanguagePreferenceResponse(language=updated_user.preferred_language), + msg=t("users.language.update_success") + ) diff --git a/api/app/controllers/user_memory_controllers.py b/api/app/controllers/user_memory_controllers.py index d3fe7d83..be796ff9 100644 --- a/api/app/controllers/user_memory_controllers.py +++ b/api/app/controllers/user_memory_controllers.py @@ -17,6 +17,7 @@ from app.services.user_memory_service import ( UserMemoryService, analytics_memory_types, analytics_graph_data, + analytics_community_graph_data, ) from app.services.memory_entity_relationship_service import MemoryEntityService,MemoryEmotion,MemoryInteraction from app.schemas.response_schema import ApiResponse @@ -295,6 +296,42 @@ async def get_graph_data_api( return fail(BizCode.INTERNAL_ERROR, "图数据查询失败", str(e)) +@router.get("/analytics/community_graph", response_model=ApiResponse) +async def get_community_graph_data_api( + end_user_id: str, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +) -> dict: + workspace_id = current_user.current_workspace_id + + if workspace_id is None: + api_logger.warning(f"用户 {current_user.username} 尝试查询社区图谱但未选择工作空间") + return fail(BizCode.INVALID_PARAMETER, "请先切换到一个工作空间", "current_workspace_id is None") + + api_logger.info( + f"社区图谱查询请求: end_user_id={end_user_id}, user={current_user.username}, " + f"workspace={workspace_id}" + ) + + try: + result = await analytics_community_graph_data(db=db, end_user_id=end_user_id) + + if "message" in result and result["statistics"]["total_nodes"] == 0: + api_logger.warning(f"社区图谱查询返回空结果: {result.get('message')}") + return success(data=result, msg=result.get("message", "查询成功")) + + api_logger.info( + f"成功获取社区图谱: end_user_id={end_user_id}, " + f"nodes={result['statistics']['total_nodes']}, " + f"edges={result['statistics']['total_edges']}" + ) + return success(data=result, msg="查询成功") + + except Exception as e: + api_logger.error(f"社区图谱查询失败: end_user_id={end_user_id}, error={str(e)}") + return fail(BizCode.INTERNAL_ERROR, "社区图谱查询失败", str(e)) + + @router.get("/read_end_user/profile", response_model=ApiResponse) async def get_end_user_profile( end_user_id: str, diff --git a/api/app/controllers/workspace_controller.py b/api/app/controllers/workspace_controller.py index 9bcd8571..6f4a4fa8 100644 --- a/api/app/controllers/workspace_controller.py +++ b/api/app/controllers/workspace_controller.py @@ -14,6 +14,12 @@ from app.dependencies import ( get_current_user, workspace_access_guard, ) +from app.i18n.dependencies import get_current_language, get_translator +from app.i18n.serializers import ( + WorkspaceSerializer, + WorkspaceMemberSerializer, + WorkspaceInviteSerializer +) from app.models.tenant_model import Tenants from app.models.user_model import User from app.models.workspace_model import InviteStatus @@ -65,7 +71,9 @@ def get_workspaces( include_current: bool = Query(True, description="是否包含当前工作空间"), db: Session = Depends(get_db), current_user: User = Depends(get_current_user), - current_tenant: Tenants = Depends(get_current_tenant) + current_tenant: Tenants = Depends(get_current_tenant), + language: str = Depends(get_current_language), + t: callable = Depends(get_translator) ): """获取当前租户下用户参与的所有工作空间 @@ -88,8 +96,13 @@ def get_workspaces( ) api_logger.info(f"成功获取 {len(workspaces)} 个工作空间") - workspaces_schema = [WorkspaceResponse.model_validate(w) for w in workspaces] - return success(data=workspaces_schema, msg="工作空间列表获取成功") + + # 使用序列化器添加国际化字段 + serializer = WorkspaceSerializer() + workspaces_data = [WorkspaceResponse.model_validate(w).model_dump() for w in workspaces] + workspaces_i18n = serializer.serialize_list(workspaces_data, language) + + return success(data=workspaces_i18n, msg=t("workspace.list_retrieved")) @router.post("", response_model=ApiResponse) @@ -98,6 +111,8 @@ def create_workspace( language_type: str = Header(default="zh", alias="X-Language-Type"), db: Session = Depends(get_db), current_user: User = Depends(get_current_superuser), + language: str = Depends(get_current_language), + t: callable = Depends(get_translator) ): """创建新的工作空间""" from app.core.language_utils import get_language_from_header @@ -118,8 +133,13 @@ def create_workspace( f"工作空间创建成功 - 名称: {workspace.name}, ID: {result.id}, " f"创建者: {current_user.username}, language={language}" ) - result_schema = WorkspaceResponse.model_validate(result) - return success(data=result_schema, msg="工作空间创建成功") + + # 使用序列化器添加国际化字段 + serializer = WorkspaceSerializer() + result_data = WorkspaceResponse.model_validate(result).model_dump() + result_i18n = serializer.serialize(result_data, language) + + return success(data=result_i18n, msg=t("workspace.created")) @router.put("", response_model=ApiResponse) @cur_workspace_access_guard() @@ -127,6 +147,8 @@ def update_workspace( workspace: WorkspaceUpdate, db: Session = Depends(get_db), current_user: User = Depends(get_current_user), + language: str = Depends(get_current_language), + t: callable = Depends(get_translator) ): """更新工作空间""" workspace_id = current_user.current_workspace_id @@ -139,14 +161,21 @@ def update_workspace( user=current_user, ) api_logger.info(f"工作空间更新成功 - ID: {workspace_id}, 用户: {current_user.username}") - result_schema = WorkspaceResponse.model_validate(result) - return success(data=result_schema, msg="工作空间更新成功") + + # 使用序列化器添加国际化字段 + serializer = WorkspaceSerializer() + result_data = WorkspaceResponse.model_validate(result).model_dump() + result_i18n = serializer.serialize(result_data, language) + + return success(data=result_i18n, msg=t("workspace.updated")) @router.get("/members", response_model=ApiResponse) @cur_workspace_access_guard() def get_cur_workspace_members( db: Session = Depends(get_db), current_user: User = Depends(get_current_user), + language: str = Depends(get_current_language), + t: callable = Depends(get_translator) ): """获取工作空间成员列表(关系序列化)""" api_logger.info(f"用户 {current_user.username} 请求获取工作空间 {current_user.current_workspace_id} 的成员列表") @@ -157,8 +186,14 @@ def get_cur_workspace_members( user=current_user, ) api_logger.info(f"工作空间成员列表获取成功 - ID: {current_user.current_workspace_id}, 数量: {len(members)}") + + # 转换为表格项并使用序列化器添加国际化字段 table_items = _convert_members_to_table_items(members) - return success(data=table_items, msg="工作空间成员列表获取成功") + serializer = WorkspaceMemberSerializer() + members_data = [item.model_dump() for item in table_items] + members_i18n = serializer.serialize_list(members_data, language) + + return success(data=members_i18n, msg=t("workspace.members.list_retrieved")) @router.put("/members", response_model=ApiResponse) @@ -168,6 +203,7 @@ def update_workspace_members( updates: List[WorkspaceMemberUpdate], db: Session = Depends(get_db), current_user: User = Depends(get_current_user), + t: callable = Depends(get_translator) ): workspace_id = current_user.current_workspace_id api_logger.info(f"用户 {current_user.username} 请求更新工作空间 {workspace_id} 的成员角色") @@ -178,7 +214,7 @@ def update_workspace_members( user=current_user, ) api_logger.info(f"工作空间成员角色更新成功 - ID: {workspace_id}, 数量: {len(members)}") - return success(msg="成员角色更新成功") + return success(msg=t("workspace.members.role_updated")) @router.delete("/members/{member_id}", response_model=ApiResponse) @@ -187,6 +223,7 @@ def delete_workspace_member( member_id: uuid.UUID, db: Session = Depends(get_db), current_user: User = Depends(get_current_user), + t: callable = Depends(get_translator) ): workspace_id = current_user.current_workspace_id api_logger.info(f"用户 {current_user.username} 请求删除工作空间 {workspace_id} 的成员 {member_id}") @@ -198,7 +235,7 @@ def delete_workspace_member( user=current_user, ) api_logger.info(f"工作空间成员删除成功 - ID: {workspace_id}, 成员: {member_id}") - return success(msg="成员删除成功") + return success(msg=t("workspace.members.deleted")) # 创建空间协作邀请 @@ -208,6 +245,8 @@ def create_workspace_invite( invite_data: WorkspaceInviteCreate, db: Session = Depends(get_db), current_user: User = Depends(get_current_user), + language: str = Depends(get_current_language), + t: callable = Depends(get_translator) ): """创建工作空间邀请""" workspace_id = current_user.current_workspace_id @@ -220,7 +259,12 @@ def create_workspace_invite( user=current_user ) api_logger.info(f"工作空间邀请创建成功 - 工作空间: {workspace_id}, 邮箱: {invite_data.email}") - return success(data=result, msg="邀请创建成功") + + # 使用序列化器添加国际化字段 + serializer = WorkspaceInviteSerializer() + result_i18n = serializer.serialize(result, language) + + return success(data=result_i18n, msg=t("workspace.invites.created")) @router.get("/invites", response_model=ApiResponse) @@ -232,6 +276,8 @@ def get_workspace_invites( offset: int = Query(0, ge=0), db: Session = Depends(get_db), current_user: User = Depends(get_current_user), + language: str = Depends(get_current_language), + t: callable = Depends(get_translator) ): """获取工作空间邀请列表""" workspace_id = current_user.current_workspace_id @@ -246,18 +292,30 @@ def get_workspace_invites( offset=offset ) api_logger.info(f"成功获取 {len(invites)} 个邀请记录") - return success(data=invites, msg="邀请列表获取成功") + + # 使用序列化器添加国际化字段 + serializer = WorkspaceInviteSerializer() + invites_i18n = serializer.serialize_list(invites, language) + + return success(data=invites_i18n, msg=t("workspace.invites.list_retrieved")) @public_router.get("/invites/validate/{token}", response_model=ApiResponse) def get_workspace_invite_info( token: str, db: Session = Depends(get_db), + language: str = Depends(get_current_language), + t: callable = Depends(get_translator) ): """获取工作空间邀请用户信息(无需认证)""" result = workspace_service.validate_invite_token(db=db, token=token) api_logger.info(f"工作空间邀请验证成功 - 邀请: {token}") - return success(data=result, msg="邀请验证成功") + + # 使用序列化器添加国际化字段 + serializer = WorkspaceInviteSerializer() + result_i18n = serializer.serialize(result, language) + + return success(data=result_i18n, msg=t("workspace.invites.validated")) @router.delete("/invites/{invite_id}", response_model=ApiResponse) @@ -267,6 +325,8 @@ def revoke_workspace_invite( invite_id: uuid.UUID, db: Session = Depends(get_db), current_user: User = Depends(get_current_user), + language: str = Depends(get_current_language), + t: callable = Depends(get_translator) ): """撤销工作空间邀请""" workspace_id = current_user.current_workspace_id @@ -279,7 +339,12 @@ def revoke_workspace_invite( user=current_user ) api_logger.info(f"工作空间邀请撤销成功 - 邀请: {invite_id}") - return success(data=result, msg="邀请撤销成功") + + # 使用序列化器添加国际化字段 + serializer = WorkspaceInviteSerializer() + result_i18n = serializer.serialize(result, language) + + return success(data=result_i18n, msg=t("workspace.invites.revoked")) # ==================== 公开邀请接口(无需认证) ==================== @@ -302,6 +367,7 @@ def switch_workspace( workspace_id: uuid.UUID, db: Session = Depends(get_db), current_user: User = Depends(get_current_user), + t: callable = Depends(get_translator) ): """切换工作空间""" api_logger.info(f"用户 {current_user.username} 请求切换工作空间为 {workspace_id}") @@ -312,7 +378,7 @@ def switch_workspace( user=current_user, ) api_logger.info(f"成功切换工作空间为 {workspace_id}") - return success(msg="工作空间切换成功") + return success(msg=t("workspace.switched")) @router.get("/storage", response_model=ApiResponse) @@ -320,6 +386,7 @@ def switch_workspace( def get_workspace_storage_type( db: Session = Depends(get_db), current_user: User = Depends(get_current_user), + t: callable = Depends(get_translator) ): """获取当前工作空间的存储类型""" workspace_id = current_user.current_workspace_id @@ -331,7 +398,7 @@ def get_workspace_storage_type( user=current_user ) api_logger.info(f"成功获取工作空间 {workspace_id} 的存储类型: {storage_type}") - return success(data={"storage_type": storage_type}, msg="存储类型获取成功") + return success(data={"storage_type": storage_type}, msg=t("workspace.storage.type_retrieved")) @router.get("/workspace_models", response_model=ApiResponse) @@ -339,6 +406,8 @@ def get_workspace_storage_type( def workspace_models_configs( db: Session = Depends(get_db), current_user: User = Depends(get_current_user), + language: str = Depends(get_current_language), + t: callable = Depends(get_translator) ): """获取当前工作空间的模型配置(llm, embedding, rerank)""" workspace_id = current_user.current_workspace_id @@ -354,14 +423,14 @@ def workspace_models_configs( api_logger.warning(f"工作空间 {workspace_id} 不存在或无权访问") raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, - detail="工作空间不存在或无权访问" + detail=t("workspace.not_found") ) api_logger.info( f"成功获取工作空间 {workspace_id} 的模型配置: " f"llm={configs.get('llm')}, embedding={configs.get('embedding')}, rerank={configs.get('rerank')}" ) - return success(data=WorkspaceModelsConfig.model_validate(configs), msg="模型配置获取成功") + return success(data=WorkspaceModelsConfig.model_validate(configs), msg=t("workspace.models.config_retrieved")) @router.put("/workspace_models", response_model=ApiResponse) @@ -370,6 +439,7 @@ def update_workspace_models_configs( models_update: WorkspaceModelsUpdate, db: Session = Depends(get_db), current_user: User = Depends(get_current_user), + t: callable = Depends(get_translator) ): """更新当前工作空间的模型配置(llm, embedding, rerank)""" workspace_id = current_user.current_workspace_id @@ -386,5 +456,5 @@ def update_workspace_models_configs( f"成功更新工作空间 {workspace_id} 的模型配置: " f"llm={updated_workspace.llm}, embedding={updated_workspace.embedding}, rerank={updated_workspace.rerank}" ) - return success(data=WorkspaceModelsConfig.model_validate(updated_workspace), msg="模型配置更新成功") + return success(data=WorkspaceModelsConfig.model_validate(updated_workspace), msg=t("workspace.models.config_updated")) diff --git a/api/app/core/config.py b/api/app/core/config.py index bbe327b6..cdaa13cc 100644 --- a/api/app/core/config.py +++ b/api/app/core/config.py @@ -1,7 +1,6 @@ -import json import os from pathlib import Path -from typing import Annotated, Any, Dict, Optional +from typing import Annotated, Optional from dotenv import load_dotenv from pydantic import Field, TypeAdapter @@ -115,6 +114,7 @@ class Settings: S3_ACCESS_KEY_ID: str = os.getenv("S3_ACCESS_KEY_ID", "") S3_SECRET_ACCESS_KEY: str = os.getenv("S3_SECRET_ACCESS_KEY", "") S3_BUCKET_NAME: str = os.getenv("S3_BUCKET_NAME", "") + S3_ENDPOINT_URL: str = os.getenv("S3_ENDPOINT_URL", "") # VOLC ASR settings VOLC_APP_KEY: str = os.getenv("VOLC_APP_KEY", "") @@ -162,6 +162,44 @@ class Settings: # This controls the language used for memory summary titles and other generated content DEFAULT_LANGUAGE: str = os.getenv("DEFAULT_LANGUAGE", "zh") + # ======================================================================== + # Internationalization (i18n) Configuration + # ======================================================================== + # Default language for API responses + I18N_DEFAULT_LANGUAGE: str = os.getenv("I18N_DEFAULT_LANGUAGE", "zh") + + # Supported languages (comma-separated) + I18N_SUPPORTED_LANGUAGES: list[str] = [ + lang.strip() + for lang in os.getenv("I18N_SUPPORTED_LANGUAGES", "zh,en").split(",") + if lang.strip() + ] + + # Core locales directory (community edition) + # Use absolute path to work from any working directory + I18N_CORE_LOCALES_DIR: str = os.getenv( + "I18N_CORE_LOCALES_DIR", + os.path.join(os.path.dirname(os.path.dirname(__file__)), "locales") + ) + + # Premium locales directory (enterprise edition, optional) + I18N_PREMIUM_LOCALES_DIR: Optional[str] = os.getenv("I18N_PREMIUM_LOCALES_DIR", None) + + # Enable translation cache + I18N_ENABLE_TRANSLATION_CACHE: bool = os.getenv("I18N_ENABLE_TRANSLATION_CACHE", "true").lower() == "true" + + # LRU cache size for hot translations + I18N_LRU_CACHE_SIZE: int = int(os.getenv("I18N_LRU_CACHE_SIZE", "1000")) + + # Enable hot reload of translation files + I18N_ENABLE_HOT_RELOAD: bool = os.getenv("I18N_ENABLE_HOT_RELOAD", "false").lower() == "true" + + # Fallback language when translation is missing + I18N_FALLBACK_LANGUAGE: str = os.getenv("I18N_FALLBACK_LANGUAGE", "zh") + + # Log missing translations + I18N_LOG_MISSING_TRANSLATIONS: bool = os.getenv("I18N_LOG_MISSING_TRANSLATIONS", "true").lower() == "true" + # Logging settings LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO") LOG_FORMAT: str = os.getenv("LOG_FORMAT", "%(asctime)s - %(name)s - %(levelname)s - %(message)s") diff --git a/api/app/core/memory/agent/langgraph_graph/nodes/data_nodes.py b/api/app/core/memory/agent/langgraph_graph/nodes/data_nodes.py index 6595a2ce..5241ac89 100644 --- a/api/app/core/memory/agent/langgraph_graph/nodes/data_nodes.py +++ b/api/app/core/memory/agent/langgraph_graph/nodes/data_nodes.py @@ -1,16 +1,45 @@ from app.core.memory.agent.utils.llm_tools import ReadState, WriteState +from app.schemas.memory_agent_schema import AgentMemoryDataset def content_input_node(state: ReadState) -> ReadState: - """开始节点 - 提取内容并保持状态信息""" + """ + Start node - Extract content and maintain state information + + Extracts the content from the first message in the state and returns it + as the data field while preserving all other state information. + + Args: + state: ReadState containing messages and other state data + + Returns: + ReadState: Updated state with extracted content in data field + """ content = state['messages'][0].content if state.get('messages') else '' - # 返回内容并保持所有状态信息 + # Return content and maintain all state information + for pronoun in AgentMemoryDataset.PRONOUN: + content = content.replace(pronoun, AgentMemoryDataset.NAME) + return {"data": content} + def content_input_write(state: WriteState) -> WriteState: - """开始节点 - 提取内容并保持状态信息""" + """ + Start node - Extract content and maintain state information for write operations + + Extracts the content from the first message in the state for write operations. + + Args: + state: WriteState containing messages and other state data + + Returns: + WriteState: Updated state with extracted content in data field + """ content = state['messages'][0].content if state.get('messages') else '' - # 返回内容并保持所有状态信息 - return {"data": content} \ No newline at end of file + # Return content and maintain all state information + for pronoun in AgentMemoryDataset.PRONOUN: + content = content.replace(pronoun, AgentMemoryDataset.NAME) + + return {"data": content} diff --git a/api/app/core/memory/agent/langgraph_graph/nodes/problem_nodes.py b/api/app/core/memory/agent/langgraph_graph/nodes/problem_nodes.py index 784e5802..3030669c 100644 --- a/api/app/core/memory/agent/langgraph_graph/nodes/problem_nodes.py +++ b/api/app/core/memory/agent/langgraph_graph/nodes/problem_nodes.py @@ -19,19 +19,39 @@ logger = get_agent_logger(__name__) class ProblemNodeService(LLMServiceMixin): - """问题处理节点服务类""" + """ + Problem processing node service class + + Handles problem decomposition and extension operations using LLM services. + Inherits from LLMServiceMixin to provide structured LLM calling capabilities. + + Attributes: + template_service: Service for rendering Jinja2 templates + """ def __init__(self): super().__init__() self.template_service = TemplateService(template_root) -# 创建全局服务实例 +# Create global service instance problem_service = ProblemNodeService() async def Split_The_Problem(state: ReadState) -> ReadState: - """问题分解节点""" + """ + Problem decomposition node + + Breaks down complex user queries into smaller, more manageable sub-problems. + Uses LLM to analyze the input and generate structured problem decomposition + with question types and reasoning. + + Args: + state: ReadState containing user input and configuration + + Returns: + ReadState: Updated state with problem decomposition results + """ # 从状态中获取数据 content = state.get('data', '') end_user_id = state.get('end_user_id', '') @@ -64,7 +84,7 @@ async def Split_The_Problem(state: ReadState) -> ReadState: # 添加更详细的日志记录 logger.info(f"Split_The_Problem: 开始处理问题分解,内容长度: {len(content)}") - # 验证结构化响应 + # Validate structured response if not structured or not hasattr(structured, 'root'): logger.warning("Split_The_Problem: 结构化响应为空或格式不正确") split_result = json.dumps([], ensure_ascii=False) @@ -106,7 +126,7 @@ async def Split_The_Problem(state: ReadState) -> ReadState: exc_info=True ) - # 提供更详细的错误信息 + # Provide more detailed error information error_details = { "error_type": type(e).__name__, "error_message": str(e), @@ -116,7 +136,7 @@ async def Split_The_Problem(state: ReadState) -> ReadState: logger.error(f"Split_The_Problem error details: {error_details}") - # 创建默认的空结果 + # Create default empty result result = { "context": json.dumps([], ensure_ascii=False), "original": content, @@ -130,13 +150,25 @@ async def Split_The_Problem(state: ReadState) -> ReadState: } } - # 返回更新后的状态,包含spit_context字段 + # Return updated state including spit_context field return {"spit_data": result} async def Problem_Extension(state: ReadState) -> ReadState: - """问题扩展节点""" - # 获取原始数据和分解结果 + """ + Problem extension node + + Extends the decomposed problems from Split_The_Problem node by generating + additional related questions and organizing them by original question. + Uses LLM to create comprehensive question extensions for better memory retrieval. + + Args: + state: ReadState containing decomposed problems and configuration + + Returns: + ReadState: Updated state with extended problem results + """ + # Get original data and decomposition results start = time.time() content = state.get('data', '') data = state.get('spit_data', '')['context'] @@ -182,7 +214,7 @@ async def Problem_Extension(state: ReadState) -> ReadState: logger.info(f"Problem_Extension: 开始处理问题扩展,问题数量: {len(databasets)}") - # 验证结构化响应 + # Validate structured response if not response_content or not hasattr(response_content, 'root'): logger.warning("Problem_Extension: 结构化响应为空或格式不正确") aggregated_dict = {} @@ -216,7 +248,7 @@ async def Problem_Extension(state: ReadState) -> ReadState: exc_info=True ) - # 提供更详细的错误信息 + # Provide more detailed error information error_details = { "error_type": type(e).__name__, "error_message": str(e), diff --git a/api/app/core/memory/agent/langgraph_graph/nodes/retrieve_nodes.py b/api/app/core/memory/agent/langgraph_graph/nodes/retrieve_nodes.py index 06539ad1..68260f26 100644 --- a/api/app/core/memory/agent/langgraph_graph/nodes/retrieve_nodes.py +++ b/api/app/core/memory/agent/langgraph_graph/nodes/retrieve_nodes.py @@ -29,6 +29,18 @@ logger = get_agent_logger(__name__) async def rag_config(state): + """ + Configure RAG (Retrieval-Augmented Generation) settings + + Creates configuration for knowledge base retrieval including similarity thresholds, + weights, and reranker settings. + + Args: + state: Current state containing user_rag_memory_id + + Returns: + dict: RAG configuration dictionary + """ user_rag_memory_id = state.get('user_rag_memory_id', '') kb_config = { "knowledge_bases": [ @@ -48,6 +60,19 @@ async def rag_config(state): async def rag_knowledge(state, question): + """ + Retrieve knowledge using RAG approach + + Performs knowledge retrieval from configured knowledge bases using the + provided question and returns formatted results. + + Args: + state: Current state containing configuration + question: Question to search for + + Returns: + tuple: (retrieval_knowledge, clean_content, cleaned_query, raw_results) + """ kb_config = await rag_config(state) end_user_id = state.get('end_user_id', '') user_rag_memory_id = state.get("user_rag_memory_id", '') @@ -68,12 +93,24 @@ async def rag_knowledge(state, question): async def llm_infomation(state: ReadState) -> ReadState: + """ + Get LLM configuration information from state + + Retrieves model configuration details including model ID and tenant ID + from the memory configuration in the current state. + + Args: + state: ReadState containing memory configuration + + Returns: + ReadState: Model configuration as Pydantic model + """ memory_config = state.get('memory_config', None) model_id = memory_config.llm_model_id tenant_id = memory_config.tenant_id - # 使用现有的 memory_config 而不是重新查询数据库 - # 或者使用线程安全的数据库访问 + # Use existing memory_config instead of re-querying database + # or use thread-safe database access with get_db_context() as db: result_orm = ModelConfigService.get_model_by_id(db=db, model_id=model_id, tenant_id=tenant_id) result_pydantic = model_schema.ModelConfig.model_validate(result_orm) @@ -82,16 +119,20 @@ async def llm_infomation(state: ReadState) -> ReadState: async def clean_databases(data) -> str: """ - 简化的数据库搜索结果清理函数 + Simplified database search result cleaning function + + Processes and cleans search results from various sources including + reranked results and time-based search results. Extracts text content + from structured data and returns as formatted string. Args: - data: 搜索结果数据 + data: Search result data (can be string, dict, or other types) Returns: - 清理后的内容字符串 + str: Cleaned content string """ try: - # 解析JSON字符串 + # Parse JSON string if isinstance(data, str): try: data = json.loads(data) @@ -101,24 +142,24 @@ async def clean_databases(data) -> str: if not isinstance(data, dict): return str(data) - # 获取结果数据 + # Get result data # with open("搜索结果.json","w",encoding='utf-8') as f: # f.write(json.dumps(data, indent=4, ensure_ascii=False)) results = data.get('results', data) if not isinstance(results, dict): return str(results) - # 收集所有内容 + # Collect all content content_list = [] - # 处理重排序结果 + # Process reranked results reranked = results.get('reranked_results', {}) if reranked: - for category in ['summaries', 'statements', 'chunks', 'entities']: + for category in ['summaries', 'communities', 'statements', 'chunks', 'entities']: items = reranked.get(category, []) if isinstance(items, list): content_list.extend(items) - # 处理时间搜索结果 + # Process time search results time_search = results.get('time_search', {}) if time_search: if isinstance(time_search, dict): @@ -128,11 +169,18 @@ async def clean_databases(data) -> str: elif isinstance(time_search, list): content_list.extend(time_search) - # 提取文本内容 + # Extract text content,对 community 按 name 去重(多次 tool 调用会产生重复) text_parts = [] + seen_community_names = set() for item in content_list: if isinstance(item, dict): - text = item.get('statement') or item.get('content', '') + # community 节点用 name 去重 + if 'member_count' in item or 'core_entities' in item: + community_name = item.get('name') or item.get('id', '') + if community_name in seen_community_names: + continue + seen_community_names.add(community_name) + text = item.get('statement') or item.get('content') or item.get('summary', '') if text: text_parts.append(text) elif isinstance(item, str): @@ -146,10 +194,19 @@ async def clean_databases(data) -> str: async def retrieve_nodes(state: ReadState) -> ReadState: - ''' - - 模型信息 - ''' + """ + Retrieve information using simplified search approach + + Processes extended problems from previous nodes and performs retrieval + using either RAG or hybrid search based on storage type. Handles concurrent + processing of multiple questions and deduplicates results. + + Args: + state: ReadState containing problem extensions and configuration + + Returns: + ReadState: Updated state with retrieval results and intermediate outputs + """ problem_extension = state.get('problem_extension', '')['context'] storage_type = state.get('storage_type', '') @@ -163,7 +220,7 @@ async def retrieve_nodes(state: ReadState) -> ReadState: problem_list.append(data) logger.info(f"Retrieve: storage_type={storage_type}, user_rag_memory_id={user_rag_memory_id}") - # 创建异步任务处理单个问题 + # Create async task to process individual questions async def process_question_nodes(idx, question): try: # Prepare search parameters based on storage type @@ -209,7 +266,7 @@ async def retrieve_nodes(state: ReadState) -> ReadState: } } - # 并发处理所有问题 + # Process all questions concurrently tasks = [process_question_nodes(idx, question) for idx, question in enumerate(problem_list)] databases_anser = await asyncio.gather(*tasks) databases_data = { @@ -257,7 +314,20 @@ async def retrieve_nodes(state: ReadState) -> ReadState: async def retrieve(state: ReadState) -> ReadState: - # 从state中获取end_user_id + """ + Advanced retrieve function using LangChain agents and tools + + Uses LangChain agents with specialized retrieval tools (time-based and hybrid) + to perform sophisticated information retrieval. Supports both RAG and traditional + memory storage approaches with concurrent processing and result deduplication. + + Args: + state: ReadState containing problem extensions and configuration + + Returns: + ReadState: Updated state with retrieval results and intermediate outputs + """ + # Get end_user_id from state import time start = time.time() problem_extension = state.get('problem_extension', '')['context'] @@ -291,7 +361,11 @@ async def retrieve(state: ReadState) -> ReadState: ) time_retrieval_tool = create_time_retrieval_tool(end_user_id) - search_params = {"end_user_id": end_user_id, "return_raw_results": True} + search_params = { + "end_user_id": end_user_id, + "return_raw_results": True, + "include": ["summaries", "statements", "chunks", "entities", "communities"], + } hybrid_retrieval = create_hybrid_retrieval_tool_sync(memory_config, **search_params) agent = create_agent( llm, @@ -299,21 +373,21 @@ async def retrieve(state: ReadState) -> ReadState: system_prompt=f"我是检索专家,可以根据适合的工具进行检索。当前使用的end_user_id是: {end_user_id}" ) - # 创建异步任务处理单个问题 + # Create async task to process individual questions import asyncio - # 在模块级别定义信号量,限制最大并发数 - SEMAPHORE = asyncio.Semaphore(5) # 限制最多5个并发数据库操作 + # Define semaphore at module level to limit maximum concurrency + SEMAPHORE = asyncio.Semaphore(5) # Limit to maximum 5 concurrent database operations async def process_question(idx, question): - async with SEMAPHORE: # 限制并发 + async with SEMAPHORE: # Limit concurrency try: if storage_type == "rag" and user_rag_memory_id: retrieval_knowledge, clean_content, cleaned_query, raw_results = await rag_knowledge(state, question) else: cleaned_query = question - # 使用 asyncio 在线程池中运行同步的 agent.invoke + # Use asyncio to run synchronous agent.invoke in thread pool import asyncio response = await asyncio.get_event_loop().run_in_executor( None, @@ -327,8 +401,32 @@ async def retrieve(state: ReadState) -> ReadState: raw_results = tool_results['content'] clean_content = await clean_databases(raw_results) + # 社区展开:从 tool 返回结果中提取命中的 community, + # 沿 BELONGS_TO_COMMUNITY 关系拉取关联 Statement 追加到 clean_content + _expanded_stmts_to_write = [] + try: + results_dict = raw_results.get('results', {}) if isinstance(raw_results, dict) else {} + reranked = results_dict.get('reranked_results', {}) + community_hits = reranked.get('communities', []) + if not community_hits: + community_hits = results_dict.get('communities', []) + if community_hits: + from app.core.memory.agent.services.search_service import expand_communities_to_statements + _expanded_stmts_to_write, new_texts = await expand_communities_to_statements( + community_results=community_hits, + end_user_id=end_user_id, + existing_content=clean_content, + ) + if new_texts: + clean_content = clean_content + '\n' + '\n'.join(new_texts) + except Exception as parse_err: + logger.warning(f"[Retrieve] 解析社区命中结果失败,跳过展开: {parse_err}") + try: raw_results = raw_results['results'] + # 写回展开结果,接口返回中可见(已在 helper 中清洗过字段) + if _expanded_stmts_to_write and isinstance(raw_results, dict): + raw_results.setdefault('reranked_results', {})['expanded_statements'] = _expanded_stmts_to_write except Exception: raw_results = [] @@ -362,7 +460,7 @@ async def retrieve(state: ReadState) -> ReadState: } } - # 并发处理所有问题 + # Process all questions concurrently import asyncio tasks = [process_question(idx, question) for idx, question in enumerate(problem_list)] databases_anser = await asyncio.gather(*tasks) diff --git a/api/app/core/memory/agent/langgraph_graph/nodes/summary_nodes.py b/api/app/core/memory/agent/langgraph_graph/nodes/summary_nodes.py index 87606bf8..d967a285 100644 --- a/api/app/core/memory/agent/langgraph_graph/nodes/summary_nodes.py +++ b/api/app/core/memory/agent/langgraph_graph/nodes/summary_nodes.py @@ -23,18 +23,39 @@ logger = get_agent_logger(__name__) class SummaryNodeService(LLMServiceMixin): - """总结节点服务类""" + """ + Summary node service class + + Handles summary generation operations using LLM services. Inherits from + LLMServiceMixin to provide structured LLM calling capabilities for + generating summaries from retrieved information. + + Attributes: + template_service: Service for rendering Jinja2 templates + """ def __init__(self): super().__init__() self.template_service = TemplateService(template_root) -# 创建全局服务实例 +# Create global service instance summary_service = SummaryNodeService() async def rag_config(state): + """ + Configure RAG (Retrieval-Augmented Generation) settings for summary operations + + Creates configuration for knowledge base retrieval including similarity thresholds, + weights, and reranker settings specifically for summary generation. + + Args: + state: Current state containing user_rag_memory_id + + Returns: + dict: RAG configuration dictionary with knowledge base settings + """ user_rag_memory_id = state.get('user_rag_memory_id', '') kb_config = { "knowledge_bases": [ @@ -54,6 +75,23 @@ async def rag_config(state): async def rag_knowledge(state, question): + """ + Retrieve knowledge using RAG approach for summary generation + + Performs knowledge retrieval from configured knowledge bases using the + provided question and returns formatted results for summary processing. + + Args: + state: Current state containing configuration + question: Question to search for in knowledge base + + Returns: + tuple: (retrieval_knowledge, clean_content, cleaned_query, raw_results) + - retrieval_knowledge: List of retrieved knowledge chunks + - clean_content: Formatted content string + - cleaned_query: Processed query string + - raw_results: Raw retrieval results + """ kb_config = await rag_config(state) end_user_id = state.get('end_user_id', '') user_rag_memory_id = state.get("user_rag_memory_id", '') @@ -74,6 +112,18 @@ async def rag_knowledge(state, question): async def summary_history(state: ReadState) -> ReadState: + """ + Retrieve conversation history for summary context + + Gets the conversation history for the current user to provide context + for summary generation operations. + + Args: + state: ReadState containing end_user_id + + Returns: + ReadState: Conversation history data + """ end_user_id = state.get("end_user_id", '') history = await SessionService(store).get_history(end_user_id, end_user_id, end_user_id) return history @@ -82,11 +132,26 @@ async def summary_history(state: ReadState) -> ReadState: async def summary_llm(state: ReadState, history, retrieve_info, template_name, operation_name, response_model, search_mode) -> str: """ - 增强的summary_llm函数,包含更好的错误处理和数据验证 + Enhanced summary_llm function with better error handling and data validation + + Generates summaries using LLM with structured output. Includes fallback mechanisms + for handling LLM failures and provides robust error recovery. + + Args: + state: ReadState containing current context + history: Conversation history for context + retrieve_info: Retrieved information to summarize + template_name: Jinja2 template name for prompt generation + operation_name: Type of operation (summary, input_summary, retrieve_summary) + response_model: Pydantic model for structured output + search_mode: Search mode flag ("0" for simple, "1" for complex) + + Returns: + str: Generated summary text or fallback message """ data = state.get("data", '') - # 构建系统提示词 + # Build system prompt if str(search_mode) == "0": system_prompt = await summary_service.template_service.render_template( template_name=template_name, @@ -103,7 +168,7 @@ async def summary_llm(state: ReadState, history, retrieve_info, template_name, o retrieve_info=retrieve_info ) try: - # 使用优化的LLM服务进行结构化输出 + # Use optimized LLM service for structured output with get_db_context() as db_session: structured = await summary_service.call_llm_structured( state=state, @@ -112,23 +177,23 @@ async def summary_llm(state: ReadState, history, retrieve_info, template_name, o response_model=response_model, fallback_value=None ) - # 验证结构化响应 + # Validate structured response if structured is None: logger.warning("LLM返回None,使用默认回答") return "信息不足,无法回答" - # 根据操作类型提取答案 + # Extract answer based on operation type if operation_name == "summary": aimessages = getattr(structured, 'query_answer', None) or "信息不足,无法回答" else: - # 处理RetrieveSummaryResponse + # Handle RetrieveSummaryResponse if hasattr(structured, 'data') and structured.data: aimessages = getattr(structured.data, 'query_answer', None) or "信息不足,无法回答" else: logger.warning("结构化响应缺少data字段") aimessages = "信息不足,无法回答" - # 验证答案不为空 + # Validate answer is not empty if not aimessages or aimessages.strip() == "": aimessages = "信息不足,无法回答" @@ -137,7 +202,7 @@ async def summary_llm(state: ReadState, history, retrieve_info, template_name, o except Exception as e: logger.error(f"结构化输出失败: {e}", exc_info=True) - # 尝试非结构化输出作为fallback + # Try unstructured output as fallback try: logger.info("尝试非结构化输出作为fallback") response = await summary_service.call_llm_simple( @@ -148,9 +213,9 @@ async def summary_llm(state: ReadState, history, retrieve_info, template_name, o ) if response and response.strip(): - # 简单清理响应 + # Simple response cleaning cleaned_response = response.strip() - # 移除可能的JSON标记 + # Remove possible JSON markers if cleaned_response.startswith('```'): lines = cleaned_response.split('\n') cleaned_response = '\n'.join(lines[1:-1]) @@ -165,6 +230,19 @@ async def summary_llm(state: ReadState, history, retrieve_info, template_name, o async def summary_redis_save(state: ReadState, aimessages) -> ReadState: + """ + Save summary results to Redis session storage + + Stores the generated summary and user query in Redis for session management + and conversation history tracking. + + Args: + state: ReadState containing user and query information + aimessages: Generated summary message to save + + Returns: + ReadState: Updated state after saving to Redis + """ data = state.get("data", '') end_user_id = state.get("end_user_id", '') await SessionService(store).save_session( @@ -179,6 +257,20 @@ async def summary_redis_save(state: ReadState, aimessages) -> ReadState: async def summary_prompt(state: ReadState, aimessages, raw_results) -> ReadState: + """ + Format summary results for different output types + + Creates structured output formats for both input summary and retrieval summary + operations, including metadata and intermediate results for frontend display. + + Args: + state: ReadState containing storage and user information + aimessages: Generated summary message + raw_results: Raw search/retrieval results + + Returns: + tuple: (input_summary, retrieve_summary) formatted result dictionaries + """ storage_type = state.get("storage_type", '') user_rag_memory_id = state.get("user_rag_memory_id", '') data = state.get("data", '') @@ -217,6 +309,19 @@ async def summary_prompt(state: ReadState, aimessages, raw_results) -> ReadState async def Input_Summary(state: ReadState) -> ReadState: + """ + Generate quick input summary from retrieved information + + Performs fast retrieval and generates a quick summary response for user queries. + This function prioritizes speed by only searching summary nodes and provides + immediate feedback to users. + + Args: + state: ReadState containing user query, storage configuration, and context + + Returns: + ReadState: Dictionary containing summary results with status and metadata + """ start = time.time() storage_type = state.get("storage_type", '') memory_config = state.get('memory_config', None) @@ -229,13 +334,22 @@ async def Input_Summary(state: ReadState) -> ReadState: "end_user_id": end_user_id, "question": data, "return_raw_results": True, - "include": ["summaries"] # Only search summary nodes for faster performance + "include": ["summaries", "communities"] # MemorySummary 和 Community 同为高维度概括节点 } try: if storage_type != "rag": - retrieve_info, question, raw_results = await SearchService().execute_hybrid_search(**search_params, - memory_config=memory_config) + retrieve_info, question, raw_results = await SearchService().execute_hybrid_search( + **search_params, + memory_config=memory_config, + expand_communities=False, # 路径 "2" 只需要 community 的 summary 文本,不展开到 Statement + ) + # 调试:打印 community 检索结果数量 + if raw_results and isinstance(raw_results, dict): + reranked = raw_results.get('reranked_results', {}) + community_hits = reranked.get('communities', []) + logger.debug(f"[Input_Summary] community 命中数: {len(community_hits)}, " + f"summary 命中数: {len(reranked.get('summaries', []))}") else: retrieval_knowledge, retrieve_info, question, raw_results = await rag_knowledge(state, data) except Exception as e: @@ -266,6 +380,19 @@ async def Input_Summary(state: ReadState) -> ReadState: async def Retrieve_Summary(state: ReadState) -> ReadState: + """ + Generate comprehensive summary from retrieved expansion issues + + Processes retrieved expansion issues and generates a detailed summary using LLM. + This function handles complex retrieval results and provides comprehensive answers + based on expanded query results. + + Args: + state: ReadState containing retrieve data with expansion issues + + Returns: + ReadState: Dictionary containing comprehensive summary results + """ retrieve = state.get("retrieve", '') history = await summary_history(state) import json @@ -299,13 +426,26 @@ async def Retrieve_Summary(state: ReadState) -> ReadState: duration = 0.0 log_time('Retrieval summary', duration) - # 修复协程调用 - 先await,然后访问返回值 + # Fixed coroutine call - await first, then access return value summary_result = await summary_prompt(state, aimessages, retrieve_info_str) summary = summary_result[1] return {"summary": summary} async def Summary(state: ReadState) -> ReadState: + """ + Generate final comprehensive summary from verified data + + Creates the final summary using verified expansion issues and conversation history. + This function processes verified data to generate the most comprehensive and + accurate response to user queries. + + Args: + state: ReadState containing verified data and query information + + Returns: + ReadState: Dictionary containing final summary results + """ start = time.time() query = state.get("data", '') verify = state.get("verify", '') @@ -336,13 +476,26 @@ async def Summary(state: ReadState) -> ReadState: duration = 0.0 log_time('Retrieval summary', duration) - # 修复协程调用 - 先await,然后访问返回值 + # Fixed coroutine call - await first, then access return value summary_result = await summary_prompt(state, aimessages, retrieve_info_str) summary = summary_result[1] return {"summary": summary} async def Summary_fails(state: ReadState) -> ReadState: + """ + Generate fallback summary when normal summary process fails + + Provides a fallback summary generation mechanism when the standard summary + process encounters errors or fails to produce satisfactory results. Uses + a specialized failure template to handle edge cases. + + Args: + state: ReadState containing verified data and failure context + + Returns: + ReadState: Dictionary containing fallback summary results + """ storage_type = state.get("storage_type", '') user_rag_memory_id = state.get("user_rag_memory_id", '') history = await summary_history(state) diff --git a/api/app/core/memory/agent/langgraph_graph/nodes/verification_nodes.py b/api/app/core/memory/agent/langgraph_graph/nodes/verification_nodes.py index 3f7b491e..3a04b411 100644 --- a/api/app/core/memory/agent/langgraph_graph/nodes/verification_nodes.py +++ b/api/app/core/memory/agent/langgraph_graph/nodes/verification_nodes.py @@ -18,24 +18,46 @@ logger = get_agent_logger(__name__) class VerificationNodeService(LLMServiceMixin): - """验证节点服务类""" + """ + Verification node service class + + Handles data verification operations using LLM services. Inherits from + LLMServiceMixin to provide structured LLM calling capabilities for + verifying and validating retrieved information. + + Attributes: + template_service: Service for rendering Jinja2 templates + """ def __init__(self): super().__init__() self.template_service = TemplateService(template_root) -# 创建全局服务实例 +# Create global service instance verification_service = VerificationNodeService() async def Verify_prompt(state: ReadState, messages_deal: VerificationResult): - """处理验证结果并生成输出格式""" + """ + Process verification results and generate output format + + Transforms VerificationResult objects into structured output format suitable + for frontend consumption. Handles conversion of VerificationItem objects to + dictionary format and adds metadata for tracking. + + Args: + state: ReadState containing storage and user configuration + messages_deal: VerificationResult containing verification outcomes + + Returns: + dict: Formatted verification result with status and metadata + """ storage_type = state.get('storage_type', '') user_rag_memory_id = state.get('user_rag_memory_id', '') data = state.get('data', '') - # 将 VerificationItem 对象转换为字典列表 + # Convert VerificationItem objects to dictionary list verified_data = [] if messages_deal.expansion_issue: for item in messages_deal.expansion_issue: @@ -89,7 +111,7 @@ async def Verify(state: ReadState): logger.info("Verify: 开始渲染模板") - # 生成 JSON schema 以指导 LLM 输出正确格式 + # Generate JSON schema to guide LLM output format json_schema = VerificationResult.model_json_schema() system_prompt = await verification_service.template_service.render_template( @@ -104,8 +126,8 @@ async def Verify(state: ReadState): # 使用优化的LLM服务,添加超时保护 logger.info("Verify: 开始调用 LLM") try: - # 添加 asyncio.wait_for 超时包裹,防止无限等待 - # 超时时间设置为 150 秒(比 LLM 配置的 120 秒稍长) + # Add asyncio.wait_for timeout wrapper to prevent infinite waiting + # Timeout set to 150 seconds (slightly longer than LLM config's 120 seconds) with get_db_context() as db_session: structured = await asyncio.wait_for( @@ -122,7 +144,7 @@ async def Verify(state: ReadState): "reason": "验证失败或超时" } ), - timeout=150.0 # 150秒超时 + timeout=150.0 # 150 second timeout ) logger.info(f"Verify: LLM 调用完成,result={structured}") except asyncio.TimeoutError: diff --git a/api/app/core/memory/agent/langgraph_graph/read_graph.py b/api/app/core/memory/agent/langgraph_graph/read_graph.py index cba1b230..bddae618 100644 --- a/api/app/core/memory/agent/langgraph_graph/read_graph.py +++ b/api/app/core/memory/agent/langgraph_graph/read_graph.py @@ -33,7 +33,19 @@ from app.core.memory.agent.langgraph_graph.routing.routers import ( @asynccontextmanager async def make_read_graph(): - """创建并返回 LangGraph 工作流""" + """ + Create and return a LangGraph workflow for memory reading operations + + Builds a state graph workflow that handles memory retrieval, problem analysis, + verification, and summarization. The workflow includes nodes for content input, + problem splitting, retrieval, verification, and various summary operations. + + Yields: + StateGraph: Compiled LangGraph workflow for memory reading + + Raises: + Exception: If workflow creation fails + """ try: # Build workflow graph workflow = StateGraph(ReadState) @@ -48,7 +60,7 @@ async def make_read_graph(): workflow.add_node("Summary", Summary) workflow.add_node("Summary_fails", Summary_fails) - # 添加边 + # Add edges to define workflow flow workflow.add_edge(START, "content_input") workflow.add_conditional_edges("content_input", Split_continue) workflow.add_edge("Input_Summary", END) @@ -63,7 +75,7 @@ async def make_read_graph(): '''-----''' # workflow.add_edge("Retrieve", END) - # 编译工作流 + # Compile workflow graph = workflow.compile() yield graph @@ -72,108 +84,3 @@ async def make_read_graph(): raise finally: print("工作流创建完成") - - -async def main(): - """主函数 - 运行工作流""" - message = "昨天有什么好看的电影" - end_user_id = '88a459f5_text09' # 组ID - storage_type = 'neo4j' # 存储类型 - search_switch = '1' # 搜索开关 - user_rag_memory_id = 'wwwwwwww' # 用户RAG记忆ID - - # 获取数据库会话 - db_session = next(get_db()) - config_service = MemoryConfigService(db_session) - memory_config = config_service.load_memory_config( - config_id=17, # 改为整数 - service_name="MemoryAgentService" - ) - import time - start = time.time() - try: - async with make_read_graph() as graph: - config = {"configurable": {"thread_id": end_user_id}} - # 初始状态 - 包含所有必要字段 - initial_state = {"messages": [HumanMessage(content=message)], "search_switch": search_switch, - "end_user_id": end_user_id - , "storage_type": storage_type, "user_rag_memory_id": user_rag_memory_id, - "memory_config": memory_config} - # 获取节点更新信息 - _intermediate_outputs = [] - summary = '' - - async for update_event in graph.astream( - initial_state, - stream_mode="updates", - config=config - ): - for node_name, node_data in update_event.items(): - print(f"处理节点: {node_name}") - - # 处理不同Summary节点的返回结构 - if 'Summary' in node_name: - if 'InputSummary' in node_data and 'summary_result' in node_data['InputSummary']: - summary = node_data['InputSummary']['summary_result'] - elif 'RetrieveSummary' in node_data and 'summary_result' in node_data['RetrieveSummary']: - summary = node_data['RetrieveSummary']['summary_result'] - elif 'summary' in node_data and 'summary_result' in node_data['summary']: - summary = node_data['summary']['summary_result'] - elif 'SummaryFails' in node_data and 'summary_result' in node_data['SummaryFails']: - summary = node_data['SummaryFails']['summary_result'] - - spit_data = node_data.get('spit_data', {}).get('_intermediate', None) - if spit_data and spit_data != [] and spit_data != {}: - _intermediate_outputs.append(spit_data) - - # Problem_Extension 节点 - problem_extension = node_data.get('problem_extension', {}).get('_intermediate', None) - if problem_extension and problem_extension != [] and problem_extension != {}: - _intermediate_outputs.append(problem_extension) - - # Retrieve 节点 - retrieve_node = node_data.get('retrieve', {}).get('_intermediate_outputs', None) - if retrieve_node and retrieve_node != [] and retrieve_node != {}: - _intermediate_outputs.extend(retrieve_node) - - # Verify 节点 - verify_n = node_data.get('verify', {}).get('_intermediate', None) - if verify_n and verify_n != [] and verify_n != {}: - _intermediate_outputs.append(verify_n) - - # Summary 节点 - summary_n = node_data.get('summary', {}).get('_intermediate', None) - if summary_n and summary_n != [] and summary_n != {}: - _intermediate_outputs.append(summary_n) - - # # 过滤掉空值 - # _intermediate_outputs = [item for item in _intermediate_outputs if item and item != [] and item != {}] - # - # # 优化搜索结果 - # print("=== 开始优化搜索结果 ===") - # optimized_outputs = merge_multiple_search_results(_intermediate_outputs) - # result=reorder_output_results(optimized_outputs) - # # 保存优化后的结果到文件 - # with open('_intermediate_outputs_optimized.json', 'w', encoding='utf-8') as f: - # import json - # f.write(json.dumps(result, indent=4, ensure_ascii=False)) - # - print(f"=== 最终摘要 ===") - print(summary) - - except Exception as e: - import traceback - traceback.print_exc() - finally: - db_session.close() - - end = time.time() - print(100 * 'y') - print(f"总耗时: {end - start}s") - print(100 * 'y') - - -if __name__ == "__main__": - import asyncio - - asyncio.run(main()) diff --git a/api/app/core/memory/agent/langgraph_graph/routing/routers.py b/api/app/core/memory/agent/langgraph_graph/routing/routers.py index 004e03b3..d6ca3333 100644 --- a/api/app/core/memory/agent/langgraph_graph/routing/routers.py +++ b/api/app/core/memory/agent/langgraph_graph/routing/routers.py @@ -1,13 +1,13 @@ - from typing import Literal from app.core.logging_config import get_agent_logger from app.core.memory.agent.utils.llm_tools import ReadState, COUNTState - logger = get_agent_logger(__name__) counter = COUNTState(limit=3) -def Split_continue(state:ReadState) -> Literal["Split_The_Problem", "Input_Summary"]: + + +def Split_continue(state: ReadState) -> Literal["Split_The_Problem", "Input_Summary"]: """ Determine routing based on search_switch value. @@ -25,6 +25,7 @@ def Split_continue(state:ReadState) -> Literal["Split_The_Problem", "Input_Summa return 'Input_Summary' return 'Split_The_Problem' # 默认情况 + def Retrieve_continue(state) -> Literal["Verify", "Retrieve_Summary"]: """ Determine routing based on search_switch value. @@ -43,8 +44,10 @@ def Retrieve_continue(state) -> Literal["Verify", "Retrieve_Summary"]: elif search_switch == '1': return 'Retrieve_Summary' return 'Retrieve_Summary' # Default based on business logic + + def Verify_continue(state: ReadState) -> Literal["Summary", "Summary_fails", "content_input"]: - status=state.get('verify', '')['status'] + status = state.get('verify', '')['status'] # loop_count = counter.get_total() if "success" in status: # counter.reset() @@ -53,7 +56,7 @@ def Verify_continue(state: ReadState) -> Literal["Summary", "Summary_fails", "co # if loop_count < 2: # Maximum loop count is 3 # return "content_input" # else: - # counter.reset() + # counter.reset() return "Summary_fails" else: # Add default return value to avoid returning None diff --git a/api/app/core/memory/agent/langgraph_graph/routing/write_router.py b/api/app/core/memory/agent/langgraph_graph/routing/write_router.py index 895f61ac..6176caf5 100644 --- a/api/app/core/memory/agent/langgraph_graph/routing/write_router.py +++ b/api/app/core/memory/agent/langgraph_graph/routing/write_router.py @@ -2,77 +2,104 @@ import json import os from app.core.logging_config import get_agent_logger -from app.core.memory.agent.langgraph_graph.tools.write_tool import format_parsing, messages_parse -from app.core.memory.agent.langgraph_graph.write_graph import make_write_graph, long_term_storage - +from app.core.memory.agent.langgraph_graph.tools.write_tool import format_parsing, messages_parse from app.core.memory.agent.models.write_aggregate_model import WriteAggregateModel from app.core.memory.agent.utils.llm_tools import PROJECT_ROOT_ -from app.core.memory.agent.utils.redis_tool import write_store from app.core.memory.agent.utils.redis_tool import count_store +from app.core.memory.agent.utils.redis_tool import write_store from app.core.memory.agent.utils.template_tools import TemplateService from app.core.memory.utils.llm.llm_utils import MemoryClientFactory -from app.db import get_db_context, get_db +from app.db import get_db_context from app.repositories.memory_short_repository import LongTermMemoryRepository from app.schemas.memory_agent_schema import AgentMemory_Long_Term from app.services.memory_konwledges_server import write_rag from app.services.task_service import get_task_memory_write_result from app.tasks import write_message_task from app.utils.config_utils import resolve_config_id + logger = get_agent_logger(__name__) template_root = os.path.join(PROJECT_ROOT_, 'memory', 'agent', 'utils', 'prompt') + async def write_rag_agent(end_user_id, user_message, ai_message, user_rag_memory_id): - # RAG 模式:组合消息为字符串格式(保持原有逻辑) + """ + Write messages to RAG storage system + + Combines user and AI messages into a single string format and stores them + in the RAG (Retrieval-Augmented Generation) knowledge base for future retrieval. + + Args: + end_user_id: User identifier for the conversation + user_message: User's input message content + ai_message: AI's response message content + user_rag_memory_id: RAG memory identifier for storage location + """ + # RAG mode: combine messages into string format (maintain original logic) combined_message = f"user: {user_message}\nassistant: {ai_message}" await write_rag(end_user_id, combined_message, user_rag_memory_id) logger.info(f'RAG_Agent:{end_user_id};{user_rag_memory_id}') -async def write(storage_type, end_user_id, user_message, ai_message, user_rag_memory_id, actual_end_user_id, - actual_config_id, long_term_messages=[]): + + +async def write( + storage_type, + end_user_id, + user_message, + ai_message, + user_rag_memory_id, + actual_end_user_id, + actual_config_id, + long_term_messages=None +): """ - 写入记忆(支持结构化消息) + Write memory with structured message support + + Handles memory writing operations for different storage types (Neo4j/RAG). + Supports both individual message pairs and batch long-term message processing. Args: - storage_type: 存储类型 (neo4j/rag) - end_user_id: 终端用户ID - user_message: 用户消息内容 - ai_message: AI 回复内容 - user_rag_memory_id: RAG 记忆ID - actual_end_user_id: 实际用户ID - actual_config_id: 配置ID + storage_type: Storage type identifier ("neo4j" or "rag") + end_user_id: Terminal user identifier + user_message: User message content + ai_message: AI response content + user_rag_memory_id: RAG memory identifier + actual_end_user_id: Actual user identifier for storage + actual_config_id: Configuration identifier + long_term_messages: Optional list of structured messages for batch processing - 逻辑说明: - - RAG 模式:组合 user_message 和 ai_message 为字符串格式,保持原有逻辑不变 - - Neo4j 模式:使用结构化消息列表 - 1. 如果 user_message 和 ai_message 都不为空:创建配对消息 [user, assistant] - 2. 如果只有 user_message:创建单条用户消息 [user](用于历史记忆场景) - 3. 每条消息会被转换为独立的 Chunk,保留 speaker 字段 + Logic explanation: + - RAG mode: Combines user_message and ai_message into string format, maintains original logic + - Neo4j mode: Uses structured message lists + 1. If both user_message and ai_message are not empty: Creates paired messages [user, assistant] + 2. If only user_message exists: Creates single user message [user] (for historical memory scenarios) + 3. Each message is converted to independent Chunk, preserving speaker field """ - db = next(get_db()) - try: + if long_term_messages is None: + long_term_messages = [] + with get_db_context() as db: actual_config_id = resolve_config_id(actual_config_id, db) - # Neo4j 模式:使用结构化消息列表 + # Neo4j mode: Use structured message lists structured_messages = [] - # 始终添加用户消息(如果不为空) + # Always add user message (if not empty) if isinstance(user_message, str) and user_message.strip() != "": structured_messages.append({"role": "user", "content": user_message}) - # 只有当 AI 回复不为空时才添加 assistant 消息 + # Only add assistant message when AI reply is not empty if isinstance(ai_message, str) and ai_message.strip() != "": structured_messages.append({"role": "assistant", "content": ai_message}) - # 如果提供了 long_term_messages,使用它替代 structured_messages + # If long_term_messages provided, use it to replace structured_messages if long_term_messages and isinstance(long_term_messages, list): structured_messages = long_term_messages elif long_term_messages and isinstance(long_term_messages, str): - # 如果是 JSON 字符串,先解析 + # If it's a JSON string, parse it first try: structured_messages = json.loads(long_term_messages) except json.JSONDecodeError: logger.error(f"Failed to parse long_term_messages as JSON: {long_term_messages}") - # 如果没有消息,直接返回 + # If no messages, return directly if not structured_messages: logger.warning(f"No messages to write for user {actual_end_user_id}") return @@ -80,29 +107,41 @@ async def write(storage_type, end_user_id, user_message, ai_message, user_rag_me logger.info( f"[WRITE] Submitting Celery task - user={actual_end_user_id}, messages={len(structured_messages)}, config={actual_config_id}") write_id = write_message_task.delay( - actual_end_user_id, # end_user_id: 用户ID - structured_messages, # message: JSON 字符串格式的消息列表 - str(actual_config_id), # config_id: 配置ID字符串 + actual_end_user_id, # end_user_id: User ID + structured_messages, # message: JSON string format message list + str(actual_config_id), # config_id: Configuration ID string storage_type, # storage_type: "neo4j" - user_rag_memory_id or "" # user_rag_memory_id: RAG记忆ID(Neo4j模式下不使用) + user_rag_memory_id or "" # user_rag_memory_id: RAG memory ID (not used in Neo4j mode) ) logger.info(f"[WRITE] Celery task submitted - task_id={write_id}") write_status = get_task_memory_write_result(str(write_id)) logger.info(f'[WRITE] Task result - user={actual_end_user_id}, status={write_status}') - finally: - db.close() -async def term_memory_save(long_term_messages,actual_config_id,end_user_id,type,scope): + +async def term_memory_save(long_term_messages, actual_config_id, end_user_id, type, scope): + """ + Save long-term memory data to database + + Handles the storage of long-term memory data based on different strategies + (chunk-based or aggregate-based) and manages the transition from short-term + to long-term memory storage. + + Args: + long_term_messages: Long-term message data to be saved + actual_config_id: Configuration identifier for memory settings + end_user_id: User identifier for memory association + type: Memory storage strategy type (STRATEGY_CHUNK or STRATEGY_AGGREGATE) + scope: Scope/window size for memory processing + """ with get_db_context() as db_session: repo = LongTermMemoryRepository(db_session) - from app.core.memory.agent.utils.redis_tool import write_store result = write_store.get_session_by_userid(end_user_id) - if type==AgentMemory_Long_Term.STRATEGY_CHUNK or AgentMemory_Long_Term.STRATEGY_AGGREGATE: + if type == AgentMemory_Long_Term.STRATEGY_CHUNK or AgentMemory_Long_Term.STRATEGY_AGGREGATE: data = await format_parsing(result, "dict") chunk_data = data[:scope] - if len(chunk_data)==scope: + if len(chunk_data) == scope: repo.upsert(end_user_id, chunk_data) logger.info(f'---------写入短长期-----------') else: @@ -112,18 +151,23 @@ async def term_memory_save(long_term_messages,actual_config_id,end_user_id,type, logger.info(f'写入短长期:') +"""Window-based dialogue processing""" -'''根据窗口''' -async def window_dialogue(end_user_id,langchain_messages,memory_config,scope): - ''' - 根据窗口获取redis数据,写入neo4j: - Args: - end_user_id: 终端用户ID - memory_config: 内存配置对象 - langchain_messages:原始数据LIST - scope:窗口大小 - ''' - scope=scope + +async def window_dialogue(end_user_id, langchain_messages, memory_config, scope): + """ + Process dialogue based on window size and write to Neo4j + + Manages conversation data based on a sliding window approach. When the window + reaches the specified scope size, it triggers long-term memory storage to Neo4j. + + Args: + end_user_id: Terminal user identifier + memory_config: Memory configuration object containing settings + langchain_messages: Original message data list + scope: Window size determining when to trigger long-term storage + """ + scope = scope is_end_user_id = count_store.get_sessions_count(end_user_id) if is_end_user_id is not False: is_end_user_id = count_store.get_sessions_count(end_user_id)[0] @@ -135,50 +179,72 @@ async def window_dialogue(end_user_id,langchain_messages,memory_config,scope): elif int(is_end_user_id) == int(scope): logger.info('写入长期记忆NEO4J') formatted_messages = (redis_messages) - # 获取 config_id(如果 memory_config 是对象,提取 config_id;否则直接使用) + # Get config_id (if memory_config is an object, extract config_id; otherwise use directly) if hasattr(memory_config, 'config_id'): config_id = memory_config.config_id else: config_id = memory_config - - await write(AgentMemory_Long_Term.STORAGE_NEO4J, end_user_id, "", "", None, end_user_id, - config_id, formatted_messages) + + await write( + AgentMemory_Long_Term.STORAGE_NEO4J, + end_user_id, + "", + "", + None, + end_user_id, + config_id, + formatted_messages + ) count_store.update_sessions_count(end_user_id, 1, langchain_messages) else: count_store.save_sessions_count(end_user_id, 1, langchain_messages) -"""根据时间""" -async def memory_long_term_storage(end_user_id,memory_config,time): - ''' - 根据时间获取redis数据,写入neo4j: - Args: - end_user_id: 终端用户ID - memory_config: 内存配置对象 - ''' +"""Time-based memory processing""" + + +async def memory_long_term_storage(end_user_id, memory_config, time): + """ + Process memory storage based on time intervals and write to Neo4j + + Retrieves Redis data based on time intervals and writes it to Neo4j for + long-term storage. This function handles time-based memory consolidation. + + Args: + end_user_id: Terminal user identifier + memory_config: Memory configuration object containing settings + time: Time interval for data retrieval + """ long_time_data = write_store.find_user_recent_sessions(end_user_id, time) - format_messages = (long_time_data) - messages=[] - memory_config=memory_config.config_id + format_messages = long_time_data + messages = [] + memory_config = memory_config.config_id for i in format_messages: - message=json.loads(i['Query']) - messages+= message - if format_messages!=[]: + message = json.loads(i['Query']) + messages += message + if format_messages: await write(AgentMemory_Long_Term.STORAGE_NEO4J, end_user_id, "", "", None, end_user_id, memory_config, messages) -'''聚合判断''' + + async def aggregate_judgment(end_user_id: str, ori_messages: list, memory_config) -> dict: """ - 聚合判断函数:判断输入句子和历史消息是否描述同一事件 + Aggregation judgment function: determine if input sentence and historical messages describe the same event + + Uses LLM-based analysis to determine whether new messages should be aggregated with existing + historical data or stored as separate events. This helps optimize memory storage and retrieval. Args: - end_user_id: 终端用户ID - ori_messages: 原始消息列表,格式如 [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}] - memory_config: 内存配置对象 - """ + end_user_id: Terminal user identifier + ori_messages: Original message list, format like [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}] + memory_config: Memory configuration object containing LLM settings + Returns: + dict: Aggregation judgment result containing is_same_event flag and processed output + """ + history = None try: - # 1. 获取历史会话数据(使用新方法) + # 1. Get historical session data (using new method) result = write_store.get_all_sessions_by_end_user_id(end_user_id) history = await format_parsing(result) if not result: @@ -210,7 +276,7 @@ async def aggregate_judgment(end_user_id: str, ori_messages: list, memory_config output_value = structured.output if isinstance(output_value, list): output_value = [ - {"role": msg.role, "content": msg.content} + {"role": msg.role, "content": msg.content} for msg in output_value ] @@ -223,16 +289,16 @@ async def aggregate_judgment(end_user_id: str, ori_messages: list, memory_config await write("neo4j", end_user_id, "", "", None, end_user_id, memory_config.config_id, output_value) return result_dict - + except Exception as e: print(f"[aggregate_judgment] 发生错误: {e}") import traceback traceback.print_exc() - + return { "is_same_event": False, "output": ori_messages, "messages": ori_messages, "history": history if 'history' in locals() else [], "error": str(e) - } \ No newline at end of file + } diff --git a/api/app/core/memory/agent/langgraph_graph/tools/tool.py b/api/app/core/memory/agent/langgraph_graph/tools/tool.py index fcbb18e3..ae2c5772 100644 --- a/api/app/core/memory/agent/langgraph_graph/tools/tool.py +++ b/api/app/core/memory/agent/langgraph_graph/tools/tool.py @@ -2,41 +2,53 @@ import asyncio import json from datetime import datetime, timedelta - from langchain.tools import tool from pydantic import BaseModel, Field - from app.core.memory.src.search import ( search_by_temporal, search_by_keyword_temporal, ) + def extract_tool_message_content(response): - """从agent响应中提取ToolMessage内容和工具名称""" + """ + Extract ToolMessage content and tool names from agent response + + Parses agent response messages to extract tool execution results and metadata. + Handles JSON parsing and provides structured access to tool output data. + + Args: + response: Agent response dictionary containing messages + + Returns: + dict: Dictionary containing tool_name and parsed content, or None if no tool message found + - tool_name: Name of the executed tool + - content: Parsed tool execution result (JSON or raw text) + """ messages = response.get('messages', []) for message in messages: if hasattr(message, 'tool_call_id') and hasattr(message, 'content'): - # 这是一个ToolMessage + # This is a ToolMessage tool_content = message.content tool_name = None - # 尝试获取工具名称 + # Try to get tool name if hasattr(message, 'name'): tool_name = message.name elif hasattr(message, 'tool_name'): tool_name = message.tool_name try: - # 解析JSON内容 + # Parse JSON content parsed_content = json.loads(tool_content) return { 'tool_name': tool_name, 'content': parsed_content } except json.JSONDecodeError: - # 如果不是JSON格式,直接返回内容 + # If not JSON format, return content directly return { 'tool_name': tool_name, 'content': tool_content @@ -46,38 +58,61 @@ def extract_tool_message_content(response): class TimeRetrievalInput(BaseModel): - """时间检索工具的输入模式""" + """ + Input schema for time retrieval tool + + Defines the expected input parameters for time-based retrieval operations. + Used for validation and documentation of tool parameters. + + Attributes: + context: User input query content for search + end_user_id: Group ID for filtering search results, defaults to test user + """ context: str = Field(description="用户输入的查询内容") end_user_id: str = Field(default="88a459f5_text09", description="组ID,用于过滤搜索结果") + def create_time_retrieval_tool(end_user_id: str): """ - 创建一个带有特定end_user_id的TimeRetrieval工具(同步版本),用于按时间范围搜索语句(Statements) + Create a TimeRetrieval tool with specific end_user_id (synchronous version) for searching statements by time range + + Creates a specialized time-based retrieval tool that searches for statements within + specified time ranges. Includes field cleaning functionality to remove unnecessary + metadata from search results. + + Args: + end_user_id: User identifier for scoping search results + + Returns: + function: Configured TimeRetrievalWithGroupId tool function """ - + def clean_temporal_result_fields(data): """ - 清理时间搜索结果中不需要的字段,并修改结构 + Clean unnecessary fields from temporal search results and modify structure + + Removes metadata fields that are not needed for end-user consumption and + restructures the response format for better usability. Args: - data: 要清理的数据 + data: Data to be cleaned (dict, list, or other types) Returns: - 清理后的数据 + Cleaned data with unnecessary fields removed """ - # 需要过滤的字段列表 + # List of fields to filter out fields_to_remove = { - 'id', 'apply_id', 'user_id', 'chunk_id', 'created_at', + 'id', 'apply_id', 'user_id', 'chunk_id', 'created_at', 'valid_at', 'invalid_at', 'statement_ids' } - + if isinstance(data, dict): cleaned = {} for key, value in data.items(): if key == 'statements' and isinstance(value, dict) and 'statements' in value: - # 将 statements: {"statements": [...]} 改为 time_search: {"statements": [...]} + # Change statements: {"statements": [...]} to time_search: {"statements": [...]} cleaned_value = clean_temporal_result_fields(value) - # 进一步将内部的 statements 改为 time_search + # Further change internal statements to time_search if 'statements' in cleaned_value: cleaned['results'] = { 'time_search': cleaned_value['statements'] @@ -91,26 +126,35 @@ def create_time_retrieval_tool(end_user_id: str): return [clean_temporal_result_fields(item) for item in data] else: return data - + @tool - def TimeRetrievalWithGroupId(context: str, start_date: str = None, end_date: str = None, end_user_id_param: str = None, clean_output: bool = True) -> str: + def TimeRetrievalWithGroupId(context: str, start_date: str = None, end_date: str = None, + end_user_id_param: str = None, clean_output: bool = True) -> str: """ - 优化的时间检索工具,只结合时间范围搜索(同步版本),自动过滤不需要的元数据字段 - 显式接收参数: - - context: 查询上下文内容 - - start_date: 开始时间(可选,格式:YYYY-MM-DD) - - end_date: 结束时间(可选,格式:YYYY-MM-DD) - - end_user_id_param: 组ID(可选,用于覆盖默认组ID) - - clean_output: 是否清理输出中的元数据字段 - -end_date 需要根据用户的描述获取结束的时间,输出格式用strftime("%Y-%m-%d") + Optimized time retrieval tool, combines time range search only (synchronous version), automatically filters unnecessary metadata fields + + Performs time-based search operations with automatic metadata filtering. Supports + flexible date range specification and provides clean, user-friendly output. + + Explicit parameters: + - context: Query context content + - start_date: Start time (optional, format: YYYY-MM-DD) + - end_date: End time (optional, format: YYYY-MM-DD) + - end_user_id_param: Group ID (optional, overrides default group ID) + - clean_output: Whether to clean metadata fields from output + - end_date needs to be obtained based on user description, output format uses strftime("%Y-%m-%d") + + Returns: + str: JSON formatted search results with temporal data """ + async def _async_search(): - # 使用传入的参数或默认值 + # Use passed parameters or default values actual_end_user_id = end_user_id_param or end_user_id actual_end_date = end_date or datetime.now().strftime("%Y-%m-%d") actual_start_date = start_date or (datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d") - # 基本时间搜索 + # Basic time search results = await search_by_temporal( end_user_id=actual_end_user_id, start_date=actual_start_date, @@ -118,33 +162,43 @@ def create_time_retrieval_tool(end_user_id: str): limit=10 ) - # 清理结果中不需要的字段 + # Clean unnecessary fields from results if clean_output: cleaned_results = clean_temporal_result_fields(results) else: cleaned_results = results return json.dumps(cleaned_results, ensure_ascii=False, indent=2) - + return asyncio.run(_async_search()) @tool - def KeywordTimeRetrieval(context: str, days_back: int = 7, start_date: str = None, end_date: str = None, clean_output: bool = True) -> str: + def KeywordTimeRetrieval(context: str, days_back: int = 7, start_date: str = None, end_date: str = None, + clean_output: bool = True) -> str: """ - 优化的关键词时间检索工具,结合关键词和时间范围搜索(同步版本),自动过滤不需要的元数据字段 - 显式接收参数: - - context: 查询内容 - - days_back: 向前搜索的天数,默认7天 - - start_date: 开始时间(可选,格式:YYYY-MM-DD) - - end_date: 结束时间(可选,格式:YYYY-MM-DD) - - clean_output: 是否清理输出中的元数据字段 - - end_date 需要根据用户的描述获取结束的时间,输出格式用strftime("%Y-%m-%d") + Optimized keyword time retrieval tool, combines keyword and time range search (synchronous version), automatically filters unnecessary metadata fields + + Performs combined keyword and temporal search operations with automatic metadata + filtering. Provides more targeted search results by combining content relevance + with time-based filtering. + + Explicit parameters: + - context: Query content for keyword matching + - days_back: Number of days to search backwards, default 7 days + - start_date: Start time (optional, format: YYYY-MM-DD) + - end_date: End time (optional, format: YYYY-MM-DD) + - clean_output: Whether to clean metadata fields from output + - end_date needs to be obtained based on user description, output format uses strftime("%Y-%m-%d") + + Returns: + str: JSON formatted search results combining keyword and temporal data """ + async def _async_search(): actual_end_date = end_date or datetime.now().strftime("%Y-%m-%d") actual_start_date = start_date or (datetime.now() - timedelta(days=days_back)).strftime("%Y-%m-%d") - # 关键词时间搜索 + # Keyword time search results = await search_by_keyword_temporal( query_text=context, end_user_id=end_user_id, @@ -153,7 +207,7 @@ def create_time_retrieval_tool(end_user_id: str): limit=15 ) - # 清理结果中不需要的字段 + # Clean unnecessary fields from results if clean_output: cleaned_results = clean_temporal_result_fields(results) else: @@ -162,51 +216,61 @@ def create_time_retrieval_tool(end_user_id: str): return json.dumps(cleaned_results, ensure_ascii=False, indent=2) return asyncio.run(_async_search()) - + return TimeRetrievalWithGroupId def create_hybrid_retrieval_tool_async(memory_config, **search_params): """ - 创建混合检索工具,使用run_hybrid_search进行混合检索,优化输出格式并过滤不需要的字段 + Create hybrid retrieval tool using run_hybrid_search for hybrid retrieval, optimize output format and filter unnecessary fields + + Creates an advanced hybrid search tool that combines multiple search strategies + (keyword, vector, hybrid) with automatic result cleaning and formatting. Args: - memory_config: 内存配置对象 - **search_params: 搜索参数,包含end_user_id, limit, include等 + memory_config: Memory configuration object containing LLM and search settings + **search_params: Search parameters including end_user_id, limit, include, etc. + + Returns: + function: Configured HybridSearch tool function with async capabilities """ - + def clean_result_fields(data): """ - 递归清理结果中不需要的字段 + Recursively clean unnecessary fields from results + + Removes metadata fields that are not needed for end-user consumption, + improving readability and reducing response size. Args: - data: 要清理的数据(可能是字典、列表或其他类型) + data: Data to be cleaned (can be dict, list, or other types) Returns: - 清理后的数据 + Cleaned data with unnecessary fields removed """ - # 需要过滤的字段列表 - # TODO: fact_summary 功能暂时禁用,待后续开发完善后启用 + # List of fields to filter out + # TODO: fact_summary functionality temporarily disabled, will be enabled after future development fields_to_remove = { - 'invalid_at', 'valid_at', 'chunk_id_from_rel', 'entity_ids', - 'expired_at', 'created_at', 'chunk_id', 'id', 'apply_id', - 'user_id', 'statement_ids', 'updated_at',"chunk_ids" ,"fact_summary" + 'invalid_at', 'valid_at', 'chunk_id_from_rel', 'entity_ids', + 'expired_at', 'created_at', 'chunk_id', 'apply_id', + 'user_id', 'statement_ids', 'updated_at', "chunk_ids", "fact_summary" } - + # 注意:'id' 字段保留,community 展开时需要用 community id 查询成员 statements + if isinstance(data, dict): - # 对字典进行清理 + # Clean dictionary cleaned = {} for key, value in data.items(): if key not in fields_to_remove: - cleaned[key] = clean_result_fields(value) # 递归清理嵌套数据 + cleaned[key] = clean_result_fields(value) # Recursively clean nested data return cleaned elif isinstance(data, list): - # 对列表中的每个元素进行清理 + # Clean each element in list return [clean_result_fields(item) for item in data] else: - # 其他类型直接返回 + # Return other types directly return data - + @tool async def HybridSearch( context: str, @@ -216,57 +280,63 @@ def create_hybrid_retrieval_tool_async(memory_config, **search_params): rerank_alpha: float = 0.6, use_forgetting_rerank: bool = False, use_llm_rerank: bool = False, - clean_output: bool = True # 新增:是否清理输出字段 + clean_output: bool = True # New: whether to clean output fields ) -> str: """ - 优化的混合检索工具,支持关键词、向量和混合搜索,自动过滤不需要的元数据字段 + Optimized hybrid retrieval tool, supports keyword, vector and hybrid search, automatically filters unnecessary metadata fields + + Provides comprehensive search capabilities combining multiple search strategies + with intelligent result ranking and automatic metadata filtering for clean output. Args: - context: 查询内容 - search_type: 搜索类型 ('keyword', 'embedding', 'hybrid') - limit: 结果数量限制 - end_user_id: 组ID,用于过滤搜索结果 - rerank_alpha: 重排序权重参数 - use_forgetting_rerank: 是否使用遗忘重排序 - use_llm_rerank: 是否使用LLM重排序 - clean_output: 是否清理输出中的元数据字段 + context: Query content for search + search_type: Search type ('keyword', 'embedding', 'hybrid') + limit: Result quantity limit + end_user_id: Group ID for filtering search results + rerank_alpha: Reranking weight parameter for result scoring + use_forgetting_rerank: Whether to use forgetting-based reranking + use_llm_rerank: Whether to use LLM-based reranking + clean_output: Whether to clean metadata fields from output + + Returns: + str: JSON formatted comprehensive search results """ try: - # 导入run_hybrid_search函数 + # Import run_hybrid_search function from app.core.memory.src.search import run_hybrid_search - # 合并参数,优先使用传入的参数 + # Merge parameters, prioritize passed parameters final_params = { "query_text": context, "search_type": search_type, "end_user_id": end_user_id or search_params.get("end_user_id"), "limit": limit or search_params.get("limit", 10), - "include": search_params.get("include", ["summaries", "statements", "chunks", "entities"]), - "output_path": None, # 不保存到文件 + "include": search_params.get("include", ["summaries", "statements", "chunks", "entities", "communities"]), + "output_path": None, # Don't save to file "memory_config": memory_config, "rerank_alpha": rerank_alpha, "use_forgetting_rerank": use_forgetting_rerank, "use_llm_rerank": use_llm_rerank } - # 执行混合检索 + # Execute hybrid retrieval raw_results = await run_hybrid_search(**final_params) - # 清理结果中不需要的字段 + # Clean unnecessary fields from results if clean_output: cleaned_results = clean_result_fields(raw_results) else: cleaned_results = raw_results - # 格式化返回结果 + # Format return results formatted_results = { "search_query": context, "search_type": search_type, "results": cleaned_results } - + return json.dumps(formatted_results, ensure_ascii=False, indent=2, default=str) - + except Exception as e: error_result = { "error": f"混合检索失败: {str(e)}", @@ -275,38 +345,52 @@ def create_hybrid_retrieval_tool_async(memory_config, **search_params): "timestamp": datetime.now().isoformat() } return json.dumps(error_result, ensure_ascii=False, indent=2) - + return HybridSearch def create_hybrid_retrieval_tool_sync(memory_config, **search_params): """ - 创建同步版本的混合检索工具,优化输出格式并过滤不需要的字段 + Create synchronous version of hybrid retrieval tool, optimize output format and filter unnecessary fields + + Creates a synchronous wrapper around the async hybrid search functionality, + making it compatible with synchronous tool execution environments. Args: - memory_config: 内存配置对象 - **search_params: 搜索参数 + memory_config: Memory configuration object containing search settings + **search_params: Search parameters for configuration + + Returns: + function: Configured HybridSearchSync tool function """ + @tool def HybridSearchSync( - context: str, - search_type: str = "hybrid", - limit: int = 10, - end_user_id: str = None, - clean_output: bool = True + context: str, + search_type: str = "hybrid", + limit: int = 10, + end_user_id: str = None, + clean_output: bool = True ) -> str: """ - 优化的混合检索工具(同步版本),自动过滤不需要的元数据字段 + Optimized hybrid retrieval tool (synchronous version), automatically filters unnecessary metadata fields + + Provides the same hybrid search capabilities as the async version but in a + synchronous execution context. Automatically handles async-to-sync conversion. Args: - context: 查询内容 - search_type: 搜索类型 ('keyword', 'embedding', 'hybrid') - limit: 结果数量限制 - end_user_id: 组ID,用于过滤搜索结果 - clean_output: 是否清理输出中的元数据字段 + context: Query content for search + search_type: Search type ('keyword', 'embedding', 'hybrid') + limit: Result quantity limit + end_user_id: Group ID for filtering search results + clean_output: Whether to clean metadata fields from output + + Returns: + str: JSON formatted search results """ + async def _async_search(): - # 创建异步工具并执行 + # Create async tool and execute async_tool = create_hybrid_retrieval_tool_async(memory_config, **search_params) return await async_tool.ainvoke({ "context": context, @@ -315,7 +399,7 @@ def create_hybrid_retrieval_tool_sync(memory_config, **search_params): "end_user_id": end_user_id, "clean_output": clean_output }) - + return asyncio.run(_async_search()) - - return HybridSearchSync \ No newline at end of file + + return HybridSearchSync diff --git a/api/app/core/memory/agent/langgraph_graph/tools/write_tool.py b/api/app/core/memory/agent/langgraph_graph/tools/write_tool.py index 9ce581ee..e11a2085 100644 --- a/api/app/core/memory/agent/langgraph_graph/tools/write_tool.py +++ b/api/app/core/memory/agent/langgraph_graph/tools/write_tool.py @@ -1,20 +1,28 @@ import json from langchain_core.messages import HumanMessage, AIMessage -async def format_parsing(messages: list,type:str='string'): + + +async def format_parsing(messages: list, type: str = 'string'): """ - 格式化解析消息列表 + Format and parse message lists into different output types + + Processes message lists from storage and converts them into either string format + or dictionary format based on the specified type parameter. Handles JSON parsing + and role-based message organization. Args: - messages: 消息列表 - type: 返回类型 ('string' 或 'dict') + messages: List of message objects from storage containing message data + type: Return type specification ('string' for text format, 'dict' for key-value pairs) Returns: - 格式化后的消息列表 + list: Formatted message list in the specified format + - 'string': List of formatted text messages with role prefixes + - 'dict': List of dictionaries mapping user messages to AI responses """ result = [] - user=[] - ai=[] + user = [] + ai = [] for message in messages: hstory_messages = message['messages'] @@ -24,25 +32,38 @@ async def format_parsing(messages: list,type:str='string'): role = content['role'] content = content['content'] if type == "string": - if role == 'human' or role=="user": + if role == 'human' or role == "user": content = '用户:' + content else: content = 'AI:' + content result.append(content) - if type == "dict" : - if role == 'human' or role=="user": - user.append( content) + if type == "dict": + if role == 'human' or role == "user": + user.append(content) else: ai.append(content) if type == "dict": - for key,values in zip(user,ai): - result.append({key:values}) + for key, values in zip(user, ai): + result.append({key: values}) return result + async def messages_parse(messages: list | dict): - user=[] - ai=[] - database=[] + """ + Parse messages from storage format into user-AI conversation pairs + + Extracts and organizes conversation data from stored message format, + separating user and AI messages and pairing them for database storage. + + Args: + messages: List or dictionary containing stored message data with Query fields + + Returns: + list: List of dictionaries containing user-AI message pairs for database storage + """ + user = [] + ai = [] + database = [] for message in messages: Query = message['Query'] Query = json.loads(Query) @@ -54,10 +75,23 @@ async def messages_parse(messages: list | dict): ai.append(data['content']) for key, values in zip(user, ai): database.append({key, values}) - return database + return database -async def agent_chat_messages(user_content,ai_content): +async def agent_chat_messages(user_content, ai_content): + """ + Create structured chat message format for agent conversations + + Formats user and AI content into a standardized message structure suitable + for agent processing and storage. Creates role-based message objects. + + Args: + user_content: User's message content string + ai_content: AI's response content string + + Returns: + list: List of structured message dictionaries with role and content fields + """ messages = [ { "role": "user", diff --git a/api/app/core/memory/agent/langgraph_graph/write_graph.py b/api/app/core/memory/agent/langgraph_graph/write_graph.py index 1134acc7..bf3c6597 100644 --- a/api/app/core/memory/agent/langgraph_graph/write_graph.py +++ b/api/app/core/memory/agent/langgraph_graph/write_graph.py @@ -13,7 +13,6 @@ from app.core.memory.agent.langgraph_graph.nodes.write_nodes import write_node from app.schemas.memory_agent_schema import AgentMemory_Long_Term from app.services.memory_config_service import MemoryConfigService - warnings.filterwarnings("ignore", category=RuntimeWarning) logger = get_agent_logger(__name__) @@ -42,10 +41,26 @@ async def make_write_graph(): yield graph -async def long_term_storage(long_term_type:str="chunk",langchain_messages:list=[],memory_config:str='',end_user_id:str='',scope:int=6): - from app.core.memory.agent.langgraph_graph.routing.write_router import memory_long_term_storage, window_dialogue,aggregate_judgment + +async def long_term_storage(long_term_type: str = "chunk", langchain_messages: list = [], memory_config: str = '', + end_user_id: str = '', scope: int = 6): + """ + Handle long-term memory storage with different strategies + + Supports multiple storage strategies including chunk-based, time-based, + and aggregate judgment approaches for long-term memory persistence. + + Args: + long_term_type: Storage strategy type ('chunk', 'time', 'aggregate') + langchain_messages: List of messages to store + memory_config: Memory configuration identifier + end_user_id: User group identifier + scope: Scope parameter for chunk-based storage (default: 6) + """ + from app.core.memory.agent.langgraph_graph.routing.write_router import memory_long_term_storage, window_dialogue, \ + aggregate_judgment from app.core.memory.agent.utils.redis_tool import write_store - write_store.save_session_write(end_user_id, (langchain_messages)) + write_store.save_session_write(end_user_id, langchain_messages) # 获取数据库会话 with get_db_context() as db_session: config_service = MemoryConfigService(db_session) @@ -53,26 +68,39 @@ async def long_term_storage(long_term_type:str="chunk",langchain_messages:list=[ config_id=memory_config, # 改为整数 service_name="MemoryAgentService" ) - if long_term_type=='chunk': - '''方案一:对话窗口6轮对话''' - await window_dialogue(end_user_id,langchain_messages,memory_config,scope) - if long_term_type=='time': - """时间""" - await memory_long_term_storage(end_user_id, memory_config,5) - if long_term_type=='aggregate': - """方案三:聚合判断""" + if long_term_type == AgentMemory_Long_Term.STRATEGY_CHUNK: + '''Strategy 1: Dialogue window with 6 rounds of conversation''' + await window_dialogue(end_user_id, langchain_messages, memory_config, scope) + if long_term_type == AgentMemory_Long_Term.STRATEGY_TIME: + """Time-based strategy""" + await memory_long_term_storage(end_user_id, memory_config, AgentMemory_Long_Term.TIME_SCOPE) + if long_term_type == AgentMemory_Long_Term.STRATEGY_AGGREGATE: + """Strategy 3: Aggregate judgment""" await aggregate_judgment(end_user_id, langchain_messages, memory_config) +async def write_long_term(storage_type, end_user_id, message_chat, aimessages, user_rag_memory_id, actual_config_id): + """ + Write long-term memory with different storage types -async def write_long_term(storage_type,end_user_id,message_chat,aimessages,user_rag_memory_id,actual_config_id): + Handles both RAG-based storage and traditional memory storage approaches. + For traditional storage, uses chunk-based strategy with paired user-AI messages. + + Args: + storage_type: Type of storage (RAG or traditional) + end_user_id: User group identifier + message_chat: User message content + aimessages: AI response messages + user_rag_memory_id: RAG memory identifier + actual_config_id: Actual configuration ID + """ from app.core.memory.agent.langgraph_graph.routing.write_router import write_rag_agent from app.core.memory.agent.langgraph_graph.routing.write_router import term_memory_save - from app.core.memory.agent.langgraph_graph.tools.write_tool import agent_chat_messages + from app.core.memory.agent.langgraph_graph.tools.write_tool import agent_chat_messages if storage_type == AgentMemory_Long_Term.STORAGE_RAG: await write_rag_agent(end_user_id, message_chat, aimessages, user_rag_memory_id) else: - # AI 回复写入(用户消息和 AI 回复配对,一次性写入完整对话) + # AI reply writing (user messages and AI replies paired, written as complete dialogue at once) CHUNK = AgentMemory_Long_Term.STRATEGY_CHUNK SCOPE = AgentMemory_Long_Term.DEFAULT_SCOPE long_term_messages = await agent_chat_messages(message_chat, aimessages) @@ -101,4 +129,4 @@ async def write_long_term(storage_type,end_user_id,message_chat,aimessages,user_ # # if __name__ == "__main__": # import asyncio -# asyncio.run(main()) \ No newline at end of file +# asyncio.run(main()) diff --git a/api/app/core/memory/agent/services/search_service.py b/api/app/core/memory/agent/services/search_service.py index 4fc4256e..90b1c088 100644 --- a/api/app/core/memory/agent/services/search_service.py +++ b/api/app/core/memory/agent/services/search_service.py @@ -13,6 +13,72 @@ from app.core.memory.utils.data.text_utils import escape_lucene_query logger = get_agent_logger(__name__) +# 需要从展开结果中过滤的字段(含 Neo4j DateTime,不可 JSON 序列化) +_EXPAND_FIELDS_TO_REMOVE = { + 'invalid_at', 'valid_at', 'chunk_id_from_rel', 'entity_ids', + 'expired_at', 'created_at', 'chunk_id', 'apply_id', + 'user_id', 'statement_ids', 'updated_at', 'chunk_ids', 'fact_summary' +} + + +def _clean_expand_fields(obj): + """递归过滤展开结果中不可序列化的字段(DateTime 等)。""" + if isinstance(obj, dict): + return {k: _clean_expand_fields(v) for k, v in obj.items() if k not in _EXPAND_FIELDS_TO_REMOVE} + if isinstance(obj, list): + return [_clean_expand_fields(i) for i in obj] + return obj + + +async def expand_communities_to_statements( + community_results: List[dict], + end_user_id: str, + existing_content: str = "", + limit: int = 10, +) -> Tuple[List[dict], List[str]]: + """ + 社区展开 helper:给定命中的 community 列表,拉取关联 Statement。 + + - 对展开结果去重(过滤已在 existing_content 中出现的文本) + - 过滤不可序列化字段 + - 返回 (cleaned_expanded_stmts, new_texts) + - cleaned_expanded_stmts: 可直接写回 raw_results 的列表 + - new_texts: 去重后新增的 statement 文本列表,用于追加到 clean_content + """ + community_ids = [r.get("id") for r in community_results if r.get("id")] + if not community_ids or not end_user_id: + return [], [] + + from app.repositories.neo4j.graph_search import search_graph_community_expand + from app.repositories.neo4j.neo4j_connector import Neo4jConnector + + connector = Neo4jConnector() + try: + result = await search_graph_community_expand( + connector=connector, + community_ids=community_ids, + end_user_id=end_user_id, + limit=limit, + ) + except Exception as e: + logger.warning(f"[expand_communities] 社区展开检索失败,跳过: {e}") + return [], [] + finally: + await connector.close() + + expanded_stmts = result.get("expanded_statements", []) + if not expanded_stmts: + return [], [] + + existing_lines = set(existing_content.splitlines()) + new_texts = [ + s["statement"] for s in expanded_stmts + if s.get("statement") and s["statement"] not in existing_lines + ] + cleaned = _clean_expand_fields(expanded_stmts) + logger.info(f"[expand_communities] 展开 {len(expanded_stmts)} 条 statements,新增 {len(new_texts)} 条,community_ids={community_ids}") + return cleaned, new_texts + class SearchService: """Service for executing hybrid search and processing results.""" @@ -21,7 +87,7 @@ class SearchService: """Initialize the search service.""" logger.info("SearchService initialized") - def extract_content_from_result(self, result: dict) -> str: + def extract_content_from_result(self, result: dict, node_type: str = "") -> str: """ Extract only meaningful content from search results, dropping all metadata. @@ -30,9 +96,11 @@ class SearchService: - Entities: extract 'name' and 'fact_summary' fields - Summaries: extract 'content' field - Chunks: extract 'content' field + - Communities: extract 'content' field (c.summary), prefixed with community name Args: result: Search result dictionary + node_type: Hint for node type ("community", "summary", etc.) Returns: Clean content string without metadata @@ -46,8 +114,21 @@ class SearchService: if 'statement' in result and result['statement']: content_parts.append(result['statement']) - # Summaries/Chunks: extract content field - if 'content' in result and result['content']: + # Community 节点:有 member_count 或 core_entities 字段,或 node_type 明确指定 + # 用 "[主题:{name}]" 前缀区分,让 LLM 知道这是主题级摘要 + is_community = ( + node_type == "community" + or 'member_count' in result + or 'core_entities' in result + ) + if is_community: + name = result.get('name', '') + content = result.get('content', '') + if content: + prefix = f"[主题:{name}] " if name else "" + content_parts.append(f"{prefix}{content}") + elif 'content' in result and result['content']: + # Summaries / Chunks content_parts.append(result['content']) # Entities: extract name and fact_summary (commented out in original) @@ -99,7 +180,8 @@ class SearchService: rerank_alpha: float = 0.4, output_path: str = "search_results.json", return_raw_results: bool = False, - memory_config = None + memory_config = None, + expand_communities: bool = True, ) -> Tuple[str, str, Optional[dict]]: """ Execute hybrid search and return clean content. @@ -114,13 +196,15 @@ class SearchService: output_path: Path to save search results (default: "search_results.json") return_raw_results: If True, also return the raw search results as third element (default: False) memory_config: Memory configuration object (required) + expand_communities: If True, expand community hits to member statements (default: True). + Set to False for quick-summary paths that only need community-level text. Returns: Tuple of (clean_content, cleaned_query, raw_results) raw_results is None if return_raw_results=False """ if include is None: - include = ["statements", "chunks", "entities", "summaries"] + include = ["statements", "chunks", "entities", "summaries", "communities"] # Clean query cleaned_query = self.clean_query(question) @@ -146,8 +230,8 @@ class SearchService: if search_type == "hybrid": reranked_results = answer.get('reranked_results', {}) - # Priority order: summaries first (most contextual), then statements, chunks, entities - priority_order = ['summaries', 'statements', 'chunks', 'entities'] + # Priority order: summaries first (most contextual), then communities, statements, chunks, entities + priority_order = ['summaries', 'communities', 'statements', 'chunks', 'entities'] for category in priority_order: if category in include and category in reranked_results: @@ -157,19 +241,33 @@ class SearchService: else: # For keyword or embedding search, results are directly in answer dict # Apply same priority order - priority_order = ['summaries', 'statements', 'chunks', 'entities'] + priority_order = ['summaries', 'communities', 'statements', 'chunks', 'entities'] for category in priority_order: if category in include and category in answer: category_results = answer[category] if isinstance(category_results, list): answer_list.extend(category_results) + + # 对命中的 community 节点展开其成员 statements(路径 "0"/"1" 需要,路径 "2" 不需要) + if expand_communities and "communities" in include: + community_results = ( + answer.get('reranked_results', {}).get('communities', []) + if search_type == "hybrid" + else answer.get('communities', []) + ) + cleaned_stmts, new_texts = await expand_communities_to_statements( + community_results=community_results, + end_user_id=end_user_id, + ) + answer_list.extend(cleaned_stmts) - # Extract clean content from all results - content_list = [ - self.extract_content_from_result(ans) - for ans in answer_list - ] + # Extract clean content from all results,按类型传入 node_type 区分 community + content_list = [] + for ans in answer_list: + # community 节点有 member_count 或 core_entities 字段 + ntype = "community" if ('member_count' in ans or 'core_entities' in ans) else "" + content_list.append(self.extract_content_from_result(ans, node_type=ntype)) # Filter out empty strings and join with newlines diff --git a/api/app/core/memory/agent/utils/get_dialogs.py b/api/app/core/memory/agent/utils/get_dialogs.py index ea44d0a5..3b06defe 100644 --- a/api/app/core/memory/agent/utils/get_dialogs.py +++ b/api/app/core/memory/agent/utils/get_dialogs.py @@ -84,7 +84,7 @@ async def get_chunked_dialogs( pruning_scene=memory_config.pruning_scene or "education", pruning_threshold=memory_config.pruning_threshold, scene_id=str(memory_config.scene_id) if memory_config.scene_id else None, - ontology_classes=memory_config.ontology_classes, + ontology_class_infos=memory_config.ontology_class_infos, ) logger.info(f"[剪枝] 加载配置: switch={pruning_config.pruning_switch}, scene={pruning_config.pruning_scene}, threshold={pruning_config.pruning_threshold}") diff --git a/api/app/core/memory/agent/utils/llm_tools.py b/api/app/core/memory/agent/utils/llm_tools.py index 1c183422..ea8add48 100644 --- a/api/app/core/memory/agent/utils/llm_tools.py +++ b/api/app/core/memory/agent/utils/llm_tools.py @@ -8,10 +8,11 @@ from langgraph.graph import add_messages PROJECT_ROOT_ = str(Path(__file__).resolve().parents[3]) + class WriteState(TypedDict): - ''' + """ Langgrapg Writing TypedDict - ''' + """ messages: Annotated[list[AnyMessage], add_messages] end_user_id: str errors: list[dict] # Track errors: [{"tool": "tool_name", "error": "message"}] @@ -20,6 +21,7 @@ class WriteState(TypedDict): data: str language: str # 语言类型 ("zh" 中文, "en" 英文) + class ReadState(TypedDict): """ LangGraph 工作流状态定义 @@ -43,18 +45,20 @@ class ReadState(TypedDict): config_id: str data: str # 新增字段用于传递内容 spit_data: dict # 新增字段用于传递问题分解结果 - problem_extension:dict + problem_extension: dict storage_type: str user_rag_memory_id: str llm_id: str embedding_id: str memory_config: object # 新增字段用于传递内存配置对象 - retrieve:dict + retrieve: dict RetrieveSummary: dict InputSummary: dict verify: dict SummaryFails: dict summary: dict + + class COUNTState: """ 工作流对话检索内容计数器 @@ -99,6 +103,7 @@ class COUNTState: self.total = 0 print("[COUNTState] 已重置为 0") + def deduplicate_entries(entries): seen = set() deduped = [] @@ -109,6 +114,7 @@ def deduplicate_entries(entries): deduped.append(entry) return deduped + def merge_to_key_value_pairs(data, query_key, result_key): grouped = defaultdict(list) for item in data: @@ -142,4 +148,4 @@ def convert_extended_question_to_question(data): return [convert_extended_question_to_question(item) for item in data] else: # 其他类型直接返回 - return data \ No newline at end of file + return data diff --git a/api/app/core/memory/agent/utils/write_tools.py b/api/app/core/memory/agent/utils/write_tools.py index 22030278..02aa1b44 100644 --- a/api/app/core/memory/agent/utils/write_tools.py +++ b/api/app/core/memory/agent/utils/write_tools.py @@ -19,7 +19,7 @@ from app.core.memory.utils.log.logging_utils import log_time from app.db import get_db_context from app.repositories.neo4j.add_edges import add_memory_summary_statement_edges from app.repositories.neo4j.add_nodes import add_memory_summary_nodes -from app.repositories.neo4j.graph_saver import save_dialog_and_statements_to_neo4j +from app.repositories.neo4j.graph_saver import save_dialog_and_statements_to_neo4j, schedule_clustering_after_write from app.repositories.neo4j.neo4j_connector import Neo4jConnector from app.schemas.memory_config_schema import MemoryConfig @@ -165,10 +165,19 @@ async def write( statement_chunk_edges=all_statement_chunk_edges, statement_entity_edges=all_statement_entity_edges, entity_edges=all_entity_entity_edges, - connector=neo4j_connector + connector=neo4j_connector, + config_id=config_id, + llm_model_id=str(memory_config.llm_model_id) if memory_config.llm_model_id else None, ) if success: logger.info("Successfully saved all data to Neo4j") + # 写入成功后,异步触发聚类(不阻塞写入响应) + schedule_clustering_after_write( + all_entity_nodes, + config_id=config_id, + llm_model_id=str(memory_config.llm_model_id) if memory_config.llm_model_id else None, + embedding_model_id=str(memory_config.embedding_model_id) if memory_config.embedding_model_id else None, + ) break else: logger.warning("Failed to save some data to Neo4j") diff --git a/api/app/core/memory/models/config_models.py b/api/app/core/memory/models/config_models.py index c2d62ac1..5ed50b7f 100644 --- a/api/app/core/memory/models/config_models.py +++ b/api/app/core/memory/models/config_models.py @@ -6,6 +6,7 @@ of the memory system including LLM, chunking, pruning, and search. Classes: LLMConfig: Configuration for LLM client ChunkerConfig: Configuration for dialogue chunking + OntologyClassInfo: Single ontology class with name and description PruningConfig: Configuration for semantic pruning TemporalSearchParams: Parameters for temporal search queries """ @@ -50,30 +51,41 @@ class ChunkerConfig(BaseModel): min_characters_per_chunk: Optional[int] = Field(24, ge=0, description="The minimum number of characters in each chunk.") +class OntologyClassInfo(BaseModel): + """本体类型的名称与语义描述,用于剪枝提示词注入。 + + Attributes: + class_name: 本体类型名称(如"患者"、"课程") + class_description: 本体类型语义描述,告知 LLM 该类型在当前场景下的含义 + """ + class_name: str = Field(..., description="本体类型名称") + class_description: str = Field(default="", description="本体类型语义描述") + + class PruningConfig(BaseModel): """Configuration for semantic pruning of dialogue content. Attributes: pruning_switch: Enable or disable semantic pruning - pruning_scene: Scene name for pruning, either a built-in key - ('education', 'online_service', 'outbound') or a custom scene_name - from ontology_scene table + pruning_scene: Scene name for pruning from ontology_scene table pruning_threshold: Pruning ratio (0-0.9, max 0.9 to avoid complete removal) - scene_id: Optional ontology scene UUID, used to load custom ontology classes - ontology_classes: List of class_name strings from ontology_class table, - injected into the prompt when pruning_scene is not a built-in scene + scene_id: Optional ontology scene UUID + ontology_class_infos: Full ontology class info (name + description) from + ontology_class table, injected into the pruning prompt to drive + scene-aware preservation decisions """ pruning_switch: bool = Field(False, description="Enable semantic pruning when True.") pruning_scene: str = Field( "education", - description="Scene for pruning: built-in key or custom scene_name from ontology_scene.", + description="Scene name from ontology_scene table.", ) pruning_threshold: float = Field( 0.5, ge=0.0, le=0.9, description="Pruning ratio within 0-0.9 (max 0.9 to avoid termination).") scene_id: Optional[str] = Field(None, description="Ontology scene UUID (optional).") - ontology_classes: Optional[List[str]] = Field( - None, description="Class names from ontology_class table for custom scenes." + ontology_class_infos: List[OntologyClassInfo] = Field( + default_factory=list, + description="Full ontology class info (name + description) injected into pruning prompt." ) diff --git a/api/app/core/memory/src/search.py b/api/app/core/memory/src/search.py index 0e1d8424..e4f0d4d0 100644 --- a/api/app/core/memory/src/search.py +++ b/api/app/core/memory/src/search.py @@ -238,7 +238,7 @@ def rerank_with_activation( reranked: Dict[str, List[Dict[str, Any]]] = {} - for category in ["statements", "chunks", "entities", "summaries"]: + for category in ["statements", "chunks", "entities", "summaries", "communities"]: keyword_items = keyword_results.get(category, []) embedding_items = embedding_results.get(category, []) @@ -281,21 +281,23 @@ def rerank_with_activation( for item in items_list: item_id = item.get("id") or item.get("uuid") or item.get("chunk_id") if item_id and item_id in combined_items: - combined_items[item_id]["normalized_activation_value"] = item.get("normalized_activation_value", 0) + combined_items[item_id]["normalized_activation_value"] = item.get("normalized_activation_value") # 步骤 4: 计算基础分数和最终分数 for item_id, item in combined_items.items(): bm25_norm = float(item.get("bm25_score", 0) or 0) emb_norm = float(item.get("embedding_score", 0) or 0) - act_norm = float(item.get("normalized_activation_value", 0) or 0) + # normalized_activation_value 为 None 表示该节点无激活值,保留 None 语义 + raw_act_norm = item.get("normalized_activation_value") + act_norm = float(raw_act_norm) if raw_act_norm is not None else None # 第一阶段:只考虑内容相关性(BM25 + Embedding) # alpha 控制 BM25 权重,(1-alpha) 控制 Embedding 权重 content_score = alpha * bm25_norm + (1 - alpha) * emb_norm base_score = content_score # 第一阶段用内容分数 - # 存储激活度分数供第二阶段使用 - item["activation_score"] = act_norm + # 存储激活度分数供第二阶段使用(None 表示无激活值,不参与激活值排序) + item["activation_score"] = act_norm # 可能为 None item["content_score"] = content_score item["base_score"] = base_score @@ -724,6 +726,8 @@ async def run_hybrid_search( try: keyword_task = None embedding_task = None + keyword_results: Dict[str, List] = {} + embedding_results: Dict[str, List] = {} if search_type in ["keyword", "hybrid"]: # Keyword-based search @@ -746,35 +750,42 @@ async def run_hybrid_search( # 从数据库读取嵌入器配置(按 ID)并构建 RedBearModelConfig config_load_start = time.time() - with get_db_context() as db: - config_service = MemoryConfigService(db) - embedder_config_dict = config_service.get_embedder_config(str(memory_config.embedding_model_id)) - rb_config = RedBearModelConfig( - model_name=embedder_config_dict["model_name"], - provider=embedder_config_dict["provider"], - api_key=embedder_config_dict["api_key"], - base_url=embedder_config_dict["base_url"], - type="llm" - ) - config_load_time = time.time() - config_load_start - logger.info(f"[PERF] Config loading took {config_load_time:.4f}s") - - # Init embedder - embedder_init_start = time.time() - embedder = OpenAIEmbedderClient(model_config=rb_config) - embedder_init_time = time.time() - embedder_init_start - logger.info(f"[PERF] Embedder init took {embedder_init_time:.4f}s") - - embedding_task = asyncio.create_task( - search_graph_by_embedding( - connector=connector, - embedder_client=embedder, - query_text=query_text, - end_user_id=end_user_id, - limit=limit, - include=include, + try: + with get_db_context() as db: + config_service = MemoryConfigService(db) + embedder_config_dict = config_service.get_embedder_config(str(memory_config.embedding_model_id)) + rb_config = RedBearModelConfig( + model_name=embedder_config_dict["model_name"], + provider=embedder_config_dict["provider"], + api_key=embedder_config_dict["api_key"], + base_url=embedder_config_dict["base_url"], + type="llm" ) - ) + config_load_time = time.time() - config_load_start + logger.info(f"[PERF] Config loading took {config_load_time:.4f}s") + + # Init embedder + embedder_init_start = time.time() + embedder = OpenAIEmbedderClient(model_config=rb_config) + embedder_init_time = time.time() - embedder_init_start + logger.info(f"[PERF] Embedder init took {embedder_init_time:.4f}s") + + embedding_task = asyncio.create_task( + search_graph_by_embedding( + connector=connector, + embedder_client=embedder, + query_text=query_text, + end_user_id=end_user_id, + limit=limit, + include=include, + ) + ) + except Exception as emb_init_err: + logger.warning( + f"[PERF] Embedding search skipped due to init error " + f"(embedding_model_id={memory_config.embedding_model_id}): {emb_init_err}" + ) + embedding_task = None if keyword_task: keyword_results = await keyword_task diff --git a/api/app/core/memory/storage_services/clustering_engine/__init__.py b/api/app/core/memory/storage_services/clustering_engine/__init__.py new file mode 100644 index 00000000..992d8bff --- /dev/null +++ b/api/app/core/memory/storage_services/clustering_engine/__init__.py @@ -0,0 +1,3 @@ +from app.core.memory.storage_services.clustering_engine.label_propagation import LabelPropagationEngine + +__all__ = ["LabelPropagationEngine"] diff --git a/api/app/core/memory/storage_services/clustering_engine/label_propagation.py b/api/app/core/memory/storage_services/clustering_engine/label_propagation.py new file mode 100644 index 00000000..21257f2e --- /dev/null +++ b/api/app/core/memory/storage_services/clustering_engine/label_propagation.py @@ -0,0 +1,559 @@ +"""标签传播聚类引擎 + +基于 ZEP 论文的动态标签传播算法,对 Neo4j 中的 ExtractedEntity 节点进行社区聚类。 + +支持两种模式: +- 全量初始化(full_clustering):首次运行,对所有实体做完整 LPA 迭代 +- 增量更新(incremental_update):新实体到达时,只处理新实体及其邻居 +""" + +import asyncio +import logging +import uuid +from math import sqrt +from typing import Dict, List, Optional + +from app.repositories.neo4j.community_repository import CommunityRepository +from app.repositories.neo4j.neo4j_connector import Neo4jConnector + +logger = logging.getLogger(__name__) + +# 全量迭代最大轮数,防止不收敛 +MAX_ITERATIONS = 10 + +# 社区核心实体取 top-N 数量 +CORE_ENTITY_LIMIT = 10 + + +def _cosine_similarity(v1: Optional[List[float]], v2: Optional[List[float]]) -> float: + """计算两个向量的余弦相似度,任一为空则返回 0。""" + if not v1 or not v2 or len(v1) != len(v2): + return 0.0 + dot = sum(a * b for a, b in zip(v1, v2)) + norm1 = sqrt(sum(a * a for a in v1)) + norm2 = sqrt(sum(b * b for b in v2)) + if norm1 == 0 or norm2 == 0: + return 0.0 + return dot / (norm1 * norm2) + + +def _weighted_vote( + neighbors: List[Dict], + self_embedding: Optional[List[float]], +) -> Optional[str]: + """ + 加权多数投票,选出得票最高的社区。 + + 权重 = 语义相似度(name_embedding 余弦)* activation_value 加成 + 没有 community_id 的邻居不参与投票。 + """ + votes: Dict[str, float] = {} + for nb in neighbors: + cid = nb.get("community_id") + if not cid: + continue + sem = _cosine_similarity(self_embedding, nb.get("name_embedding")) + act = nb.get("activation_value") or 0.5 + # 语义相似度权重 0.6,激活值权重 0.4 + weight = 0.6 * sem + 0.4 * act + votes[cid] = votes.get(cid, 0.0) + weight + + if not votes: + return None + return max(votes, key=votes.__getitem__) + + +class LabelPropagationEngine: + """标签传播聚类引擎""" + + def __init__( + self, + connector: Neo4jConnector, + config_id: Optional[str] = None, + llm_model_id: Optional[str] = None, + embedding_model_id: Optional[str] = None, + ): + self.connector = connector + self.repo = CommunityRepository(connector) + self.config_id = config_id + self.llm_model_id = llm_model_id + self.embedding_model_id = embedding_model_id + + # ────────────────────────────────────────────────────────────────────────── + # 公开接口 + # ────────────────────────────────────────────────────────────────────────── + + async def run( + self, + end_user_id: str, + new_entity_ids: Optional[List[str]] = None, + ) -> None: + """ + 统一入口:自动判断全量还是增量。 + + - 若该用户尚无 Community 节点 → 全量初始化 + - 否则 → 增量更新(仅处理 new_entity_ids) + """ + has_communities = await self.repo.has_communities(end_user_id) + if not has_communities: + logger.info(f"[Clustering] 用户 {end_user_id} 首次聚类,执行全量初始化") + await self.full_clustering(end_user_id) + else: + if new_entity_ids: + logger.info( + f"[Clustering] 增量更新,新实体数: {len(new_entity_ids)}" + ) + await self.incremental_update(new_entity_ids, end_user_id) + + async def full_clustering(self, end_user_id: str) -> None: + """ + 全量标签传播初始化(分批处理,控制内存峰值)。 + + 策略: + - 每次只加载 BATCH_SIZE 个实体及其邻居进内存 + - labels 字典跨批次共享(只存 id→community_id,内存极小) + - 每批独立跑 MAX_ITERATIONS 轮 LPA,批次间通过 labels 传递社区信息 + - 所有批次完成后统一 flush 和 merge + """ + BATCH_SIZE = 888 # 每批实体数,可按需调整 + + # 轻量查询:只获取总数和 ID 列表,不加载 embedding 等大字段 + total_count = await self.repo.get_entity_count(end_user_id) + if not total_count: + logger.info(f"[Clustering] 用户 {end_user_id} 无实体,跳过全量聚类") + return + + all_entity_ids = await self.repo.get_all_entity_ids(end_user_id) + logger.info(f"[Clustering] 用户 {end_user_id} 共 {total_count} 个实体," + f"分批大小 {BATCH_SIZE},共 {(total_count + BATCH_SIZE - 1) // BATCH_SIZE} 批") + + # labels 跨批次共享:只存 id→community_id,内存极小 + labels: Dict[str, str] = {eid: eid for eid in all_entity_ids} + del all_entity_ids # 释放 ID 列表,后续按批次加载完整数据 + + for batch_start in range(0, total_count, BATCH_SIZE): + batch_entities = await self.repo.get_entities_page( + end_user_id, skip=batch_start, limit=BATCH_SIZE + ) + if not batch_entities: + break + + batch_ids = [e["id"] for e in batch_entities] + batch_embeddings: Dict[str, Optional[List[float]]] = { + e["id"]: e.get("name_embedding") for e in batch_entities + } + + logger.info( + f"[Clustering] 批次 {batch_start // BATCH_SIZE + 1}:" + f"加载 {len(batch_entities)} 个实体的邻居图..." + ) + neighbors_cache = await self.repo.get_entity_neighbors_for_ids( + batch_ids, end_user_id + ) + logger.info(f"[Clustering] 邻居预加载完成,覆盖实体数: {len(neighbors_cache)}") + + for iteration in range(MAX_ITERATIONS): + changed = 0 + for entity in batch_entities: + eid = entity["id"] + neighbors = neighbors_cache.get(eid, []) + + # 注入跨批次的最新标签(邻居可能在其他批次,labels 里有其最新值) + enriched = [] + for nb in neighbors: + nb_copy = dict(nb) + nb_copy["community_id"] = labels.get(nb["id"], nb.get("community_id")) + enriched.append(nb_copy) + + new_label = _weighted_vote(enriched, batch_embeddings.get(eid)) + if new_label and new_label != labels[eid]: + labels[eid] = new_label + changed += 1 + + logger.info( + f"[Clustering] 批次 {batch_start // BATCH_SIZE + 1} " + f"迭代 {iteration + 1}/{MAX_ITERATIONS},标签变化数: {changed}" + ) + if changed == 0: + logger.info("[Clustering] 标签已收敛,提前结束本批迭代") + break + + # 释放本批次的大对象 + del neighbors_cache, batch_embeddings, batch_entities + + # 所有批次完成,统一写入 Neo4j + await self._flush_labels(labels, end_user_id) + pre_merge_count = len(set(labels.values())) + logger.info( + f"[Clustering] 全量迭代完成,共 {pre_merge_count} 个社区," + f"{len(labels)} 个实体,开始后处理合并" + ) + + all_community_ids = list(set(labels.values())) + await self._evaluate_merge(all_community_ids, end_user_id) + + logger.info( + f"[Clustering] 全量聚类完成,合并前 {pre_merge_count} 个社区," + f"{len(labels)} 个实体" + ) + + # 查询存活社区并生成元数据 + surviving_communities = await self.repo.get_all_entities(end_user_id) + surviving_community_ids = list({ + e.get("community_id") for e in surviving_communities + if e.get("community_id") + }) + logger.info(f"[Clustering] 合并后实际存活社区数: {len(surviving_community_ids)}") + await self._generate_community_metadata(surviving_community_ids, end_user_id) + + async def incremental_update( + self, new_entity_ids: List[str], end_user_id: str + ) -> None: + """ + 增量更新:只处理新实体及其邻居,不重跑全图。 + + 1. 对每个新实体查询邻居 + 2. 加权多数投票决定社区归属 + 3. 若邻居无社区 → 创建新社区 + 4. 若邻居分属多个社区 → 评估是否合并 + """ + for entity_id in new_entity_ids: + await self._process_single_entity(entity_id, end_user_id) + + # ────────────────────────────────────────────────────────────────────────── + # 内部方法 + # ────────────────────────────────────────────────────────────────────────── + + async def _process_single_entity( + self, entity_id: str, end_user_id: str + ) -> None: + """处理单个新实体的社区分配。""" + neighbors = await self.repo.get_entity_neighbors(entity_id, end_user_id) + + # 查询自身 embedding(从邻居查询结果中无法获取,需单独查) + self_embedding = await self._get_entity_embedding(entity_id, end_user_id) + + if not neighbors: + # 孤立实体:创建单成员社区 + new_cid = self._new_community_id() + await self.repo.upsert_community(new_cid, end_user_id, member_count=1) + await self.repo.assign_entity_to_community(entity_id, new_cid, end_user_id) + logger.debug(f"[Clustering] 孤立实体 {entity_id} → 新社区 {new_cid}") + return + + # 统计邻居社区分布 + community_ids_in_neighbors = set( + nb["community_id"] for nb in neighbors if nb.get("community_id") + ) + + target_cid = _weighted_vote(neighbors, self_embedding) + + if target_cid is None: + # 邻居都没有社区,连同新实体一起创建新社区 + new_cid = self._new_community_id() + await self.repo.upsert_community(new_cid, end_user_id) + await self.repo.assign_entity_to_community(entity_id, new_cid, end_user_id) + for nb in neighbors: + await self.repo.assign_entity_to_community( + nb["id"], new_cid, end_user_id + ) + await self.repo.refresh_member_count(new_cid, end_user_id) + logger.debug( + f"[Clustering] 新实体 {entity_id} 与 {len(neighbors)} 个无社区邻居 → 新社区 {new_cid}" + ) + await self._generate_community_metadata([new_cid], end_user_id) + else: + # 加入得票最多的社区 + await self.repo.assign_entity_to_community(entity_id, target_cid, end_user_id) + await self.repo.refresh_member_count(target_cid, end_user_id) + logger.debug(f"[Clustering] 新实体 {entity_id} → 社区 {target_cid}") + + # 若邻居分属多个社区,评估合并 + if len(community_ids_in_neighbors) > 1: + await self._evaluate_merge( + list(community_ids_in_neighbors), end_user_id + ) + await self._generate_community_metadata([target_cid], end_user_id) + + async def _evaluate_merge( + self, community_ids: List[str], end_user_id: str + ) -> None: + """ + 评估多个社区是否应合并。 + + 策略:计算各社区成员 embedding 的平均向量,若两两余弦相似度 > 0.75 则合并。 + 合并时保留成员数最多的社区,其余成员迁移过来。 + + 全量场景(社区数 > 20)使用批量查询,避免 N 次数据库往返。 + """ + MERGE_THRESHOLD = 0.85 + BATCH_THRESHOLD = 20 # 超过此数量走批量查询 + + community_embeddings: Dict[str, Optional[List[float]]] = {} + community_sizes: Dict[str, int] = {} + + if len(community_ids) > BATCH_THRESHOLD: + # 批量查询:一次拉取所有社区成员 + all_members = await self.repo.get_all_community_members_batch( + community_ids, end_user_id + ) + for cid in community_ids: + members = all_members.get(cid, []) + community_sizes[cid] = len(members) + valid_embeddings = [ + m["name_embedding"] for m in members if m.get("name_embedding") + ] + if valid_embeddings: + dim = len(valid_embeddings[0]) + community_embeddings[cid] = [ + sum(e[i] for e in valid_embeddings) / len(valid_embeddings) + for i in range(dim) + ] + else: + community_embeddings[cid] = None + else: + # 增量场景:逐个查询 + for cid in community_ids: + members = await self.repo.get_community_members(cid, end_user_id) + community_sizes[cid] = len(members) + valid_embeddings = [ + m["name_embedding"] for m in members if m.get("name_embedding") + ] + if valid_embeddings: + dim = len(valid_embeddings[0]) + community_embeddings[cid] = [ + sum(e[i] for e in valid_embeddings) / len(valid_embeddings) + for i in range(dim) + ] + else: + community_embeddings[cid] = None + + # 找出应合并的社区对 + to_merge: List[tuple] = [] + cids = list(community_ids) + for i in range(len(cids)): + for j in range(i + 1, len(cids)): + sim = _cosine_similarity( + community_embeddings[cids[i]], + community_embeddings[cids[j]], + ) + if sim > MERGE_THRESHOLD: + to_merge.append((cids[i], cids[j])) + + logger.info(f"[Clustering] 发现 {len(to_merge)} 对可合并社区") + + # 执行合并:逐对处理,每次合并后重新计算合并社区的平均向量 + # 避免 union-find 链式传递导致语义不相关的社区被间接合并 + # (A≈B、B≈C 不代表 A≈C,不能因传递性把 A/B/C 全部合并) + merged_into: Dict[str, str] = {} # dissolve → keep 的最终映射 + + def get_root(x: str) -> str: + """路径压缩,找到 x 当前所属的根社区。""" + while x in merged_into: + merged_into[x] = merged_into.get(merged_into[x], merged_into[x]) + x = merged_into[x] + return x + + for c1, c2 in to_merge: + root1, root2 = get_root(c1), get_root(c2) + if root1 == root2: + continue + + # 用合并后的最新平均向量重新验证相似度 + # 防止链式传递:A≈B 合并后 B 的向量已更新,C 必须和新 B 相似才能合并 + current_sim = _cosine_similarity( + community_embeddings.get(root1), + community_embeddings.get(root2), + ) + if current_sim <= MERGE_THRESHOLD: + # 合并后向量已漂移,不再满足阈值,跳过 + logger.debug( + f"[Clustering] 跳过合并 {root1} ↔ {root2}," + f"当前相似度 {current_sim:.3f} ≤ {MERGE_THRESHOLD}" + ) + continue + + keep = root1 if community_sizes.get(root1, 0) >= community_sizes.get(root2, 0) else root2 + dissolve = root2 if keep == root1 else root1 + merged_into[dissolve] = keep + + members = await self.repo.get_community_members(dissolve, end_user_id) + for m in members: + await self.repo.assign_entity_to_community(m["id"], keep, end_user_id) + + # 合并后重新计算 keep 的平均向量(加权平均) + keep_emb = community_embeddings.get(keep) + dissolve_emb = community_embeddings.get(dissolve) + keep_size = community_sizes.get(keep, 0) + dissolve_size = community_sizes.get(dissolve, 0) + total_size = keep_size + dissolve_size + if keep_emb and dissolve_emb and total_size > 0: + dim = len(keep_emb) + community_embeddings[keep] = [ + (keep_emb[i] * keep_size + dissolve_emb[i] * dissolve_size) / total_size + for i in range(dim) + ] + community_embeddings[dissolve] = None + + community_sizes[keep] = total_size + community_sizes[dissolve] = 0 + await self.repo.refresh_member_count(keep, end_user_id) + logger.info( + f"[Clustering] 社区合并: {dissolve} → {keep}," + f"相似度={current_sim:.3f},迁移 {len(members)} 个成员" + ) + + async def _flush_labels( + self, labels: Dict[str, str], end_user_id: str + ) -> None: + """将内存中的标签批量写入 Neo4j。""" + # 先创建所有唯一社区节点 + unique_communities = set(labels.values()) + for cid in unique_communities: + await self.repo.upsert_community(cid, end_user_id) + + # 再批量分配实体 + for entity_id, community_id in labels.items(): + await self.repo.assign_entity_to_community( + entity_id, community_id, end_user_id + ) + + # 刷新成员数 + for cid in unique_communities: + await self.repo.refresh_member_count(cid, end_user_id) + + async def _get_entity_embedding( + self, entity_id: str, end_user_id: str + ) -> Optional[List[float]]: + """查询单个实体的 name_embedding。""" + try: + result = await self.connector.execute_query( + "MATCH (e:ExtractedEntity {id: $eid, end_user_id: $uid}) " + "RETURN e.name_embedding AS name_embedding", + eid=entity_id, + uid=end_user_id, + ) + return result[0]["name_embedding"] if result else None + except Exception: + return None + + @staticmethod + def _build_entity_lines(members: List[Dict]) -> List[str]: + """将实体列表格式化为 prompt 行,包含 name、aliases、description。""" + lines = [] + for m in members: + m_name = m.get("name", "") + aliases = m.get("aliases") or [] + description = m.get("description") or "" + aliases_str = f"(别名:{'、'.join(aliases)})" if aliases else "" + desc_str = f":{description}" if description else "" + lines.append(f"- {m_name}{aliases_str}{desc_str}") + return lines + + async def _generate_community_metadata( + self, community_ids: List[str], end_user_id: str + ) -> None: + """ + 为一个或多个社区生成并写入元数据。 + + 流程: + 1. 逐个社区调 LLM 生成 name / summary(串行) + 2. 收集所有 summary,一次性批量 embed + 3. 单个社区用 update_community_metadata,多个用 batch_update_community_metadata + """ + if not community_ids: + return + + from app.db import get_db_context + from app.core.memory.utils.llm.llm_utils import MemoryClientFactory + + # --- 阶段1:并发调 LLM 生成每个社区的 name / summary --- + async def _build_one(cid: str): + members = await self.repo.get_community_members(cid, end_user_id) + if not members: + return None + + sorted_members = sorted( + members, + key=lambda m: m.get("activation_value") or 0, + reverse=True, + ) + core_entities = [m["name"] for m in sorted_members[:CORE_ENTITY_LIMIT] if m.get("name")] + + entity_list_str = "\n".join(self._build_entity_lines(members)) + prompt = ( + f"以下是一组语义相关的实体:\n{entity_list_str}\n\n" + f"请为这组实体所代表的主题:\n" + f"1. 起一个简洁的中文名称(不超过10个字)\n" + f"2. 写一句话摘要(不超过50个字)\n\n" + f"严格按以下格式输出,不要有其他内容:\n" + f"名称:<名称>\n摘要:<摘要>" + ) + with get_db_context() as db: + llm_client = MemoryClientFactory(db).get_llm_client(self.llm_model_id) + response = await llm_client.chat([{"role": "user", "content": prompt}]) + text = response.content if hasattr(response, "content") else str(response) + + name, summary = "", "" + for line in text.strip().splitlines(): + if line.startswith("名称:"): + name = line[3:].strip() + elif line.startswith("摘要:"): + summary = line[3:].strip() + + return { + "community_id": cid, + "end_user_id": end_user_id, + "name": name, + "summary": summary, + "core_entities": core_entities, + "summary_embedding": None, + } + + results = await asyncio.gather( + *[_build_one(cid) for cid in community_ids], + return_exceptions=True, + ) + metadata_list = [] + for cid, res in zip(community_ids, results): + if isinstance(res, Exception): + logger.error(f"[Clustering] 社区 {cid} 元数据准备失败: {res}", exc_info=res) + elif res is not None: + metadata_list.append(res) + + if not metadata_list: + return + + # --- 阶段2:批量生成 summary_embedding --- + summaries = [m["summary"] for m in metadata_list] + with get_db_context() as db: + embedder = MemoryClientFactory(db).get_embedder_client(self.embedding_model_id) + embeddings = await embedder.response(summaries) + for i, meta in enumerate(metadata_list): + meta["summary_embedding"] = embeddings[i] if i < len(embeddings) else None + + # --- 阶段3:写入(单个 or 批量)--- + if len(metadata_list) == 1: + m = metadata_list[0] + result = await self.repo.update_community_metadata( + community_id=m["community_id"], + end_user_id=m["end_user_id"], + name=m["name"], + summary=m["summary"], + core_entities=m["core_entities"], + summary_embedding=m["summary_embedding"], + ) + if result: + logger.info(f"[Clustering] 社区 {m['community_id']} 元数据写入成功: name={m['name']}, summary={m['summary'][:30]}...") + else: + logger.warning(f"[Clustering] 社区 {m['community_id']} 元数据写入返回 False") + else: + ok = await self.repo.batch_update_community_metadata(metadata_list) + if ok: + logger.info(f"[Clustering] 批量写入 {len(metadata_list)} 个社区元数据成功") + else: + logger.warning(f"[Clustering] 批量写入社区元数据失败") + + @staticmethod + def _new_community_id() -> str: + return str(uuid.uuid4()) diff --git a/api/app/core/memory/storage_services/extraction_engine/data_preprocessing/data_pruning.py b/api/app/core/memory/storage_services/extraction_engine/data_preprocessing/data_pruning.py index 904b238f..248067e7 100644 --- a/api/app/core/memory/storage_services/extraction_engine/data_preprocessing/data_pruning.py +++ b/api/app/core/memory/storage_services/extraction_engine/data_preprocessing/data_pruning.py @@ -20,7 +20,6 @@ from pydantic import BaseModel, Field from app.core.memory.models.message_models import DialogData, ConversationMessage, ConversationContext from app.core.memory.models.config_models import PruningConfig -from app.core.memory.utils.config.config_utils import get_pruning_config from app.core.memory.utils.prompt.prompt_utils import prompt_env, log_prompt_rendering, log_template_rendering from app.core.memory.storage_services.extraction_engine.data_preprocessing.scene_config import ( SceneConfigRegistry, @@ -33,6 +32,9 @@ class DialogExtractionResponse(BaseModel): - is_related:对话与场景的相关性判定。 - times / ids / amounts / contacts / addresses / keywords:重要信息片段,用来在不相关对话中保留关键消息。 + - preserve_keywords:情绪/兴趣/爱好/个人观点相关词,包含这些词的消息必须强制保留。 + - scene_unrelated_snippets:与当前场景无关且无语义关联的消息片段(原文截取), + 用于高阈值阶段精准删除跨场景内容。 """ is_related: bool = Field(...) times: List[str] = Field(default_factory=list) @@ -41,6 +43,8 @@ class DialogExtractionResponse(BaseModel): contacts: List[str] = Field(default_factory=list) addresses: List[str] = Field(default_factory=list) keywords: List[str] = Field(default_factory=list) + preserve_keywords: List[str] = Field(default_factory=list, description="情绪/兴趣/爱好/个人观点相关词,包含这些词的消息强制保留") + scene_unrelated_snippets: List[str] = Field(default_factory=list,description="与当前场景无关且无语义关联的消息原文片段,高阈值阶段用于精准删除跨场景内容") class MessageImportanceResponse(BaseModel): @@ -86,26 +90,19 @@ class SemanticPruner: self._detailed_prune_logging = True # 是否启用详细日志 self._max_debug_msgs_per_dialog = 20 # 每个对话最多记录前N条消息的详细日志 - # 加载场景特定配置(内置场景走专门规则,自定义场景 fallback 到通用规则) - self.scene_config: ScenePatterns = SceneConfigRegistry.get_config( - self.config.pruning_scene, - fallback_to_generic=True - ) + # 加载统一填充词库 + self.scene_config: ScenePatterns = SceneConfigRegistry.get_config(self.config.pruning_scene) - # 判断是否为内置专门场景 - self._is_builtin_scene = SceneConfigRegistry.is_scene_supported(self.config.pruning_scene) + # 本体类型列表:直接使用 ontology_class_infos(name + description) + self._ontology_class_infos = getattr(self.config, "ontology_class_infos", None) or [] + # _ontology_classes 仅用于日志统计 + self._ontology_classes = [info.class_name for info in self._ontology_class_infos] - # 自定义场景的本体类型列表(用于注入提示词) - self._ontology_classes = getattr(self.config, "ontology_classes", None) or [] - - if self._is_builtin_scene: - self._log(f"[剪枝-初始化] 场景={self.config.pruning_scene} 使用内置专门配置") + self._log(f"[剪枝-初始化] 场景={self.config.pruning_scene}") + if self._ontology_class_infos: + self._log(f"[剪枝-初始化] 注入本体类型({len(self._ontology_class_infos)}个): {self._ontology_classes}") else: - self._log(f"[剪枝-初始化] 场景={self.config.pruning_scene} 为自定义场景,使用通用规则 + 本体类型提示词注入") - if self._ontology_classes: - self._log(f"[剪枝-初始化] 注入本体类型: {self._ontology_classes}") - else: - self._log(f"[剪枝-初始化] 未找到本体类型,将使用通用提示词") + self._log(f"[剪枝-初始化] 未找到本体类型,将使用通用提示词") # Load Jinja2 template self.template = prompt_env.get_template("extracat_Pruning.jinja2") @@ -117,107 +114,28 @@ class SemanticPruner: # 运行日志:收集关键终端输出,便于写入 JSON self.run_logs: List[str] = [] - def _is_important_message(self, message: ConversationMessage) -> bool: - """基于启发式规则识别重要信息消息,优先保留。 - - 改进版:使用场景特定的模式进行识别 - - 根据 pruning_scene 动态加载对应的识别规则 - - 支持教育、在线服务、外呼三个场景的特定模式 - """ - text = message.msg.strip() - if not text: - return False - - # 使用场景特定的模式 - all_patterns = ( - self.scene_config.high_priority_patterns + - self.scene_config.medium_priority_patterns + - self.scene_config.low_priority_patterns - ) - - for pattern, _ in all_patterns: - if re.search(pattern, text, flags=re.IGNORECASE): - return True - - # 检查是否为问句(以问号结尾或包含疑问词) - if text.endswith("?") or text.endswith("?"): - return True - - # 检查是否包含问句关键词 - if any(keyword in text for keyword in self.scene_config.question_keywords): - return True - - # 检查是否包含决策性关键词 - if any(keyword in text for keyword in self.scene_config.decision_keywords): - return True - - return False - - def _importance_score(self, message: ConversationMessage) -> int: - """为重要消息打分,用于在保留比例内优先保留更关键的内容。 - - 改进版:使用场景特定的权重体系(0-10分) - - 根据场景动态调整不同信息类型的权重 - - 高优先级模式:4-6分 - - 中优先级模式:2-3分 - - 低优先级模式:1分 - """ - text = message.msg.strip() - score = 0 - - # 使用场景特定的权重 - for pattern, weight in self.scene_config.high_priority_patterns: - if re.search(pattern, text, flags=re.IGNORECASE): - score += weight - - for pattern, weight in self.scene_config.medium_priority_patterns: - if re.search(pattern, text, flags=re.IGNORECASE): - score += weight - - for pattern, weight in self.scene_config.low_priority_patterns: - if re.search(pattern, text, flags=re.IGNORECASE): - score += weight - - # 问句加分 - if text.endswith("?") or text.endswith("?"): - score += 2 - - # 包含问句关键词加分 - if any(keyword in text for keyword in self.scene_config.question_keywords): - score += 1 - - # 包含决策性关键词加分 - if any(keyword in text for keyword in self.scene_config.decision_keywords): - score += 2 - - # 长度加分(较长的消息通常包含更多信息) - if len(text) > 50: - score += 1 - if len(text) > 100: - score += 1 - - return min(score, 10) # 最高10分 + # _is_important_message 和 _importance_score 已移除: + # 重要性判断完全由 extracat_Pruning.jinja2 提示词 + LLM 的 preserve_tokens 机制承担。 + # LLM 根据注入的本体工程类型语义识别需要保护的内容,无需硬编码正则规则。 def _is_filler_message(self, message: ConversationMessage) -> bool: """检测典型寒暄/口头禅/确认类短消息。 - 改进版:更严格的填充消息判断,避免误删场景相关内容 - 满足以下之一视为填充消息: - - 纯标点或空白 - - 在场景特定填充词库中(精确匹配) - - 纯表情符号 - - 常见寒暄(精确匹配短语) - - 注意:不再使用长度判断,避免误删短但重要的消息 + 判断顺序: + 1. 空消息 + 2. 场景特定填充词库精确匹配 + 3. 常见寒暄精确匹配 + 4. 组合寒暄模式(前缀+后缀组合,如"好的谢谢"、"同学你好"、"明白了") + 5. 纯表情/标点 """ t = message.msg.strip() if not t: return True - + # 检查是否在场景特定填充词库中(精确匹配) if t in self.scene_config.filler_phrases: return True - + # 常见寒暄和问候(精确匹配,避免误删) common_greetings = { "在吗", "在不在", "在呢", "在的", @@ -229,25 +147,60 @@ class SemanticPruner: } if t in common_greetings: return True - + + # 组合寒暄模式:短消息(≤15字)且完全由寒暄成分构成 + # 策略:将消息拆分后,每个片段都能在填充词库或常见寒暄中找到,则整体为填充 + if len(t) <= 15: + # 确认+称呼/感谢组合,如"好的谢谢"、"明白了"、"知道了谢谢" + _confirm_prefixes = {"好的", "好", "嗯", "嗯嗯", "哦", "明白", "明白了", "知道了", "了解", "收到", "没问题"} + _thanks_suffixes = {"谢谢", "谢谢你", "谢谢您", "多谢", "感谢", "谢了"} + _greeting_suffixes = {"你好", "您好", "老师好", "同学好", "大家好"} + _greeting_prefixes = {"同学", "老师", "您好", "你好"} + _close_patterns = { + "没有了", "没事了", "没问题了", "好了", "行了", "可以了", + "不用了", "不需要了", "就这样", "就这样吧", "那就这样", + } + _polite_responses = { + "不客气", "不用谢", "没关系", "没事", "应该的", "这是我应该做的", + } + + # 规则1:确认词 + 感谢词(如"好的谢谢"、"嗯谢谢") + for cp in _confirm_prefixes: + for ts in _thanks_suffixes: + if t == cp + ts or t == cp + "," + ts or t == cp + "," + ts: + return True + + # 规则2:称呼前缀 + 问候(如"同学你好"、"老师好") + for gp in _greeting_prefixes: + for gs in _greeting_suffixes: + if t == gp + gs or t.startswith(gp) and t.endswith("好"): + return True + + # 规则3:结束语 + 感谢(如"没有了,谢谢老师"、"没有了谢谢") + for cp in _close_patterns: + if t.startswith(cp): + remainder = t[len(cp):].lstrip(",,、 ") + if not remainder or any(remainder.startswith(ts) for ts in _thanks_suffixes): + return True + + # 规则4:礼貌回应(如"不客气,祝你考试顺利"——前缀是礼貌词,后半是祝福套话) + for pr in _polite_responses: + if t.startswith(pr): + remainder = t[len(pr):].lstrip(",,、 ") + # 后半是祝福/套话(不含实质信息) + if not remainder or re.match(r"^(祝|希望|期待|加油|顺利|好好|保重)", remainder): + return True + + # 规则5:纯确认词加"了"后缀(如"明白了"、"知道了"、"好了") + _confirm_base = {"明白", "知道", "了解", "收到", "好", "行", "可以", "没问题"} + for cb in _confirm_base: + if t == cb + "了" or t == cb + "了。" or t == cb + "了!": + return True + # 检查是否为纯表情符号(方括号包裹) if re.fullmatch(r"(\[[^\]]+\])+", t): return True - # 检查是否为纯emoji(Unicode表情) - emoji_pattern = re.compile( - "[" - "\U0001F600-\U0001F64F" # 表情符号 - "\U0001F300-\U0001F5FF" # 符号和象形文字 - "\U0001F680-\U0001F6FF" # 交通和地图符号 - "\U0001F1E0-\U0001F1FF" # 旗帜 - "\U00002702-\U000027B0" - "\U000024C2-\U0001F251" - "]+", flags=re.UNICODE - ) - if emoji_pattern.fullmatch(t): - return True - # 纯标点符号 if re.fullmatch(r"[。!?,.!?…·\s]+", t): return True @@ -432,15 +385,13 @@ class SemanticPruner: rendered = self.template.render( pruning_scene=self.config.pruning_scene, - is_builtin_scene=self._is_builtin_scene, - ontology_classes=self._ontology_classes, + ontology_class_infos=self._ontology_class_infos, dialog_text=dialog_text, language=self.language ) log_template_rendering("extracat_Pruning.jinja2", { "pruning_scene": self.config.pruning_scene, - "is_builtin_scene": self._is_builtin_scene, - "ontology_classes_count": len(self._ontology_classes), + "ontology_class_infos_count": len(self._ontology_class_infos), "language": self.language }) log_prompt_rendering("pruning-extract", rendered) @@ -480,6 +431,183 @@ class SemanticPruner: ) return fallback_response + def _get_pruning_mode(self) -> str: + """根据 pruning_threshold 返回当前剪枝阶段。 + + - 低阈值 [0.0, 0.3):conservative 只删填充,保留所有实质内容 + - 中阈值 [0.3, 0.6):semantic 保留场景相关 + 有语义关联的内容,删除无关联内容 + - 高阈值 [0.6, 0.9]:strict 只保留场景相关内容,跨场景内容可被删除 + """ + t = float(self.config.pruning_threshold) + if t < 0.3: + return "conservative" + elif t < 0.6: + return "semantic" + else: + return "strict" + + def _apply_related_dialog_pruning( + self, + msgs: List[ConversationMessage], + extraction: "DialogExtractionResponse", + dialog_label: str, + pruning_mode: str, + ) -> List[ConversationMessage]: + """相关对话统一剪枝入口,消除 prune_dialog / prune_dataset 中的重复逻辑。 + + - conservative:只删填充 + - semantic / strict:场景感知剪枝 + """ + if pruning_mode == "conservative": + preserve_tokens = self._build_preserve_tokens(extraction) + return self._prune_fillers_only(msgs, preserve_tokens, dialog_label) + else: + return self._prune_with_scene_filter(msgs, extraction, dialog_label, pruning_mode) + + def _prune_fillers_only( + self, + msgs: List[ConversationMessage], + preserve_tokens: List[str], + dialog_label: str, + ) -> List[ConversationMessage]: + """相关对话专用:只删填充消息,LLM 保护消息和实质内容一律保留。 + + 不受 pruning_threshold 约束,删多少算多少(填充有多少删多少)。 + 至少保留 1 条消息。 + 注意:填充检测优先于 preserve_tokens 保护——填充消息本身无信息价值, + 即使 LLM 误将其关键词放入 preserve_tokens 也应删除。 + """ + to_delete_ids: set = set() + for m in msgs: + # 填充检测优先:先判断是否为填充,再看 LLM 保护 + if self._is_filler_message(m): + to_delete_ids.add(id(m)) + self._log(f" [填充] '{m.msg[:40]}' → 删除") + continue + if self._msg_matches_tokens(m, preserve_tokens): + self._log(f" [保护] '{m.msg[:40]}' → LLM保护,跳过") + + kept = [m for m in msgs if id(m) not in to_delete_ids] + if not kept and msgs: + kept = [msgs[0]] + + deleted = len(msgs) - len(kept) + self._log( + f"[剪枝-相关] {dialog_label} 总消息={len(msgs)} " + f"填充删除={deleted} 保留={len(kept)}" + ) + return kept + + def _prune_with_scene_filter( + self, + msgs: List[ConversationMessage], + extraction: "DialogExtractionResponse", + dialog_label: str, + mode: str, + ) -> List[ConversationMessage]: + """场景感知剪枝,供 semantic / strict 两个阈值档位调用。 + + 本函数体现剪枝系统的三层递进逻辑: + + 第一层(conservative,阈值 < 0.3): + 不进入本函数,由 _prune_fillers_only 处理。 + 保留标准:只问"有没有信息量",填充消息(嗯/好的/哈哈等)删除,其余一律保留。 + + 第二层(semantic,阈值 [0.3, 0.6)): + 保留标准:内容价值优先,场景相关性是参考而非唯一标准。 + - 填充消息 → 删除(最高优先级) + - 场景相关消息 → 保留 + - 场景无关消息 → 有两次豁免机会: + 1. 命中 scene_preserve_tokens(LLM 标记的关键词/时间/金额等)→ 保留 + 2. 含情感词(感觉/压力/开心等)→ 保留(情感内容有记忆价值) + 3. 两次豁免均未命中 → 删除 + + 第三层(strict,阈值 [0.6, 0.9]): + 保留标准:场景相关性优先,无任何豁免。 + - 填充消息 → 删除(最高优先级) + - 场景相关消息 → 保留 + - 场景无关消息 → 直接删除,preserve_keywords 和情感词在此模式下均不生效 + + 至少保留 1 条消息(兜底取第一条)。 + """ + # strict 模式收窄保护范围:只保护结构化关键信息(时间/编号/金额/联系方式/地址), + # 不保护 keywords / preserve_keywords,让场景过滤能删掉更多内容。 + # semantic 模式完整保护:包含 LLM 抽取的所有重要片段(含 keywords 和 preserve_keywords)。 + if mode == "strict": + scene_preserve_tokens = ( + extraction.times + extraction.ids + extraction.amounts + + extraction.contacts + extraction.addresses + ) + else: + scene_preserve_tokens = self._build_preserve_tokens(extraction) + + unrelated_snippets = extraction.scene_unrelated_snippets or [] + + to_delete_ids: set = set() + for m in msgs: + msg_text = m.msg.strip() + + # 第一优先级:填充消息无论模式直接删除,不参与后续场景判断 + if self._is_filler_message(m): + to_delete_ids.add(id(m)) + self._log(f" [填充] '{msg_text[:40]}' → 删除") + continue + + # 双向包含匹配:处理 LLM 返回片段与原始消息文本长度不完全一致的情况 + is_scene_unrelated = any( + snip and (snip in msg_text or msg_text in snip) + for snip in unrelated_snippets + ) + + if is_scene_unrelated: + if mode == "strict": + # strict:场景无关直接删除,不做任何豁免 + # 场景相关性是唯一裁决标准,preserve_keywords 在此模式下不生效 + to_delete_ids.add(id(m)) + self._log(f" [场景无关-严格] '{msg_text[:40]}' → 删除") + elif mode == "semantic": + # semantic:场景无关但有内容价值 → 保留 + # 豁免第一层:命中 scene_preserve_tokens(关键词/结构化信息保护) + if self._msg_matches_tokens(m, scene_preserve_tokens): + self._log(f" [保护] '{msg_text[:40]}' → 场景关键词保护,保留") + else: + # 豁免第二层:含情感词,认为有情境记忆价值,即使场景无关也保留 + has_contextual_emotion = any( + word in msg_text + for word in ["感觉", "觉得", "心情", "开心", "难过", "高兴", "沮丧", + "喜欢", "讨厌", "爱", "恨", "担心", "害怕", "兴奋", + "压力", "累", "疲惫", "烦", "焦虑", "委屈", "感动"] + ) + if not has_contextual_emotion: + to_delete_ids.add(id(m)) + self._log(f" [场景无关-语义] '{msg_text[:40]}' → 删除(无情感关联)") + else: + self._log(f" [场景关联-保留] '{msg_text[:40]}' → 有情感关联,保留") + else: + # 不在 scene_unrelated_snippets 中 → 场景相关,直接保留 + if self._msg_matches_tokens(m, scene_preserve_tokens): + self._log(f" [保护] '{msg_text[:40]}' → LLM保护,跳过") + # else: 普通场景相关消息,保留,不输出日志 + + kept = [m for m in msgs if id(m) not in to_delete_ids] + if not kept and msgs: + kept = [msgs[0]] + + deleted = len(msgs) - len(kept) + self._log( + f"[剪枝-{mode}] {dialog_label} 总消息={len(msgs)} " + f"删除={deleted} 保留={len(kept)}" + ) + return kept + + def _build_preserve_tokens(self, extraction: "DialogExtractionResponse") -> List[str]: + """统一构建 preserve_tokens,合并 LLM 抽取的所有重要片段。""" + return ( + extraction.times + extraction.ids + extraction.amounts + + extraction.contacts + extraction.addresses + extraction.keywords + + extraction.preserve_keywords + ) + def _msg_matches_tokens(self, message: ConversationMessage, tokens: List[str]) -> bool: """判断消息是否包含任意抽取到的重要片段。""" if not tokens: @@ -500,66 +628,62 @@ class SemanticPruner: proportion = float(self.config.pruning_threshold) extraction = await self._extract_dialog_important(dialog.content) + pruning_mode = self._get_pruning_mode() + self._log(f"[剪枝-模式] 阈值={proportion} → 模式={pruning_mode}") + if extraction.is_related: - # 相关对话不剪枝 + kept = self._apply_related_dialog_pruning( + dialog.context.msgs, extraction, f"对话ID={dialog.id}", pruning_mode + ) + dialog.context = ConversationContext(msgs=kept) return dialog - # 在不相关对话中,识别重要/不重要消息 - tokens = extraction.times + extraction.ids + extraction.amounts + extraction.contacts + extraction.addresses + extraction.keywords + # 在不相关对话中,LLM 已通过 preserve_tokens 标记需要保护的内容 + preserve_tokens = self._build_preserve_tokens(extraction) msgs = dialog.context.msgs - imp_unrel_msgs: List[ConversationMessage] = [] - unimp_unrel_msgs: List[ConversationMessage] = [] + + # 分类:填充 / 其他可删(LLM保护消息通过不加入任何桶来隐式保护) + filler_ids: set = set() + deletable: List[ConversationMessage] = [] + for m in msgs: - if self._msg_matches_tokens(m, tokens) or self._is_important_message(m): - imp_unrel_msgs.append(m) + if self._msg_matches_tokens(m, preserve_tokens): + pass # 保护消息:不加入任何桶,不会被删除 + elif self._is_filler_message(m): + filler_ids.add(id(m)) else: - unimp_unrel_msgs.append(m) - # 计算总删除目标数量 + deletable.append(m) + + # 计算删除目标 total_unrel = len(msgs) delete_target = int(total_unrel * proportion) if proportion > 0 and total_unrel > 0 and delete_target == 0: delete_target = 1 - imp_del_cap = min(int(len(imp_unrel_msgs) * proportion), len(imp_unrel_msgs)) - unimp_del_cap = len(unimp_unrel_msgs) - max_capacity = max(0, len(msgs) - 1) - max_deletable = min(imp_del_cap + unimp_del_cap, max_capacity) + max_deletable = min(len(filler_ids) + len(deletable), max(0, total_unrel - 1)) delete_target = min(delete_target, max_deletable) - # 删除配额分配 - del_unimp = min(delete_target, unimp_del_cap) - rem = delete_target - del_unimp - del_imp = min(rem, imp_del_cap) - # 选取删除集合 - unimp_delete_ids = [] - imp_delete_ids = [] - if del_unimp > 0: - # 按出现顺序选取前 del_unimp 条不重要消息进行删除(确定性、可复现) - unimp_delete_ids = [id(m) for m in unimp_unrel_msgs[:del_unimp]] - if del_imp > 0: - imp_sorted = sorted(imp_unrel_msgs, key=lambda m: self._importance_score(m)) - imp_delete_ids = [id(m) for m in imp_sorted[:del_imp]] - - # 统计实际删除数量(重要/不重要) - actual_unimp_deleted = 0 - actual_imp_deleted = 0 - kept_msgs = [] - delete_targets = set(unimp_delete_ids) | set(imp_delete_ids) + # 优先删填充,再删其他可删消息(按出现顺序) + to_delete_ids: set = set() for m in msgs: - mid = id(m) - if mid in delete_targets: - if mid in set(unimp_delete_ids) and actual_unimp_deleted < del_unimp: - actual_unimp_deleted += 1 - continue - if mid in set(imp_delete_ids) and actual_imp_deleted < del_imp: - actual_imp_deleted += 1 - continue - kept_msgs.append(m) + if len(to_delete_ids) >= delete_target: + break + if id(m) in filler_ids: + to_delete_ids.add(id(m)) + for m in deletable: + if len(to_delete_ids) >= delete_target: + break + to_delete_ids.add(id(m)) + + kept_msgs = [m for m in msgs if id(m) not in to_delete_ids] if not kept_msgs and msgs: kept_msgs = [msgs[0]] - deleted_total = actual_unimp_deleted + actual_imp_deleted + deleted_total = len(msgs) - len(kept_msgs) + protected_count = len(msgs) - len(filler_ids) - len(deletable) self._log( - f"[剪枝-对话] 对话ID={dialog.id} 总消息={len(msgs)} 删除目标={delete_target} 实删={deleted_total} 保留={len(kept_msgs)}" + f"[剪枝-对话] 对话ID={dialog.id} 总消息={len(msgs)} " + f"(保护={protected_count} 填充={len(filler_ids)} 可删={len(deletable)}) " + f"删除目标={delete_target} 实删={deleted_total} 保留={len(kept_msgs)}" ) dialog.context = ConversationContext(msgs=kept_msgs) @@ -590,140 +714,192 @@ class SemanticPruner: self._log( f"[剪枝-数据集] 对话总数={len(dialogs)} 场景={self.config.pruning_scene} 删除比例={proportion} 开关={self.config.pruning_switch} 模式=消息级独立判断" ) - + + pruning_mode = self._get_pruning_mode() + self._log(f"[剪枝-数据集] 阈值={proportion} → 剪枝阶段={pruning_mode}") + result: List[DialogData] = [] total_original_msgs = 0 total_deleted_msgs = 0 - - for d_idx, dd in enumerate(dialogs): + + # 统计对象:直接收集结构化数据,无需事后正则解析 + stats = { + "scene": self.config.pruning_scene, + "dialog_total": len(dialogs), + "deletion_ratio": proportion, + "enabled": self.config.pruning_switch, + "pruning_mode": pruning_mode, + "related_count": 0, + "unrelated_count": 0, + "related_indices": [], + "unrelated_indices": [], + "total_deleted_messages": 0, + "remaining_dialogs": 0, + "dialogs": [], + } + + # 并发执行所有对话的 LLM 抽取(获取 preserve_keywords 等保护信息) + semaphore = asyncio.Semaphore(self.max_concurrent) + + async def extract_with_semaphore(dd: DialogData) -> DialogExtractionResponse: + async with semaphore: + try: + return await self._extract_dialog_important(dd.content) + except Exception as e: + self._log(f"[剪枝-LLM] 对话抽取失败,使用降级策略: {str(e)[:100]}") + return DialogExtractionResponse(is_related=True) + + extraction_tasks = [extract_with_semaphore(dd) for dd in dialogs] + extraction_results: List[DialogExtractionResponse] = await asyncio.gather(*extraction_tasks) + + for d_idx, (dd, extraction) in enumerate(zip(dialogs, extraction_results)): msgs = dd.context.msgs original_count = len(msgs) total_original_msgs += original_count - - # ========== 问答对保护(已注释,暂不启用,留作观察) ========== - # qa_pairs = self._identify_qa_pairs(msgs) - # protected_indices = self._get_protected_indices(msgs, qa_pairs, window_size=0) - # ======================================================== - - # 消息级分类:每条消息独立判断 - important_msgs = [] # 重要消息(保留) - unimportant_msgs = [] # 不重要消息(可删除) - filler_msgs = [] # 填充消息(优先删除) - - # 判断是否需要详细日志(仅对前N条消息记录) + + # 相关对话:根据阶段决定处理力度 + if extraction.is_related: + stats["related_count"] += 1 + stats["related_indices"].append(d_idx + 1) + kept = self._apply_related_dialog_pruning( + msgs, extraction, f"对话 {d_idx+1}", pruning_mode + ) + deleted_count = original_count - len(kept) + total_deleted_msgs += deleted_count + dd.context.msgs = kept + result.append(dd) + stats["dialogs"].append({ + "index": d_idx + 1, + "is_related": True, + "total_messages": original_count, + "deleted": deleted_count, + "kept": len(kept), + }) + continue + + stats["unrelated_count"] += 1 + stats["unrelated_indices"].append(d_idx + 1) + + # 从 LLM 抽取结果中获取所有需要保留的 token + preserve_tokens = self._build_preserve_tokens(extraction) + + # 判断是否需要详细日志 should_log_details = self._detailed_prune_logging and original_count <= self._max_debug_msgs_per_dialog if self._detailed_prune_logging and original_count > self._max_debug_msgs_per_dialog: self._log(f" 对话[{d_idx}]消息数={original_count},仅采样前{self._max_debug_msgs_per_dialog}条进行详细日志") - + + if extraction.preserve_keywords: + self._log(f" 对话[{d_idx}] LLM抽取到情绪/兴趣保护词: {extraction.preserve_keywords}") + + # 消息级分类:LLM保护 / 填充 / 其他可删 + llm_protected_msgs = [] # LLM 保护消息(preserve_tokens 命中):绝对不可删除 + filler_msgs = [] # 填充消息(优先删除) + deletable_msgs = [] # 其余消息(按比例删除) + for idx, m in enumerate(msgs): msg_text = m.msg.strip() - - # ========== 问答对保护判断(已注释) ========== - # if idx in protected_indices: - # important_msgs.append((idx, m)) - # self._log(f" [{idx}] '{msg_text[:30]}...' → 重要(问答对保护)") - # ========================================== - - # 填充消息(寒暄、表情等) - if self._is_filler_message(m): + + if self._msg_matches_tokens(m, preserve_tokens): + llm_protected_msgs.append((idx, m)) + if should_log_details or idx < self._max_debug_msgs_per_dialog: + self._log(f" [{idx}] '{msg_text[:30]}...' → 保护(LLM,不可删)") + elif self._is_filler_message(m): filler_msgs.append((idx, m)) if should_log_details or idx < self._max_debug_msgs_per_dialog: self._log(f" [{idx}] '{msg_text[:30]}...' → 填充") - # 重要信息(学号、成绩、时间、金额等) - elif self._is_important_message(m): - important_msgs.append((idx, m)) - if should_log_details or idx < self._max_debug_msgs_per_dialog: - self._log(f" [{idx}] '{msg_text[:30]}...' → 重要(场景规则)") - # 其他消息 else: - unimportant_msgs.append((idx, m)) + deletable_msgs.append((idx, m)) if should_log_details or idx < self._max_debug_msgs_per_dialog: - self._log(f" [{idx}] '{msg_text[:30]}...' → 不重要") - + self._log(f" [{idx}] '{msg_text[:30]}...' → 可删") + + # important_msgs 仅用于日志统计 + important_msgs = llm_protected_msgs + # 计算删除配额 delete_target = int(original_count * proportion) if proportion > 0 and original_count > 0 and delete_target == 0: delete_target = 1 - + # 确保至少保留1条消息 max_deletable = max(0, original_count - 1) delete_target = min(delete_target, max_deletable) - - # 删除策略:优先删除填充消息,再删除不重要消息 + + # 删除策略:优先删填充消息,再按出现顺序删其余可删消息 to_delete_indices = set() - deleted_details = [] # 记录删除的消息详情 - + deleted_details = [] + # 第一步:删除填充消息 - filler_to_delete = min(len(filler_msgs), delete_target) - for i in range(filler_to_delete): - idx, msg = filler_msgs[i] + for idx, msg in filler_msgs: + if len(to_delete_indices) >= delete_target: + break to_delete_indices.add(idx) deleted_details.append(f"[{idx}] 填充: '{msg.msg[:50]}'") - - # 第二步:如果还需要删除,删除不重要消息 - remaining_quota = delete_target - len(to_delete_indices) - if remaining_quota > 0: - unimp_to_delete = min(len(unimportant_msgs), remaining_quota) - for i in range(unimp_to_delete): - idx, msg = unimportant_msgs[i] - to_delete_indices.add(idx) - deleted_details.append(f"[{idx}] 不重要: '{msg.msg[:50]}'") - - # 第三步:如果还需要删除,按重要性分数删除重要消息 - remaining_quota = delete_target - len(to_delete_indices) - if remaining_quota > 0 and important_msgs: - # 按重要性分数排序(分数低的优先删除) - imp_sorted = sorted(important_msgs, key=lambda x: self._importance_score(x[1])) - imp_to_delete = min(len(imp_sorted), remaining_quota) - for i in range(imp_to_delete): - idx, msg = imp_sorted[i] - to_delete_indices.add(idx) - score = self._importance_score(msg) - deleted_details.append(f"[{idx}] 重要(分数{score}): '{msg.msg[:50]}'") - + + # 第二步:如果还需要删除,按出现顺序删可删消息 + for idx, msg in deletable_msgs: + if len(to_delete_indices) >= delete_target: + break + to_delete_indices.add(idx) + deleted_details.append(f"[{idx}] 可删: '{msg.msg[:50]}'") + # 执行删除 kept_msgs = [] for idx, m in enumerate(msgs): if idx not in to_delete_indices: kept_msgs.append(m) - + # 确保至少保留1条 if not kept_msgs and msgs: kept_msgs = [msgs[0]] - + dd.context.msgs = kept_msgs deleted_count = original_count - len(kept_msgs) total_deleted_msgs += deleted_count - + # 输出删除详情 if deleted_details: self._log(f"[剪枝-删除详情] 对话 {d_idx+1} 删除了以下消息:") for detail in deleted_details: self._log(f" {detail}") - + # ========== 问答对统计(已注释) ========== # qa_info = f",问答对={len(qa_pairs)}" if qa_pairs else "" # ======================================== - + self._log( f"[剪枝-对话] 对话 {d_idx+1} 总消息={original_count} " - f"(重要={len(important_msgs)} 不重要={len(unimportant_msgs)} 填充={len(filler_msgs)}) " + f"(保护={len(important_msgs)} 填充={len(filler_msgs)} 可删={len(deletable_msgs)}) " f"删除={deleted_count} 保留={len(kept_msgs)}" ) - - result.append(dd) - - self._log(f"[剪枝-数据集] 剩余对话数={len(result)}") - # 保存日志 + stats["dialogs"].append({ + "index": d_idx + 1, + "is_related": False, + "total_messages": original_count, + "protected": len(important_msgs), + "fillers": len(filler_msgs), + "deletable": len(deletable_msgs), + "deleted": deleted_count, + "kept": len(kept_msgs), + }) + + result.append(dd) + + # 补全统计对象 + stats["total_deleted_messages"] = total_deleted_msgs + stats["remaining_dialogs"] = len(result) + + self._log(f"[剪枝-数据集] 剩余对话数={len(result)}") + self._log(f"[剪枝-数据集] 相关对话数={stats['related_count']} 不相关对话数={stats['unrelated_count']}") + self._log(f"[剪枝-数据集] 总删除 {total_deleted_msgs} 条") + + # 直接序列化统计对象,无需正则解析 try: from app.core.config import settings settings.ensure_memory_output_dir() log_output_path = settings.get_memory_output_path("pruned_terminal.json") - sanitized_logs = [self._sanitize_log_line(l) for l in self.run_logs] - payload = self._parse_logs_to_structured(sanitized_logs) with open(log_output_path, "w", encoding="utf-8") as f: - json.dump(payload, f, ensure_ascii=False, indent=2) + json.dump(stats, f, ensure_ascii=False, indent=2) except Exception as e: self._log(f"[剪枝-数据集] 保存终端输出日志失败:{e}") @@ -731,7 +907,7 @@ class SemanticPruner: if not result: print("警告: 语义剪枝后数据集为空,已回退为未剪枝数据以避免流程中断") return dialogs - + return result def _log(self, msg: str) -> None: @@ -743,114 +919,4 @@ class SemanticPruner: pass print(msg) - def _sanitize_log_line(self, line: str) -> str: - """移除行首的方括号标签前缀,例如 [剪枝-数据集] 或 [剪枝-对话]。""" - try: - return re.sub(r"^\[[^\]]+\]\s*", "", line) - except Exception: - return line - def _parse_logs_to_structured(self, logs: List[str]) -> dict: - """将已去前缀的日志列表解析为结构化 JSON,便于数据对接。""" - summary = { - "scene": self.config.pruning_scene, - "dialog_total": None, - "deletion_ratio": None, - "enabled": None, - "related_count": None, - "unrelated_count": None, - "related_indices": [], - "unrelated_indices": [], - "total_deleted_messages": None, - "remaining_dialogs": None, - } - dialogs = [] - - # 解析函数 - def parse_int(value: str) -> Optional[int]: - try: - return int(value) - except Exception: - return None - - def parse_float(value: str) -> Optional[float]: - try: - return float(value) - except Exception: - return None - - def parse_indices(s: str) -> List[int]: - s = s.strip() - if not s: - return [] - parts = [p.strip() for p in s.split(",") if p.strip()] - out: List[int] = [] - for p in parts: - try: - out.append(int(p)) - except Exception: - pass - return out - - # 正则 - re_header = re.compile(r"对话总数=(\d+)\s+场景=([^\s]+)\s+删除比例=([0-9.]+)\s+开关=(True|False)") - re_counts = re.compile(r"相关对话数=(\d+)\s+不相关对话数=(\d+)") - re_indices = re.compile(r"相关对话:第\[(.*?)\]段;不相关对话:第\[(.*?)\]段") - re_dialog = re.compile(r"对话\s+(\d+)\s+总消息=(\d+)\s+分配删除=(\d+)\s+实删=(\d+)\s+保留=(\d+)") - re_total_del = re.compile(r"总删除\s+(\d+)\s+条") - re_remaining = re.compile(r"剩余对话数=(\d+)") - - for line in logs: - # 第一行:总览 - m = re_header.search(line) - if m: - summary["dialog_total"] = parse_int(m.group(1)) - # 顶层 scene 依配置,这里不覆盖,但也可校验 m.group(2) - summary["deletion_ratio"] = parse_float(m.group(3)) - summary["enabled"] = True if m.group(4) == "True" else False - continue - - # 第二行:相关/不相关数量 - m = re_counts.search(line) - if m: - summary["related_count"] = parse_int(m.group(1)) - summary["unrelated_count"] = parse_int(m.group(2)) - continue - - # 第三行:相关/不相关索引 - m = re_indices.search(line) - if m: - summary["related_indices"] = parse_indices(m.group(1)) - summary["unrelated_indices"] = parse_indices(m.group(2)) - continue - - # 对话级统计 - m = re_dialog.search(line) - if m: - dialogs.append({ - "index": parse_int(m.group(1)), - "total_messages": parse_int(m.group(2)), - "quota_delete": parse_int(m.group(3)), - "actual_deleted": parse_int(m.group(4)), - "kept": parse_int(m.group(5)), - }) - continue - - # 全局删除总数 - m = re_total_del.search(line) - if m: - summary["total_deleted_messages"] = parse_int(m.group(1)) - continue - - # 剩余对话数 - m = re_remaining.search(line) - if m: - summary["remaining_dialogs"] = parse_int(m.group(1)) - continue - - return { - "scene": summary["scene"], - "timestamp": datetime.now().isoformat(), - "summary": {k: v for k, v in summary.items() if k != "scene"}, - "dialogs": dialogs, - } diff --git a/api/app/core/memory/storage_services/extraction_engine/data_preprocessing/scene_config.py b/api/app/core/memory/storage_services/extraction_engine/data_preprocessing/scene_config.py index ed9592af..8e97163e 100644 --- a/api/app/core/memory/storage_services/extraction_engine/data_preprocessing/scene_config.py +++ b/api/app/core/memory/storage_services/extraction_engine/data_preprocessing/scene_config.py @@ -1,66 +1,25 @@ """ -场景特定配置 - 为不同场景提供定制化的剪枝规则 +场景特定配置 - 统一填充词库 -功能: -- 场景特定的重要信息识别模式 -- 场景特定的重要性评分权重 -- 场景特定的填充词库 -- 场景特定的问答对识别规则 +重要性判断已完全交由 extracat_Pruning.jinja2 提示词 + LLM preserve_tokens 机制承担。 +本模块仅保留统一填充词库(filler_phrases),用于识别无意义寒暄/表情/口头禅。 +所有场景共用同一份词库,场景差异由 LLM 语义判断处理。 """ -from typing import Dict, List, Set, Tuple +from typing import List, Set from dataclasses import dataclass, field @dataclass class ScenePatterns: - """场景特定的识别模式""" - - # 重要信息的正则模式(优先级从高到低) - high_priority_patterns: List[Tuple[str, int]] = field(default_factory=list) # (pattern, weight) - medium_priority_patterns: List[Tuple[str, int]] = field(default_factory=list) - low_priority_patterns: List[Tuple[str, int]] = field(default_factory=list) - - # 填充词库(无意义对话) + """场景特定的识别模式(仅保留填充词库)""" filler_phrases: Set[str] = field(default_factory=set) - - # 问句关键词(用于识别问答对) - question_keywords: Set[str] = field(default_factory=set) - - # 决策性/承诺性关键词 - decision_keywords: Set[str] = field(default_factory=set) class SceneConfigRegistry: - """场景配置注册表 - 管理所有场景的特定配置""" - - # 基础通用模式(所有场景共享) - BASE_HIGH_PRIORITY = [ - (r"订单号|工单|申请号|编号|ID|账号|账户", 5), - (r"金额|费用|价格|¥|¥|\d+元", 5), - (r"\d{11}", 4), # 手机号 - (r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", 4), # 邮箱 - ] - - BASE_MEDIUM_PRIORITY = [ - (r"\d{4}-\d{1,2}-\d{1,2}", 3), # 日期 - (r"\d{4}年\d{1,2}月\d{1,2}日", 3), - (r"电话|手机号|微信|QQ|联系方式", 3), - (r"地址|地点|位置", 2), - (r"时间|日期|有效期|截止", 2), - (r"今天|明天|后天|昨天|前天", 3), # 相对时间(提高权重) - (r"下周|下月|下年|上周|上月|上年|本周|本月|本年", 3), - (r"今年|去年|明年", 3), - ] - - BASE_LOW_PRIORITY = [ - (r"\d{1,2}:\d{2}", 2), # 时间点 HH:MM - (r"\d{1,2}点\d{0,2}分?", 2), # 时间点 X点Y分 或 X点 - (r"上午|下午|中午|晚上|早上|傍晚|凌晨", 2), # 时段(提高权重并扩充) - (r"AM|PM|am|pm", 1), - ] - - BASE_FILLERS = { + """场景配置注册表 - 所有场景共用统一填充词库""" + + BASE_FILLERS: Set[str] = { # 基础寒暄 "你好", "您好", "在吗", "在的", "在呢", "嗯", "嗯嗯", "哦", "哦哦", "好的", "好", "行", "可以", "不可以", "谢谢", "多谢", "感谢", @@ -69,7 +28,26 @@ class SceneConfigRegistry: "哈哈", "呵呵", "哈哈哈", "嘿嘿", "嘻嘻", "hiahia", "额", "呃", "啊", "诶", "唉", "哎", "嗯哼", # 确认词 - "是的", "对", "对的", "没错", "嗯嗯", "好嘞", "收到", "明白", "了解", "知道了", + "是的", "对", "对的", "没错", "好嘞", "收到", "明白", "了解", "知道了", + # 服务类套话 + "请问", "请稍等", "稍等", "马上", "立即", + "正在查询", "正在处理", "正在为您", "帮您查一下", + "还有其他问题吗", "还需要什么帮助", "很高兴为您服务", + "感谢您的耐心等待", "抱歉让您久等了", + "已记录", "已反馈", "已转接", "已升级", + "祝您生活愉快", "欢迎下次咨询", + # 外呼套话 + "喂", "hello", "打扰了", "不好意思", + "方便接电话吗", "现在方便吗", "占用您一点时间", + "我是", "我们是", "我们公司", "我们这边", + "了解一下", "介绍一下", "简单说一下", + "考虑考虑", "想一想", "再说", "再看看", + "不需要", "不感兴趣", "没兴趣", "不用了", + "没问题", "那就这样", "再联系", "回头聊", "有需要再说", + # 教育场景套话 + "老师好", "同学们好", "上课", "下课", "起立", "坐下", + "举手", "请坐", "很好", "不错", "继续", + "下一个", "下一题", "下一位", "还有吗", "还有问题吗", # 标点和符号 "。。。", "...", "???", "???", "!!!", "!!!", # 表情符号 @@ -81,246 +59,8 @@ class SceneConfigRegistry: "hhh", "hhhh", "2333", "666", "gg", "ok", "OK", "okok", "emmm", "emm", "em", "mmp", "wtf", "omg", } - - BASE_QUESTION_KEYWORDS = { - "什么", "为什么", "怎么", "如何", "哪里", "哪个", "谁", "多少", "几点", "何时", "吗" - } - - BASE_DECISION_KEYWORDS = { - "必须", "一定", "务必", "需要", "要求", "规定", "应该", - "承诺", "保证", "确保", "负责", "同意", "答应" - } - + @classmethod - def get_education_config(cls) -> ScenePatterns: - """教育场景配置""" - return ScenePatterns( - high_priority_patterns=cls.BASE_HIGH_PRIORITY + [ - # 成绩相关(最高优先级) - (r"成绩|分数|得分|满分|及格|不及格", 6), - (r"GPA|绩点|学分|平均分", 6), - (r"\d+分|\d+\.?\d*分", 5), # 具体分数 - (r"排名|名次|第.{1,3}名", 5), # 支持"第三名"、"第1名"等 - - # 学籍信息 - (r"学号|学生证|教师工号|工号", 5), - (r"班级|年级|专业|院系", 4), - - # 课程相关 - (r"课程|科目|学科|必修|选修", 4), - (r"教材|课本|教科书|参考书", 4), - (r"章节|第.{1,3}章|第.{1,3}节", 3), # 支持"第三章"、"第1章"等 - - # 学科内容(新增) - (r"微积分|导数|积分|函数|极限|微分", 4), - (r"代数|几何|三角|概率|统计", 4), - (r"物理|化学|生物|历史|地理", 4), - (r"英语|语文|数学|政治|哲学", 4), - (r"定义|定理|公式|概念|原理|法则", 3), - (r"例题|解题|证明|推导|计算", 3), - ], - medium_priority_patterns=cls.BASE_MEDIUM_PRIORITY + [ - # 教学活动 - (r"作业|练习|习题|题目", 3), - (r"考试|测验|测试|考核|期中|期末", 3), - (r"上课|下课|课堂|讲课", 2), - (r"提问|回答|发言|讨论", 2), - (r"问一下|请教|咨询|询问", 2), # 新增:问询相关 - (r"理解|明白|懂|掌握|学会", 2), # 新增:学习状态 - - # 时间安排 - (r"课表|课程表|时间表", 3), - (r"第.{1,3}节课|第.{1,3}周", 2), # 支持"第三节课"、"第1周"等 - ], - low_priority_patterns=cls.BASE_LOW_PRIORITY + [ - (r"老师|教师|同学|学生", 1), - (r"教室|实验室|图书馆", 1), - ], - filler_phrases=cls.BASE_FILLERS | { - # 教育场景特有填充词(移除了"明白了"、"懂了"、"不懂"等,这些在教育场景中有意义) - "老师好", "同学们好", "上课", "下课", "起立", "坐下", - "举手", "请坐", "很好", "不错", "继续", - "下一个", "下一题", "下一位", "还有吗", "还有问题吗", - }, - question_keywords=cls.BASE_QUESTION_KEYWORDS | { - "为啥", "咋", "咋办", "怎样", "如何做", - "能不能", "可不可以", "行不行", "对不对", "是不是", - }, - decision_keywords=cls.BASE_DECISION_KEYWORDS | { - "必考", "重点", "考点", "难点", "关键", - "记住", "背诵", "掌握", "理解", "复习", - } - ) - - @classmethod - def get_online_service_config(cls) -> ScenePatterns: - """在线服务场景配置""" - return ScenePatterns( - high_priority_patterns=cls.BASE_HIGH_PRIORITY + [ - # 工单相关(最高优先级) - (r"工单号|工单编号|ticket|TK\d+", 6), - (r"工单状态|处理中|已解决|已关闭|待处理", 5), - (r"优先级|紧急|高优先级|P0|P1|P2", 5), - - # 产品信息 - (r"产品型号|型号|SKU|产品编号", 5), - (r"序列号|SN|设备号", 5), - (r"版本号|软件版本|固件版本", 4), - - # 问题描述 - (r"故障|错误|异常|bug|问题", 4), - (r"错误代码|故障代码|error code", 5), - (r"无法|不能|失败|报错", 3), - ], - medium_priority_patterns=cls.BASE_MEDIUM_PRIORITY + [ - # 服务相关 - (r"退款|退货|换货|补发", 4), - (r"发票|收据|凭证", 3), - (r"物流|快递|运单号", 3), - (r"保修|质保|售后", 3), - - # 时效相关 - (r"SLA|响应时间|处理时长", 4), - (r"超时|延迟|等待", 2), - ], - low_priority_patterns=cls.BASE_LOW_PRIORITY + [ - (r"客服|工程师|技术支持", 1), - (r"用户|客户|会员", 1), - ], - filler_phrases=cls.BASE_FILLERS | { - # 在线服务特有填充词 - "您好", "请问", "请稍等", "稍等", "马上", "立即", - "正在查询", "正在处理", "正在为您", "帮您查一下", - "还有其他问题吗", "还需要什么帮助", "很高兴为您服务", - "感谢您的耐心等待", "抱歉让您久等了", - "已记录", "已反馈", "已转接", "已升级", - "祝您生活愉快", "再见", "欢迎下次咨询", - }, - question_keywords=cls.BASE_QUESTION_KEYWORDS | { - "能否", "可否", "是否", "有没有", "能不能", - "怎么办", "如何处理", "怎么解决", - }, - decision_keywords=cls.BASE_DECISION_KEYWORDS | { - "立即处理", "马上解决", "尽快", "优先", - "升级", "转接", "派单", "跟进", - "补偿", "赔偿", "退款", "换货", - } - ) - - @classmethod - def get_outbound_config(cls) -> ScenePatterns: - """外呼场景配置""" - return ScenePatterns( - high_priority_patterns=cls.BASE_HIGH_PRIORITY + [ - # 意向相关(最高优先级) - (r"意向|意愿|兴趣|感兴趣", 6), - (r"A类|B类|C类|D类|高意向|低意向", 6), - (r"成交|签约|下单|购买|确认", 6), - - # 联系信息(外呼场景中更重要) - (r"预约|约定|安排|确定时间", 5), - (r"下次联系|回访|跟进", 5), - (r"方便|有空|可以|时间", 4), - - # 通话状态 - (r"接通|未接通|占线|关机|停机", 4), - (r"通话时长|通话时间", 3), - ], - medium_priority_patterns=cls.BASE_MEDIUM_PRIORITY + [ - # 客户信息 - (r"姓名|称呼|先生|女士", 3), - (r"公司|单位|职位|职务", 3), - (r"需求|要求|期望", 3), - - # 跟进状态 - (r"跟进状态|进展|进度", 3), - (r"已联系|待联系|联系中", 2), - (r"拒绝|不感兴趣|考虑|再说", 3), - ], - low_priority_patterns=cls.BASE_LOW_PRIORITY + [ - (r"销售|客户经理|业务员", 1), - (r"产品|服务|方案", 1), - ], - filler_phrases=cls.BASE_FILLERS | { - # 外呼场景特有填充词 - "您好", "喂", "hello", "打扰了", "不好意思", - "方便接电话吗", "现在方便吗", "占用您一点时间", - "我是", "我们是", "我们公司", "我们这边", - "了解一下", "介绍一下", "简单说一下", - "考虑考虑", "想一想", "再说", "再看看", - "不需要", "不感兴趣", "没兴趣", "不用了", - "好的", "行", "可以", "没问题", "那就这样", - "再联系", "回头聊", "有需要再说", - }, - question_keywords=cls.BASE_QUESTION_KEYWORDS | { - "有没有", "需不需要", "要不要", "考虑不考虑", - "了解吗", "知道吗", "听说过吗", - "方便吗", "有空吗", "在吗", - }, - decision_keywords=cls.BASE_DECISION_KEYWORDS | { - "确定", "决定", "选择", "购买", "下单", - "预约", "安排", "约定", "确认", - "跟进", "回访", "联系", "沟通", - } - ) - - @classmethod - def get_config(cls, scene: str, fallback_to_generic: bool = True) -> ScenePatterns: - """根据场景名称获取配置 - - Args: - scene: 场景名称 ('education', 'online_service', 'outbound' 或其他) - fallback_to_generic: 如果场景不存在,是否降级到通用配置 - - Returns: - 对应场景的配置,如果场景不存在: - - fallback_to_generic=True: 返回通用配置(仅基础规则) - - fallback_to_generic=False: 抛出异常 - """ - scene_map = { - 'education': cls.get_education_config, - 'online_service': cls.get_online_service_config, - 'outbound': cls.get_outbound_config, - } - - if scene in scene_map: - return scene_map[scene]() - - if fallback_to_generic: - # 返回通用配置(仅包含基础规则,不包含场景特定规则) - return cls.get_generic_config() - else: - raise ValueError(f"不支持的场景: {scene},支持的场景: {list(scene_map.keys())}") - - @classmethod - def get_generic_config(cls) -> ScenePatterns: - """通用场景配置 - 仅包含基础规则,适用于未定义的场景 - - 这是一个保守的配置,只使用最通用的规则,避免误删重要信息 - """ - return ScenePatterns( - high_priority_patterns=cls.BASE_HIGH_PRIORITY, - medium_priority_patterns=cls.BASE_MEDIUM_PRIORITY, - low_priority_patterns=cls.BASE_LOW_PRIORITY, - filler_phrases=cls.BASE_FILLERS, - question_keywords=cls.BASE_QUESTION_KEYWORDS, - decision_keywords=cls.BASE_DECISION_KEYWORDS - ) - - @classmethod - def get_all_scenes(cls) -> List[str]: - """获取所有预定义场景的列表""" - return ['education', 'online_service', 'outbound'] - - @classmethod - def is_scene_supported(cls, scene: str) -> bool: - """检查场景是否有专门的配置支持 - - Args: - scene: 场景名称 - - Returns: - True: 有专门配置 - False: 将使用通用配置 - """ - return scene in cls.get_all_scenes() + def get_config(cls, scene: str = "") -> ScenePatterns: + """所有场景统一返回同一份填充词库""" + return ScenePatterns(filler_phrases=cls.BASE_FILLERS) diff --git a/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py b/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py index 1242e4e6..00d06f72 100644 --- a/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py +++ b/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py @@ -384,6 +384,14 @@ class ExtractionOrchestrator: logger.info(f"陈述句提取完成,共提取 {len(all_statements)} 条陈述句") + # 试运行模式下,所有分块提取完成后发送完成事件 + if self.progress_callback and self.is_pilot_run: + await self.progress_callback( + "knowledge_extraction_complete", + f"陈述句提取完成,共提取 {len(all_statements)} 条", + {"total_statements": len(all_statements), "total_chunks": total_chunks} + ) + return dialog_data_list async def _extract_triplets( diff --git a/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/triplet_extraction.py b/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/triplet_extraction.py index 024e320a..147ed777 100644 --- a/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/triplet_extraction.py +++ b/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/triplet_extraction.py @@ -5,7 +5,7 @@ from typing import List, Dict, Optional from app.core.logging_config import get_memory_logger from app.core.memory.llm_tools.openai_client import OpenAIClient from app.core.memory.utils.prompt.prompt_utils import render_triplet_extraction_prompt -from app.core.memory.utils.data.ontology import PREDICATE_DEFINITIONS, Predicate # 引入枚举 Predicate 白名单过滤 +from app.core.memory.utils.data.ontology import PREDICATE_DEFINITIONS, Predicate # 引入枚举 Predicate 白名单过滤 from app.core.memory.models.triplet_models import TripletExtractionResponse from app.core.memory.models.message_models import DialogData, Statement from app.core.memory.models.ontology_extraction_models import OntologyTypeList @@ -14,15 +14,15 @@ from app.core.memory.utils.log.logging_utils import prompt_logger logger = get_memory_logger(__name__) - class TripletExtractor: """Extracts knowledge triplets and entities from statements using LLM""" def __init__( - self, - llm_client: OpenAIClient, - ontology_types: Optional[OntologyTypeList] = None, - language: str = "zh"): + self, + llm_client: OpenAIClient, + ontology_types: Optional[OntologyTypeList] = None, + language: str = "zh" + ): """Initialize the TripletExtractor with an LLM client Args: @@ -65,7 +65,8 @@ class TripletExtractor: # Create messages for LLM messages = [ - {"role": "system", "content": "You are an expert at extracting knowledge triplets and entities from text. Follow the provided instructions carefully and return valid JSON."}, + {"role": "system", + "content": "You are an expert at extracting knowledge triplets and entities from text. Follow the provided instructions carefully and return valid JSON."}, {"role": "user", "content": prompt_content} ] @@ -116,7 +117,8 @@ class TripletExtractor: logger.error(f"Error processing statement: {e}", exc_info=True) return TripletExtractionResponse(triplets=[], entities=[]) - async def extract_triplets_from_statements(self, dialog_data: DialogData, limit_chunks: int = None) -> Dict[str, TripletExtractionResponse]: + async def extract_triplets_from_statements(self, dialog_data: DialogData, limit_chunks: int = None) -> Dict[ + str, TripletExtractionResponse]: """Extract triplets and entities from statements Args: diff --git a/api/app/core/memory/storage_services/reflection_engine/self_reflexion.py b/api/app/core/memory/storage_services/reflection_engine/self_reflexion.py index 09c7ef3d..b2a594c6 100644 --- a/api/app/core/memory/storage_services/reflection_engine/self_reflexion.py +++ b/api/app/core/memory/storage_services/reflection_engine/self_reflexion.py @@ -1,11 +1,11 @@ """ -自我反思引擎实现 +Self-Reflection Engine Implementation -该模块实现了记忆系统的自我反思功能,包括: -1. 基于时间的反思 - 根据时间周期触发反思 -2. 基于事实的反思 - 检测记忆冲突并解决 -3. 综合反思 - 整合多种反思策略 -4. 反思结果应用 - 更新记忆库 +This module implements the self-reflection functionality of the memory system, including: +1. Time-based reflection - Triggers reflection based on time cycles +2. Fact-based reflection - Detects and resolves memory conflicts +3. Comprehensive reflection - Integrates multiple reflection strategies +4. Reflection result application - Updates memory database """ import asyncio @@ -38,7 +38,7 @@ from app.schemas.memory_storage_schema import ( ) from pydantic import BaseModel -# 配置日志 +# Configure logging _root_logger = logging.getLogger() if not _root_logger.handlers: logging.basicConfig( @@ -49,35 +49,62 @@ else: _root_logger.setLevel(logging.INFO) class TranslationResponse(BaseModel): - """翻译响应模型""" + """Translation response model for language conversion""" data: str + class ReflectionRange(str, Enum): - """反思范围枚举""" - PARTIAL = "partial" # 从检索结果中反思 - ALL = "all" # 从整个数据库中反思 + """ + Reflection range enumeration + + Defines the scope of data to be included in reflection operations. + """ + PARTIAL = "partial" # Reflect from retrieval results + ALL = "all" # Reflect from entire database class ReflectionBaseline(str, Enum): - """反思基线枚举""" - TIME = "TIME" # 基于时间的反思 - FACT = "FACT" # 基于事实的反思 - HYBRID = "HYBRID" # 混合反思 + """ + Reflection baseline enumeration + + Defines the strategy or approach used for reflection operations. + """ + TIME = "TIME" # Time-based reflection + FACT = "FACT" # Fact-based reflection + HYBRID = "HYBRID" # Hybrid reflection combining multiple strategies class ReflectionConfig(BaseModel): - """反思引擎配置""" + """ + Reflection engine configuration + + Defines all configuration parameters for the reflection engine including + operation modes, model settings, and evaluation criteria. + + Attributes: + enabled: Whether reflection engine is enabled + iteration_period: Reflection cycle period (e.g., "3" hours) + reflexion_range: Scope of reflection (PARTIAL or ALL) + baseline: Reflection strategy (TIME, FACT, or HYBRID) + model_id: LLM model identifier for reflection operations + end_user_id: User identifier for scoped operations + output_example: Example output format for guidance + memory_verify: Enable memory verification checks + quality_assessment: Enable quality assessment evaluation + violation_handling_strategy: Strategy for handling violations + language_type: Language type for output ("zh" or "en") + """ enabled: bool = False - iteration_period: str = "3" # 反思周期 + iteration_period: str = "3" # Reflection cycle period reflexion_range: ReflectionRange = ReflectionRange.PARTIAL baseline: ReflectionBaseline = ReflectionBaseline.TIME - model_id: Optional[str] = None # 模型ID + model_id: Optional[str] = None # Model ID end_user_id: Optional[str] = None - output_example: Optional[str] = None # 输出示例 + output_example: Optional[str] = None # Output example - # 评估相关字段 - memory_verify: bool = True # 记忆验证 - quality_assessment: bool = True # 质量评估 - violation_handling_strategy: str = "warn" # 违规处理策略 + # Evaluation related fields + memory_verify: bool = True # Memory verification + quality_assessment: bool = True # Quality assessment + violation_handling_strategy: str = "warn" # Violation handling strategy language_type: str = "zh" class Config: @@ -85,7 +112,21 @@ class ReflectionConfig(BaseModel): class ReflectionResult(BaseModel): - """反思结果""" + """ + Reflection operation result + + Contains comprehensive information about the outcome of a reflection operation + including success status, metrics, and execution details. + + Attributes: + success: Whether the reflection operation succeeded + message: Descriptive message about the operation result + conflicts_found: Number of conflicts detected during reflection + conflicts_resolved: Number of conflicts successfully resolved + memories_updated: Number of memory entries updated in database + execution_time: Total time taken for the reflection operation + details: Additional details about the operation (optional) + """ success: bool message: str conflicts_found: int = 0 @@ -97,9 +138,22 @@ class ReflectionResult(BaseModel): class ReflectionEngine: """ - 自我反思引擎 - - 负责执行记忆系统的自我反思,包括冲突检测、冲突解决和记忆更新。 + Self-Reflection Engine + + Responsible for executing memory system self-reflection operations including + conflict detection, conflict resolution, and memory updates. Supports multiple + reflection strategies and provides comprehensive result tracking. + + The engine can operate in different modes: + - Time-based: Reflects on memories within specific time periods + - Fact-based: Detects and resolves factual conflicts in memories + - Hybrid: Combines multiple reflection strategies + + Attributes: + config: Reflection engine configuration + neo4j_connector: Neo4j database connector + llm_client: Language model client for analysis + Various function handlers for data processing and prompt rendering """ def __init__( @@ -115,18 +169,21 @@ class ReflectionEngine: update_query: Optional[str] = None ): """ - 初始化反思引擎 + Initialize reflection engine + + Sets up the reflection engine with configuration and optional dependencies. + Uses lazy initialization to avoid circular imports and optimize startup time. Args: - config: 反思引擎配置 - neo4j_connector: Neo4j 连接器(可选) - llm_client: LLM 客户端(可选) - get_data_func: 获取数据的函数(可选) - render_evaluate_prompt_func: 渲染评估提示词的函数(可选) - render_reflexion_prompt_func: 渲染反思提示词的函数(可选) - conflict_schema: 冲突结果 Schema(可选) - reflexion_schema: 反思结果 Schema(可选) - update_query: 更新查询语句(可选) + config: Reflection engine configuration object + neo4j_connector: Neo4j connector instance (optional, will be created if not provided) + llm_client: LLM client instance (optional, will be created if not provided) + get_data_func: Function for retrieving data (optional, uses default if not provided) + render_evaluate_prompt_func: Function for rendering evaluation prompts (optional) + render_reflexion_prompt_func: Function for rendering reflection prompts (optional) + conflict_schema: Schema for conflict result validation (optional) + reflexion_schema: Schema for reflection result validation (optional) + update_query: Query string for database updates (optional) """ self.config = config self.neo4j_connector = neo4j_connector @@ -137,14 +194,20 @@ class ReflectionEngine: self.conflict_schema = conflict_schema self.reflexion_schema = reflexion_schema self.update_query = update_query - self._semaphore = asyncio.Semaphore(5) # 默认并发数为5 + self._semaphore = asyncio.Semaphore(5) # Default concurrency limit of 5 - # 延迟导入以避免循环依赖 + # Lazy import to avoid circular dependencies self._lazy_init_done = False def _lazy_init(self): - """延迟初始化,避免循环导入""" + """ + Lazy initialization to avoid circular imports + + Initializes dependencies only when needed, preventing circular import issues + and optimizing startup performance. Sets up default implementations for + any components not provided during construction. + """ if self._lazy_init_done: return @@ -158,7 +221,7 @@ class ReflectionEngine: factory = MemoryClientFactory(db) self.llm_client = factory.get_llm_client(self.config.model_id) elif isinstance(self.llm_client, str): - # 如果 llm_client 是字符串(model_id),则用它初始化客户端 + # If llm_client is a string (model_id), use it to initialize the client from app.core.memory.utils.llm.llm_utils import MemoryClientFactory from app.db import get_db_context from app.services.memory_config_service import MemoryConfigService @@ -172,10 +235,10 @@ class ReflectionEngine: model_config = config_service.get_model_config(model_id) extra_params={ - "temperature": 0.2, # 降低温度提高响应速度和一致性 - "max_tokens": 600, # 限制最大token数 - "top_p": 0.8, # 优化采样参数 - "stream": False, # 确保非流式输出以获得最快响应 + "temperature": 0.2, # Lower temperature for faster response and consistency + "max_tokens": 600, # Limit maximum token count + "top_p": 0.8, # Optimize sampling parameters + "stream": False, # Ensure non-streaming output for fastest response } self.llm_client = OpenAIClient(RedBearModelConfig( @@ -191,7 +254,7 @@ class ReflectionEngine: if self.get_data_func is None: self.get_data_func = get_data - # 导入get_data_statement函数 + # Import get_data_statement function if not hasattr(self, 'get_data_statement'): self.get_data_statement = get_data_statement @@ -223,13 +286,20 @@ class ReflectionEngine: async def execute_reflection(self, host_id) -> ReflectionResult: """ - 执行完整的反思流程 + Execute complete reflection workflow + + Performs the full reflection process including data retrieval, conflict detection, + conflict resolution, and memory updates. This is the main entry point for + reflection operations. + Args: - host_id: 主机ID + host_id: Host identifier for scoping reflection operations + Returns: - ReflectionResult: 反思结果 + ReflectionResult: Comprehensive result of the reflection operation including + success status, conflict metrics, and execution time """ - # 延迟初始化 + # Lazy initialization self._lazy_init() if not self.config.enabled: @@ -243,7 +313,7 @@ class ReflectionEngine: print(self.config.baseline, self.config.memory_verify, self.config.quality_assessment) try: - # 1. 获取反思数据 + # 1. Get reflection data reflexion_data, statement_databasets = await self._get_reflexion_data(host_id) if not reflexion_data: return ReflectionResult( @@ -252,7 +322,7 @@ class ReflectionEngine: execution_time=asyncio.get_event_loop().time() - start_time ) - # 2. 检测冲突(基于事实的反思) + # 2. Detect conflicts (fact-based reflection) conflict_data = await self._detect_conflicts(reflexion_data, statement_databasets) conflict_list=[] for i in conflict_data: @@ -261,7 +331,7 @@ class ReflectionEngine: conflicts_found=0 - # 3. 解决冲突 + # 3. Resolve conflicts solved_data = await self._resolve_conflicts(conflict_list, statement_databasets) if not solved_data: @@ -276,7 +346,7 @@ class ReflectionEngine: logging.info(f"解决了 {conflicts_resolved} 个冲突") - # 4. 应用反思结果(更新记忆库) + # 4. Apply reflection results (update memory database) memories_updated=await self._apply_reflection_results(solved_data) execution_time = asyncio.get_event_loop().time() - start_time @@ -302,7 +372,19 @@ class ReflectionEngine: ) async def Translate(self, text): - # 翻译中文为英文 + """ + Translate Chinese text to English + + Uses the configured LLM to translate Chinese text to English with structured output. + Provides consistent translation format for reflection results. + + Args: + text: Chinese text to be translated + + Returns: + str: Translated English text + """ + # Translate Chinese to English translation_messages = [ { "role": "user", @@ -316,6 +398,19 @@ class ReflectionEngine: ) return response.data async def extract_translation(self,data): + """ + Extract and translate reflection data to English + + Processes reflection data structure and translates all Chinese content to English. + Handles nested data structures including memory verifications, quality assessments, + and reflection data while preserving the original structure. + + Args: + data: Dictionary containing reflection data with Chinese content + + Returns: + dict: Translated data structure with English content + """ end_datas={} end_datas['source_data']=await self.Translate(data['source_data']) quality_assessments = [] @@ -350,6 +445,18 @@ class ReflectionEngine: return end_datas async def reflection_run(self): + """ + Execute reflection workflow with comprehensive data processing + + Performs a complete reflection operation including conflict detection, resolution, + and result formatting. Supports both Chinese and English output based on + configuration settings. + + Returns: + dict: Comprehensive reflection results including source data, memory verifications, + quality assessments, and reflection data. Results are translated to English + if language_type is set to 'en'. + """ self._lazy_init() start_time = time.time() memory_verifies_flag = self.config.memory_verify @@ -367,7 +474,7 @@ class ReflectionEngine: result_data['source_data'] = "我是 2023 年春天去北京工作的,后来基本一直都在北京上班,也没怎么换过城市。不过后来公司调整,2024 年上半年我被调到上海待了差不多半年,那段时间每天都是在上海办公室打卡。当时入职资料用的还是我之前的身份信息,身份证号是 11010119950308123X,银行卡是 6222023847595898,这些一直没变。对了,其实我 从 2023 年开始就一直在北京生活,从来没有长期离开过北京,上海那段更多算是远程配合" # 2. 检测冲突(基于事实的反思) conflict_data = await self._detect_conflicts(databasets, source_data) - # 遍历数据提取字段 + # Traverse data to extract fields quality_assessments = [] memory_verifies = [] for item in conflict_data: @@ -375,9 +482,9 @@ class ReflectionEngine: memory_verifies.append(item['memory_verify']) result_data['memory_verifies'] = memory_verifies result_data['quality_assessments'] = quality_assessments - conflicts_found = 0 # 初始化为整数0而不是空字符串 + conflicts_found = 0 # Initialize as integer 0 instead of empty string REMOVE_KEYS = {"created_at", "expired_at","relationship","predicate","statement_id","id","statement_id","relationship_statement_id"} - # Clearn conflict_data,And memory_verify和quality_assessment + # Clean conflict_data, and memory_verify and quality_assessment cleaned_conflict_data = [] for item in conflict_data: cleaned_item = { @@ -389,7 +496,7 @@ class ReflectionEngine: for item in conflict_data: cleaned_data = [] for row in item.get("data", []): - # 删除 created_at / expired_at + # Remove created_at / expired_at cleaned_row = { k: v for k, v in row.items() @@ -402,7 +509,7 @@ class ReflectionEngine: } cleaned_conflict_data_.append(cleaned_item) print(cleaned_conflict_data_) - # 3. 解决冲突 + # 3. Resolve conflicts solved_data = await self._resolve_conflicts(cleaned_conflict_data_, source_data) if not solved_data: return ReflectionResult( @@ -413,7 +520,7 @@ class ReflectionEngine: ) reflexion_data = [] - # 遍历数据提取reflexion字段 + # Traverse data to extract reflexion fields for item in solved_data: if 'results' in item: for result in item['results']: @@ -431,15 +538,24 @@ class ReflectionEngine: async def extract_fields_from_json(self): - """从example.json中提取source_data和databasets字段""" + """ + Extract source_data and databasets fields from example.json + + Reads reflection example data from the example.json file and extracts + the source data and database statements for testing and demonstration purposes. + + Returns: + tuple: (source_data, databasets) extracted from the example file + Returns empty lists if file reading fails + """ prompt_dir = os.path.join(os.path.dirname(__file__), "example") try: - # 读取JSON文件 + # Read JSON file with open(prompt_dir + '/example.json', 'r', encoding='utf-8') as f: data = json.loads(f.read()) - # 提取memory_verify下的字段 + # Extract fields under memory_verify memory_verify = data.get("memory_verify", {}) source_data = memory_verify.get("source_data", []) databasets = memory_verify.get("databasets", []) @@ -451,15 +567,17 @@ class ReflectionEngine: async def _get_reflexion_data(self, host_id: uuid.UUID) -> List[Any]: """ - 获取反思数据 - - 根据配置的反思范围获取需要反思的记忆数据。 + Get reflection data from database + + Retrieves memory data for reflection based on the configured reflection range. + Supports both partial (from retrieval results) and full (entire database) modes. Args: - host_id: 主机ID + host_id: Host UUID identifier for scoping data retrieval Returns: - List[Any]: 反思数据列表 + tuple: (reflexion_data, statement_data) containing memory data for reflection + Returns empty lists if query fails """ print("=== 获取反思数据 ===") @@ -484,26 +602,29 @@ class ReflectionEngine: async def _detect_conflicts(self, data: List[Any], statement_databasets: List[Any]) -> List[Any]: """ - 检测冲突(基于事实的反思) - - 使用 LLM 分析记忆数据,检测其中的冲突。 + Detect conflicts (fact-based reflection) + + Uses LLM to analyze memory data and detect conflicts within the memories. + Performs comprehensive conflict detection including memory verification and + quality assessment based on configuration settings. Args: - data: 待检测的记忆数据 + data: Memory data to be analyzed for conflicts + statement_databasets: Statement database records for context Returns: - List[Any]: 冲突记忆列表 + List[Any]: List of detected conflicts with detailed analysis """ if not data: return [] - # 数据预处理:如果数据量太少,直接返回无冲突 + # Data preprocessing: if data is too small, return no conflicts directly if len(data) < 2: logging.info("数据量不足,无需检测冲突") return [] - # 使用转换后的数据 - # print("转换后的数据:", data[:2] if len(data) > 2 else data) # 只打印前2条避免日志过长 + # Use converted data + # print("Converted data:", data[:2] if len(data) > 2 else data) # Only print first 2 to avoid long logs memory_verify = self.config.memory_verify logging.info("====== 冲突检测开始 ======") @@ -512,7 +633,7 @@ class ReflectionEngine: language_type=self.config.language_type try: - # 渲染冲突检测提示词 + # Render conflict detection prompt rendered_prompt = await self.render_evaluate_prompt_func( data, self.conflict_schema, @@ -526,7 +647,7 @@ class ReflectionEngine: messages = [{"role": "user", "content": rendered_prompt}] logging.info(f"提示词长度: {len(rendered_prompt)}") - # 调用 LLM 进行冲突检测 + # Call LLM for conflict detection response = await self.llm_client.response_structured( messages, self.conflict_schema @@ -539,7 +660,7 @@ class ReflectionEngine: logging.error("LLM 冲突检测输出解析失败") return [] - # 标准化返回格式 + # Standardize return format if isinstance(response, BaseModel): return [response.model_dump()] elif hasattr(response, 'dict'): @@ -553,15 +674,17 @@ class ReflectionEngine: async def _resolve_conflicts(self, conflicts: List[Any], statement_databasets: List[Any]) -> List[Any]: """ - 解决冲突 - - 使用 LLM 对检测到的冲突进行反思和解决。 + Resolve detected conflicts + + Uses LLM to perform reflection and resolution on detected conflicts. + Processes conflicts in parallel for efficiency while respecting concurrency limits. Args: - conflicts: 冲突列表 + conflicts: List of conflicts to be resolved + statement_databasets: Statement database records for context Returns: - List[Any]: 解决方案列表 + List[Any]: List of resolution solutions with reflection results """ if not conflicts: return [] @@ -570,12 +693,12 @@ class ReflectionEngine: baseline = self.config.baseline memory_verify = self.config.memory_verify - # 并行处理每个冲突 + # Process each conflict in parallel async def _resolve_one(conflict: Any) -> Optional[Dict[str, Any]]: - """解决单个冲突""" + """Resolve a single conflict""" async with self._semaphore: try: - # 渲染反思提示词 + # Render reflection prompt rendered_prompt = await self.render_reflexion_prompt_func( [conflict], self.reflexion_schema, @@ -587,7 +710,7 @@ class ReflectionEngine: messages = [{"role": "user", "content": rendered_prompt}] - # 调用 LLM 进行反思 + # Call LLM for reflection response = await self.llm_client.response_structured( messages, self.reflexion_schema @@ -596,7 +719,7 @@ class ReflectionEngine: if not response: return None - # 标准化返回格式 + # Standardize return format if isinstance(response, BaseModel): return response.model_dump() elif hasattr(response, 'dict'): @@ -610,11 +733,11 @@ class ReflectionEngine: logging.warning(f"解决单个冲突失败: {e}") return None - # 并发执行所有冲突解决任务 + # Execute all conflict resolution tasks concurrently tasks = [_resolve_one(conflict) for conflict in conflicts] results = await asyncio.gather(*tasks, return_exceptions=False) - # 过滤掉失败的结果 + # Filter out failed results solved = [r for r in results if r is not None] logging.info(f"成功解决 {len(solved)}/{len(conflicts)} 个冲突") @@ -626,15 +749,16 @@ class ReflectionEngine: solved_data: List[Dict[str, Any]] ) -> int: """ - 应用反思结果(更新记忆库) - - 将解决冲突后的记忆更新到 Neo4j 数据库中。 + Apply reflection results (update memory database) + + Updates the Neo4j database with resolved conflicts and reflection results. + Processes the solved data and applies changes to the memory storage system. Args: - solved_data: 解决方案列表 + solved_data: List of resolved conflict solutions with reflection data Returns: - int: 成功更新的记忆数量 + int: Number of successfully updated memory entries """ changes = extract_and_process_changes(solved_data) success_count = await neo4j_data(changes) @@ -642,80 +766,86 @@ class ReflectionEngine: - # 基于时间的反思方法 + # Time-based reflection methods async def time_based_reflection( self, host_id: uuid.UUID, time_period: Optional[str] = None ) -> ReflectionResult: """ - 基于时间的反思 - - 根据时间周期触发反思,检查在指定时间段内的记忆。 + Time-based reflection + + Triggers reflection based on time cycles, checking memories within + specified time periods. Uses the configured iteration period if + no specific time period is provided. Args: - host_id: 主机ID - time_period: 时间周期(如"三小时"),如果不提供则使用配置中的值 + host_id: Host UUID identifier for scoping reflection + time_period: Time period (e.g., "three hours"), uses config value if not provided Returns: - ReflectionResult: 反思结果 + ReflectionResult: Comprehensive reflection operation result """ period = time_period or self.config.iteration_period logging.info(f"执行基于时间的反思,周期: {period}") - # 使用标准反思流程 + # Use standard reflection workflow return await self.execute_reflection(host_id) - # 基于事实的反思方法 + # Fact-based reflection methods async def fact_based_reflection( self, host_id: uuid.UUID ) -> ReflectionResult: """ - 基于事实的反思 - - 检测记忆中的事实冲突并解决。 + Fact-based reflection + + Detects and resolves factual conflicts within memories. Analyzes + memory data for inconsistencies and contradictions that need resolution. Args: - host_id: 主机ID + host_id: Host UUID identifier for scoping reflection Returns: - ReflectionResult: 反思结果 + ReflectionResult: Comprehensive reflection operation result """ logging.info("执行基于事实的反思") - # 使用标准反思流程 + # Use standard reflection workflow return await self.execute_reflection(host_id) - # 综合反思方法 + # Comprehensive reflection methods async def comprehensive_reflection( self, host_id: uuid.UUID ) -> ReflectionResult: """ - 综合反思 - - 整合基于时间和基于事实的反思策略。 + Comprehensive reflection + + Integrates time-based and fact-based reflection strategies based on + the configured baseline. Supports hybrid approaches that combine + multiple reflection methodologies. Args: - host_id: 主机ID + host_id: Host UUID identifier for scoping reflection Returns: - ReflectionResult: 反思结果 + ReflectionResult: Comprehensive reflection operation result combining + multiple strategies if using hybrid baseline """ logging.info("执行综合反思") - # 根据配置的基线选择反思策略 + # Choose reflection strategy based on configured baseline if self.config.baseline == ReflectionBaseline.TIME: return await self.time_based_reflection(host_id) elif self.config.baseline == ReflectionBaseline.FACT: return await self.fact_based_reflection(host_id) elif self.config.baseline == ReflectionBaseline.HYBRID: - # 混合策略:先执行基于时间的反思,再执行基于事实的反思 + # Hybrid strategy: execute time-based reflection first, then fact-based reflection time_result = await self.time_based_reflection(host_id) fact_result = await self.fact_based_reflection(host_id) - # 合并结果 + # Merge results return ReflectionResult( success=time_result.success and fact_result.success, message=f"时间反思: {time_result.message}; 事实反思: {fact_result.message}", diff --git a/api/app/core/memory/utils/data/text_utils.py b/api/app/core/memory/utils/data/text_utils.py index 133990f7..d0b10f97 100644 --- a/api/app/core/memory/utils/data/text_utils.py +++ b/api/app/core/memory/utils/data/text_utils.py @@ -2,9 +2,17 @@ import json def escape_lucene_query(query: str) -> str: - """Escape Lucene special characters in a free-text query. - - This prevents ParseException when using Neo4j full-text procedures. + """ + Escape special characters in Lucene queries + + Prevents ParseException when using Neo4j full-text search procedures. + Escapes all Lucene reserved special characters and operators. + + Args: + query: Original query string + + Returns: + str: Escaped query string safe for Lucene search """ if query is None: return "" @@ -22,11 +30,21 @@ def escape_lucene_query(query: str) -> str: return s def extract_plain_query(query_input: str) -> str: - """Extract clean, plain-text query from various input forms. - + """ + Extract clean plain-text query from various input forms + + Handles the following cases: - Strips surrounding quotes and whitespace - If input looks like JSON, prefers the 'original' field - - Fallbacks to the raw string when parsing fails + - Falls back to raw string when parsing fails + - Handles dictionary-type input + - Best-effort unescape common escape characters + + Args: + query_input: Query input in various forms (string, dict, etc.) + + Returns: + str: Extracted plain-text query string """ if query_input is None: return "" diff --git a/api/app/core/memory/utils/data/time_utils.py b/api/app/core/memory/utils/data/time_utils.py index c6791dfc..763c642c 100644 --- a/api/app/core/memory/utils/data/time_utils.py +++ b/api/app/core/memory/utils/data/time_utils.py @@ -4,7 +4,13 @@ from datetime import datetime def validate_date_format(date_str: str) -> bool: """ - Validate if the date string is in the format YYYY-MM-DD. + Validate if date string conforms to YYYY-MM-DD format + + Args: + date_str: Date string to validate + + Returns: + bool: True if format is correct, False otherwise """ pattern = r"^\d{4}-\d{1,2}-\d{1,2}$" return bool(re.match(pattern, date_str)) @@ -41,7 +47,20 @@ def normalize_date(date_str: str) -> str: def preprocess_date_string(date_str: str) -> str: - """预处理日期字符串,处理特殊格式""" + """ + 预处理日期字符串,处理特殊格式 + + 处理以下特殊格式: + - 年份后直接跟月份没有分隔符的格式(如 "20259/28") + - 无分隔符的纯数字格式(如 "20251028", "251028") + - 混合分隔符,统一为 "-" + + Args: + date_str: 原始日期字符串 + + Returns: + str: 预处理后的日期字符串,格式为 "YYYY-MM-DD" 或 "YYYY-MM" + """ # 处理类似 "20259/28" 的格式(年份后直接跟月份没有分隔) match = re.match(r'^(\d{4,5})[/\.\-_]?(\d{1,2})[/\.\-_]?(\d{1,2})$', date_str) @@ -78,7 +97,23 @@ def preprocess_date_string(date_str: str) -> str: def fallback_parse(date_str: str) -> str: - """备选解析方案""" + """ + 备选日期解析方案 + + 当智能解析失败时,尝试使用预定义的日期格式进行解析。 + 支持多种常见的日期格式,包括: + - YYYY-MM-DD, YYYY/MM/DD, YYYY.MM.DD + - YYYYMMDD, YYMMDD + - MM-DD-YYYY, MM/DD/YYYY, MM.DD.YYYY + - DD-MM-YYYY, DD/MM/YYYY, DD.MM.YYYY + - YYYY-MM, YYYY/MM, YYYY.MM + + Args: + date_str: 待解析的日期字符串 + + Returns: + str: 标准化后的日期字符串(YYYY-MM-DD格式),解析失败时返回原字符串 + """ # 尝试常见的日期格式[citation:4][citation:5] formats_to_try = [ diff --git a/api/app/core/memory/utils/prompt/prompts/extracat_Pruning.jinja2 b/api/app/core/memory/utils/prompt/prompts/extracat_Pruning.jinja2 index 6b620df9..3061e663 100644 --- a/api/app/core/memory/utils/prompt/prompts/extracat_Pruning.jinja2 +++ b/api/app/core/memory/utils/prompt/prompts/extracat_Pruning.jinja2 @@ -1,6 +1,7 @@ {# 对话级抽取与相关性判定模板(用于剪枝加速) - 输入:pruning_scene, is_builtin_scene, ontology_classes, dialog_text, language + 输入:pruning_scene, ontology_class_infos, dialog_text, language + - ontology_class_infos: List[{class_name: str, class_description: str}] 输出:严格 JSON(不要包含任何多余文本),字段: - is_related: bool,是否与所选场景相关 - times: [string],从对话中抽取的时间相关文本(日期、时间、时间段、有效期等) @@ -9,64 +10,103 @@ - contacts: [string],联系方式(电话/手机号/邮箱/微信/QQ等) - addresses: [string],地址/地点相关文本 - keywords: [string],其它有助于保留的重要关键词(与场景强相关的术语) + - preserve_keywords: [string],必须保留的情绪/兴趣/爱好/个人偏好相关词或短语片段 要求: - 必须只输出上述 JSON,且键名一致;不得输出解释、前后缀;不得包含注释。 - - times/ids/amounts/contacts/addresses/keywords 仅抽取原文片段或规范化后的简单字符串。 + - times/ids/amounts/contacts/addresses/keywords/preserve_keywords 仅抽取原文片段或规范化后的简单字符串。 - 仅输出上述键;避免多余解释或字段。 #} -{# ── 内置场景的固定说明 ── #} -{% set builtin_scene_instructions = { - 'education': { - 'zh': '教育场景:教学、课程、考试、作业、老师/学生互动、学习资源、学校管理等。', - 'en': 'Education Scenario: Teaching, courses, exams, homework, teacher/student interaction, learning resources, school management, etc.' - }, - 'online_service': { - 'zh': '在线客服场景:客户咨询、问题排查、服务工单、售后支持、订单/退款、工单升级等。', - 'en': 'Online Service Scenario: Customer inquiries, troubleshooting, service tickets, after-sales support, orders/refunds, ticket escalation, etc.' - }, - 'outbound': { - 'zh': '外呼场景:电话外呼、邀约、调研问卷、线索跟进、对话脚本、回访记录等。', - 'en': 'Outbound Scenario: Outbound calls, invitations, survey questionnaires, lead follow-up, call scripts, follow-up records, etc.' - } -} %} - -{# ── 确定最终使用的场景说明 ── #} -{% if is_builtin_scene %} - {# 内置专门场景:使用固定说明 #} - {% set scene_key = pruning_scene %} - {% if scene_key not in builtin_scene_instructions %}{% set scene_key = 'education' %}{% endif %} - {% set instruction = builtin_scene_instructions[scene_key][language] if language in ['zh', 'en'] else builtin_scene_instructions[scene_key]['zh'] %} - {% set custom_types_str = '' %} -{% else %} - {# 自定义场景:使用场景名称 + 本体类型列表构建说明 #} - {% if ontology_classes and ontology_classes | length > 0 %} - {% if language == 'en' %} - {% set custom_types_str = ontology_classes | join(', ') %} - {% set instruction = 'Custom scene "' ~ pruning_scene ~ '": The dialogue is related to this scene if it involves any of the following entity types: ' ~ custom_types_str ~ '.' %} - {% else %} - {% set custom_types_str = ontology_classes | join('、') %} - {% set instruction = '自定义场景「' ~ pruning_scene ~ '」:对话涉及以下任意实体类型时视为相关:' ~ custom_types_str ~ '。' %} - {% endif %} +{# ── 确定场景说明 ── #} +{% if ontology_class_infos and ontology_class_infos | length > 0 %} + {% if language == 'en' %} + {% set instruction = 'Scene "' ~ pruning_scene ~ '": The dialogue is relevant if it involves any of the following entity types.' %} {% else %} - {# 无本体类型时退化为通用说明 #} - {% if language == 'en' %} - {% set instruction = 'Custom scene "' ~ pruning_scene ~ '": Determine whether the dialogue content is relevant to this scene based on overall context.' %} - {% else %} - {% set instruction = '自定义场景「' ~ pruning_scene ~ '」:根据对话整体内容判断是否与该场景相关。' %} - {% endif %} - {% set custom_types_str = '' %} + {% set instruction = '场景「' ~ pruning_scene ~ '」:对话涉及以下任意实体类型时视为相关。' %} + {% endif %} +{% else %} + {% if language == 'en' %} + {% set instruction = 'Scene "' ~ pruning_scene ~ '": Determine whether the dialogue content is relevant to this scene based on overall context.' %} + {% else %} + {% set instruction = '场景「' ~ pruning_scene ~ '」:根据对话整体内容判断是否与该场景相关。' %} {% endif %} {% endif %} {% if language == "zh" %} -请在下方对话全文基础上,按该场景进行一次性抽取并判定相关性: +你是一个对话内容分析助手。请对下方对话全文进行一次性分析,完成两项任务: +1. 判断对话是否与指定场景相关; +2. 从对话中抽取所有需要保留的重要信息片段。 + 场景说明:{{ instruction }} -{% if not is_builtin_scene and custom_types_str %} -重要提示:只要对话中出现与上述实体类型({{ custom_types_str }})相关的内容,即判定为相关(is_related=true)。 + +{% if ontology_class_infos and ontology_class_infos | length > 0 %} +【本场景实体类型定义】 +以下实体类型定义了本场景中哪些内容是重要的。 +凡是与以下任意类型相关的内容,都必须保留,并将关键词/短语提取到 keywords 字段: + +{% for info in ontology_class_infos %} +- {{ info.class_name }}:{{ info.class_description }} +{% endfor %} + +重要提示:只要对话中出现与上述任意实体类型相关的内容,即判定为相关(is_related=true)。 {% endif %} +--- +【必须保留的内容(不可删除)】 +以下类型的内容无论是否与场景直接相关,都必须保留,请将其关键词/短语抽取到对应字段: +- 时间信息:日期、时间点、时间段、有效期 → times 字段 +- 编号信息:学号、工号、订单号、申请号、账号、ID → ids 字段 +- 金额信息:价格、费用、金额(含货币符号或单位,如"100元"、"¥200")→ amounts 字段(注意:考试分数、成绩分数不属于金额,不要放入此字段) +- 联系方式:电话、手机号、邮箱、微信、QQ → contacts 字段 +- 地址信息:地点、地址、位置 → addresses 字段 +- 场景关键词:与**当前场景**强相关的专业术语、事件名称 → keywords 字段(注意:只放与当前场景直接相关的词,跨场景的内容不要放入此字段) +- **情绪与情感**:喜悦、悲伤、愤怒、焦虑、开心、难过、委屈、兴奋、害怕、担心、压力、感动等情绪表达 → preserve_keywords 字段 +- **兴趣与爱好**:喜欢、热爱、爱好、擅长、享受、沉迷、着迷、讨厌某事物等个人偏好表达 → preserve_keywords 字段 +- **个人情感态度**:对人际关系、情感状态的明确表达(如"我跟室友闹矛盾了"、"我都快抑郁了")→ preserve_keywords 字段 +- 注意:学业目标(如"我想考研")、成绩(如"87分")、学科偏好(如"喜欢数学")属于学业信息,不属于情绪/情感,不要放入 preserve_keywords 字段 + +【场景无关内容标记】 +请从对话中识别出与当前场景({{ pruning_scene }})**既不相关、也无语义关联**的消息片段,将其原文(或关键片段)提取到 scene_unrelated_snippets 字段。 +判断标准: +- 与场景实体类型完全无关 +- 与场景话题没有因果/时间/情境上的关联(例如:不是"因为上课所以累"这种关联) +- 纯粹是另一个话题的内容(如在教育场景中讨论购物、娱乐等) +注意:有情绪/感受表达的消息即使话题不同,也可能有语义关联,请谨慎标记。 + +**重要:scene_unrelated_snippets 必须认真填写,不能为空数组。** +如果对话中存在与场景无关的内容,必须将其原文片段提取出来。 + +示例(场景=在线教育): +- "我最近心情很差,跟室友闹矛盾了" → 与教育场景无关,加入 scene_unrelated_snippets +- "她总是很晚回来吵到我睡觉" → 与教育场景无关,加入 scene_unrelated_snippets +- "对,我都快抑郁了" → 与教育场景无关,加入 scene_unrelated_snippets +- "期末考试12月25日" → 与教育场景相关,不加入 scene_unrelated_snippets +- "我上次高数作业87分" → 与教育场景相关,不加入 scene_unrelated_snippets +- "我的目标是考研" → 与教育场景相关,不加入 scene_unrelated_snippets + +示例(场景=情感陪伴): +- "我最近心情很差,跟室友闹矛盾了" → 与情感陪伴场景相关(情绪+关系),不加入 scene_unrelated_snippets +- "对,我都快抑郁了" → 与情感陪伴场景相关(情绪),不加入 scene_unrelated_snippets +- "期末考试12月25日,3号教学楼201室" → 与情感陪伴场景无关(教育信息),加入 scene_unrelated_snippets +- "我上次高数作业87分,这次能考好吗" → 与情感陪伴场景无关(学业信息),加入 scene_unrelated_snippets +- "我的目标是考研,想读应用数学" → 与情感陪伴场景无关(学业目标),加入 scene_unrelated_snippets + +【可以删除的内容】 +以下类型的内容属于低价值信息,可以在剪枝时删除: +- 纯寒暄问候:如"你好"、"在吗"、"拜拜"、"嗯"、"好的"、"哦"等无实质内容的短语 +- 纯表情/符号:如"[微笑]"、"😊"、"哈哈"等 +- 重复确认:如"对对对"、"是的是的"、"嗯嗯嗯"等无新增信息的重复 +- 无意义填充:如"啊"、"呢"、"嘛"等语气词单独成句 + +**注意:即使消息很短,只要包含情绪、兴趣、爱好、个人观点等有价值信息,就必须保留,不得删除。** +例如: +- "我好开心呀" → 包含情绪(开心),必须保留,preserve_keywords 中加入"开心" +- "好喜欢打羽毛球呀" → 包含兴趣爱好(喜欢打羽毛球),必须保留,preserve_keywords 中加入"喜欢打羽毛球" +- "我好难过" → 包含情绪(难过),必须保留,preserve_keywords 中加入"难过" +- "太好啦!看到你开心,我也跟着心情亮起来" → 包含情绪,必须保留,preserve_keywords 中加入"开心" + +--- 对话全文: """ {{ dialog_text }} @@ -80,15 +120,65 @@ "amounts": [...], "contacts": [...], "addresses": [...], - "keywords": [...] + "keywords": [...], + "preserve_keywords": [...], + "scene_unrelated_snippets": [...] } {% else %} -Based on the full dialogue below, perform one-time extraction and relevance determination according to this scenario: +You are a dialogue content analysis assistant. Please analyze the full dialogue below in one pass and complete two tasks: +1. Determine whether the dialogue is relevant to the specified scene; +2. Extract all important information fragments that must be preserved. + Scenario Description: {{ instruction }} -{% if not is_builtin_scene and custom_types_str %} -Important: If the dialogue contains content related to any of the entity types above ({{ custom_types_str }}), mark it as relevant (is_related=true). + +{% if ontology_class_infos and ontology_class_infos | length > 0 %} +[Scene Entity Type Definitions] +The following entity types define what content is important in this scene. +Content related to ANY of these types must be preserved and extracted into the keywords field: + +{% for info in ontology_class_infos %} +- {{ info.class_name }}: {{ info.class_description }} +{% endfor %} + +Important: If the dialogue contains content related to any of the entity types above, mark it as relevant (is_related=true). {% endif %} +--- +[MUST PRESERVE (cannot be deleted)] +The following types of content must always be preserved regardless of scene relevance. Extract their keywords/phrases into the corresponding fields: +- Time information: dates, time points, durations, expiry dates → times field +- ID information: student IDs, employee IDs, order numbers, application numbers, account IDs → ids field +- Amount information: prices, fees, amounts (with currency symbols or units, e.g., "$100", "¥200") → amounts field (Note: exam scores and grades are NOT amounts, do not put them here) +- Contact information: phone numbers, emails, WeChat, QQ → contacts field +- Address information: locations, addresses, places → addresses field +- Scene keywords: professional terms and event names strongly related to **the current scene** → keywords field (Note: only put terms directly related to the current scene; cross-scene content should not be placed here) +- **Emotions and feelings**: joy, sadness, anger, anxiety, happiness, sadness, excitement, fear, worry, stress, being moved, etc. → preserve_keywords field +- **Interests and hobbies**: likes, loves, hobbies, good at, enjoys, obsessed with, hates something, personal preferences → preserve_keywords field +- **Personal emotional attitudes**: clear expressions about interpersonal relationships or emotional states (e.g., "I had a fight with my roommate", "I'm almost depressed") → preserve_keywords field +- Note: Academic goals (e.g., "I want to pursue a master's degree"), grades (e.g., "87 points"), and subject preferences (e.g., "I like math") are academic information, NOT emotions/feelings — do not put them in preserve_keywords + +[Scene-Unrelated Content Marking] +Please identify message snippets in the dialogue that are **neither relevant to nor semantically associated with** the current scene ({{ pruning_scene }}), and extract their original text (or key fragments) into the scene_unrelated_snippets field. +Criteria: +- Completely unrelated to the scene's entity types +- No causal/temporal/contextual association with the scene topic (e.g., "feeling tired because of class" IS associated) +- Purely belongs to a different topic (e.g., discussing shopping or entertainment in an education scene) +Note: Messages with emotional/feeling expressions may still have semantic association even if the topic differs — mark carefully. + +[CAN BE DELETED] +The following types of content are low-value and can be removed during pruning: +- Pure greetings: e.g., "hello", "are you there", "bye", "ok", "yeah" — short phrases with no substantive content +- Pure emojis/symbols: e.g., "[smile]", "😊", "haha" +- Repetitive confirmations: e.g., "yes yes yes", "right right", "uh huh" — repetitions with no new information +- Meaningless fillers: standalone interjections like "ah", "well", "hmm" + +**Note: Even if a message is short, if it contains emotions, interests, hobbies, or personal opinions, it MUST be preserved.** +Examples: +- "I'm so happy!" → contains emotion (happy), must preserve; add "happy" to preserve_keywords +- "I love playing badminton!" → contains interest (love playing badminton), must preserve; add "love playing badminton" to preserve_keywords +- "I feel so sad" → contains emotion (sad), must preserve; add "sad" to preserve_keywords + +--- Full Dialogue: """ {{ dialog_text }} @@ -102,6 +192,8 @@ Output strict JSON only (fixed keys, order doesn't matter): "amounts": [...], "contacts": [...], "addresses": [...], - "keywords": [...] + "keywords": [...], + "preserve_keywords": [...], + "scene_unrelated_snippets": [...] } {% endif %} diff --git a/api/app/core/memory/utils/prompt/template_render.py b/api/app/core/memory/utils/prompt/template_render.py index 68e0ffe4..4df8d55b 100644 --- a/api/app/core/memory/utils/prompt/template_render.py +++ b/api/app/core/memory/utils/prompt/template_render.py @@ -2,15 +2,15 @@ import os from jinja2 import Environment, FileSystemLoader from typing import List, Dict, Any - # Setup Jinja2 environment prompt_dir = os.path.join(os.path.dirname(__file__), "prompts") prompt_env = Environment(loader=FileSystemLoader(prompt_dir)) + async def render_evaluate_prompt(evaluate_data: List[Any], schema: Any, baseline: str = "TIME", - memory_verify: bool = False,quality_assessment:bool = False, - statement_databasets: List[str] = [],language_type:str = "zh") -> str: + memory_verify: bool = False, quality_assessment: bool = False, + statement_databasets=None, language_type: str = "zh") -> str: """ Renders the evaluate prompt using the evaluate_optimized.jinja2 template. @@ -23,6 +23,8 @@ async def render_evaluate_prompt(evaluate_data: List[Any], schema: Any, Returns: Rendered prompt content as string """ + if statement_databasets is None: + statement_databasets = [] template = prompt_env.get_template("evaluate.jinja2") # Convert Pydantic model to JSON schema if needed @@ -46,7 +48,7 @@ async def render_evaluate_prompt(evaluate_data: List[Any], schema: Any, async def render_reflexion_prompt(data: Dict[str, Any], schema: Any, baseline: str, memory_verify: bool = False, - statement_databasets: List[str] = [],language_type:str = "zh") -> str: + statement_databasets=None, language_type: str = "zh") -> str: """ Renders the reflexion prompt using the reflexion_optimized.jinja2 template. @@ -58,6 +60,8 @@ async def render_reflexion_prompt(data: Dict[str, Any], schema: Any, baseline: s Returns: Rendered prompt content as a string. """ + if statement_databasets is None: + statement_databasets = [] template = prompt_env.get_template("reflexion.jinja2") # Convert Pydantic model to JSON schema if needed @@ -69,7 +73,7 @@ async def render_reflexion_prompt(data: Dict[str, Any], schema: Any, baseline: s json_schema = schema rendered_prompt = template.render(data=data, json_schema=json_schema, - baseline=baseline,memory_verify=memory_verify, - statement_databasets=statement_databasets,language_type=language_type) + baseline=baseline, memory_verify=memory_verify, + statement_databasets=statement_databasets, language_type=language_type) return rendered_prompt diff --git a/api/app/core/models/base.py b/api/app/core/models/base.py index dba6717d..4a453c6b 100644 --- a/api/app/core/models/base.py +++ b/api/app/core/models/base.py @@ -1,23 +1,19 @@ from __future__ import annotations -import asyncio import os -import time -from abc import ABC, abstractmethod -from typing import Any, Callable, Dict, List, Optional, TypeVar +from typing import Any, Dict, Optional, TypeVar + +from langchain_aws import ChatBedrock +from langchain_community.chat_models import ChatTongyi +from langchain_core.embeddings import Embeddings +from langchain_core.language_models import BaseLLM +from langchain_ollama import OllamaLLM +from langchain_openai import ChatOpenAI, OpenAI +from pydantic import BaseModel, Field -import httpx from app.core.error_codes import BizCode from app.core.exceptions import BusinessException from app.models.models_model import ModelProvider, ModelType -from langchain_community.document_compressors import JinaRerank -from langchain_core.callbacks import CallbackManagerForLLMRun -from langchain_core.embeddings import Embeddings -from langchain_core.language_models import BaseLanguageModel, BaseLLM -from langchain_core.outputs import Generation, LLMResult -from langchain_core.retrievers import BaseRetriever -from langchain_core.runnables import RunnableSerializable -from pydantic import BaseModel, Field T = TypeVar("T") @@ -163,25 +159,17 @@ def get_provider_llm_class(config: RedBearModelConfig, type: ModelType = ModelTy # dashscope 的 omni 模型使用 OpenAI 兼容模式 if provider == ModelProvider.DASHSCOPE and config.is_omni: - from langchain_openai import ChatOpenAI return ChatOpenAI - - if provider in [ModelProvider.OPENAI, ModelProvider.XINFERENCE, ModelProvider.GPUSTACK] : + if provider in [ModelProvider.OPENAI, ModelProvider.XINFERENCE, ModelProvider.GPUSTACK]: if type == ModelType.LLM: - from langchain_openai import OpenAI return OpenAI elif type == ModelType.CHAT: - from langchain_openai import ChatOpenAI return ChatOpenAI elif provider == ModelProvider.DASHSCOPE: - from langchain_community.chat_models import ChatTongyi return ChatTongyi elif provider == ModelProvider.OLLAMA: - from langchain_ollama import OllamaLLM return OllamaLLM elif provider == ModelProvider.BEDROCK: - from langchain_aws import ChatBedrock, ChatBedrockConverse - return ChatBedrock else: raise BusinessException(f"不支持的模型提供商: {provider}", code=BizCode.PROVIDER_NOT_SUPPORTED) diff --git a/api/app/core/rag/nlp/search.py b/api/app/core/rag/nlp/search.py index 65fbd9cb..db93bc48 100644 --- a/api/app/core/rag/nlp/search.py +++ b/api/app/core/rag/nlp/search.py @@ -94,72 +94,16 @@ def knowledge_retrieval( db_knowledge = knowledge_repository.get_knowledge_by_id(db, knowledge_id=kb_id) if db_knowledge and db_knowledge.chunk_num > 0 and db_knowledge.status == 1: # Process shared knowledge base - if db_knowledge.permission_id.lower() == knowledge_model.PermissionType.Share: - knowledgeshare = knowledgeshare_repository.get_knowledgeshare_by_id(db=db, - knowledgeshare_id=db_knowledge.id) - if knowledgeshare: - db_knowledge = knowledge_repository.get_knowledge_by_id(db, - knowledge_id=knowledgeshare.source_kb_id) - if not (db_knowledge and db_knowledge.chunk_num > 0 and db_knowledge.status == 1): - continue - else: - continue - - if str(db_knowledge.id) not in kb_ids: - kb_ids.append(str(db_knowledge.id)) - if str(db_knowledge.workspace_id) not in workspace_ids: - workspace_ids.append(str(db_knowledge.workspace_id)) - if not chat_model: - chat_model = Base( - key=db_knowledge.llm.api_keys[0].api_key, - model_name=db_knowledge.llm.api_keys[0].model_name, - base_url=db_knowledge.llm.api_keys[0].api_base - ) - if not embedding_model: - embedding_model = OpenAIEmbed( - key=db_knowledge.embedding.api_keys[0].api_key, - model_name=db_knowledge.embedding.api_keys[0].model_name, - base_url=db_knowledge.embedding.api_keys[0].api_base - ) - vector_service = ElasticSearchVectorFactory().init_vector(knowledge=db_knowledge) - # Retrieve according to the configured retrieval type - match kb_config["retrieve_type"]: - case "participle": - rs = vector_service.search_by_full_text( - query=query, - top_k=kb_config["top_k"], - score_threshold=kb_config["similarity_threshold"], - file_names_filter=file_names_filter - ) - case "semantic": - rs = vector_service.search_by_vector( - query=query, - top_k=kb_config["top_k"], - score_threshold=kb_config["vector_similarity_weight"], - file_names_filter=file_names_filter - ) - case _: # hybrid - rs1 = vector_service.search_by_vector( - query=query, - top_k=kb_config["top_k"], - score_threshold=kb_config["vector_similarity_weight"], - file_names_filter=file_names_filter - ) - rs2 = vector_service.search_by_full_text( - query=query, - top_k=kb_config["top_k"], - score_threshold=kb_config["similarity_threshold"], - file_names_filter=file_names_filter - ) - - # Deduplication of merge results - seen_ids = set() - unique_rs = [] - for doc in rs1 + rs2: - if doc.metadata["doc_id"] not in seen_ids: - seen_ids.add(doc.metadata["doc_id"]) - unique_rs.append(doc) - rs = unique_rs + rs, chat_model, embedding_model = _retrieve_for_knowledge( + db=db, + db_knowledge=db_knowledge, + kb_config={**kb_config, "query": query}, # 或改为单独参数 + file_names_filter=file_names_filter, + chat_model=chat_model, + embedding_model=embedding_model, + kb_ids=kb_ids, + workspace_ids=workspace_ids, + ) all_results.extend(rs) except Exception as e: @@ -199,6 +143,115 @@ def knowledge_retrieval( finally: db.close() +def _retrieve_for_knowledge( + db: Session, + db_knowledge, + kb_config: Dict[str, Any], + file_names_filter: list[str], + chat_model: Base | None, + embedding_model: OpenAIEmbed | None, + kb_ids: list[str], + workspace_ids: list[str], +) -> tuple[list[DocumentChunk], Base | None, OpenAIEmbed | None]: + """ + 对单个知识库进行检索。 + - 处理共享知识库 + - 如果是 Folder,则递归检索其子知识库 + - 返回本知识库(含子库)的检索结果和可能更新后的 chat_model/embedding_model + """ + results: list[DocumentChunk] = [] + + # 处理共享知识库 + if db_knowledge.permission_id.lower() == knowledge_model.PermissionType.Share: + knowledgeshare = knowledgeshare_repository.get_knowledgeshare_by_id(db=db, knowledgeshare_id=db_knowledge.id) + if not knowledgeshare: + return results, chat_model, embedding_model + + db_knowledge = knowledge_repository.get_knowledge_by_id(db, knowledge_id=knowledgeshare.source_kb_id) + if not (db_knowledge and db_knowledge.chunk_num > 0 and db_knowledge.status == 1): + return results, chat_model, embedding_model + + # Folder 类型:递归处理子知识库 + if db_knowledge.type == knowledge_model.KnowledgeType.FOLDER: + children = knowledge_repository.get_knowledges_by_parent_id(db=db, parent_id=db_knowledge.id) + for child in children: + if not (child and child.chunk_num > 0 and child.status == 1): + continue + # 递归处理子知识库(子库如果还是 Folder,会继续往下) + child_results, chat_model, embedding_model = _retrieve_for_knowledge( + db=db, + db_knowledge=child, + kb_config=kb_config, + file_names_filter=file_names_filter, + chat_model=chat_model, + embedding_model=embedding_model, + kb_ids=kb_ids, + workspace_ids=workspace_ids, + ) + results.extend(child_results) + return results, chat_model, embedding_model + + # 普通知识库,执行一次检索 + if str(db_knowledge.id) not in kb_ids: + kb_ids.append(str(db_knowledge.id)) + if str(db_knowledge.workspace_id) not in workspace_ids: + workspace_ids.append(str(db_knowledge.workspace_id)) + + if not chat_model: + chat_model = Base( + key=db_knowledge.llm.api_keys[0].api_key, + model_name=db_knowledge.llm.api_keys[0].model_name, + base_url=db_knowledge.llm.api_keys[0].api_base, + ) + if not embedding_model: + embedding_model = OpenAIEmbed( + key=db_knowledge.embedding.api_keys[0].api_key, + model_name=db_knowledge.embedding.api_keys[0].model_name, + base_url=db_knowledge.embedding.api_keys[0].api_base, + ) + + vector_service = ElasticSearchVectorFactory().init_vector(knowledge=db_knowledge) + + match kb_config["retrieve_type"]: + case "participle": + rs = vector_service.search_by_full_text( + query=kb_config["query"], # 或者直接把 query 作为额外参数传进来 + top_k=kb_config["top_k"], + score_threshold=kb_config["similarity_threshold"], + file_names_filter=file_names_filter, + ) + case "semantic": + rs = vector_service.search_by_vector( + query=kb_config["query"], + top_k=kb_config["top_k"], + score_threshold=kb_config["vector_similarity_weight"], + file_names_filter=file_names_filter, + ) + case _: + rs1 = vector_service.search_by_vector( + query=kb_config["query"], + top_k=kb_config["top_k"], + score_threshold=kb_config["vector_similarity_weight"], + file_names_filter=file_names_filter, + ) + rs2 = vector_service.search_by_full_text( + query=kb_config["query"], + top_k=kb_config["top_k"], + score_threshold=kb_config["similarity_threshold"], + file_names_filter=file_names_filter, + ) + # 合并去重 + seen_ids = set() + unique_rs = [] + for doc in rs1 + rs2: + if doc.metadata["doc_id"] not in seen_ids: + seen_ids.add(doc.metadata["doc_id"]) + unique_rs.append(doc) + rs = unique_rs + + results.extend(rs) + return results, chat_model, embedding_model + def rerank(db: Session, reranker_id: uuid, query: str, docs: list[DocumentChunk], top_k: int) -> list[DocumentChunk]: """ diff --git a/api/app/core/rag_utils/__init__.py b/api/app/core/rag_utils/__init__.py index d5a8ce1c..0efe1938 100644 --- a/api/app/core/rag_utils/__init__.py +++ b/api/app/core/rag_utils/__init__.py @@ -4,11 +4,12 @@ RAG chunk analysis utilities. from .chunk_summary import generate_chunk_summary from .chunk_tags import extract_chunk_tags, extract_chunk_persona -from .chunk_insight import generate_chunk_insight +from .chunk_insight import generate_chunk_insight, generate_chunk_insight_sections __all__ = [ "generate_chunk_summary", "extract_chunk_tags", "extract_chunk_persona", "generate_chunk_insight", + "generate_chunk_insight_sections", ] diff --git a/api/app/core/rag_utils/chunk_insight.py b/api/app/core/rag_utils/chunk_insight.py index e904e53d..935b1449 100644 --- a/api/app/core/rag_utils/chunk_insight.py +++ b/api/app/core/rag_utils/chunk_insight.py @@ -1,213 +1,207 @@ """ -Generate insights from RAG chunks. +Generate memory insight report for RAG chunks using memory_insight.jinja2 prompt template. -This module provides functionality to analyze chunk content and generate insights using LLM. +The memory_insight.jinja2 template produces a four-section report: + 【总体概述】 → memory_insight + 【行为模式】 → behavior_pattern + 【关键发现】 → key_findings + 【成长轨迹】 → growth_trajectory + +generate_chunk_insight() returns the full raw text (stored in end_user.memory_insight). +generate_chunk_insight_sections() returns a dict with all four fields for richer storage. """ import asyncio +import os +import re from collections import Counter -from typing import Any, Dict, List +from typing import Dict, List, Optional from app.core.logging_config import get_business_logger from app.core.memory.utils.llm.llm_utils import MemoryClientFactory from app.db import get_db_context -from pydantic import BaseModel, Field business_logger = get_business_logger() +DEFAULT_LLM_ID = os.getenv("SELECTED_LLM_ID", "openai/qwen-plus") -def _get_llm_client(): - """Get LLM client using db context.""" + +# ── LLM client helper ──────────────────────────────────────────────────────── + +def _get_llm_client(end_user_id: Optional[str] = None): + """Get LLM client, preferring user-connected config with fallback to default.""" with get_db_context() as db: + try: + if end_user_id: + from app.services.memory_agent_service import get_end_user_connected_config + from app.services.memory_config_service import MemoryConfigService + connected_config = get_end_user_connected_config(end_user_id, db) + config_id = connected_config.get("memory_config_id") + workspace_id = connected_config.get("workspace_id") + if config_id or workspace_id: + config_service = MemoryConfigService(db) + memory_config = config_service.load_memory_config( + config_id=config_id, + workspace_id=workspace_id + ) + factory = MemoryClientFactory(db) + return factory.get_llm_client(memory_config.llm_model_id) + except Exception as e: + business_logger.warning(f"Failed to get user connected config, using default LLM: {e}") factory = MemoryClientFactory(db) - return factory.get_llm_client(None) # Uses default LLM + return factory.get_llm_client(DEFAULT_LLM_ID) -class ChunkInsight(BaseModel): - """Pydantic model for chunk insight.""" - insight: str = Field(..., description="对chunk内容的深度洞察分析") +# ── Domain analysis helpers (kept for building prompt inputs) ───────────────── +async def _classify_domain(chunk: str, llm_client) -> str: + """Classify a single chunk into a domain category.""" + from pydantic import BaseModel, Field -class DomainClassification(BaseModel): - """Pydantic model for domain classification.""" - domain: str = Field( - ..., - description="内容所属的领域分类", - examples=["技术", "商业", "教育", "生活", "娱乐", "健康", "其他"] - ) + class _Domain(BaseModel): + domain: str = Field(..., description="领域分类") - -async def classify_chunk_domain(chunk: str) -> str: - """ - Classify a chunk into a specific domain. - - Args: - chunk: Chunk content string - - Returns: - Domain name - """ try: - llm_client = _get_llm_client() - - prompt = f"""请将以下文本内容归类到最合适的领域中。 - -可选领域及其关键词: -- 技术:编程、软件、硬件、算法、数据、网络、系统、开发、工程等 -- 商业:市场、销售、管理、财务、投资、创业、营销、战略等 -- 教育:学习、课程、培训、教学、知识、技能、考试、研究等 -- 生活:日常、家庭、饮食、购物、旅行、休闲、娱乐等 -- 娱乐:游戏、电影、音乐、体育、艺术、文化等 -- 健康:医疗、养生、运动、心理、保健、疾病等 -- 其他:无法归入以上类别的内容 - -文本内容: {chunk[:500]}... - -请直接返回最合适的领域名称。""" - - messages = [ - {"role": "system", "content": "你是一个专业的文本分类助手。请仔细分析文本内容,选择最合适的领域分类。"}, - {"role": "user", "content": prompt} - ] - - classification = await llm_client.response_structured( - messages=messages, - response_model=DomainClassification + prompt = ( + "请将以下文本归类到最合适的领域(技术/商业/教育/生活/娱乐/健康/其他)。\n\n" + f"文本: {chunk[:500]}\n\n直接返回领域名称。" ) - - return classification.domain if classification else "其他" - - except Exception as e: - business_logger.error(f"分类chunk领域失败: {str(e)}") + result = await llm_client.response_structured( + messages=[{"role": "user", "content": prompt}], + response_model=_Domain, + ) + return result.domain if result else "其他" + except Exception: return "其他" -async def analyze_domain_distribution(chunks: List[str], max_chunks: int = 20) -> Dict[str, float]: +async def _build_insight_inputs( + chunks: List[str], + max_chunks: int, + end_user_id: Optional[str], +) -> Dict[str, Optional[str]]: """ - Analyze the domain distribution of chunks. - - Args: - chunks: List of chunk content strings - max_chunks: Maximum number of chunks to analyze - - Returns: - Dictionary of domain -> percentage + Derive domain_distribution, active_periods, social_connections strings + to feed into the memory_insight.jinja2 template. """ - if not chunks: - return {} - - try: - # 限制分析的chunk数量 - chunks_to_analyze = chunks[:max_chunks] - - # 为每个chunk分类 - domain_counts = Counter() - for chunk in chunks_to_analyze: - domain = await classify_chunk_domain(chunk) - domain_counts[domain] += 1 - - # 计算百分比 - total = sum(domain_counts.values()) - domain_distribution = { - domain: count / total - for domain, count in domain_counts.items() - } - - # 按百分比降序排序 - return dict(sorted(domain_distribution.items(), key=lambda x: x[1], reverse=True)) - - except Exception as e: - business_logger.error(f"分析领域分布失败: {str(e)}") - return {} + llm_client = _get_llm_client(end_user_id) + chunks_sample = chunks[:max_chunks] + + # Domain distribution + domain_counts: Counter = Counter() + for chunk in chunks_sample: + domain = await _classify_domain(chunk, llm_client) + domain_counts[domain] += 1 + + total = sum(domain_counts.values()) or 1 + domain_distribution = ", ".join( + f"{d}({c / total:.0%})" for d, c in domain_counts.most_common(3) + ) + + return { + "domain_distribution": domain_distribution, + "active_periods": None, # RAG模式暂无时间维度数据 + "social_connections": None, # RAG模式暂无社交关联数据 + } -async def generate_chunk_insight(chunks: List[str], max_chunks: int = 15) -> str: +# ── Section parser ──────────────────────────────────────────────────────────── + +_ZH_SECTIONS = { + "memory_insight": r"【总体概述】(.*?)(?=【|$)", + "behavior_pattern": r"【行为模式】(.*?)(?=【|$)", + "key_findings": r"【关键发现】(.*?)(?=【|$)", + "growth_trajectory": r"【成长轨迹】(.*?)(?=【|$)", +} + +_EN_SECTIONS = { + "memory_insight": r"【Overview】(.*?)(?=【|$)", + "behavior_pattern": r"【Behavior Pattern】(.*?)(?=【|$)", + "key_findings": r"【Key Findings】(.*?)(?=【|$)", + "growth_trajectory": r"【Growth Trajectory】(.*?)(?=【|$)", +} + + +def _parse_sections(text: str, language: str = "zh") -> Dict[str, str]: + """Extract the four sections from the LLM output.""" + patterns = _ZH_SECTIONS if language == "zh" else _EN_SECTIONS + result = {} + for key, pattern in patterns.items(): + match = re.search(pattern, text, re.DOTALL) + result[key] = match.group(1).strip() if match else "" + return result + + +# ── Public API ──────────────────────────────────────────────────────────────── + +async def generate_chunk_insight( + chunks: List[str], + max_chunks: int = 15, + end_user_id: Optional[str] = None, + language: str = "zh", +) -> str: """ - Generate insights from the given chunks. - - Args: - chunks: List of chunk content strings - max_chunks: Maximum number of chunks to analyze - - Returns: - A comprehensive insight report + Generate a memory insight report from RAG chunks. + + Returns the full raw report text (suitable for end_user.memory_insight). + Use generate_chunk_insight_sections() when you need all four dimensions. + """ + sections = await generate_chunk_insight_sections( + chunks=chunks, + max_chunks=max_chunks, + end_user_id=end_user_id, + language=language, + ) + return sections.get("memory_insight") or sections.get("_raw", "洞察生成失败") + + +async def generate_chunk_insight_sections( + chunks: List[str], + max_chunks: int = 15, + end_user_id: Optional[str] = None, + language: str = "zh", +) -> Dict[str, str]: + """ + Generate a four-section memory insight report from RAG chunks. + + Returns a dict with keys: + memory_insight, behavior_pattern, key_findings, growth_trajectory + (plus '_raw' containing the full LLM output for debugging) """ if not chunks: business_logger.warning("没有提供chunk内容用于生成洞察") - return "暂无足够数据生成洞察报告" - + empty = {k: "" for k in ("memory_insight", "behavior_pattern", "key_findings", "growth_trajectory")} + empty["_raw"] = "暂无足够数据生成洞察报告" + return empty + try: - # 1. 分析领域分布 - domain_dist = await analyze_domain_distribution(chunks, max_chunks=max_chunks) - - # 2. 统计基本信息 - total_chunks = len(chunks) - avg_length = sum(len(chunk) for chunk in chunks) / total_chunks if total_chunks > 0 else 0 - - # 3. 构建洞察prompt - prompt_parts = [] - - if domain_dist: - top_domains = ", ".join([f"{k}({v:.0%})" for k, v in list(domain_dist.items())[:3]]) - prompt_parts.append(f"- 内容领域分布: {top_domains}") - - prompt_parts.append(f"- 内容规模: 共{total_chunks}个知识片段,平均长度{avg_length:.0f}字") - - # 添加部分chunk内容作为参考 - sample_chunks = chunks[:5] - sample_content = "\n".join([f"示例{i+1}: {chunk[:200]}..." for i, chunk in enumerate(sample_chunks)]) - prompt_parts.append(f"\n内容示例:\n{sample_content}") - - system_prompt = """你是一位专业的知识内容分析师。你的任务是根据提供的信息,生成一段简洁、有洞察力的分析报告。 + from app.core.memory.utils.prompt.prompt_utils import render_memory_insight_prompt -重要规则: -1. 报告需要将所有要点流畅地串联成一个段落 -2. 语言风格要专业、客观,同时易于理解 -3. 不要添加任何额外的解释或标题,直接输出报告内容 -4. 基于提供的数据和示例内容进行分析,不要编造信息 -5. 重点关注内容的主题、特点和价值 -6. 报告长度控制在150-200字 + # Build template inputs from chunk analysis + inputs = await _build_insight_inputs(chunks, max_chunks, end_user_id) -例如,如果输入是: -- 内容领域分布: 技术(60%), 商业(25%), 教育(15%) -- 内容规模: 共50个知识片段,平均长度320字 -内容示例: [示例内容...] + rendered_prompt = await render_memory_insight_prompt( + domain_distribution=inputs["domain_distribution"], + active_periods=inputs["active_periods"], + social_connections=inputs["social_connections"], + language=language, + ) -你的输出应该类似: -"该知识库主要聚焦于技术领域(60%),涵盖商业(25%)和教育(15%)相关内容。共包含50个知识片段,平均每个片段约320字,内容详实。从示例来看,内容涉及[具体主题],体现了[特点],对[目标用户]具有较高的参考价值。" -""" - - user_prompt = "\n".join(prompt_parts) - - messages = [ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_prompt} - ] - - # 调用LLM生成洞察 - llm_client = _get_llm_client() + messages = [{"role": "user", "content": rendered_prompt}] + llm_client = _get_llm_client(end_user_id) response = await llm_client.chat(messages=messages) - - insight = response.content.strip() - business_logger.info(f"成功生成chunk洞察,分析了 {min(len(chunks), max_chunks)} 个片段") - - return insight - + raw_text = response.content.strip() if response and response.content else "" + + sections = _parse_sections(raw_text, language=language) + sections["_raw"] = raw_text + + business_logger.info( + f"成功生成chunk洞察四维度,分析了 {min(len(chunks), max_chunks)} 个片段" + ) + return sections + except Exception as e: business_logger.error(f"生成chunk洞察失败: {str(e)}") - return "洞察生成失败" - - -if __name__ == "__main__": - # 测试代码 - test_chunks = [ - "Python是一种高级编程语言,以其简洁的语法和强大的功能而闻名。它广泛应用于Web开发、数据分析、人工智能等领域。", - "机器学习算法可以从数据中自动学习模式,无需显式编程。常见的算法包括决策树、随机森林、神经网络等。", - "深度学习是机器学习的一个分支,使用多层神经网络来学习数据的层次化表示。它在图像识别、语音识别等任务中表现出色。", - "自然语言处理技术使计算机能够理解和生成人类语言。应用包括机器翻译、情感分析、文本摘要等。", - "数据科学结合了统计学、计算机科学和领域知识,用于从数据中提取有价值的洞察。" - ] - - print("开始生成chunk洞察...") - insight = asyncio.run(generate_chunk_insight(test_chunks)) - print(f"\n生成的洞察:\n{insight}") + empty = {k: "" for k in ("memory_insight", "behavior_pattern", "key_findings", "growth_trajectory")} + empty["_raw"] = "洞察生成失败" + return empty diff --git a/api/app/core/rag_utils/chunk_summary.py b/api/app/core/rag_utils/chunk_summary.py index 7f69af88..1b0f4395 100644 --- a/api/app/core/rag_utils/chunk_summary.py +++ b/api/app/core/rag_utils/chunk_summary.py @@ -1,11 +1,10 @@ """ -Generate summary for RAG chunks. - -This module provides functionality to summarize chunk content using LLM. +Generate summary for RAG chunks using memory_summary.jinja2 prompt template. """ import asyncio -from typing import Any, Dict, List +import os +from typing import List, Optional from app.core.logging_config import get_business_logger from app.core.memory.utils.llm.llm_utils import MemoryClientFactory @@ -14,94 +13,135 @@ from pydantic import BaseModel, Field business_logger = get_business_logger() - -def _get_llm_client(): - """Get LLM client using db context.""" - with get_db_context() as db: - factory = MemoryClientFactory(db) - return factory.get_llm_client(None) # Uses default LLM +DEFAULT_LLM_ID = os.getenv("SELECTED_LLM_ID", "openai/qwen-plus") -class ChunkSummary(BaseModel): - """Pydantic model for chunk summary.""" - summary: str = Field(..., description="简洁的chunk内容摘要") +# ── Schema ────────────────────────────────────────────────────────────────── + +class MemorySummaryStatement(BaseModel): + """Single labelled statement extracted by memory_summary.jinja2.""" + statement: str = Field(..., description="提取的陈述内容") + label: Optional[str] = Field(None, description="陈述标签") -async def generate_chunk_summary(chunks: List[str], max_chunks: int = 10) -> str: +class MemorySummaryResponse(BaseModel): """ - Generate a summary for the given chunks. - + Structured output expected from memory_summary.jinja2. + The template asks for a JSON array of labelled statements; + we wrap it in an object so response_structured can parse it. + """ + statements: List[MemorySummaryStatement] = Field( + default_factory=list, + description="从chunk中提取的陈述列表" + ) + summary: Optional[str] = Field(None, description="整体摘要文本(可选)") + + +# ── LLM client helper ──────────────────────────────────────────────────────── + +def _get_llm_client(end_user_id: Optional[str] = None): + """Get LLM client, preferring user-connected config with fallback to default.""" + with get_db_context() as db: + try: + if end_user_id: + from app.services.memory_agent_service import get_end_user_connected_config + from app.services.memory_config_service import MemoryConfigService + connected_config = get_end_user_connected_config(end_user_id, db) + config_id = connected_config.get("memory_config_id") + workspace_id = connected_config.get("workspace_id") + if config_id or workspace_id: + config_service = MemoryConfigService(db) + memory_config = config_service.load_memory_config( + config_id=config_id, + workspace_id=workspace_id + ) + factory = MemoryClientFactory(db) + return factory.get_llm_client(memory_config.llm_model_id) + except Exception as e: + business_logger.warning(f"Failed to get user connected config, using default LLM: {e}") + factory = MemoryClientFactory(db) + return factory.get_llm_client(DEFAULT_LLM_ID) + + +# ── Core function ───────────────────────────────────────────────────────────── + +async def generate_chunk_summary( + chunks: List[str], + max_chunks: int = 10, + end_user_id: Optional[str] = None, + language: str = "zh", +) -> str: + """ + Generate a user summary from RAG chunks using the memory_summary.jinja2 template. + + The template extracts labelled statements from the chunks; we then join them + into a coherent summary string that can be stored in end_user.user_summary. + Args: chunks: List of chunk content strings - max_chunks: Maximum number of chunks to process (default: 10) - + max_chunks: Maximum number of chunks to process + end_user_id: Optional end-user ID for model selection + language: Output language ("zh" or "en") + Returns: - A concise summary of the chunks + Summary string (joined statements or fallback text) """ if not chunks: business_logger.warning("没有提供chunk内容用于生成摘要") return "暂无内容" - + try: - # 限制处理的chunk数量,避免token过多 + from app.core.memory.utils.prompt.prompt_utils import render_memory_summary_prompt + chunks_to_process = chunks[:max_chunks] - - # 合并chunk内容 - combined_content = "\n\n".join([f"片段{i+1}: {chunk}" for i, chunk in enumerate(chunks_to_process)]) - - # 构建prompt - system_prompt = ( - "你是一位专业的文本摘要助手。请基于提供的文本片段,生成简洁的摘要。要求:\n" - "- 摘要长度控制在100-150字;\n" - "- 提取核心信息和关键要点;\n" - "- 使用客观、清晰的语言;\n" - "- 避免冗余和重复;\n" - "- 如果内容涉及多个主题,按重要性排序呈现。" + chunk_texts = "\n\n".join( + [f"片段{i + 1}: {chunk}" for i, chunk in enumerate(chunks_to_process)] + ) + + json_schema = MemorySummaryResponse.model_json_schema() + + rendered_prompt = await render_memory_summary_prompt( + chunk_texts=chunk_texts, + json_schema=json_schema, + max_words=200, + language=language, + ) + + messages = [{"role": "user", "content": rendered_prompt}] + + llm_client = _get_llm_client(end_user_id) + + # Try structured output; fall back to plain chat only for LLMClientException + # (indicates the model/provider doesn't support structured output). + # All other exceptions are re-raised so config/schema errors stay visible. + try: + response: MemorySummaryResponse = await llm_client.response_structured( + messages=messages, + response_model=MemorySummaryResponse, + ) + if response.summary: + summary = response.summary.strip() + elif response.statements: + summary = ";".join(s.statement for s in response.statements) + else: + summary = "暂无内容" + except Exception as e: + from app.core.memory.llm_tools.llm_client import LLMClientException + if isinstance(e, LLMClientException): + business_logger.warning( + f"结构化输出不可用,降级为普通对话: end_user_id={end_user_id}, reason={e}" + ) + raw = await llm_client.chat(messages=messages) + summary = raw.content.strip() if raw and raw.content else "暂无内容" + else: + business_logger.error(f"生成摘要时发生非预期异常: {e}") + raise + + business_logger.info( + f"成功生成chunk摘要,处理了 {len(chunks_to_process)} 个片段" ) - - user_prompt = f"请为以下文本片段生成摘要:\n\n{combined_content}" - - messages = [ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_prompt}, - ] - - # 调用LLM生成摘要 - llm_client = _get_llm_client() - response = await llm_client.chat(messages=messages) - - summary = response.content.strip() - business_logger.info(f"成功生成chunk摘要,处理了 {len(chunks_to_process)} 个片段") - return summary - + except Exception as e: business_logger.error(f"生成chunk摘要失败: {str(e)}") return "摘要生成失败" - - -async def generate_chunk_summary_batch(chunks_list: List[List[str]]) -> List[str]: - """ - Generate summaries for multiple chunk lists in batch. - - Args: - chunks_list: List of chunk lists - - Returns: - List of summaries - """ - tasks = [generate_chunk_summary(chunks) for chunks in chunks_list] - return await asyncio.gather(*tasks) - - -if __name__ == "__main__": - # 测试代码 - test_chunks = [ - "这是第一段测试内容,讲述了关于机器学习的基础知识。", - "第二段内容介绍了深度学习的应用场景和发展历史。", - "第三段讨论了自然语言处理技术的最新进展。" - ] - - print("开始生成chunk摘要...") - summary = asyncio.run(generate_chunk_summary(test_chunks)) - print(f"\n生成的摘要:\n{summary}") diff --git a/api/app/core/rag_utils/chunk_tags.py b/api/app/core/rag_utils/chunk_tags.py index 2057f8ac..98ab4a33 100644 --- a/api/app/core/rag_utils/chunk_tags.py +++ b/api/app/core/rag_utils/chunk_tags.py @@ -5,8 +5,9 @@ This module provides functionality to extract meaningful tags from chunk content """ import asyncio +import os from collections import Counter -from typing import List, Tuple +from typing import List, Optional, Tuple from app.core.logging_config import get_business_logger from app.core.memory.utils.llm.llm_utils import MemoryClientFactory @@ -15,12 +16,31 @@ from pydantic import BaseModel, Field business_logger = get_business_logger() +DEFAULT_LLM_ID = os.getenv("SELECTED_LLM_ID", "openai/qwen-plus") -def _get_llm_client(): - """Get LLM client using db context.""" + +def _get_llm_client(end_user_id: Optional[str] = None): + """Get LLM client, preferring user-connected config with fallback to default.""" with get_db_context() as db: + try: + if end_user_id: + from app.services.memory_agent_service import get_end_user_connected_config + from app.services.memory_config_service import MemoryConfigService + connected_config = get_end_user_connected_config(end_user_id, db) + config_id = connected_config.get("memory_config_id") + workspace_id = connected_config.get("workspace_id") + if config_id or workspace_id: + config_service = MemoryConfigService(db) + memory_config = config_service.load_memory_config( + config_id=config_id, + workspace_id=workspace_id + ) + factory = MemoryClientFactory(db) + return factory.get_llm_client(memory_config.llm_model_id) + except Exception as e: + business_logger.warning(f"Failed to get user connected config, using default LLM: {e}") factory = MemoryClientFactory(db) - return factory.get_llm_client(None) # Uses default LLM + return factory.get_llm_client(DEFAULT_LLM_ID) class ExtractedTags(BaseModel): @@ -33,7 +53,7 @@ class ExtractedPersona(BaseModel): personas: List[str] = Field(..., description="从文本中提取的人物形象列表,如'产品设计师'、'旅行爱好者'等") -async def extract_chunk_tags(chunks: List[str], max_tags: int = 10, max_chunks: int = 10) -> List[Tuple[str, int]]: +async def extract_chunk_tags(chunks: List[str], max_tags: int = 10, max_chunks: int = 10, end_user_id: Optional[str] = None) -> List[Tuple[str, int]]: """ Extract meaningful tags from the given chunks. @@ -64,7 +84,7 @@ async def extract_chunk_tags(chunks: List[str], max_tags: int = 10, max_chunks: "标签应该是名词或名词短语,能够准确概括文本的核心内容。" ) - llm_client = _get_llm_client() + llm_client = _get_llm_client(end_user_id) # 为每个chunk单独提取标签,然后统计频率 all_tags = [] @@ -116,7 +136,7 @@ async def extract_chunk_tags_with_frequency(chunks: List[str], max_tags: int = 1 return await extract_chunk_tags(chunks, max_tags=max_tags, max_chunks=len(chunks)) -async def extract_chunk_persona(chunks: List[str], max_personas: int = 5, max_chunks: int = 20) -> List[str]: +async def extract_chunk_persona(chunks: List[str], max_personas: int = 5, max_chunks: int = 20, end_user_id: Optional[str] = None) -> List[str]: """ Extract persona (人物形象) from the given chunks. @@ -159,7 +179,7 @@ async def extract_chunk_persona(chunks: List[str], max_personas: int = 5, max_ch ] # 调用LLM提取人物形象 - llm_client = _get_llm_client() + llm_client = _get_llm_client(end_user_id) structured_response = await llm_client.response_structured( messages=messages, response_model=ExtractedPersona diff --git a/api/app/core/storage/base.py b/api/app/core/storage/base.py index 64be6fda..6653d04a 100644 --- a/api/app/core/storage/base.py +++ b/api/app/core/storage/base.py @@ -7,7 +7,7 @@ file operations across different storage backends. """ from abc import ABC, abstractmethod -from typing import Optional +from typing import AsyncIterator, Optional class StorageBackend(ABC): @@ -42,6 +42,26 @@ class StorageBackend(ABC): """ pass + @abstractmethod + async def upload_stream( + self, + file_key: str, + stream: AsyncIterator[bytes], + content_type: Optional[str] = None, + ) -> int: + """ + Upload a file from an async byte stream. + + Args: + file_key: Unique identifier for the file. + stream: Async iterator yielding bytes chunks. + content_type: Optional MIME type of the file. + + Returns: + Total bytes written. + """ + pass + @abstractmethod async def download(self, file_key: str) -> bytes: """ diff --git a/api/app/core/storage/factory.py b/api/app/core/storage/factory.py index 52749e3c..93fa847b 100644 --- a/api/app/core/storage/factory.py +++ b/api/app/core/storage/factory.py @@ -85,6 +85,7 @@ class StorageFactory: access_key_id=settings.S3_ACCESS_KEY_ID, secret_access_key=settings.S3_SECRET_ACCESS_KEY, bucket_name=settings.S3_BUCKET_NAME, + endpoint_url=settings.S3_ENDPOINT_URL, ) else: diff --git a/api/app/core/storage/local.py b/api/app/core/storage/local.py index e0931b16..4b8ae829 100644 --- a/api/app/core/storage/local.py +++ b/api/app/core/storage/local.py @@ -11,6 +11,7 @@ from typing import Optional import aiofiles import aiofiles.os +from typing import AsyncIterator from app.core.storage.base import StorageBackend from app.core.storage_exceptions import ( @@ -179,6 +180,36 @@ class LocalStorage(StorageBackend): full_path = self._get_full_path(file_key) return full_path.exists() + async def upload_stream( + self, + file_key: str, + stream: AsyncIterator[bytes], + content_type: Optional[str] = None, + ) -> int: + """ + Upload a file from an async byte stream to the local file system. + + Returns: + Total bytes written. + """ + full_path = self._get_full_path(file_key) + try: + full_path.parent.mkdir(parents=True, exist_ok=True) + total = 0 + async with aiofiles.open(full_path, "wb") as f: + async for chunk in stream: + await f.write(chunk) + total += len(chunk) + logger.info(f"File stream uploaded successfully: {file_key}") + return total + except Exception as e: + logger.error(f"Failed to stream upload file {file_key}: {e}") + raise StorageUploadError( + message=f"Failed to stream upload file: {e}", + file_key=file_key, + cause=e, + ) + async def get_url(self, file_key: str, expires: int = 3600) -> str: """ Get an access URL for the file. diff --git a/api/app/core/storage/oss.py b/api/app/core/storage/oss.py index 6f04bbbd..81bedce1 100644 --- a/api/app/core/storage/oss.py +++ b/api/app/core/storage/oss.py @@ -5,8 +5,9 @@ This module provides a storage backend that stores files on Aliyun Object Storage Service (OSS) using the oss2 SDK. """ +import io import logging -from typing import Optional +from typing import AsyncIterator, Optional import oss2 from oss2.exceptions import NoSuchKey, OssError @@ -125,10 +126,39 @@ class OSSStorage(StorageBackend): cause=e, ) + async def upload_stream( + self, + file_key: str, + stream: AsyncIterator[bytes], + content_type: Optional[str] = None, + ) -> int: + """Upload from async stream to OSS. Returns total bytes written.""" + buf = io.BytesIO() + try: + async for chunk in stream: + buf.write(chunk) + content = buf.getvalue() + headers = {"Content-Type": content_type} if content_type else None + self.bucket.put_object(file_key, content, headers=headers) + logger.info(f"File stream uploaded to OSS successfully: {file_key}") + return len(content) + except OssError as e: + logger.error(f"OSS error stream uploading file {file_key}: {e}") + raise StorageUploadError( + message=f"Failed to stream upload file to OSS: {e.message}", + file_key=file_key, + cause=e, + ) + except Exception as e: + logger.error(f"Failed to stream upload file to OSS {file_key}: {e}") + raise StorageUploadError( + message=f"Failed to stream upload file to OSS: {e}", + file_key=file_key, + cause=e, + ) + async def download(self, file_key: str) -> bytes: """ - Download a file from OSS. - Args: file_key: Unique identifier for the file in the storage system. diff --git a/api/app/core/storage/s3.py b/api/app/core/storage/s3.py index e59da8d5..37ad4184 100644 --- a/api/app/core/storage/s3.py +++ b/api/app/core/storage/s3.py @@ -5,8 +5,9 @@ This module provides a storage backend that stores files on AWS S3 using the boto3 SDK. """ +import io import logging -from typing import Optional +from typing import AsyncIterator, Optional import boto3 from botocore.exceptions import ClientError, NoCredentialsError, BotoCoreError @@ -35,6 +36,19 @@ class S3Storage(StorageBackend): bucket_name: The name of the S3 bucket. region: The AWS region. """ + AMAZON_S3_ENDPOINT_MAP = { + "us-east-1": "https://s3.us-east-1.amazonaws.com", # 特殊:无地域后缀 + "us-east-2": "https://s3.us-east-2.amazonaws.com", + "us-west-1": "https://s3.us-west-1.amazonaws.com", + "us-west-2": "https://s3.us-west-2.amazonaws.com", + "ap-east-1": "https://s3.ap-east-1.amazonaws.com", # 香港 + "ap-southeast-1": "https://s3.ap-southeast-1.amazonaws.com", # 新加坡 + "ap-southeast-2": "https://s3.ap-southeast-2.amazonaws.com", # 悉尼 + "ap-northeast-1": "https://s3.ap-northeast-1.amazonaws.com", # 东京 + "eu-central-1": "https://s3.eu-central-1.amazonaws.com", # 法兰克福 + "eu-west-1": "https://s3.eu-west-1.amazonaws.com", # 爱尔兰 + # 可根据需要扩展其他地域 + } def __init__( self, @@ -42,6 +56,7 @@ class S3Storage(StorageBackend): access_key_id: str, secret_access_key: str, bucket_name: str, + endpoint_url: Optional[str] = None ): """ Initialize the S3Storage backend. @@ -51,6 +66,7 @@ class S3Storage(StorageBackend): access_key_id: The AWS access key ID. secret_access_key: The AWS secret access key. bucket_name: The name of the S3 bucket. + endpoint_url: The complete URL to use for the constructed client. Raises: StorageConfigError: If any required configuration is missing. @@ -69,10 +85,19 @@ class S3Storage(StorageBackend): self.region = region self.bucket_name = bucket_name + if not endpoint_url: + # 优先匹配内置映射表(解决特殊地域) + if region in self.AMAZON_S3_ENDPOINT_MAP: + endpoint_url = self.AMAZON_S3_ENDPOINT_MAP[region] + # 兜底:通用拼接(适配未配置的新地域) + else: + endpoint_url = f"https://s3.{region}.amazonaws.com" + try: self.client = boto3.client( "s3", region_name=region, + endpoint_url=endpoint_url, aws_access_key_id=access_key_id, aws_secret_access_key=secret_access_key, ) @@ -150,6 +175,62 @@ class S3Storage(StorageBackend): cause=e, ) + async def upload_stream( + self, + file_key: str, + stream: AsyncIterator[bytes], + content_type: Optional[str] = None, + ) -> int: + """Upload from async stream to S3 via multipart upload. Returns total bytes written.""" + extra_args = {"ContentType": content_type} if content_type else {} + mpu = self.client.create_multipart_upload( + Bucket=self.bucket_name, Key=file_key, **extra_args + ) + upload_id = mpu["UploadId"] + parts = [] + part_number = 1 + buf = io.BytesIO() + total = 0 + min_part_size = 5 * 1024 * 1024 # S3 最小分片 5MB + try: + async for chunk in stream: + buf.write(chunk) + total += len(chunk) + if buf.tell() >= min_part_size: + buf.seek(0) + resp = self.client.upload_part( + Bucket=self.bucket_name, Key=file_key, + UploadId=upload_id, PartNumber=part_number, Body=buf.read() + ) + parts.append({"PartNumber": part_number, "ETag": resp["ETag"]}) + part_number += 1 + buf = io.BytesIO() + # 上传剩余数据(最后一片可小于 5MB) + remaining = buf.getvalue() + if remaining: + resp = self.client.upload_part( + Bucket=self.bucket_name, Key=file_key, + UploadId=upload_id, PartNumber=part_number, Body=remaining + ) + parts.append({"PartNumber": part_number, "ETag": resp["ETag"]}) + self.client.complete_multipart_upload( + Bucket=self.bucket_name, Key=file_key, + UploadId=upload_id, + MultipartUpload={"Parts": parts} + ) + logger.info(f"File stream uploaded to S3 successfully: {file_key}") + return total + except Exception as e: + self.client.abort_multipart_upload( + Bucket=self.bucket_name, Key=file_key, UploadId=upload_id + ) + logger.error(f"Failed to stream upload file to S3 {file_key}: {e}") + raise StorageUploadError( + message=f"Failed to stream upload file to S3: {e}", + file_key=file_key, + cause=e, + ) + async def download(self, file_key: str) -> bytes: """ Download a file from S3. diff --git a/api/app/core/tools/mcp/base.py b/api/app/core/tools/mcp/base.py index 9e683ead..27dea86e 100644 --- a/api/app/core/tools/mcp/base.py +++ b/api/app/core/tools/mcp/base.py @@ -195,6 +195,6 @@ class MCPToolManager: except Exception as e: return { "success": False, - "error": str(e), - "message": "连接失败" + "error": "连接失败", + "message": str(e) } \ No newline at end of file diff --git a/api/app/core/tools/mcp/client.py b/api/app/core/tools/mcp/client.py index c082b314..6df6df51 100644 --- a/api/app/core/tools/mcp/client.py +++ b/api/app/core/tools/mcp/client.py @@ -23,7 +23,7 @@ class SimpleMCPClient: def __init__(self, server_url: str, connection_config: Dict[str, Any] = None): self.server_url = server_url self.connection_config = connection_config or {} - self.timeout = self.connection_config.get("timeout", 30) + self.timeout = self.connection_config.get("timeout", 10) # 确定连接类型 self.is_websocket = server_url.startswith(("ws://", "wss://")) @@ -53,6 +53,7 @@ class SimpleMCPClient: else: await self._connect_http() except Exception as e: + await self.disconnect() logger.error(f"MCP连接失败: {self.server_url}, 错误: {e}") raise MCPConnectionError(f"连接失败: {e}") diff --git a/api/app/core/workflow/adapters/dify/converter.py b/api/app/core/workflow/adapters/dify/converter.py index 3c9348c7..467beb07 100644 --- a/api/app/core/workflow/adapters/dify/converter.py +++ b/api/app/core/workflow/adapters/dify/converter.py @@ -8,34 +8,60 @@ from typing import Any from urllib.parse import quote from app.core.workflow.adapters.base_converter import BaseConverter -from app.core.workflow.adapters.errors import UnsupportVariableType, UnknowModelWarning, ExceptionDefineition, \ +from app.core.workflow.adapters.errors import ( + UnsupportVariableType, + UnknowModelWarning, + ExceptionDefineition, ExceptionType -from app.core.workflow.nodes.assigner import AssignerNodeConfig +) from app.core.workflow.nodes.assigner.config import AssignmentItem from app.core.workflow.nodes.base_config import VariableDefinition, BaseNodeConfig -from app.core.workflow.nodes.code import CodeNodeConfig from app.core.workflow.nodes.code.config import InputVariable, OutputVariable -from app.core.workflow.nodes.configs import StartNodeConfig, LLMNodeConfig -from app.core.workflow.nodes.cycle_graph import LoopNodeConfig, IterationNodeConfig -from app.core.workflow.nodes.cycle_graph.config import ConditionDetail as LoopConditionDetail, ConditionsConfig, \ +from app.core.workflow.nodes.configs import ( + StartNodeConfig, + LLMNodeConfig, + AssignerNodeConfig, + CodeNodeConfig, + LoopNodeConfig, + IterationNodeConfig, + EndNodeConfig, + HttpRequestNodeConfig, + IfElseNodeConfig, + JinjaRenderNodeConfig, + KnowledgeRetrievalNodeConfig, + NoteNodeConfig, + ParameterExtractorNodeConfig, + QuestionClassifierNodeConfig, + VariableAggregatorNodeConfig +) +from app.core.workflow.nodes.cycle_graph.config import ( + ConditionDetail as LoopConditionDetail, + ConditionsConfig, CycleVariable -from app.core.workflow.nodes.end import EndNodeConfig -from app.core.workflow.nodes.enums import ValueInputType, ComparisonOperator, AssignmentOperator, HttpAuthType, \ - HttpContentType, HttpErrorHandle -from app.core.workflow.nodes.http_request import HttpRequestNodeConfig -from app.core.workflow.nodes.http_request.config import HttpAuthConfig, HttpContentTypeConfig, HttpFormData, \ - HttpTimeOutConfig, HttpRetryConfig, HttpErrorDefaultTamplete, HttpErrorHandleConfig -from app.core.workflow.nodes.if_else import IfElseNodeConfig +) +from app.core.workflow.nodes.enums import ( + ValueInputType, + ComparisonOperator, + AssignmentOperator, + HttpAuthType, + HttpContentType, + HttpErrorHandle, + NodeType +) +from app.core.workflow.nodes.http_request.config import ( + HttpAuthConfig, + HttpContentTypeConfig, + HttpFormData, + HttpTimeOutConfig, + HttpRetryConfig, + HttpErrorDefaultTamplete, + HttpErrorHandleConfig +) from app.core.workflow.nodes.if_else.config import ConditionDetail, ConditionBranchConfig -from app.core.workflow.nodes.jinja_render import JinjaRenderNodeConfig from app.core.workflow.nodes.jinja_render.config import VariablesMappingConfig -from app.core.workflow.nodes.knowledge import KnowledgeRetrievalNodeConfig from app.core.workflow.nodes.llm.config import MemoryWindowSetting, MessageConfig -from app.core.workflow.nodes.parameter_extractor import ParameterExtractorNodeConfig from app.core.workflow.nodes.parameter_extractor.config import ParamsConfig -from app.core.workflow.nodes.question_classifier import QuestionClassifierNodeConfig from app.core.workflow.nodes.question_classifier.config import ClassifierConfig -from app.core.workflow.nodes.variable_aggregator import VariableAggregatorNodeConfig from app.core.workflow.variable.base_variable import VariableType, DEFAULT_VALUE @@ -48,24 +74,24 @@ class DifyConverter(BaseConverter): def __init__(self): self.CONFIG_CONVERT_MAP = { - "start": self.convert_start_node_config, - "llm": self.convert_llm_node_config, - "answer": self.convert_end_node_config, - "if-else": self.convert_if_else_node_config, - "loop": self.convert_loop_node_config, - "iteration": self.convert_iteration_node_config, - "assigner": self.convert_assigner_node_config, - "code": self.convert_code_node_config, - "http-request": self.convert_http_node_config, - "template-transform": self.convert_jinja_render_node_config, - "knowledge-retrieval": self.convert_knowledge_node_config, - "parameter-extractor": self.convert_parameter_extractor_node_config, - "question-classifier": self.convert_question_classifier_node_config, - "variable-aggregator": self.convert_variable_aggregator_node_config, - "tool": self.convert_tool_node_config, - "loop-start": lambda x: {}, - "iteration-start": lambda x: {}, - "loop-end": lambda x: {}, + NodeType.START: self.convert_start_node_config, + NodeType.LLM: self.convert_llm_node_config, + NodeType.END: self.convert_end_node_config, + NodeType.IF_ELSE: self.convert_if_else_node_config, + NodeType.LOOP: self.convert_loop_node_config, + NodeType.ITERATION: self.convert_iteration_node_config, + NodeType.ASSIGNER: self.convert_assigner_node_config, + NodeType.CODE: self.convert_code_node_config, + NodeType.HTTP_REQUEST: self.convert_http_node_config, + NodeType.JINJARENDER: self.convert_jinja_render_node_config, + NodeType.KNOWLEDGE_RETRIEVAL: self.convert_knowledge_node_config, + NodeType.PARAMETER_EXTRACTOR: self.convert_parameter_extractor_node_config, + NodeType.QUESTION_CLASSIFIER: self.convert_question_classifier_node_config, + NodeType.VAR_AGGREGATOR: self.convert_variable_aggregator_node_config, + NodeType.TOOL: self.convert_tool_node_config, + NodeType.NOTES: self.convert_notes_config, + NodeType.CYCLE_START: lambda x: {}, + NodeType.BREAK: lambda x: {}, } def get_node_convert(self, node_type): @@ -185,6 +211,9 @@ class DifyConverter(BaseConverter): "not empty": ComparisonOperator.NOT_EMPTY, "start with": ComparisonOperator.START_WITH, "end with": ComparisonOperator.END_WITH, + "not contains": ComparisonOperator.NOT_CONTAINS, + "exists": ComparisonOperator.NOT_EMPTY, + "not exists": ComparisonOperator.EMPTY } return operator_map.get(operator, operator) @@ -364,7 +393,7 @@ class DifyConverter(BaseConverter): node_data = node["data"] cases = [] for case in node_data["cases"]: - case_id = case["id"] + case_id = case.get("id") or case.get("case_id") logical_operator = case["logical_operator"] conditions = [] for condition in case["conditions"]: @@ -540,7 +569,8 @@ class DifyConverter(BaseConverter): ] = self.trans_variable_format(content["value"]) else: if node_data["body"]["data"]: - body_content = node_data["body"]["data"][0]["value"] + body_content = (node_data["body"]["data"][0].get("value") or + self._process_list_variable_litearl(node_data["body"]["data"][0].get("file"))) else: body_content = "" @@ -728,3 +758,16 @@ class DifyConverter(BaseConverter): detail=f"Please reconfigure the tool node.", )) return {} + + @staticmethod + def convert_notes_config(node: dict): + node_data = node["data"] + result = NoteNodeConfig.model_construct( + author=node_data.get("author", ""), + text=node_data.get("text", ""), + width=node_data.get("width", 80), + height=node_data.get("height", 80), + theme=node_data.get("theme", "blue"), + show_author=node_data.get("showAuthor", True) + ).model_dump() + return result diff --git a/api/app/core/workflow/adapters/dify/dify_adapter.py b/api/app/core/workflow/adapters/dify/dify_adapter.py index 5b506d16..10397ad0 100644 --- a/api/app/core/workflow/adapters/dify/dify_adapter.py +++ b/api/app/core/workflow/adapters/dify/dify_adapter.py @@ -44,12 +44,13 @@ class DifyAdapter(BasePlatformAdapter, DifyConverter): "parameter-extractor": NodeType.PARAMETER_EXTRACTOR, "question-classifier": NodeType.QUESTION_CLASSIFIER, "variable-aggregator": NodeType.VAR_AGGREGATOR, - "tool": NodeType.TOOL + "tool": NodeType.TOOL, + "": NodeType.NOTES } def __init__(self, config: dict[str, Any]): DifyConverter.__init__(self) - BasePlatformAdapter.__init__(self, config) + BasePlatformAdapter.__init__(self, config) def get_metadata(self) -> PlatformMetadata: return PlatformMetadata( @@ -58,7 +59,7 @@ class DifyAdapter(BasePlatformAdapter, DifyConverter): support_node_types=list(self.NODE_TYPE_MAPPING.keys()) ) - def map_node_type(self, platform_node_type) -> str: + def map_node_type(self, platform_node_type) -> NodeType: return self.NODE_TYPE_MAPPING.get(platform_node_type, NodeType.UNKNOWN) @property @@ -83,7 +84,7 @@ class DifyAdapter(BasePlatformAdapter, DifyConverter): require_fields = frozenset({'app', 'kind', 'version', 'workflow'}) if not all(field in self.config for field in require_fields): return False - if self.config.get("app",{}).get("mode") == "workflow": + if self.config.get("app", {}).get("mode") == "workflow": self.errors.append(ExceptionDefineition( type=ExceptionType.PLATFORM, detail="workflow mode is not supported" @@ -162,13 +163,14 @@ class DifyAdapter(BasePlatformAdapter, DifyConverter): def _convert_node(self, node: dict[str, Any]) -> NodeDefinition | None: node_data = node["data"] try: + node_type = self.map_node_type(node_data["type"]) return NodeDefinition( id=node["id"], - type=self.map_node_type(node_data["type"]), - name=node_data.get("title"), + type=node_type, + name=node_data.get("title") or "notes", cycle=node.get("parentId"), description=None, - config=self._convert_node_config(node), + config=self._convert_node_config(node_type, node), position={ "x": node["position"]["x"], "y": node["position"]["y"] @@ -182,17 +184,16 @@ class DifyAdapter(BasePlatformAdapter, DifyConverter): except Exception as e: logger.debug(f"convert node error - {e}", exc_info=True) - def _convert_node_config(self, node: dict): - node_data = node["data"] - node_type = node_data["type"] + def _convert_node_config(self, node_type: NodeType, node: dict): try: + node_data = node["data"] converter = self.get_node_convert(node_type) - if node_type not in self.CONFIG_CONVERT_MAP: + if node_type == NodeType.UNKNOWN: self.errors.append(ExceptionDefineition( type=ExceptionType.NODE, node_id=node["id"], node_name=node["data"]["title"], - detail=f"node type {node_type if node_type else 'notes'} is unsupported", + detail=f"node type {node_data.get('type')} is unsupported", )) return converter(node) except Exception as e: @@ -209,16 +210,15 @@ class DifyAdapter(BasePlatformAdapter, DifyConverter): source = edge["source"] target = edge["target"] - edge_id = edge["id"] label = None if source in self.branch_node_cache: - case_id = "-".join(edge_id.split("-")[1:-2]) + case_id = edge["sourceHandle"] if case_id == "false": - label = f'CASE{len(self.branch_node_cache[source])+1}' + label = f'CASE{len(self.branch_node_cache[source]) + 1}' else: label = f'CASE{self.branch_node_cache[source].index(case_id) + 1}' if source in self.error_branch_node_cache: - case_id = "-".join(edge_id.split("-")[1:-2]) + case_id = edge["sourceHandle"] if case_id == "source": label = "SUCCESS" else: @@ -243,6 +243,7 @@ class DifyAdapter(BasePlatformAdapter, DifyConverter): name=variable["name"], default=variable["value"], type=self.variable_type_map(variable["value_type"]), + description=variable.get("description") ) except Exception as e: self.errors.append(ExceptionDefineition( @@ -256,5 +257,3 @@ class DifyAdapter(BasePlatformAdapter, DifyConverter): def _convert_execution(self, execution: dict[str, Any]) -> ExecutionConfig: return ExecutionConfig() - - diff --git a/api/app/core/workflow/adapters/memory_bear/memory_bear_adapter.py b/api/app/core/workflow/adapters/memory_bear/memory_bear_adapter.py index 0e3f459f..3516cb58 100644 --- a/api/app/core/workflow/adapters/memory_bear/memory_bear_adapter.py +++ b/api/app/core/workflow/adapters/memory_bear/memory_bear_adapter.py @@ -4,65 +4,145 @@ # @Time : 2026/2/25 14:11 from typing import Any +from app.core.logging_config import get_logger from app.core.workflow.adapters.base_adapter import ( PlatformMetadata, PlatformType, BasePlatformAdapter, WorkflowParserResult ) -from app.schemas.workflow_schema import ExecutionConfig +from app.core.workflow.adapters.errors import ExceptionDefineition, ExceptionType, UnsupportNodeType +from app.core.workflow.adapters.memory_bear.memory_bear_converter import MemoryBearConverter +from app.core.workflow.nodes.enums import NodeType +from app.schemas.workflow_schema import ExecutionConfig, NodeDefinition, EdgeDefinition, VariableDefinition + +logger = get_logger() + +VALID_NODE_TYPES = frozenset(t.value for t in NodeType if t != NodeType.UNKNOWN) -class MemoryBearAdapter(BasePlatformAdapter): - NODE_TYPE_MAPPING = {} +class MemoryBearAdapter(BasePlatformAdapter, MemoryBearConverter): + NODE_TYPE_MAPPING = {t.value: t for t in NodeType} + + def __init__(self, config: dict[str, Any]): + MemoryBearConverter.__init__(self) + BasePlatformAdapter.__init__(self, config) @property def origin_nodes(self): - return self.config.get("workflow").get("nodes") + return self.config.get("workflow").get("nodes") or [] @property def origin_edges(self): - return self.config.get("workflow").get("edges") + return self.config.get("workflow").get("edges") or [] @property def origin_variables(self): - return self.config.get("workflow").get("variables") + return self.config.get("workflow").get("variables") or [] def get_metadata(self) -> PlatformMetadata: return PlatformMetadata( platform_name=PlatformType.MEMORY_BEAR, version="0.2.5", - support_node_types=list(self.NODE_TYPE_MAPPING.keys()) + support_node_types=list(VALID_NODE_TYPES) ) - def map_node_type(self, platform_node_type) -> str: - return platform_node_type + def map_node_type(self, platform_node_type: str) -> NodeType: + return self.NODE_TYPE_MAPPING.get(platform_node_type, NodeType.UNKNOWN) @staticmethod - def _valid_nodes(node: dict[str, Any]): - if "type" not in node["data"]: - return False + def _valid_node(node: dict[str, Any]) -> bool: if "id" not in node or "type" not in node: return False + if not isinstance(node.get("config"), dict): + return False return True def validate_config(self) -> bool: require_fields = frozenset({'app', 'workflow'}) if not all(field in self.config for field in require_fields): return False - for node in self.origin_nodes: - if not self._valid_nodes(node): + if not self._valid_node(node): return False return True + def _convert_node(self, node: dict[str, Any]) -> NodeDefinition | None: + node_id = node.get("id") + node_name = node.get("name") + try: + node_type = self.map_node_type(node["type"]) + if node_type == NodeType.UNKNOWN: + self.errors.append(UnsupportNodeType( + node_id=node_id, + node_type=node["type"] + )) + return None + + config = node.get("config") or {} + converter = self.get_node_convert(node_type) + converter(node_id, node_name, config) # validates and appends errors if invalid + + return NodeDefinition(**node) + except Exception as e: + self.errors.append(ExceptionDefineition( + type=ExceptionType.NODE, + node_id=node_id, + node_name=node_name, + detail=f"convert node error - {e}" + )) + logger.debug(f"MemoryBear convert node error - {e}", exc_info=True) + return None + + def _convert_edge(self, edge: dict[str, Any], valid_node_ids: set) -> EdgeDefinition | None: + try: + if edge.get("source") not in valid_node_ids or edge.get("target") not in valid_node_ids: + self.warnings.append(ExceptionDefineition( + type=ExceptionType.EDGE, + detail=f"edge {edge.get('id')} skipped: source or target node not found" + )) + return None + return EdgeDefinition(**edge) + except Exception as e: + self.errors.append(ExceptionDefineition( + type=ExceptionType.EDGE, + detail=f"convert edge error - {e}" + )) + logger.debug(f"MemoryBear convert edge error - {e}", exc_info=True) + return None + + def _convert_variable(self, variable: dict[str, Any]) -> VariableDefinition | None: + try: + return VariableDefinition(**variable) + except Exception as e: + self.warnings.append(ExceptionDefineition( + type=ExceptionType.VARIABLE, + name=variable.get("name"), + detail=f"convert variable error - {e}" + )) + logger.debug(f"MemoryBear convert variable error - {e}", exc_info=True) + return None + def parse_workflow(self) -> WorkflowParserResult: - self.nodes = self.origin_nodes - self.edges = self.origin_edges - self.conv_variables = self.origin_variables + for node in self.origin_nodes: + converted = self._convert_node(node) + if converted: + self.nodes.append(converted) + + valid_node_ids = {n.id for n in self.nodes} + + for edge in self.origin_edges: + converted = self._convert_edge(edge, valid_node_ids) + if converted: + self.edges.append(converted) + + for variable in self.origin_variables: + converted = self._convert_variable(variable) + if converted: + self.conv_variables.append(converted) return WorkflowParserResult( - success=True, + success=not self.errors and not self.warnings, platform=self.get_metadata(), execution_config=ExecutionConfig(), origin_config=self.config, @@ -72,5 +152,4 @@ class MemoryBearAdapter(BasePlatformAdapter): variables=self.conv_variables, warnings=self.warnings, errors=self.errors, - ) diff --git a/api/app/core/workflow/adapters/memory_bear/memory_bear_converter.py b/api/app/core/workflow/adapters/memory_bear/memory_bear_converter.py new file mode 100644 index 00000000..031c7025 --- /dev/null +++ b/api/app/core/workflow/adapters/memory_bear/memory_bear_converter.py @@ -0,0 +1,85 @@ +# -*- coding: UTF-8 -*- +from app.core.workflow.adapters.base_converter import BaseConverter +from app.core.workflow.adapters.errors import ExceptionDefineition, ExceptionType +from app.core.workflow.nodes.base_config import BaseNodeConfig +from app.core.workflow.nodes.configs import ( + StartNodeConfig, + EndNodeConfig, + LLMNodeConfig, + AgentNodeConfig, + IfElseNodeConfig, + KnowledgeRetrievalNodeConfig, + AssignerNodeConfig, + CodeNodeConfig, + HttpRequestNodeConfig, + JinjaRenderNodeConfig, + VariableAggregatorNodeConfig, + ParameterExtractorNodeConfig, + LoopNodeConfig, + IterationNodeConfig, + QuestionClassifierNodeConfig, + ToolNodeConfig, + MemoryReadNodeConfig, + MemoryWriteNodeConfig, + NoteNodeConfig, +) +from app.core.workflow.nodes.enums import NodeType + + +class MemoryBearConverter(BaseConverter): + errors: list + warnings: list + + CONFIG_CLASS_MAP: dict[NodeType, type[BaseNodeConfig]] = { + NodeType.START: StartNodeConfig, + NodeType.END: EndNodeConfig, + NodeType.ANSWER: EndNodeConfig, + NodeType.LLM: LLMNodeConfig, + NodeType.AGENT: AgentNodeConfig, + NodeType.IF_ELSE: IfElseNodeConfig, + NodeType.KNOWLEDGE_RETRIEVAL: KnowledgeRetrievalNodeConfig, + NodeType.ASSIGNER: AssignerNodeConfig, + NodeType.CODE: CodeNodeConfig, + NodeType.HTTP_REQUEST: HttpRequestNodeConfig, + NodeType.JINJARENDER: JinjaRenderNodeConfig, + NodeType.VAR_AGGREGATOR: VariableAggregatorNodeConfig, + NodeType.PARAMETER_EXTRACTOR: ParameterExtractorNodeConfig, + NodeType.LOOP: LoopNodeConfig, + NodeType.ITERATION: IterationNodeConfig, + NodeType.QUESTION_CLASSIFIER: QuestionClassifierNodeConfig, + NodeType.TOOL: ToolNodeConfig, + NodeType.MEMORY_READ: MemoryReadNodeConfig, + NodeType.MEMORY_WRITE: MemoryWriteNodeConfig, + NodeType.NOTES: NoteNodeConfig, + } + + @staticmethod + def _convert_file(var): + return None + + @staticmethod + def _convert_array_file(var): + return [] + + def config_validate(self, node_id: str, node_name: str, config_cls: type[BaseNodeConfig], value: dict): + try: + return config_cls.model_validate(value) + except Exception as e: + self.errors.append(ExceptionDefineition( + type=ExceptionType.CONFIG, + node_id=node_id, + node_name=node_name, + detail=str(e) + )) + return None + + def get_node_convert(self, node_type: NodeType): + config_cls = self.CONFIG_CLASS_MAP.get(node_type) + if not config_cls: + return lambda node_id, node_name, config: config + + def validate(node_id: str, node_name: str, config: dict): + self.config_validate(node_id, node_name, config_cls, config) + return config + + return validate diff --git a/api/app/core/workflow/engine/graph_builder.py b/api/app/core/workflow/engine/graph_builder.py index 5e4569ad..90668ad9 100644 --- a/api/app/core/workflow/engine/graph_builder.py +++ b/api/app/core/workflow/engine/graph_builder.py @@ -292,6 +292,8 @@ class GraphBuilder: """ for node in self.nodes: node_type = node.get("type") + if node_type == NodeType.NOTES: + continue node_id = node.get("id") cycle_node = node.get("cycle") if cycle_node: diff --git a/api/app/core/workflow/engine/stream_output_coordinator.py b/api/app/core/workflow/engine/stream_output_coordinator.py index c2885ab0..ddee9adc 100644 --- a/api/app/core/workflow/engine/stream_output_coordinator.py +++ b/api/app/core/workflow/engine/stream_output_coordinator.py @@ -5,7 +5,7 @@ import re from typing import AsyncGenerator -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, PrivateAttr from app.core.logging_config import get_logger from app.core.workflow.engine.variable_pool import VariablePool @@ -52,10 +52,11 @@ class OutputContent(BaseModel): ) ) - _SCOPE: str | None = None + _SCOPE: str | None = PrivateAttr(default=None) - def get_scope(self) -> str: - self._SCOPE = SCOPE_PATTERN.findall(self.literal)[0] + def get_scope(self) -> str | None: + matches = SCOPE_PATTERN.findall(self.literal) + self._SCOPE = matches[0] if matches else None return self._SCOPE def depends_on_scope(self, scope: str) -> bool: @@ -68,6 +69,8 @@ class OutputContent(BaseModel): Returns: bool: True if this segment references the given scope. """ + if not self.is_variable: + return False if self._SCOPE: return self._SCOPE == scope return self.get_scope() == scope @@ -152,7 +155,7 @@ class StreamOutputConfig(BaseModel): """ # Case 1: resolve control branch dependency - if scope in self.control_nodes.keys(): + if scope in self.control_nodes: if status is None: raise RuntimeError("[Stream Output] Control node activation status not provided") if status in self.control_nodes[scope]: diff --git a/api/app/core/workflow/nodes/base_node.py b/api/app/core/workflow/nodes/base_node.py index 496454ba..0e3fecee 100644 --- a/api/app/core/workflow/nodes/base_node.py +++ b/api/app/core/workflow/nodes/base_node.py @@ -1,5 +1,6 @@ import asyncio import logging +import uuid from abc import ABC, abstractmethod from datetime import datetime from functools import cached_property @@ -15,6 +16,7 @@ from app.core.workflow.variable.base_variable import VariableType, FileObject from app.db import get_db_read from app.models import ModelConfig, ModelApiKey, LoadBalanceStrategy from app.schemas import FileInput +from app.schemas.model_schema import ModelInfo from app.services.multimodal_service import MultimodalService logger = logging.getLogger(__name__) @@ -619,11 +621,12 @@ class BaseNode(ABC): @staticmethod async def process_message( - provider: str, - is_omni: bool, + api_config: ModelInfo, content: str | dict | FileObject, + end_user_id: str, enable_file=False ) -> list | str | None: + provider = api_config.provider if isinstance(content, dict): content = FileObject( type=content.get("type"), @@ -642,16 +645,20 @@ class BaseNode(ABC): if content.content_cache.get(provider): return content.content_cache[provider] with get_db_read() as db: - multimodel_service = MultimodalService(db, provider, is_omni=is_omni) - message = await multimodel_service.process_files( - [FileInput.model_construct( - type=content.type, - url=content.url, - transfer_method=content.transfer_method, - file_type=content.origin_file_type, - upload_file_id=content.file_id - )] + multimodel_service = MultimodalService(db, api_config=api_config) + file_obj = FileInput( + type=content.type, + url=content.url, + transfer_method=content.transfer_method, + origin_file_type=content.origin_file_type, + upload_file_id=uuid.UUID(content.file_id) if content.file_id else None, ) + file_obj.set_content(content.get_content()) + message = await multimodel_service.process_files( + end_user_id, + [file_obj], + ) + content.set_content(file_obj.get_content()) if message: content.content_cache[provider] = message return message diff --git a/api/app/core/workflow/nodes/configs.py b/api/app/core/workflow/nodes/configs.py index e4e418fe..31dadc38 100644 --- a/api/app/core/workflow/nodes/configs.py +++ b/api/app/core/workflow/nodes/configs.py @@ -23,6 +23,7 @@ from app.core.workflow.nodes.question_classifier.config import QuestionClassifie from app.core.workflow.nodes.start.config import StartNodeConfig from app.core.workflow.nodes.tool.config import ToolNodeConfig from app.core.workflow.nodes.variable_aggregator.config import VariableAggregatorNodeConfig +from app.core.workflow.nodes.notes.config import NoteNodeConfig __all__ = [ # 基础类 @@ -47,5 +48,6 @@ __all__ = [ "ToolNodeConfig", "MemoryReadNodeConfig", "MemoryWriteNodeConfig", - "CodeNodeConfig" + "CodeNodeConfig", + "NoteNodeConfig" ] diff --git a/api/app/core/workflow/nodes/enums.py b/api/app/core/workflow/nodes/enums.py index ae9b81ff..43ab593b 100644 --- a/api/app/core/workflow/nodes/enums.py +++ b/api/app/core/workflow/nodes/enums.py @@ -25,6 +25,7 @@ class NodeType(StrEnum): MEMORY_WRITE = "memory-write" UNKNOWN = "unknown" + NOTES = "notes" BRANCH_NODES = [NodeType.IF_ELSE, NodeType.HTTP_REQUEST, NodeType.QUESTION_CLASSIFIER] diff --git a/api/app/core/workflow/nodes/http_request/config.py b/api/app/core/workflow/nodes/http_request/config.py index 9b41d9f2..fe38fafb 100644 --- a/api/app/core/workflow/nodes/http_request/config.py +++ b/api/app/core/workflow/nodes/http_request/config.py @@ -4,6 +4,7 @@ from pydantic import Field, BaseModel, field_validator from app.core.workflow.nodes.base_config import BaseNodeConfig from app.core.workflow.nodes.enums import HttpRequestMethod, HttpAuthType, HttpContentType, HttpErrorHandle +from app.core.workflow.variable.base_variable import FileObject class HttpAuthConfig(BaseModel): @@ -260,6 +261,11 @@ class HttpRequestNodeOutput(BaseModel): description="Http response headers" ) + files: list[FileObject] = Field( + default_factory=list, + description="List of files", + ) + output: str = Field( default="SUCCESS", description="HTTP response body", diff --git a/api/app/core/workflow/nodes/http_request/node.py b/api/app/core/workflow/nodes/http_request/node.py index e6c00eff..23378c83 100644 --- a/api/app/core/workflow/nodes/http_request/node.py +++ b/api/app/core/workflow/nodes/http_request/node.py @@ -1,24 +1,146 @@ import asyncio import json import logging +import mimetypes import uuid +import imghdr +from email.message import Message from typing import Any, Callable, Coroutine import httpx -# import filetypes # TODO: File support (Feature) from httpx import AsyncClient, Response, Timeout +import magic from app.core.workflow.engine.state_manager import WorkflowState from app.core.workflow.engine.variable_pool import VariablePool from app.core.workflow.nodes.base_node import BaseNode from app.core.workflow.nodes.enums import HttpRequestMethod, HttpErrorHandle, HttpAuthType, HttpContentType from app.core.workflow.nodes.http_request.config import HttpRequestNodeConfig, HttpRequestNodeOutput -from app.core.workflow.variable.base_variable import VariableType +from app.core.workflow.utils.file_processer import mime_to_file_type +from app.core.workflow.variable.base_variable import VariableType, FileObject from app.core.workflow.variable.variable_objects import FileVariable, ArrayVariable +from app.schemas import FileType, TransferMethod logger = logging.getLogger(__file__) +class HttpResponse: + def __init__(self, response: httpx.Response): + self.response = response + self.headers = dict(response.headers) + + self._is_file: bool | None = None + + @property + def content_type(self) -> str: + return self.headers.get("content-type", "") + + @property + def content_disposition(self) -> Message | None: + content_disposition = self.headers.get("content-disposition", "") + if content_disposition: + msg = Message() + msg["content-disposition"] = content_disposition + return msg + return None + + @property + def is_file(self) -> bool: + if self._is_file is not None: + return self._is_file + content_type = self.content_type.split(";")[0].strip().lower() + + parsed_content_disposition = self.content_disposition + if parsed_content_disposition: + disp_type = parsed_content_disposition.get_content_disposition() + filename = parsed_content_disposition.get_filename() + if disp_type == "attachment" or filename: + self._is_file = True + return True + + if content_type.startswith("text/") and "csv" not in content_type: + return False + + if content_type.startswith("application/"): + if any( + text_type in content_type + for text_type in {"json", "xml", "javascript", "x-www-form-urlencoded", "yaml", "graphql"} + ): + self._is_file = False + return False + try: + content_sample = self.response.content[:1024] + content_sample.decode("utf-8") + text_markers = (b"{", b"[", b"<", b"function", b"var ", b"const ", b"let ") + if any(marker in content_sample for marker in text_markers): + return False + except UnicodeDecodeError: + self._is_file = True + return True + + main_type, _ = mimetypes.guess_type("dummy" + (mimetypes.guess_extension(content_type) or "")) + if main_type: + self._is_file = main_type.split("/")[0] in ("application", "image", "audio", "video") + return self._is_file + self._is_file = any(media_type in content_type for media_type in ("image/", "audio/", "video/")) + return self._is_file + + @property + def is_image(self): + if self.is_file: + kind = imghdr.what(None, h=self.response.content) + return kind is not None + return False + + @property + def url(self) -> str: + return str(self.response.url) + + @property + def body(self) -> str: + if self.is_file: + return f"{'!' if self.is_image else ''}[file]({self.url})" + return self.response.text + + @staticmethod + def get_file_type(file_bytes) -> tuple[FileType | None, str | None]: + mime = magic.from_buffer(file_bytes, mime=True) + + if mime.startswith("image"): + return FileType.IMAGE, mime + elif mime.startswith("video"): + return FileType.VIDEO, mime + elif mime.startswith("audio"): + return FileType.AUDIO, mime + elif mime in ["application/pdf", + "application/msword", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/vnd.ms-excel", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "text/plain"]: + return FileType.DOCUMENT, mime + return None, None + + @property + def files(self) -> list[FileObject]: + file_type, mime_type = self.get_file_type(self.response.content) + origin_file_type = mime_to_file_type(mime_type) + if self.is_file and file_type and origin_file_type: + file_obj = FileObject( + type=file_type, + url=self.url, + transfer_method=TransferMethod.REMOTE_URL.value, + origin_file_type=origin_file_type, + file_id=None, + is_file=True + ) + file_obj.set_content(self.response.content) + return [ + file_obj + ] + return [] + + class HttpRequestNode(BaseNode): """ HTTP Request Workflow Node. @@ -44,6 +166,7 @@ class HttpRequestNode(BaseNode): "body": VariableType.STRING, "status_code": VariableType.NUMBER, "headers": VariableType.OBJECT, + "files": VariableType.ARRAY_FILE, "output": VariableType.STRING } @@ -232,10 +355,12 @@ class HttpRequestNode(BaseNode): ) resp.raise_for_status() logger.info(f"Node {self.node_id}: HTTP request succeeded") + response = HttpResponse(resp) return HttpRequestNodeOutput( - body=resp.text, + body=response.body, status_code=resp.status_code, headers=resp.headers, + files=response.files ).model_dump() except (httpx.HTTPStatusError, httpx.RequestError) as e: logger.error(f"HTTP request node exception: {e}") diff --git a/api/app/core/workflow/nodes/if_else/node.py b/api/app/core/workflow/nodes/if_else/node.py index 29f7085b..7e98efab 100644 --- a/api/app/core/workflow/nodes/if_else/node.py +++ b/api/app/core/workflow/nodes/if_else/node.py @@ -5,7 +5,7 @@ from typing import Any from app.core.workflow.engine.state_manager import WorkflowState from app.core.workflow.engine.variable_pool import VariablePool from app.core.workflow.nodes.base_node import BaseNode -from app.core.workflow.nodes.enums import ComparisonOperator, LogicOperator +from app.core.workflow.nodes.enums import ComparisonOperator, LogicOperator, ValueInputType from app.core.workflow.nodes.if_else import IfElseNodeConfig from app.core.workflow.nodes.operators import ConditionExpressionResolver, CompareOperatorInstance from app.core.workflow.variable.base_variable import VariableType @@ -23,6 +23,26 @@ class IfElseNode(BaseNode): "output": VariableType.STRING } + def _extract_input(self, state: WorkflowState, variable_pool: VariablePool) -> dict[str, Any]: + result = [] + for case in self.typed_config.cases: + expressions = [] + for expression in case.expressions: + expressions.append({ + "left": self.get_variable(expression.left, variable_pool, strict=False), + "right": expression.right + if expression.input_type == ValueInputType.CONSTANT + else self.get_variable(expression.right, variable_pool, strict=False), + "operator": expression.operator, + }) + result.append({ + "expressions": expressions, + "logical_operator": case.logical_operator, + }) + return { + "cases": result + } + @staticmethod def _evaluate(operator, instance: CompareOperatorInstance) -> Any: match operator: diff --git a/api/app/core/workflow/nodes/knowledge/node.py b/api/app/core/workflow/nodes/knowledge/node.py index 17f55319..14f789a9 100644 --- a/api/app/core/workflow/nodes/knowledge/node.py +++ b/api/app/core/workflow/nodes/knowledge/node.py @@ -30,6 +30,12 @@ class KnowledgeRetrievalNode(BaseNode): "output": VariableType.ARRAY_STRING } + def _extract_input(self, state: WorkflowState, variable_pool: VariablePool) -> dict[str, Any]: + return { + "query": self._render_template(self.typed_config.query, variable_pool), + "knowledge_bases": [kb_config.model_dump(mode="json") for kb_config in self.typed_config.knowledge_bases], + } + @staticmethod def _build_kb_filter(kb_ids: list[uuid.UUID], permission: knowledge_model.PermissionType): """ @@ -180,6 +186,8 @@ class KnowledgeRetrievalNode(BaseNode): RuntimeError: If no valid knowledge base is found or access is denied. """ self.typed_config = KnowledgeRetrievalNodeConfig(**self.config) + if not self.typed_config.knowledge_bases: + return [] query = self._render_template(self.typed_config.query, variable_pool) with get_db_read() as db: knowledge_bases = self.typed_config.knowledge_bases diff --git a/api/app/core/workflow/nodes/llm/node.py b/api/app/core/workflow/nodes/llm/node.py index 186c204f..b293d1f4 100644 --- a/api/app/core/workflow/nodes/llm/node.py +++ b/api/app/core/workflow/nodes/llm/node.py @@ -20,6 +20,7 @@ from app.core.workflow.nodes.llm.config import LLMNodeConfig from app.core.workflow.variable.base_variable import VariableType from app.db import get_db_context from app.models import ModelType +from app.schemas.model_schema import ModelInfo from app.services.model_service import ModelConfigService logger = logging.getLogger(__name__) @@ -113,12 +114,15 @@ class LLMNode(BaseNode): # 在 Session 关闭前提取所有需要的数据 api_config = self.model_balance(config) - model_name = api_config.model_name - provider = api_config.provider - api_key = api_config.api_key - api_base = api_config.api_base - is_omni = api_config.is_omni - model_type = config.type + model_info = ModelInfo( + model_name=api_config.model_name, + model_type=ModelType(config.type), + api_key=api_config.api_key, + api_base=api_config.api_base, + provider=api_config.provider, + is_omni=api_config.is_omni, + capability=api_config.capability + ) # 4. 创建 LLM 实例(使用已提取的数据) # 注意:对于流式输出,需要在模型初始化时设置 streaming=True @@ -126,17 +130,18 @@ class LLMNode(BaseNode): llm = RedBearLLM( RedBearModelConfig( - model_name=model_name, - provider=provider, - api_key=api_key, - base_url=api_base, + model_name=model_info.model_name, + provider=model_info.provider, + api_key=model_info.api_key, + base_url=model_info.api_base, extra_params=extra_params, - is_omni=is_omni + is_omni=model_info.is_omni ), - type=ModelType(model_type) + type=model_info.model_type ) - logger.debug(f"创建 LLM 实例: provider={provider}, model={model_name}, streaming={stream}") + logger.debug( + f"创建 LLM 实例: provider={model_info.provider}, model={model_info.model_name}, streaming={stream}") messages_config = self.typed_config.messages @@ -148,35 +153,40 @@ class LLMNode(BaseNode): content_template = msg_config.content content_template = self._render_context(content_template, variable_pool) content = self._render_template(content_template, variable_pool) - + user_id = self.get_variable("sys.user_id", variable_pool) # 根据角色创建对应的消息对象 if role == "system": messages.append({ "role": "system", - "content": await self.process_message(provider, is_omni, content, self.typed_config.vision) + "content": await self.process_message( + model_info, + content, + user_id, + self.typed_config.vision, + ) }) elif role in ["user", "human"]: messages.append({ "role": "user", - "content": await self.process_message(provider, is_omni, content, self.typed_config.vision) + "content": await self.process_message(model_info, content, user_id, self.typed_config.vision) }) elif role in ["ai", "assistant"]: messages.append({ "role": "assistant", - "content": await self.process_message(provider, is_omni, content, self.typed_config.vision) + "content": await self.process_message(model_info, content, user_id, self.typed_config.vision) }) else: logger.warning(f"未知的消息角色: {role},默认使用 user") messages.append({ "role": "user", - "content": await self.process_message(provider, is_omni, content, self.typed_config.vision) + "content": await self.process_message(model_info, content, user_id, self.typed_config.vision) }) if self.typed_config.vision_input and self.typed_config.vision: file_content = [] files = variable_pool.get_instance(self.typed_config.vision_input) for file in files.value: - content = await self.process_message(provider, is_omni, file.value, self.typed_config.vision) + content = await self.process_message(model_info, file.value, user_id, self.typed_config.vision) if content: file_content.extend(content) if messages and messages[-1]["role"] == 'user': @@ -190,14 +200,19 @@ class LLMNode(BaseNode): if isinstance(message["content"], list): file_content = [] for file in message["content"]: - content = await self.process_message(provider, is_omni, file, self.typed_config.vision) + content = await self.process_message(model_info, file, user_id, self.typed_config.vision) if content: file_content.extend(content) history_message.append( {"role": message["role"], "content": file_content} ) else: - message["content"] = await self.process_message(provider, is_omni, message["content"], self.typed_config.vision) + message["content"] = await self.process_message( + model_info, + message["content"], + user_id, + self.typed_config.vision + ) history_message.append(message) messages = messages[:-1] + history_message + messages[-1:] self.messages = messages @@ -293,7 +308,7 @@ class LLMNode(BaseNode): # 调用 LLM(流式,支持字符串或消息列表) last_meta_data = {} - async for chunk in llm.astream(self.messages, stream_usage=True): + async for chunk in llm.astream(self.messages): # 提取内容 if hasattr(chunk, 'content'): content = self.process_model_output(chunk.content) diff --git a/api/app/core/workflow/nodes/notes/__init__.py b/api/app/core/workflow/nodes/notes/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/api/app/core/workflow/nodes/notes/config.py b/api/app/core/workflow/nodes/notes/config.py new file mode 100644 index 00000000..42b4a1ab --- /dev/null +++ b/api/app/core/workflow/nodes/notes/config.py @@ -0,0 +1,12 @@ +from pydantic import Field + +from app.core.workflow.nodes.base_config import BaseNodeConfig + + +class NoteNodeConfig(BaseNodeConfig): + author: str = Field(default="", description="author") + text: str = Field(default="", description="note content") + width: int = Field(default=80) + height: int = Field(default=80) + theme: str = Field(default="blue") + show_author: bool = Field(default=True) diff --git a/api/app/core/workflow/nodes/parameter_extractor/node.py b/api/app/core/workflow/nodes/parameter_extractor/node.py index 700ed85f..acac09e4 100644 --- a/api/app/core/workflow/nodes/parameter_extractor/node.py +++ b/api/app/core/workflow/nodes/parameter_extractor/node.py @@ -37,6 +37,14 @@ class ParameterExtractorNode(BaseNode): } return None + def _extract_input(self, state: WorkflowState, variable_pool: VariablePool) -> dict[str, Any]: + return { + "text": self._render_template(self.typed_config.text, variable_pool), + "prompt": self._render_template(self.typed_config.prompt, variable_pool), + "params": [param.model_dump(mode="json") for param in self.typed_config.params], + "model_id": str(self.typed_config.model_id), + } + def _output_types(self) -> dict[str, VariableType]: outputs = {} for param in self.typed_config.params: diff --git a/api/app/core/workflow/nodes/tool/node.py b/api/app/core/workflow/nodes/tool/node.py index 096f498f..0e9d3c62 100644 --- a/api/app/core/workflow/nodes/tool/node.py +++ b/api/app/core/workflow/nodes/tool/node.py @@ -27,7 +27,6 @@ class ToolNode(BaseNode): def _output_types(self) -> dict[str, VariableType]: return { "data": VariableType.STRING, - "error_code": VariableType.STRING, "execution_time": VariableType.NUMBER } @@ -48,10 +47,7 @@ class ToolNode(BaseNode): if not tenant_id: logger.error(f"节点 {self.node_id} 缺少租户ID") - return { - "success": False, - "data": "缺少租户ID" - } + raise ValueError("缺少租户ID") # 渲染工具参数 rendered_parameters = {} @@ -83,13 +79,8 @@ class ToolNode(BaseNode): logger.info(f"节点 {self.node_id} 工具执行成功") return { "data": result.data if isinstance(result.data, str) else json.dumps(result.data, ensure_ascii=False), - "error_code": "", "execution_time": result.execution_time } else: logger.error(f"节点 {self.node_id} 工具执行失败: {result.error}") - return { - "data": result.error if isinstance(result.error, str) else json.dumps(result.error, ensure_ascii=False), - "error_code": result.error_code, - "execution_time": result.execution_time - } + raise ValueError(f"工具执行失败: {result.error if isinstance(result.error, str) else json.dumps(result.error, ensure_ascii=False)}") diff --git a/api/app/core/workflow/utils/file_processer.py b/api/app/core/workflow/utils/file_processer.py new file mode 100644 index 00000000..ae406ab0 --- /dev/null +++ b/api/app/core/workflow/utils/file_processer.py @@ -0,0 +1,56 @@ +# -*- coding: UTF-8 -*- +# Author: Eternity +# @Email: 1533512157@qq.com +# @Time : 2026/3/10 13:36 +TRANSFORM_FILE_TYPE = { + 'text/plain': 'document/text', + 'text/markdown': 'document/markdown', + 'text/x-markdown': 'document/x-markdown', + + 'application/pdf': 'document/pdf', + + 'application/msword': 'document/doc', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'document/docx', + + 'application/vnd.ms-powerpoint': 'document/ppt', + 'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'document/pptx', +} +ALLOWED_FILE_TYPES = [ + 'text/plain', + 'text/markdown', + 'text/x-markdown', + 'application/pdf', + 'application/msword', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'application/vnd.ms-powerpoint', + 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + 'image/jpg', + 'image/jpeg', + 'image/png', + 'image/gif', + 'image/bmp', + 'image/webp', + 'image/svg+xml', + 'video/mp4', + 'video/quicktime', + 'video/x-msvideo', + 'video/x-matroska', + 'video/webm', + 'video/x-flv', + 'video/x-ms-wmv', + 'audio/mpeg', + 'audio/wav', + 'audio/ogg', + 'audio/aac', + 'audio/flac', + 'audio/mp4', + 'audio/x-ms-wma', + 'audio/x-m4a', +] + + +def mime_to_file_type(mime_type): + if mime_type not in ALLOWED_FILE_TYPES: + return None + + return TRANSFORM_FILE_TYPE.get(mime_type, mime_type) diff --git a/api/app/core/workflow/validator.py b/api/app/core/workflow/validator.py index 47256b75..3b6e9036 100644 --- a/api/app/core/workflow/validator.py +++ b/api/app/core/workflow/validator.py @@ -138,7 +138,7 @@ class WorkflowValidator: errors.append("工作流必须至少有一个 end 节点") # 3. 验证节点 ID 唯一性 - node_ids = [n.get("id") for n in nodes] + node_ids = [n.get("id") for n in nodes if n.get("type") != NodeType.NOTES] if len(node_ids) != len(set(node_ids)): duplicates = [nid for nid in node_ids if node_ids.count(nid) > 1] errors.append(f"节点 ID 必须唯一,重复的 ID: {set(duplicates)}") diff --git a/api/app/core/workflow/variable/base_variable.py b/api/app/core/workflow/variable/base_variable.py index dd821ea7..aea40cf6 100644 --- a/api/app/core/workflow/variable/base_variable.py +++ b/api/app/core/workflow/variable/base_variable.py @@ -114,9 +114,16 @@ class FileObject(BaseModel): file_id: str | None content_cache: dict = Field(default_factory=dict) - is_file: bool + _byte_content: bytes | None = None + + def get_content(self): + return self._byte_content + + def set_content(self, byte_content): + self._byte_content = byte_content + class BaseVariable(ABC): """Abstract base class for all workflow variables. diff --git a/api/app/db.py b/api/app/db.py index cdaa6dbd..80ab2756 100644 --- a/api/app/db.py +++ b/api/app/db.py @@ -16,7 +16,7 @@ engine = create_engine( pool_recycle=settings.DB_POOL_RECYCLE, pool_timeout=settings.DB_POOL_TIMEOUT, connect_args={ - "options": "-c timezone=Asia/Shanghai -c statement_timeout=60000" + "options": "-c timezone=UTC -c statement_timeout=60000" }, ) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) diff --git a/api/app/i18n/README.md b/api/app/i18n/README.md new file mode 100644 index 00000000..7374e966 --- /dev/null +++ b/api/app/i18n/README.md @@ -0,0 +1,61 @@ +# Internationalization (i18n) Module + +This module provides internationalization support for the MemoryBear API. + +## Components + +- `service.py` - Translation service and core translation logic +- `middleware.py` - Language detection middleware +- `dependencies.py` - FastAPI dependency injection functions +- `exceptions.py` - Internationalized exception classes + +## Usage + +### Basic Translation + +```python +from app.i18n import t + +# Simple translation +message = t("common.success.created") + +# Parameterized translation +message = t("common.validation.required", field="Name") +``` + +### Enum Translation + +```python +from app.i18n import t_enum + +# Translate enum value +role_display = t_enum("workspace_role", "manager") +``` + +### In FastAPI Endpoints + +```python +from fastapi import Depends +from app.i18n.dependencies import get_translator + +@router.post("/workspaces") +async def create_workspace( + data: WorkspaceCreate, + t: Callable = Depends(get_translator) +): + workspace = await workspace_service.create(data) + return { + "success": True, + "message": t("workspace.created_successfully"), + "data": workspace + } +``` + +## Configuration + +See `app/core/config.py` for i18n configuration options: + +- `I18N_DEFAULT_LANGUAGE` - Default language (default: "zh") +- `I18N_SUPPORTED_LANGUAGES` - Supported languages (default: "zh,en") +- `I18N_ENABLE_TRANSLATION_CACHE` - Enable caching (default: true) +- `I18N_LOG_MISSING_TRANSLATIONS` - Log missing translations (default: true) diff --git a/api/app/i18n/__init__.py b/api/app/i18n/__init__.py new file mode 100644 index 00000000..38d2b5bd --- /dev/null +++ b/api/app/i18n/__init__.py @@ -0,0 +1,124 @@ +""" +Internationalization (i18n) module for MemoryBear Enterprise. + +This module provides complete i18n support for the backend API including: +- Translation loading from multiple directories (community + enterprise) +- Translation service with caching and fallback +- Language detection middleware +- Dependency injection for FastAPI +- Convenience functions for easy usage + +Usage: + from app.i18n import t, t_enum + + # Simple translation + message = t("common.success.created") + + # Parameterized translation + error = t("common.validation.required", field="名称") + + # Enum translation + role_display = t_enum("workspace_role", "manager") +""" + +from app.i18n.dependencies import ( + get_current_language, + get_enum_translator, + get_translator, +) +from app.i18n.exceptions import ( + BadRequestError, + ConflictError, + FileNotFoundError, + FileTooLargeError, + ForbiddenError, + I18nException, + InternalServerError, + InvalidCredentialsError, + InvalidFileTypeError, + NotFoundError, + QuotaExceededError, + RateLimitExceededError, + ServiceUnavailableError, + TenantNotFoundError, + TenantSuspendedError, + TokenExpiredError, + TokenInvalidError, + UnauthorizedError, + UserAlreadyExistsError, + UserNotFoundError, + ValidationError, + WorkspaceNotFoundError, + WorkspacePermissionDeniedError, + get_current_locale, + set_current_locale, +) +from app.i18n.loader import TranslationLoader +from app.i18n.logger import ( + TranslationLogger, + get_translation_logger, + log_missing_translation, + log_translation_error, +) +from app.i18n.middleware import LanguageMiddleware +from app.i18n.serializers import ( + I18nResponseMixin, + WorkspaceSerializer, + WorkspaceMemberSerializer, + WorkspaceInviteSerializer, +) +from app.i18n.service import ( + TranslationService, + get_translation_service, + t, + t_enum, +) + +__all__ = [ + "TranslationLoader", + "LanguageMiddleware", + "TranslationService", + "get_translation_service", + "t", + "t_enum", + "get_current_language", + "get_translator", + "get_enum_translator", + # Context management + "get_current_locale", + "set_current_locale", + # Logging + "TranslationLogger", + "get_translation_logger", + "log_missing_translation", + "log_translation_error", + # Serializers + "I18nResponseMixin", + "WorkspaceSerializer", + "WorkspaceMemberSerializer", + "WorkspaceInviteSerializer", + # Exception classes + "I18nException", + "BadRequestError", + "UnauthorizedError", + "ForbiddenError", + "NotFoundError", + "ConflictError", + "ValidationError", + "InternalServerError", + "ServiceUnavailableError", + "WorkspaceNotFoundError", + "WorkspacePermissionDeniedError", + "UserNotFoundError", + "UserAlreadyExistsError", + "TenantNotFoundError", + "TenantSuspendedError", + "InvalidCredentialsError", + "TokenExpiredError", + "TokenInvalidError", + "FileNotFoundError", + "FileTooLargeError", + "InvalidFileTypeError", + "RateLimitExceededError", + "QuotaExceededError", +] diff --git a/api/app/i18n/cache.py b/api/app/i18n/cache.py new file mode 100644 index 00000000..5b0837d9 --- /dev/null +++ b/api/app/i18n/cache.py @@ -0,0 +1,291 @@ +""" +Advanced caching system for i18n translations. + +This module provides: +- LRU cache for hot translations +- Lazy loading mechanism +- Memory optimization +- Cache statistics +""" + +import logging +from functools import lru_cache +from typing import Any, Dict, Optional +from collections import OrderedDict +import time + +logger = logging.getLogger(__name__) + + +class TranslationCache: + """ + Advanced translation cache with LRU eviction and lazy loading. + + Features: + - LRU cache for frequently accessed translations + - Lazy loading to reduce startup time + - Memory-efficient storage + - Cache hit/miss statistics + """ + + def __init__(self, max_lru_size: int = 1000, enable_lazy_load: bool = True): + """ + Initialize the translation cache. + + Args: + max_lru_size: Maximum size of LRU cache for hot translations + enable_lazy_load: Enable lazy loading of locales + """ + self.max_lru_size = max_lru_size + self.enable_lazy_load = enable_lazy_load + + # Main cache: {locale: {namespace: {key: value}}} + self._main_cache: Dict[str, Dict[str, Any]] = {} + + # LRU cache for hot translations + self._lru_cache: OrderedDict = OrderedDict() + + # Loaded locales tracker + self._loaded_locales: set = set() + + # Statistics + self._stats = { + "hits": 0, + "misses": 0, + "lru_hits": 0, + "lru_misses": 0, + "lazy_loads": 0 + } + + logger.info( + f"TranslationCache initialized with LRU size: {max_lru_size}, " + f"lazy loading: {enable_lazy_load}" + ) + + def set_locale_data(self, locale: str, data: Dict[str, Any]): + """ + Set translation data for a locale. + + Args: + locale: Locale code + data: Translation data dictionary + """ + self._main_cache[locale] = data + self._loaded_locales.add(locale) + logger.debug(f"Loaded locale '{locale}' into cache") + + def get_translation( + self, + locale: str, + namespace: str, + key_path: list + ) -> Optional[str]: + """ + Get translation from cache with LRU optimization. + + Args: + locale: Locale code + namespace: Translation namespace + key_path: List of nested keys + + Returns: + Translation string or None if not found + """ + # Build cache key for LRU + cache_key = f"{locale}:{namespace}:{'.'.join(key_path)}" + + # Check LRU cache first (hot translations) + if cache_key in self._lru_cache: + self._stats["lru_hits"] += 1 + self._stats["hits"] += 1 + # Move to end (most recently used) + self._lru_cache.move_to_end(cache_key) + return self._lru_cache[cache_key] + + self._stats["lru_misses"] += 1 + + # Check main cache + if locale not in self._main_cache: + self._stats["misses"] += 1 + return None + + if namespace not in self._main_cache[locale]: + self._stats["misses"] += 1 + return None + + # Navigate through nested keys + current = self._main_cache[locale][namespace] + for key in key_path: + if isinstance(current, dict) and key in current: + current = current[key] + else: + self._stats["misses"] += 1 + return None + + # Return only if it's a string value + if not isinstance(current, str): + self._stats["misses"] += 1 + return None + + self._stats["hits"] += 1 + + # Add to LRU cache + self._add_to_lru(cache_key, current) + + return current + + def _add_to_lru(self, key: str, value: str): + """ + Add translation to LRU cache. + + Args: + key: Cache key + value: Translation value + """ + # Remove oldest if cache is full + if len(self._lru_cache) >= self.max_lru_size: + self._lru_cache.popitem(last=False) + + self._lru_cache[key] = value + + def is_locale_loaded(self, locale: str) -> bool: + """ + Check if a locale is loaded. + + Args: + locale: Locale code + + Returns: + True if locale is loaded + """ + return locale in self._loaded_locales + + def get_loaded_locales(self) -> list: + """ + Get list of loaded locales. + + Returns: + List of locale codes + """ + return list(self._loaded_locales) + + def clear_lru(self): + """Clear the LRU cache.""" + self._lru_cache.clear() + logger.info("LRU cache cleared") + + def clear_locale(self, locale: str): + """ + Clear cache for a specific locale. + + Args: + locale: Locale code + """ + if locale in self._main_cache: + del self._main_cache[locale] + self._loaded_locales.discard(locale) + + # Clear related LRU entries + keys_to_remove = [k for k in self._lru_cache if k.startswith(f"{locale}:")] + for key in keys_to_remove: + del self._lru_cache[key] + + logger.info(f"Cleared cache for locale '{locale}'") + + def clear_all(self): + """Clear all caches.""" + self._main_cache.clear() + self._lru_cache.clear() + self._loaded_locales.clear() + logger.info("All caches cleared") + + def get_stats(self) -> Dict[str, Any]: + """ + Get cache statistics. + + Returns: + Dictionary with cache statistics + """ + total_requests = self._stats["hits"] + self._stats["misses"] + hit_rate = ( + self._stats["hits"] / total_requests * 100 + if total_requests > 0 + else 0 + ) + + lru_total = self._stats["lru_hits"] + self._stats["lru_misses"] + lru_hit_rate = ( + self._stats["lru_hits"] / lru_total * 100 + if lru_total > 0 + else 0 + ) + + return { + "total_requests": total_requests, + "hits": self._stats["hits"], + "misses": self._stats["misses"], + "hit_rate": round(hit_rate, 2), + "lru_hits": self._stats["lru_hits"], + "lru_misses": self._stats["lru_misses"], + "lru_hit_rate": round(lru_hit_rate, 2), + "lru_size": len(self._lru_cache), + "lru_max_size": self.max_lru_size, + "loaded_locales": len(self._loaded_locales), + "lazy_loads": self._stats["lazy_loads"] + } + + def reset_stats(self): + """Reset cache statistics.""" + self._stats = { + "hits": 0, + "misses": 0, + "lru_hits": 0, + "lru_misses": 0, + "lazy_loads": 0 + } + logger.info("Cache statistics reset") + + def get_memory_usage(self) -> Dict[str, Any]: + """ + Estimate memory usage of the cache. + + Returns: + Dictionary with memory usage information + """ + import sys + + main_cache_size = sys.getsizeof(self._main_cache) + lru_cache_size = sys.getsizeof(self._lru_cache) + + # Rough estimate of nested data + for locale_data in self._main_cache.values(): + main_cache_size += sys.getsizeof(locale_data) + for namespace_data in locale_data.values(): + main_cache_size += sys.getsizeof(namespace_data) + + return { + "main_cache_bytes": main_cache_size, + "lru_cache_bytes": lru_cache_size, + "total_bytes": main_cache_size + lru_cache_size, + "main_cache_mb": round(main_cache_size / 1024 / 1024, 2), + "lru_cache_mb": round(lru_cache_size / 1024 / 1024, 2), + "total_mb": round((main_cache_size + lru_cache_size) / 1024 / 1024, 2) + } + + +@lru_cache(maxsize=128) +def get_cached_translation_key(locale: str, namespace: str, key: str) -> str: + """ + LRU cached function for building translation cache keys. + + This reduces string concatenation overhead for frequently accessed keys. + + Args: + locale: Locale code + namespace: Translation namespace + key: Translation key + + Returns: + Cache key string + """ + return f"{locale}:{namespace}:{key}" diff --git a/api/app/i18n/dependencies.py b/api/app/i18n/dependencies.py new file mode 100644 index 00000000..4c8e9a11 --- /dev/null +++ b/api/app/i18n/dependencies.py @@ -0,0 +1,158 @@ +""" +FastAPI dependency injection functions for i18n. + +This module provides dependency injection functions that can be used +in FastAPI route handlers to access the current language and translator. +""" + +import logging +from typing import Callable + +from fastapi import Request + +from app.i18n.service import get_translation_service + +logger = logging.getLogger(__name__) + + +async def get_current_language(request: Request) -> str: + """ + Get the current language from the request context. + + This dependency extracts the language that was determined by the + LanguageMiddleware and stored in request.state. + + Args: + request: FastAPI request object + + Returns: + Language code (e.g., "zh", "en") + + Usage: + @router.get("/example") + async def example(language: str = Depends(get_current_language)): + return {"language": language} + """ + # Get language from request state (set by LanguageMiddleware) + language = getattr(request.state, "language", None) + + if language is None: + # Fallback to default language if not set + from app.core.config import settings + language = settings.I18N_DEFAULT_LANGUAGE + logger.warning( + "Language not found in request.state, using default: " + f"{language}" + ) + + return language + + +async def get_translator(request: Request) -> Callable: + """ + Get a translator function bound to the current request's language. + + This dependency returns a translation function that automatically + uses the current request's language, making it easy to translate + strings in route handlers. + + Args: + request: FastAPI request object + + Returns: + Translation function with signature: t(key: str, **params) -> str + + Usage: + @router.post("/workspaces") + async def create_workspace( + data: WorkspaceCreate, + t: Callable = Depends(get_translator) + ): + workspace = await workspace_service.create(data) + return { + "success": True, + "message": t("workspace.created_successfully"), + "data": workspace + } + + # With parameters + @router.get("/items") + async def get_items(t: Callable = Depends(get_translator)): + count = 5 + return { + "message": t("items.found", count=count) + } + """ + # Get current language + language = await get_current_language(request) + + # Get translation service + service = get_translation_service() + + # Return a bound translation function + def translate(key: str, **params) -> str: + """ + Translate a key using the current request's language. + + Args: + key: Translation key (e.g., "common.success.created") + **params: Parameters for parameterized messages + + Returns: + Translated string + """ + return service.translate(key, language, **params) + + return translate + + +async def get_enum_translator(request: Request) -> Callable: + """ + Get an enum translator function bound to the current request's language. + + This dependency returns a function for translating enum values + that automatically uses the current request's language. + + Args: + request: FastAPI request object + + Returns: + Enum translation function with signature: + t_enum(enum_type: str, value: str) -> str + + Usage: + @router.get("/workspace/{id}") + async def get_workspace( + id: str, + t_enum: Callable = Depends(get_enum_translator) + ): + workspace = await workspace_service.get(id) + return { + "id": workspace.id, + "role": workspace.role, + "role_display": t_enum("workspace_role", workspace.role), + "status": workspace.status, + "status_display": t_enum("workspace_status", workspace.status) + } + """ + # Get current language + language = await get_current_language(request) + + # Get translation service + service = get_translation_service() + + # Return a bound enum translation function + def translate_enum(enum_type: str, value: str) -> str: + """ + Translate an enum value using the current request's language. + + Args: + enum_type: Enum type name (e.g., "workspace_role") + value: Enum value (e.g., "manager") + + Returns: + Translated enum display name + """ + return service.translate_enum(enum_type, value, language) + + return translate_enum diff --git a/api/app/i18n/exceptions.py b/api/app/i18n/exceptions.py new file mode 100644 index 00000000..b81369ed --- /dev/null +++ b/api/app/i18n/exceptions.py @@ -0,0 +1,495 @@ +""" +Internationalized exception classes for i18n system. + +This module provides exception classes that automatically translate +error messages based on the current request's language. +""" + +import logging +from contextvars import ContextVar +from typing import Any, Dict, Optional + +from fastapi import HTTPException, Request + +from app.i18n.service import get_translation_service + +logger = logging.getLogger(__name__) + +# Context variable to store current locale +_current_locale: ContextVar[Optional[str]] = ContextVar("current_locale", default=None) + + +def set_current_locale(locale: str) -> None: + """ + Set the current locale in the context variable. + + This should be called by the LanguageMiddleware. + + Args: + locale: Locale code (e.g., "zh", "en") + """ + _current_locale.set(locale) + + +def get_current_locale() -> Optional[str]: + """ + Get the current locale from the context variable. + + Returns: + Locale code or None if not set + """ + return _current_locale.get() + + +class I18nException(HTTPException): + """ + Base exception class with automatic i18n support. + + This exception automatically translates error messages based on: + 1. The current request's language (from request.state.language) + 2. The fallback language if request language is not available + 3. The error key itself if no translation is found + + Features: + - Automatic error message translation + - Parameterized error messages support + - Consistent error response format + - Language-aware error handling + + Usage: + # Simple error + raise I18nException( + error_key="errors.workspace.not_found", + status_code=404 + ) + + # Error with parameters + raise I18nException( + error_key="errors.validation.missing_field", + status_code=400, + field="name" + ) + + # Custom error code + raise I18nException( + error_key="errors.workspace.not_found", + error_code="WORKSPACE_NOT_FOUND", + status_code=404, + workspace_id="123" + ) + """ + + def __init__( + self, + error_key: str, + status_code: int = 400, + error_code: Optional[str] = None, + locale: Optional[str] = None, + headers: Optional[Dict[str, str]] = None, + **params + ): + """ + Initialize the i18n exception. + + Args: + error_key: Translation key for the error message + (e.g., "errors.workspace.not_found") + status_code: HTTP status code (default: 400) + error_code: Custom error code for API clients + (default: derived from error_key) + locale: Target locale for translation (optional) + If not provided, uses current request's language + headers: Additional HTTP headers + **params: Parameters for parameterized error messages + """ + self.error_key = error_key + self.error_code = error_code or self._generate_error_code(error_key) + self.params = params + + # Get locale from request context if not provided + if locale is None: + locale = self._get_current_locale() + + # Translate error message + translation_service = get_translation_service() + message = translation_service.translate( + error_key, + locale, + **params + ) + + # Build error detail + detail = { + "error_code": self.error_code, + "message": message, + } + + # Add parameters to detail if provided + if params: + detail["params"] = params + + # Initialize HTTPException + super().__init__( + status_code=status_code, + detail=detail, + headers=headers + ) + + logger.debug( + f"I18nException raised: {self.error_code} " + f"(key: {error_key}, locale: {locale})" + ) + + def _get_current_locale(self) -> str: + """ + Get the current locale from request context. + + Returns: + Locale code (e.g., "zh", "en") + """ + try: + # Try to get locale from context variable + locale = _current_locale.get() + if locale: + return locale + except Exception as e: + logger.debug(f"Could not get locale from context: {e}") + + # Fallback to default locale + from app.core.config import settings + return settings.I18N_DEFAULT_LANGUAGE + + def _generate_error_code(self, error_key: str) -> str: + """ + Generate error code from error key. + + Converts "errors.workspace.not_found" to "WORKSPACE_NOT_FOUND" + + Args: + error_key: Translation key + + Returns: + Error code in UPPER_SNAKE_CASE + """ + # Remove "errors." prefix if present + if error_key.startswith("errors."): + error_key = error_key[7:] + + # Convert to UPPER_SNAKE_CASE + parts = error_key.split(".") + return "_".join(parts).upper() + + +# Specific exception classes for common errors + +class BadRequestError(I18nException): + """Bad request error (400).""" + + def __init__( + self, + error_key: str = "errors.common.bad_request", + error_code: Optional[str] = None, + **params + ): + super().__init__( + error_key=error_key, + status_code=400, + error_code=error_code, + **params + ) + + +class UnauthorizedError(I18nException): + """Unauthorized error (401).""" + + def __init__( + self, + error_key: str = "errors.auth.unauthorized", + error_code: Optional[str] = None, + **params + ): + super().__init__( + error_key=error_key, + status_code=401, + error_code=error_code, + **params + ) + + +class ForbiddenError(I18nException): + """Forbidden error (403).""" + + def __init__( + self, + error_key: str = "errors.auth.forbidden", + error_code: Optional[str] = None, + **params + ): + super().__init__( + error_key=error_key, + status_code=403, + error_code=error_code, + **params + ) + + +class NotFoundError(I18nException): + """Not found error (404).""" + + def __init__( + self, + error_key: str = "errors.common.not_found", + error_code: Optional[str] = None, + **params + ): + super().__init__( + error_key=error_key, + status_code=404, + error_code=error_code, + **params + ) + + +class ConflictError(I18nException): + """Conflict error (409).""" + + def __init__( + self, + error_key: str = "errors.common.conflict", + error_code: Optional[str] = None, + **params + ): + super().__init__( + error_key=error_key, + status_code=409, + error_code=error_code, + **params + ) + + +class ValidationError(I18nException): + """Validation error (422).""" + + def __init__( + self, + error_key: str = "errors.common.validation_failed", + error_code: Optional[str] = None, + **params + ): + super().__init__( + error_key=error_key, + status_code=422, + error_code=error_code, + **params + ) + + +class InternalServerError(I18nException): + """Internal server error (500).""" + + def __init__( + self, + error_key: str = "errors.common.internal_error", + error_code: Optional[str] = None, + **params + ): + super().__init__( + error_key=error_key, + status_code=500, + error_code=error_code, + **params + ) + + +class ServiceUnavailableError(I18nException): + """Service unavailable error (503).""" + + def __init__( + self, + error_key: str = "errors.common.service_unavailable", + error_code: Optional[str] = None, + **params + ): + super().__init__( + error_key=error_key, + status_code=503, + error_code=error_code, + **params + ) + + +# Domain-specific exception classes + +class WorkspaceNotFoundError(NotFoundError): + """Workspace not found error.""" + + def __init__(self, workspace_id: Optional[str] = None, **params): + if workspace_id: + params["workspace_id"] = workspace_id + super().__init__( + error_key="errors.workspace.not_found", + error_code="WORKSPACE_NOT_FOUND", + **params + ) + + +class WorkspacePermissionDeniedError(ForbiddenError): + """Workspace permission denied error.""" + + def __init__(self, workspace_id: Optional[str] = None, **params): + if workspace_id: + params["workspace_id"] = workspace_id + super().__init__( + error_key="errors.workspace.permission_denied", + error_code="WORKSPACE_PERMISSION_DENIED", + **params + ) + + +class UserNotFoundError(NotFoundError): + """User not found error.""" + + def __init__(self, user_id: Optional[str] = None, **params): + if user_id: + params["user_id"] = user_id + super().__init__( + error_key="errors.user.not_found", + error_code="USER_NOT_FOUND", + **params + ) + + +class UserAlreadyExistsError(ConflictError): + """User already exists error.""" + + def __init__(self, identifier: Optional[str] = None, **params): + if identifier: + params["identifier"] = identifier + super().__init__( + error_key="errors.user.already_exists", + error_code="USER_ALREADY_EXISTS", + **params + ) + + +class TenantNotFoundError(NotFoundError): + """Tenant not found error.""" + + def __init__(self, tenant_id: Optional[str] = None, **params): + if tenant_id: + params["tenant_id"] = tenant_id + super().__init__( + error_key="errors.tenant.not_found", + error_code="TENANT_NOT_FOUND", + **params + ) + + +class TenantSuspendedError(ForbiddenError): + """Tenant suspended error.""" + + def __init__(self, tenant_id: Optional[str] = None, **params): + if tenant_id: + params["tenant_id"] = tenant_id + super().__init__( + error_key="errors.tenant.suspended", + error_code="TENANT_SUSPENDED", + **params + ) + + +class InvalidCredentialsError(UnauthorizedError): + """Invalid credentials error.""" + + def __init__(self, **params): + super().__init__( + error_key="errors.auth.invalid_credentials", + error_code="INVALID_CREDENTIALS", + **params + ) + + +class TokenExpiredError(UnauthorizedError): + """Token expired error.""" + + def __init__(self, **params): + super().__init__( + error_key="errors.auth.token_expired", + error_code="TOKEN_EXPIRED", + **params + ) + + +class TokenInvalidError(UnauthorizedError): + """Token invalid error.""" + + def __init__(self, **params): + super().__init__( + error_key="errors.auth.token_invalid", + error_code="TOKEN_INVALID", + **params + ) + + +class FileNotFoundError(NotFoundError): + """File not found error.""" + + def __init__(self, file_id: Optional[str] = None, **params): + if file_id: + params["file_id"] = file_id + super().__init__( + error_key="errors.file.not_found", + error_code="FILE_NOT_FOUND", + **params + ) + + +class FileTooLargeError(BadRequestError): + """File too large error.""" + + def __init__(self, max_size: Optional[str] = None, **params): + if max_size: + params["max_size"] = max_size + super().__init__( + error_key="errors.file.too_large", + error_code="FILE_TOO_LARGE", + **params + ) + + +class InvalidFileTypeError(BadRequestError): + """Invalid file type error.""" + + def __init__(self, file_type: Optional[str] = None, **params): + if file_type: + params["file_type"] = file_type + super().__init__( + error_key="errors.file.invalid_type", + error_code="INVALID_FILE_TYPE", + **params + ) + + +class RateLimitExceededError(I18nException): + """Rate limit exceeded error (429).""" + + def __init__(self, **params): + super().__init__( + error_key="errors.api.rate_limit_exceeded", + status_code=429, + error_code="RATE_LIMIT_EXCEEDED", + **params + ) + + +class QuotaExceededError(ForbiddenError): + """Quota exceeded error.""" + + def __init__(self, resource: Optional[str] = None, **params): + if resource: + params["resource"] = resource + super().__init__( + error_key="errors.api.quota_exceeded", + error_code="QUOTA_EXCEEDED", + **params + ) diff --git a/api/app/i18n/loader.py b/api/app/i18n/loader.py new file mode 100644 index 00000000..3865378b --- /dev/null +++ b/api/app/i18n/loader.py @@ -0,0 +1,199 @@ +""" +Translation file loader for i18n system. + +This module handles loading translation files from multiple directories +(community edition + enterprise edition) and provides hot reload support. +""" + +import json +import logging +from pathlib import Path +from typing import Any, Dict, List, Optional + +logger = logging.getLogger(__name__) + + +class TranslationLoader: + """ + Translation file loader that supports: + - Loading from multiple directories (community + enterprise) + - Hot reload of translation files + - Automatic locale detection + """ + + def __init__(self, locales_dirs: Optional[List[str]] = None): + """ + Initialize the translation loader. + + Args: + locales_dirs: List of directories containing translation files. + If None, will auto-detect from settings. + """ + if locales_dirs is None: + locales_dirs = self._detect_locales_dirs() + + self.locales_dirs = [Path(d) for d in locales_dirs] + logger.info(f"TranslationLoader initialized with directories: {self.locales_dirs}") + + def _detect_locales_dirs(self) -> List[str]: + """ + Auto-detect translation directories from settings. + + Returns: + List of translation directory paths + """ + from app.core.config import settings + + dirs = [] + + # 1. Core locales directory (community edition, required) + core_dir = Path(settings.I18N_CORE_LOCALES_DIR) + if core_dir.exists(): + dirs.append(str(core_dir)) + logger.debug(f"Found core locales directory: {core_dir}") + else: + logger.warning(f"Core locales directory not found: {core_dir}") + + # 2. Premium locales directory (enterprise edition, optional) + if settings.I18N_PREMIUM_LOCALES_DIR: + premium_dir = Path(settings.I18N_PREMIUM_LOCALES_DIR) + if premium_dir.exists(): + dirs.append(str(premium_dir)) + logger.debug(f"Found premium locales directory: {premium_dir}") + else: + # Auto-detect premium directory + premium_dir = Path("premium/locales") + if premium_dir.exists(): + dirs.append(str(premium_dir)) + logger.debug(f"Auto-detected premium locales directory: {premium_dir}") + + if not dirs: + logger.error("No translation directories found!") + + return dirs + + def get_available_locales(self) -> List[str]: + """ + Get list of all available locales across all directories. + + Returns: + List of locale codes (e.g., ['zh', 'en']) + """ + locales = set() + + for locales_dir in self.locales_dirs: + if not locales_dir.exists(): + continue + + for locale_dir in locales_dir.iterdir(): + if locale_dir.is_dir() and not locale_dir.name.startswith('.'): + locales.add(locale_dir.name) + + return sorted(list(locales)) + + def load_locale(self, locale: str) -> Dict[str, Any]: + """ + Load all translation files for a specific locale from all directories. + + Translation files are merged with priority: + - Later directories override earlier directories + - Enterprise translations override community translations + + Args: + locale: Locale code (e.g., 'zh', 'en') + + Returns: + Dictionary of translations organized by namespace + Format: {namespace: {key: value, ...}, ...} + """ + translations = {} + + # Load from each directory in order (later directories override earlier) + for locales_dir in self.locales_dirs: + locale_dir = locales_dir / locale + if not locale_dir.exists(): + logger.debug(f"Locale directory not found: {locale_dir}") + continue + + # Load all JSON files in this locale directory + for json_file in locale_dir.glob("*.json"): + namespace = json_file.stem + + try: + with open(json_file, "r", encoding="utf-8") as f: + new_translations = json.load(f) + + # Merge translations (deep merge) + if namespace in translations: + translations[namespace] = self._deep_merge( + translations[namespace], + new_translations + ) + logger.debug( + f"Merged translations: {locale}/{namespace} from {json_file}" + ) + else: + translations[namespace] = new_translations + logger.debug( + f"Loaded translations: {locale}/{namespace} from {json_file}" + ) + + except json.JSONDecodeError as e: + logger.error( + f"Failed to parse JSON file {json_file}: {e}" + ) + except Exception as e: + logger.error( + f"Failed to load translation file {json_file}: {e}" + ) + + if not translations: + logger.warning(f"No translations found for locale: {locale}") + + return translations + + def reload(self, locale: Optional[str] = None) -> Dict[str, Dict[str, Any]]: + """ + Reload translation files. + + Args: + locale: Specific locale to reload. If None, reloads all locales. + + Returns: + Dictionary of reloaded translations + Format: {locale: {namespace: {key: value}}} + """ + if locale: + logger.info(f"Reloading translations for locale: {locale}") + return {locale: self.load_locale(locale)} + else: + logger.info("Reloading all translations") + all_translations = {} + for loc in self.get_available_locales(): + all_translations[loc] = self.load_locale(loc) + return all_translations + + def _deep_merge(self, base: Dict, override: Dict) -> Dict: + """ + Deep merge two dictionaries. + + Args: + base: Base dictionary + override: Dictionary with values to override + + Returns: + Merged dictionary + """ + result = base.copy() + + for key, value in override.items(): + if ( + key in result + and isinstance(result[key], dict) + and isinstance(value, dict) + ): + result[key] = self._deep_merge(result[key], value) + else: + result[key] = value + + return result diff --git a/api/app/i18n/logger.py b/api/app/i18n/logger.py new file mode 100644 index 00000000..9a81fc79 --- /dev/null +++ b/api/app/i18n/logger.py @@ -0,0 +1,382 @@ +""" +Translation logging for i18n system. + +This module provides: +- TranslationLogger for recording missing translations +- Missing translation report generation +- Integration with existing logging system +- Structured logging for translation events +""" + +import logging +from typing import Dict, List, Optional, Set +from datetime import datetime +from collections import defaultdict +from pathlib import Path +import json + +from app.core.logging_config import get_logger + +logger = get_logger(__name__) + + +class TranslationLogger: + """ + Logger for translation events and missing translations. + + Features: + - Records missing translations with context + - Generates missing translation reports + - Integrates with existing logging system + - Provides structured logging for analysis + """ + + def __init__(self, log_file: Optional[str] = None): + """ + Initialize translation logger. + + Args: + log_file: Optional custom log file path for missing translations + """ + self.log_file = log_file or "logs/i18n/missing_translations.log" + self._missing_translations: Dict[str, Set[str]] = defaultdict(set) + self._missing_with_context: List[Dict] = [] + self._max_context_entries = 10000 # Keep last 10k entries + + # Ensure log directory exists + log_path = Path(self.log_file) + log_path.parent.mkdir(parents=True, exist_ok=True) + + # Create dedicated file handler for missing translations + self._file_handler = logging.FileHandler( + self.log_file, + encoding='utf-8' + ) + self._file_handler.setLevel(logging.WARNING) + + # Create formatter + formatter = logging.Formatter( + fmt='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + self._file_handler.setFormatter(formatter) + + # Create dedicated logger for missing translations + self._logger = logging.getLogger("i18n.missing_translations") + self._logger.setLevel(logging.WARNING) + self._logger.addHandler(self._file_handler) + self._logger.propagate = False # Don't propagate to root logger + + logger.info(f"TranslationLogger initialized with log file: {self.log_file}") + + def log_missing_translation( + self, + key: str, + locale: str, + context: Optional[Dict] = None + ): + """ + Log a missing translation. + + Args: + key: Translation key that was not found + locale: Locale code + context: Optional context information (e.g., request path, user info) + """ + # Add to missing set + self._missing_translations[locale].add(key) + + # Create context entry + entry = { + "timestamp": datetime.now().isoformat(), + "key": key, + "locale": locale, + "context": context or {} + } + + # Keep only recent entries to avoid memory bloat + if len(self._missing_with_context) >= self._max_context_entries: + self._missing_with_context.pop(0) + + self._missing_with_context.append(entry) + + # Log to file + context_str = f" (context: {context})" if context else "" + self._logger.warning( + f"Missing translation: key='{key}', locale='{locale}'{context_str}" + ) + + def log_translation_error( + self, + error_type: str, + message: str, + key: Optional[str] = None, + locale: Optional[str] = None, + context: Optional[Dict] = None + ): + """ + Log a translation error. + + Args: + error_type: Type of error (e.g., "format_error", "parameter_missing") + message: Error message + key: Translation key (optional) + locale: Locale code (optional) + context: Optional context information + """ + error_data = { + "error_type": error_type, + "message": message, + "key": key, + "locale": locale, + "context": context or {}, + "timestamp": datetime.now().isoformat() + } + + self._logger.error( + f"Translation error: {error_type} - {message} " + f"(key: {key}, locale: {locale})" + ) + + def log_translation_success( + self, + key: str, + locale: str, + duration_ms: Optional[float] = None + ): + """ + Log a successful translation (debug level). + + Args: + key: Translation key + locale: Locale code + duration_ms: Optional duration in milliseconds + """ + duration_str = f" ({duration_ms:.3f}ms)" if duration_ms else "" + logger.debug( + f"Translation success: key='{key}', locale='{locale}'{duration_str}" + ) + + def get_missing_translations( + self, + locale: Optional[str] = None + ) -> Dict[str, List[str]]: + """ + Get missing translations. + + Args: + locale: Specific locale (optional, returns all if None) + + Returns: + Dictionary of missing translations by locale + """ + if locale: + return {locale: sorted(list(self._missing_translations.get(locale, set())))} + + return { + loc: sorted(list(keys)) + for loc, keys in self._missing_translations.items() + } + + def get_missing_with_context( + self, + locale: Optional[str] = None, + limit: Optional[int] = None + ) -> List[Dict]: + """ + Get missing translations with context. + + Args: + locale: Filter by locale (optional) + limit: Maximum number of entries to return (optional) + + Returns: + List of missing translation entries with context + """ + entries = self._missing_with_context + + # Filter by locale if specified + if locale: + entries = [e for e in entries if e["locale"] == locale] + + # Apply limit if specified + if limit: + entries = entries[-limit:] + + return entries + + def generate_report( + self, + locale: Optional[str] = None, + output_file: Optional[str] = None + ) -> Dict: + """ + Generate a missing translation report. + + Args: + locale: Specific locale (optional, generates for all if None) + output_file: Optional file path to save report as JSON + + Returns: + Report dictionary + """ + missing = self.get_missing_translations(locale) + + report = { + "generated_at": datetime.now().isoformat(), + "total_missing": sum(len(keys) for keys in missing.values()), + "missing_by_locale": { + loc: { + "count": len(keys), + "keys": keys + } + for loc, keys in missing.items() + }, + "recent_context": self.get_missing_with_context(locale, limit=100) + } + + # Save to file if specified + if output_file: + output_path = Path(output_file) + output_path.parent.mkdir(parents=True, exist_ok=True) + + with open(output_path, 'w', encoding='utf-8') as f: + json.dump(report, f, indent=2, ensure_ascii=False) + + logger.info(f"Missing translation report saved to: {output_file}") + + return report + + def get_statistics(self) -> Dict: + """ + Get statistics about missing translations. + + Returns: + Dictionary with statistics + """ + total_missing = sum(len(keys) for keys in self._missing_translations.values()) + + # Count by namespace + namespace_counts = defaultdict(int) + for locale, keys in self._missing_translations.items(): + for key in keys: + namespace = key.split('.')[0] if '.' in key else 'unknown' + namespace_counts[namespace] += 1 + + return { + "total_missing": total_missing, + "locales_affected": len(self._missing_translations), + "missing_by_locale": { + loc: len(keys) + for loc, keys in self._missing_translations.items() + }, + "missing_by_namespace": dict(namespace_counts), + "total_context_entries": len(self._missing_with_context) + } + + def clear(self, locale: Optional[str] = None): + """ + Clear missing translation records. + + Args: + locale: Specific locale to clear (optional, clears all if None) + """ + if locale: + self._missing_translations.pop(locale, None) + self._missing_with_context = [ + e for e in self._missing_with_context + if e["locale"] != locale + ] + logger.info(f"Cleared missing translations for locale: {locale}") + else: + self._missing_translations.clear() + self._missing_with_context.clear() + logger.info("Cleared all missing translations") + + def export_to_json(self, output_file: str): + """ + Export all missing translations to JSON file. + + Args: + output_file: Output file path + """ + data = { + "exported_at": datetime.now().isoformat(), + "missing_translations": self.get_missing_translations(), + "statistics": self.get_statistics(), + "recent_context": self.get_missing_with_context(limit=1000) + } + + output_path = Path(output_file) + output_path.parent.mkdir(parents=True, exist_ok=True) + + with open(output_path, 'w', encoding='utf-8') as f: + json.dump(data, f, indent=2, ensure_ascii=False) + + logger.info(f"Missing translations exported to: {output_file}") + + def __del__(self): + """Cleanup file handler on deletion.""" + try: + if hasattr(self, '_file_handler'): + self._file_handler.close() + self._logger.removeHandler(self._file_handler) + except Exception: + pass + + +# Global translation logger instance +_translation_logger: Optional[TranslationLogger] = None + + +def get_translation_logger() -> TranslationLogger: + """ + Get the global translation logger instance. + + Returns: + TranslationLogger singleton + """ + global _translation_logger + if _translation_logger is None: + _translation_logger = TranslationLogger() + return _translation_logger + + +def log_missing_translation( + key: str, + locale: str, + context: Optional[Dict] = None +): + """ + Log a missing translation (convenience function). + + Args: + key: Translation key + locale: Locale code + context: Optional context information + """ + translation_logger = get_translation_logger() + translation_logger.log_missing_translation(key, locale, context) + + +def log_translation_error( + error_type: str, + message: str, + key: Optional[str] = None, + locale: Optional[str] = None, + context: Optional[Dict] = None +): + """ + Log a translation error (convenience function). + + Args: + error_type: Type of error + message: Error message + key: Translation key (optional) + locale: Locale code (optional) + context: Optional context information + """ + translation_logger = get_translation_logger() + translation_logger.log_translation_error( + error_type, message, key, locale, context + ) diff --git a/api/app/i18n/metrics.py b/api/app/i18n/metrics.py new file mode 100644 index 00000000..781ba83e --- /dev/null +++ b/api/app/i18n/metrics.py @@ -0,0 +1,337 @@ +""" +Performance monitoring and metrics for i18n system. + +This module provides: +- Translation request counters +- Translation timing metrics +- Missing translation tracking +- Performance monitoring decorators +- Prometheus-compatible metrics +""" + +import logging +import time +from functools import wraps +from typing import Any, Callable, Dict, Optional +from collections import defaultdict +from datetime import datetime + +logger = logging.getLogger(__name__) + + +class TranslationMetrics: + """ + Metrics collector for translation operations. + + Tracks: + - Translation request counts + - Translation timing (latency) + - Missing translations + - Cache performance + - Locale usage + """ + + def __init__(self): + """Initialize metrics collector.""" + # Request counters by locale + self._request_counts: Dict[str, int] = defaultdict(int) + + # Missing translation tracker + self._missing_translations: Dict[str, set] = defaultdict(set) + + # Timing metrics (in milliseconds) + self._timing_data: list = [] + self._max_timing_samples = 10000 # Keep last 10k samples + + # Locale usage + self._locale_usage: Dict[str, int] = defaultdict(int) + + # Namespace usage + self._namespace_usage: Dict[str, int] = defaultdict(int) + + # Error counts + self._error_counts: Dict[str, int] = defaultdict(int) + + # Start time + self._start_time = datetime.now() + + logger.info("TranslationMetrics initialized") + + def record_request(self, locale: str, namespace: str = None): + """ + Record a translation request. + + Args: + locale: Locale code + namespace: Translation namespace (optional) + """ + self._request_counts[locale] += 1 + self._locale_usage[locale] += 1 + + if namespace: + self._namespace_usage[namespace] += 1 + + def record_missing(self, key: str, locale: str): + """ + Record a missing translation. + + Args: + key: Translation key + locale: Locale code + """ + self._missing_translations[locale].add(key) + logger.debug(f"Missing translation recorded: {key} (locale: {locale})") + + def record_timing(self, duration_ms: float, locale: str, operation: str = "translate"): + """ + Record translation operation timing. + + Args: + duration_ms: Duration in milliseconds + locale: Locale code + operation: Operation type + """ + # Keep only recent samples to avoid memory bloat + if len(self._timing_data) >= self._max_timing_samples: + self._timing_data.pop(0) + + self._timing_data.append({ + "duration_ms": duration_ms, + "locale": locale, + "operation": operation, + "timestamp": time.time() + }) + + def record_error(self, error_type: str): + """ + Record an error. + + Args: + error_type: Type of error + """ + self._error_counts[error_type] += 1 + + def get_summary(self) -> Dict[str, Any]: + """ + Get metrics summary. + + Returns: + Dictionary with metrics summary + """ + total_requests = sum(self._request_counts.values()) + total_missing = sum(len(keys) for keys in self._missing_translations.values()) + + # Calculate timing statistics + timing_stats = self._calculate_timing_stats() + + # Calculate uptime + uptime_seconds = (datetime.now() - self._start_time).total_seconds() + + return { + "uptime_seconds": round(uptime_seconds, 2), + "total_requests": total_requests, + "requests_per_locale": dict(self._request_counts), + "total_missing_translations": total_missing, + "missing_by_locale": { + locale: len(keys) + for locale, keys in self._missing_translations.items() + }, + "timing": timing_stats, + "locale_usage": dict(self._locale_usage), + "namespace_usage": dict(self._namespace_usage), + "error_counts": dict(self._error_counts) + } + + def _calculate_timing_stats(self) -> Dict[str, Any]: + """ + Calculate timing statistics. + + Returns: + Dictionary with timing statistics + """ + if not self._timing_data: + return { + "count": 0, + "avg_ms": 0, + "min_ms": 0, + "max_ms": 0, + "p50_ms": 0, + "p95_ms": 0, + "p99_ms": 0 + } + + durations = [d["duration_ms"] for d in self._timing_data] + durations.sort() + + count = len(durations) + avg = sum(durations) / count + + # Calculate percentiles + p50_idx = int(count * 0.50) + p95_idx = int(count * 0.95) + p99_idx = int(count * 0.99) + + return { + "count": count, + "avg_ms": round(avg, 3), + "min_ms": round(durations[0], 3), + "max_ms": round(durations[-1], 3), + "p50_ms": round(durations[p50_idx], 3), + "p95_ms": round(durations[p95_idx], 3), + "p99_ms": round(durations[p99_idx], 3) + } + + def get_missing_translations(self, locale: Optional[str] = None) -> Dict[str, list]: + """ + Get missing translations. + + Args: + locale: Specific locale (optional, returns all if None) + + Returns: + Dictionary of missing translations by locale + """ + if locale: + return {locale: list(self._missing_translations.get(locale, set()))} + + return { + locale: list(keys) + for locale, keys in self._missing_translations.items() + } + + def reset(self): + """Reset all metrics.""" + self._request_counts.clear() + self._missing_translations.clear() + self._timing_data.clear() + self._locale_usage.clear() + self._namespace_usage.clear() + self._error_counts.clear() + self._start_time = datetime.now() + logger.info("Metrics reset") + + def export_prometheus(self) -> str: + """ + Export metrics in Prometheus format. + + Returns: + Prometheus-formatted metrics string + """ + lines = [] + + # Translation requests counter + lines.append("# HELP i18n_translation_requests_total Total number of translation requests") + lines.append("# TYPE i18n_translation_requests_total counter") + for locale, count in self._request_counts.items(): + lines.append(f'i18n_translation_requests_total{{locale="{locale}"}} {count}') + + # Missing translations counter + lines.append("# HELP i18n_missing_translations_total Total number of missing translations") + lines.append("# TYPE i18n_missing_translations_total counter") + for locale, keys in self._missing_translations.items(): + lines.append(f'i18n_missing_translations_total{{locale="{locale}"}} {len(keys)}') + + # Timing metrics + timing_stats = self._calculate_timing_stats() + lines.append("# HELP i18n_translation_duration_ms Translation operation duration in milliseconds") + lines.append("# TYPE i18n_translation_duration_ms summary") + lines.append(f'i18n_translation_duration_ms{{quantile="0.5"}} {timing_stats["p50_ms"]}') + lines.append(f'i18n_translation_duration_ms{{quantile="0.95"}} {timing_stats["p95_ms"]}') + lines.append(f'i18n_translation_duration_ms{{quantile="0.99"}} {timing_stats["p99_ms"]}') + lines.append(f'i18n_translation_duration_ms_sum {sum(d["duration_ms"] for d in self._timing_data)}') + lines.append(f'i18n_translation_duration_ms_count {timing_stats["count"]}') + + # Error counter + lines.append("# HELP i18n_errors_total Total number of i18n errors") + lines.append("# TYPE i18n_errors_total counter") + for error_type, count in self._error_counts.items(): + lines.append(f'i18n_errors_total{{type="{error_type}"}} {count}') + + return "\n".join(lines) + + +# Global metrics instance +_metrics: Optional[TranslationMetrics] = None + + +def get_metrics() -> TranslationMetrics: + """ + Get the global metrics instance. + + Returns: + TranslationMetrics singleton + """ + global _metrics + if _metrics is None: + _metrics = TranslationMetrics() + return _metrics + + +def monitor_performance(operation: str = "translate"): + """ + Decorator to monitor translation operation performance. + + Args: + operation: Operation name for metrics + + Returns: + Decorated function + + Example: + @monitor_performance("translate") + def translate(key: str, locale: str) -> str: + ... + """ + def decorator(func: Callable) -> Callable: + @wraps(func) + def wrapper(*args, **kwargs): + start_time = time.perf_counter() + + try: + result = func(*args, **kwargs) + + # Record timing + duration_ms = (time.perf_counter() - start_time) * 1000 + + # Try to extract locale from args/kwargs + locale = kwargs.get("locale", "unknown") + if not locale and len(args) > 1: + locale = args[1] if isinstance(args[1], str) else "unknown" + + metrics = get_metrics() + metrics.record_timing(duration_ms, locale, operation) + + return result + + except Exception as e: + # Record error + metrics = get_metrics() + metrics.record_error(type(e).__name__) + raise + + return wrapper + return decorator + + +def track_missing_translation(key: str, locale: str): + """ + Track a missing translation. + + Args: + key: Translation key + locale: Locale code + """ + metrics = get_metrics() + metrics.record_missing(key, locale) + + +def track_translation_request(locale: str, namespace: str = None): + """ + Track a translation request. + + Args: + locale: Locale code + namespace: Translation namespace (optional) + """ + metrics = get_metrics() + metrics.record_request(locale, namespace) diff --git a/api/app/i18n/middleware.py b/api/app/i18n/middleware.py new file mode 100644 index 00000000..2e945dde --- /dev/null +++ b/api/app/i18n/middleware.py @@ -0,0 +1,202 @@ +""" +Language detection middleware for i18n system. + +This middleware determines the language to use for each request based on: +1. Query parameter (?lang=en) +2. Accept-Language HTTP header +3. User language preference (from database) +4. Tenant default language +5. System default language + +The detected language is injected into request.state.language and +added to the response Content-Language header. +""" + +import logging +import re +from typing import Optional + +from fastapi import Request +from starlette.middleware.base import BaseHTTPMiddleware + +logger = logging.getLogger(__name__) + + +class LanguageMiddleware(BaseHTTPMiddleware): + """ + Language detection middleware. + + Determines the language for each request based on multiple sources + with a clear priority order, validates the language is supported, + and injects it into the request context. + """ + + async def dispatch(self, request: Request, call_next): + """ + Process the request and determine the language. + + Args: + request: The incoming request + call_next: The next middleware/handler in the chain + + Returns: + Response with Content-Language header added + """ + # Determine the language for this request + language = await self._determine_language(request) + + # Validate language is supported + from app.core.config import settings + if language not in settings.I18N_SUPPORTED_LANGUAGES: + logger.warning( + f"Unsupported language '{language}' requested, " + f"falling back to default: {settings.I18N_DEFAULT_LANGUAGE}" + ) + language = settings.I18N_DEFAULT_LANGUAGE + + # Inject language into request state + request.state.language = language + + # Also set in context variable for exception handling + from app.i18n.exceptions import set_current_locale + set_current_locale(language) + + logger.debug(f"Request language set to: {language}") + + # Process the request + response = await call_next(request) + + # Add Content-Language header to response + response.headers["Content-Language"] = language + + return response + + async def _determine_language(self, request: Request) -> str: + """ + Determine the language to use based on priority order. + + Priority: + 1. Query parameter (?lang=en) + 2. Accept-Language HTTP header + 3. User language preference (from database) + 4. Tenant default language + 5. System default language + + Args: + request: The incoming request + + Returns: + Language code (e.g., "zh", "en") + """ + from app.core.config import settings + + # 1. Check query parameter (?lang=en) + if "lang" in request.query_params: + lang = request.query_params["lang"].strip().lower() + if lang: + logger.debug(f"Language from query parameter: {lang}") + return lang + + # 2. Check Accept-Language HTTP header + if "Accept-Language" in request.headers: + lang = self._parse_accept_language( + request.headers["Accept-Language"] + ) + if lang: + logger.debug(f"Language from Accept-Language header: {lang}") + return lang + + # 3. Check user language preference (requires authentication) + # Note: This assumes user is already loaded into request.state by auth middleware + if hasattr(request.state, "user") and request.state.user: + user = request.state.user + if hasattr(user, "preferred_language") and user.preferred_language: + logger.debug( + f"Language from user preference: {user.preferred_language}" + ) + return user.preferred_language + + # 4. Check tenant default language + # Note: This assumes tenant is already loaded into request.state + if hasattr(request.state, "tenant") and request.state.tenant: + tenant = request.state.tenant + if hasattr(tenant, "default_language") and tenant.default_language: + logger.debug( + f"Language from tenant default: {tenant.default_language}" + ) + return tenant.default_language + + # 5. Fall back to system default language + logger.debug( + f"Using system default language: {settings.I18N_DEFAULT_LANGUAGE}" + ) + return settings.I18N_DEFAULT_LANGUAGE + + def _parse_accept_language(self, header: str) -> Optional[str]: + """ + Parse the Accept-Language HTTP header. + + The Accept-Language header format: + Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,en-US;q=0.7 + + This method: + 1. Parses all language codes and their quality values + 2. Extracts the base language code (zh-CN -> zh) + 3. Sorts by quality value (higher first) + 4. Returns the first supported language + + Args: + header: Accept-Language header value + + Returns: + Language code if found and supported, None otherwise + + Examples: + _parse_accept_language("zh-CN,zh;q=0.9,en;q=0.8") + # => "zh" (if zh is supported) + + _parse_accept_language("en-US,en;q=0.9") + # => "en" (if en is supported) + """ + from app.core.config import settings + + if not header: + return None + + # Parse language preferences with quality values + languages = [] + + for item in header.split(","): + item = item.strip() + if not item: + continue + + # Split language code and quality value + parts = item.split(";") + lang_code = parts[0].strip() + + # Extract base language code (zh-CN -> zh, en-US -> en) + base_lang = lang_code.split("-")[0].lower() + + # Extract quality value (default: 1.0) + quality = 1.0 + if len(parts) > 1: + # Look for q=0.9 pattern + q_match = re.search(r"q=([\d.]+)", parts[1]) + if q_match: + try: + quality = float(q_match.group(1)) + except ValueError: + quality = 1.0 + + languages.append((base_lang, quality)) + + # Sort by quality value (descending) + languages.sort(key=lambda x: x[1], reverse=True) + + # Return the first supported language + for lang_code, _ in languages: + if lang_code in settings.I18N_SUPPORTED_LANGUAGES: + return lang_code + + return None diff --git a/api/app/i18n/serializers.py b/api/app/i18n/serializers.py new file mode 100644 index 00000000..15ba4de5 --- /dev/null +++ b/api/app/i18n/serializers.py @@ -0,0 +1,221 @@ +""" +国际化响应序列化器 + +提供基础的 I18nResponseMixin 类,用于为 API 响应添加国际化字段。 +""" + +from typing import Any, Dict, List, Union +from pydantic import BaseModel + + +class I18nResponseMixin: + """国际化响应混入类 + + 为响应数据添加国际化字段,特别是为枚举值添加 _display 后缀的翻译字段。 + + 使用方法: + 1. 继承此类 + 2. 实现 _get_enum_fields() 方法定义需要翻译的枚举字段 + 3. 调用 serialize_with_i18n() 方法序列化数据 + + 示例: + class WorkspaceSerializer(I18nResponseMixin): + def _get_enum_fields(self) -> Dict[str, str]: + return { + "role": "workspace_role", + "status": "workspace_status" + } + + def serialize(self, workspace: Workspace, locale: str = "zh") -> Dict: + data = { + "id": str(workspace.id), + "name": workspace.name, + "role": workspace.role, + "status": workspace.status + } + return self.serialize_with_i18n(data, locale) + """ + + def serialize_with_i18n( + self, + data: Any, + locale: str = "zh" + ) -> Union[Dict, List[Dict], Any]: + """序列化数据并添加国际化字段 + + Args: + data: 要序列化的数据(字典、列表或 Pydantic 模型) + locale: 语言代码 + + Returns: + 序列化后的数据,包含国际化字段 + """ + # 如果是 Pydantic 模型,转换为字典 + if isinstance(data, BaseModel): + data = data.model_dump() + + # 处理不同类型的数据 + if isinstance(data, dict): + return self._serialize_dict(data, locale) + elif isinstance(data, list): + return [self._serialize_dict(item, locale) if isinstance(item, dict) else item for item in data] + else: + return data + + def _serialize_dict(self, data: Dict, locale: str) -> Dict: + """序列化字典并添加 _display 字段 + + Args: + data: 字典数据 + locale: 语言代码 + + Returns: + 添加了 _display 字段的字典 + """ + from app.i18n.service import get_translation_service + + translation_service = get_translation_service() + + result = data.copy() + + # 获取需要翻译的枚举字段 + enum_fields = self._get_enum_fields() + + # 为每个枚举字段添加 _display 字段 + for field, enum_type in enum_fields.items(): + if field in result and result[field] is not None: + value = result[field] + # 翻译枚举值 + display_value = translation_service.translate_enum( + enum_type=enum_type, + value=str(value), + locale=locale + ) + # 添加 _display 字段 + result[f"{field}_display"] = display_value + + return result + + def _get_enum_fields(self) -> Dict[str, str]: + """获取需要翻译的枚举字段 + + 子类必须实现此方法,返回字段名到枚举类型的映射。 + + Returns: + 字段名到枚举类型的映射 + 例如: {"role": "workspace_role", "status": "workspace_status"} + """ + return {} + + +class WorkspaceSerializer(I18nResponseMixin): + """工作空间序列化器 + + 为工作空间响应添加国际化字段。 + """ + + def _get_enum_fields(self) -> Dict[str, str]: + """定义工作空间的枚举字段""" + return { + "role": "workspace_role", + "status": "workspace_status" + } + + def serialize(self, workspace_data: Union[Dict, BaseModel], locale: str = "zh") -> Dict: + """序列化工作空间数据 + + Args: + workspace_data: 工作空间数据(字典或 Pydantic 模型) + locale: 语言代码 + + Returns: + 序列化后的工作空间数据,包含国际化字段 + """ + return self.serialize_with_i18n(workspace_data, locale) + + def serialize_list(self, workspaces: List[Union[Dict, BaseModel]], locale: str = "zh") -> List[Dict]: + """序列化工作空间列表 + + Args: + workspaces: 工作空间列表 + locale: 语言代码 + + Returns: + 序列化后的工作空间列表 + """ + return [self.serialize(ws, locale) for ws in workspaces] + + +class WorkspaceMemberSerializer(I18nResponseMixin): + """工作空间成员序列化器 + + 为工作空间成员响应添加国际化字段。 + """ + + def _get_enum_fields(self) -> Dict[str, str]: + """定义工作空间成员的枚举字段""" + return { + "role": "workspace_role" + } + + def serialize(self, member_data: Union[Dict, BaseModel], locale: str = "zh") -> Dict: + """序列化工作空间成员数据 + + Args: + member_data: 成员数据(字典或 Pydantic 模型) + locale: 语言代码 + + Returns: + 序列化后的成员数据,包含国际化字段 + """ + return self.serialize_with_i18n(member_data, locale) + + def serialize_list(self, members: List[Union[Dict, BaseModel]], locale: str = "zh") -> List[Dict]: + """序列化工作空间成员列表 + + Args: + members: 成员列表 + locale: 语言代码 + + Returns: + 序列化后的成员列表 + """ + return [self.serialize(member, locale) for member in members] + + +class WorkspaceInviteSerializer(I18nResponseMixin): + """工作空间邀请序列化器 + + 为工作空间邀请响应添加国际化字段。 + """ + + def _get_enum_fields(self) -> Dict[str, str]: + """定义工作空间邀请的枚举字段""" + return { + "status": "invite_status", + "role": "workspace_role" + } + + def serialize(self, invite_data: Union[Dict, BaseModel], locale: str = "zh") -> Dict: + """序列化工作空间邀请数据 + + Args: + invite_data: 邀请数据(字典或 Pydantic 模型) + locale: 语言代码 + + Returns: + 序列化后的邀请数据,包含国际化字段 + """ + return self.serialize_with_i18n(invite_data, locale) + + def serialize_list(self, invites: List[Union[Dict, BaseModel]], locale: str = "zh") -> List[Dict]: + """序列化工作空间邀请列表 + + Args: + invites: 邀请列表 + locale: 语言代码 + + Returns: + 序列化后的邀请列表 + """ + return [self.serialize(invite, locale) for invite in invites] diff --git a/api/app/i18n/service.py b/api/app/i18n/service.py new file mode 100644 index 00000000..9cbc0926 --- /dev/null +++ b/api/app/i18n/service.py @@ -0,0 +1,370 @@ +""" +Translation service for i18n system. + +This module provides the core translation functionality including: +- Translation lookup with fallback mechanism +- Parameterized message support +- Enum value translation +- Memory caching for performance +- Performance monitoring and metrics +""" + +import logging +from functools import lru_cache +from typing import Any, Dict, Optional + +from app.i18n.loader import TranslationLoader +from app.i18n.cache import TranslationCache +from app.i18n.metrics import get_metrics, monitor_performance, track_missing_translation, track_translation_request +from app.i18n.logger import get_translation_logger + +logger = logging.getLogger(__name__) + + +class TranslationService: + """ + Translation service that provides: + - Fast translation lookup with memory cache + - Parameterized message support ({param} syntax) + - Fallback mechanism (current locale → default locale → key) + - Enum value translation + - Deep merge of multi-directory translations + """ + + def __init__(self, locales_dirs: Optional[list] = None): + """ + Initialize the translation service. + + Args: + locales_dirs: List of directories containing translation files. + If None, will auto-detect from settings. + """ + from app.core.config import settings + + self.loader = TranslationLoader(locales_dirs) + self.default_locale = settings.I18N_DEFAULT_LANGUAGE + self.fallback_locale = settings.I18N_FALLBACK_LANGUAGE + self.log_missing = settings.I18N_LOG_MISSING_TRANSLATIONS + self.enable_cache = settings.I18N_ENABLE_TRANSLATION_CACHE + + # Initialize advanced cache with LRU + lru_cache_size = getattr(settings, 'I18N_LRU_CACHE_SIZE', 1000) + self.cache = TranslationCache( + max_lru_size=lru_cache_size, + enable_lazy_load=False # Load all at startup for now + ) + + # Load all translations into cache + self._load_all_locales() + + # Initialize metrics + self.metrics = get_metrics() + + # Initialize translation logger + self.translation_logger = get_translation_logger() + + logger.info( + f"TranslationService initialized with default locale: {self.default_locale}, " + f"LRU cache size: {lru_cache_size}" + ) + + def _load_all_locales(self): + """Load all available locales into memory cache.""" + available_locales = self.loader.get_available_locales() + logger.info(f"Loading translations for locales: {available_locales}") + + for locale in available_locales: + locale_data = self.loader.load_locale(locale) + self.cache.set_locale_data(locale, locale_data) + + logger.info(f"Loaded {len(available_locales)} locales into cache") + + @monitor_performance("translate") + def translate( + self, + key: str, + locale: Optional[str] = None, + **params + ) -> str: + """ + Translate a key to the target locale. + + Supports: + - Dot-separated keys (e.g., "common.success.created") + - Parameterized messages (e.g., "Hello {name}") + - Fallback mechanism + + Args: + key: Translation key (format: "namespace.key.subkey") + locale: Target locale (defaults to default locale) + **params: Parameters for parameterized messages + + Returns: + Translated string, or the key itself if translation not found + + Examples: + translate("common.success.created", "zh") + # => "创建成功" + + translate("common.validation.required", "zh", field="名称") + # => "名称不能为空" + """ + if locale is None: + locale = self.default_locale + + # Parse key (namespace.key.subkey) + parts = key.split(".", 1) + if len(parts) < 2: + if self.log_missing: + logger.warning(f"Invalid translation key format: {key}") + return key + + namespace = parts[0] + key_path = parts[1].split(".") + + # Track request + track_translation_request(locale, namespace) + + # Get translation from cache + translation = self.cache.get_translation(locale, namespace, key_path) + + # Fallback to default locale if not found + if translation is None and locale != self.fallback_locale: + translation = self.cache.get_translation( + self.fallback_locale, namespace, key_path + ) + + # If still not found, return the key itself + if translation is None: + if self.log_missing: + logger.warning( + f"Missing translation: {key} (locale: {locale})" + ) + track_missing_translation(key, locale) + + # Log to translation logger with context + self.translation_logger.log_missing_translation( + key=key, + locale=locale, + context={"namespace": namespace} + ) + return key + + # Apply parameters if provided + if params: + try: + translation = translation.format(**params) + except KeyError as e: + error_msg = f"Missing parameter in translation '{key}': {e}" + logger.error(error_msg) + self.translation_logger.log_translation_error( + error_type="parameter_missing", + message=error_msg, + key=key, + locale=locale, + context={"params": list(params.keys())} + ) + except Exception as e: + error_msg = f"Error formatting translation '{key}': {e}" + logger.error(error_msg) + self.translation_logger.log_translation_error( + error_type="format_error", + message=error_msg, + key=key, + locale=locale + ) + + return translation + + def _get_translation( + self, + locale: str, + namespace: str, + key_path: list + ) -> Optional[str]: + """ + Get translation from cache (deprecated, use cache.get_translation). + + Args: + locale: Locale code + namespace: Translation namespace + key_path: List of nested keys + + Returns: + Translation string or None if not found + """ + return self.cache.get_translation(locale, namespace, key_path) + + @monitor_performance("translate_enum") + def translate_enum( + self, + enum_type: str, + value: str, + locale: Optional[str] = None + ) -> str: + """ + Translate an enum value. + + Args: + enum_type: Enum type name (e.g., "workspace_role") + value: Enum value (e.g., "manager") + locale: Target locale + + Returns: + Translated enum display name + + Examples: + translate_enum("workspace_role", "manager", "zh") + # => "管理员" + + translate_enum("invite_status", "pending", "en") + # => "Pending" + """ + key = f"enums.{enum_type}.{value}" + return self.translate(key, locale) + + def has_translation(self, key: str, locale: str) -> bool: + """ + Check if a translation exists for the given key and locale. + + Args: + key: Translation key + locale: Locale code + + Returns: + True if translation exists, False otherwise + """ + parts = key.split(".", 1) + if len(parts) < 2: + return False + + namespace = parts[0] + key_path = parts[1].split(".") + + translation = self.cache.get_translation(locale, namespace, key_path) + return translation is not None + + def reload(self, locale: Optional[str] = None): + """ + Reload translation files. + + Args: + locale: Specific locale to reload. If None, reloads all locales. + """ + logger.info(f"Reloading translations for locale: {locale or 'all'}") + + if locale: + locale_data = self.loader.load_locale(locale) + self.cache.set_locale_data(locale, locale_data) + # Clear LRU cache for this locale + self.cache.clear_locale(locale) + else: + self._load_all_locales() + # Clear all LRU cache + self.cache.clear_lru() + + logger.info("Translation reload completed") + + def get_available_locales(self) -> list: + """ + Get list of all available locales. + + Returns: + List of locale codes + """ + return self.cache.get_loaded_locales() + + def get_cache_stats(self) -> Dict[str, Any]: + """ + Get cache statistics. + + Returns: + Dictionary with cache statistics + """ + return self.cache.get_stats() + + def get_metrics_summary(self) -> Dict[str, Any]: + """ + Get metrics summary. + + Returns: + Dictionary with metrics summary + """ + return self.metrics.get_summary() + + def get_memory_usage(self) -> Dict[str, Any]: + """ + Get memory usage information. + + Returns: + Dictionary with memory usage information + """ + return self.cache.get_memory_usage() + + def get_loaded_dirs(self) -> list: + """ + Get list of loaded translation directories. + + Returns: + List of directory paths + """ + return self.loader.locales_dirs + + +# Global singleton instance +_translation_service: Optional[TranslationService] = None + + +def get_translation_service() -> TranslationService: + """ + Get the global translation service instance. + + Returns: + TranslationService singleton + """ + global _translation_service + if _translation_service is None: + _translation_service = TranslationService() + return _translation_service + + +# Convenience functions for easy access +def t(key: str, locale: Optional[str] = None, **params) -> str: + """ + Translate a key (convenience function). + + Args: + key: Translation key + locale: Target locale (optional, uses default if not provided) + **params: Parameters for parameterized messages + + Returns: + Translated string + + Examples: + t("common.success.created") + t("common.validation.required", field="名称") + t("workspace.member_count", count=5) + """ + service = get_translation_service() + return service.translate(key, locale, **params) + + +def t_enum(enum_type: str, value: str, locale: Optional[str] = None) -> str: + """ + Translate an enum value (convenience function). + + Args: + enum_type: Enum type name + value: Enum value + locale: Target locale + + Returns: + Translated enum display name + + Examples: + t_enum("workspace_role", "manager") + t_enum("invite_status", "pending", "en") + """ + service = get_translation_service() + return service.translate_enum(enum_type, value, locale) diff --git a/api/app/locales/en/README.md b/api/app/locales/en/README.md new file mode 100644 index 00000000..0a605a60 --- /dev/null +++ b/api/app/locales/en/README.md @@ -0,0 +1,26 @@ +# English Translation Files + +This directory contains English translation files. + +## File Structure + +- `common.json` - Common translations (success messages, actions, validation) +- `auth.json` - Authentication module translations +- `workspace.json` - Workspace module translations +- `tenant.json` - Tenant module translations +- `errors.json` - Error message translations +- `enums.json` - Enum value translations + +## Translation File Format + +All translation files use JSON format and support nested structures. + +Example: +```json +{ + "success": { + "created": "Created successfully", + "updated": "Updated successfully" + } +} +``` diff --git a/api/app/locales/en/auth.json b/api/app/locales/en/auth.json new file mode 100644 index 00000000..50ba866b --- /dev/null +++ b/api/app/locales/en/auth.json @@ -0,0 +1,55 @@ +{ + "login": { + "success": "Login successful", + "failed": "Login failed", + "invalid_credentials": "Invalid username or password", + "account_locked": "Account has been locked", + "account_disabled": "Account has been disabled" + }, + "logout": { + "success": "Logout successful", + "failed": "Logout failed" + }, + "token": { + "refresh_success": "Token refreshed successfully", + "invalid": "Invalid token", + "expired": "Token has expired", + "blacklisted": "Token has been invalidated", + "invalid_refresh_token": "Invalid refresh token", + "refresh_token_blacklisted": "Refresh token has been invalidated" + }, + "registration": { + "success": "Registration successful", + "failed": "Registration failed", + "email_exists": "Email already in use", + "username_exists": "Username already taken" + }, + "password": { + "reset_success": "Password reset successful", + "reset_failed": "Password reset failed", + "change_success": "Password changed successfully", + "change_failed": "Password change failed", + "incorrect": "Incorrect password", + "too_weak": "Password is too weak", + "mismatch": "Passwords do not match" + }, + "invite": { + "invalid": "Invalid or expired invite code", + "email_mismatch": "Invite email does not match login email", + "accept_success": "Invite accepted successfully", + "accept_failed": "Failed to accept invite", + "password_verification_failed": "Failed to accept invite, password verification error", + "bind_workspace_success": "Workspace bound successfully", + "bind_workspace_failed": "Failed to bind workspace" + }, + "user": { + "not_found": "User not found", + "already_exists": "User already exists", + "created_with_invite": "User created successfully and joined workspace" + }, + "session": { + "expired": "Session expired, please login again", + "invalid": "Invalid session", + "single_session_enabled": "Single sign-on enabled, other device sessions will be logged out" + } +} diff --git a/api/app/locales/en/common.json b/api/app/locales/en/common.json new file mode 100644 index 00000000..505f83e3 --- /dev/null +++ b/api/app/locales/en/common.json @@ -0,0 +1,132 @@ +{ + "success": { + "created": "Created successfully", + "updated": "Updated successfully", + "deleted": "Deleted successfully", + "retrieved": "Retrieved successfully", + "saved": "Saved successfully", + "uploaded": "Uploaded successfully", + "downloaded": "Downloaded successfully", + "sent": "Sent successfully", + "completed": "Completed", + "confirmed": "Confirmed", + "cancelled": "Cancelled", + "archived": "Archived", + "restored": "Restored" + }, + "actions": { + "create": "Create", + "update": "Update", + "delete": "Delete", + "view": "View", + "edit": "Edit", + "save": "Save", + "cancel": "Cancel", + "confirm": "Confirm", + "submit": "Submit", + "upload": "Upload", + "download": "Download", + "send": "Send", + "search": "Search", + "filter": "Filter", + "sort": "Sort", + "export": "Export", + "import": "Import", + "refresh": "Refresh", + "reset": "Reset", + "back": "Back", + "next": "Next", + "previous": "Previous", + "finish": "Finish", + "close": "Close", + "open": "Open", + "archive": "Archive", + "restore": "Restore", + "duplicate": "Duplicate", + "share": "Share", + "invite": "Invite", + "remove": "Remove", + "add": "Add", + "select": "Select", + "clear": "Clear" + }, + "validation": { + "required": "{field} is required", + "invalid_format": "{field} format is invalid", + "too_long": "{field} cannot exceed {max} characters", + "too_short": "{field} must be at least {min} characters", + "invalid_email": "Invalid email format", + "invalid_url": "Invalid URL format", + "invalid_phone": "Invalid phone number format", + "invalid_date": "Invalid date format", + "invalid_number": "Must be a valid number", + "out_of_range": "{field} must be between {min} and {max}", + "already_exists": "{field} already exists", + "not_found": "{field} not found", + "invalid_value": "Invalid value for {field}", + "password_mismatch": "Passwords do not match", + "weak_password": "Password is too weak, please use a stronger password", + "invalid_credentials": "Invalid username or password", + "unauthorized": "Unauthorized access", + "forbidden": "Permission denied", + "expired": "{field} has expired", + "invalid_token": "Invalid token", + "file_too_large": "File size cannot exceed {max}", + "invalid_file_type": "Unsupported file type", + "duplicate": "Duplicate {field}" + }, + "status": { + "active": "Active", + "inactive": "Inactive", + "pending": "Pending", + "processing": "Processing", + "completed": "Completed", + "failed": "Failed", + "cancelled": "Cancelled", + "archived": "Archived", + "deleted": "Deleted", + "draft": "Draft", + "published": "Published", + "suspended": "Suspended", + "expired": "Expired" + }, + "messages": { + "loading": "Loading...", + "saving": "Saving...", + "processing": "Processing...", + "uploading": "Uploading...", + "downloading": "Downloading...", + "no_data": "No data available", + "no_results": "No results found", + "confirm_delete": "Are you sure you want to delete? This action cannot be undone.", + "confirm_action": "Are you sure you want to perform this action?", + "operation_success": "Operation successful", + "operation_failed": "Operation failed", + "please_wait": "Please wait...", + "try_again": "Please try again", + "contact_support": "If the problem persists, please contact support" + }, + "pagination": { + "page": "Page {page}", + "of": "of {total}", + "items": "{total} items", + "per_page": "{count} per page", + "showing": "Showing {from} to {to} of {total}", + "first": "First", + "last": "Last", + "next": "Next", + "previous": "Previous" + }, + "time": { + "just_now": "Just now", + "minutes_ago": "{count} minutes ago", + "hours_ago": "{count} hours ago", + "days_ago": "{count} days ago", + "weeks_ago": "{count} weeks ago", + "months_ago": "{count} months ago", + "years_ago": "{count} years ago", + "today": "Today", + "yesterday": "Yesterday", + "tomorrow": "Tomorrow" + } +} diff --git a/api/app/locales/en/enums.json b/api/app/locales/en/enums.json new file mode 100644 index 00000000..da7a3ace --- /dev/null +++ b/api/app/locales/en/enums.json @@ -0,0 +1,132 @@ +{ + "workspace_role": { + "owner": "Owner", + "manager": "Manager", + "member": "Member", + "guest": "Guest" + }, + "workspace_status": { + "active": "Active", + "inactive": "Inactive", + "archived": "Archived", + "suspended": "Suspended", + "deleted": "Deleted" + }, + "invite_status": { + "pending": "Pending", + "accepted": "Accepted", + "rejected": "Rejected", + "revoked": "Revoked", + "expired": "Expired" + }, + "user_status": { + "active": "Active", + "inactive": "Inactive", + "suspended": "Suspended", + "deleted": "Deleted", + "pending": "Pending" + }, + "tenant_status": { + "active": "Active", + "inactive": "Inactive", + "suspended": "Suspended", + "expired": "Expired", + "trial": "Trial" + }, + "file_status": { + "uploading": "Uploading", + "processing": "Processing", + "completed": "Completed", + "failed": "Failed", + "deleted": "Deleted" + }, + "task_status": { + "pending": "Pending", + "running": "Running", + "completed": "Completed", + "failed": "Failed", + "cancelled": "Cancelled", + "paused": "Paused" + }, + "priority": { + "low": "Low", + "medium": "Medium", + "high": "High", + "urgent": "Urgent" + }, + "visibility": { + "public": "Public", + "private": "Private", + "internal": "Internal", + "shared": "Shared" + }, + "permission": { + "read": "Read", + "write": "Write", + "delete": "Delete", + "admin": "Admin", + "owner": "Owner" + }, + "notification_type": { + "info": "Info", + "warning": "Warning", + "error": "Error", + "success": "Success" + }, + "language": { + "zh": "Chinese (Simplified)", + "en": "English", + "ja": "Japanese", + "ko": "Korean", + "fr": "French", + "de": "German", + "es": "Spanish" + }, + "timezone": { + "utc": "UTC", + "asia_shanghai": "Asia/Shanghai", + "asia_tokyo": "Asia/Tokyo", + "america_new_york": "America/New_York", + "europe_london": "Europe/London" + }, + "date_format": { + "short": "Short", + "medium": "Medium", + "long": "Long", + "full": "Full" + }, + "sort_order": { + "asc": "Ascending", + "desc": "Descending" + }, + "filter_operator": { + "equals": "Equals", + "not_equals": "Not Equals", + "contains": "Contains", + "not_contains": "Not Contains", + "starts_with": "Starts With", + "ends_with": "Ends With", + "greater_than": "Greater Than", + "less_than": "Less Than", + "greater_or_equal": "Greater or Equal", + "less_or_equal": "Less or Equal", + "in": "In", + "not_in": "Not In", + "is_null": "Is Null", + "is_not_null": "Is Not Null" + }, + "log_level": { + "debug": "Debug", + "info": "Info", + "warning": "Warning", + "error": "Error", + "critical": "Critical" + }, + "api_method": { + "get": "GET", + "post": "POST", + "put": "PUT", + "patch": "PATCH", + "delete": "DELETE" + } +} diff --git a/api/app/locales/en/errors.json b/api/app/locales/en/errors.json new file mode 100644 index 00000000..d0276dc9 --- /dev/null +++ b/api/app/locales/en/errors.json @@ -0,0 +1,138 @@ +{ + "common": { + "internal_error": "Internal server error", + "network_error": "Network connection error", + "timeout": "Request timeout", + "service_unavailable": "Service temporarily unavailable", + "bad_request": "Bad request parameters", + "unauthorized": "Unauthorized access", + "forbidden": "Access forbidden", + "not_found": "Resource not found", + "method_not_allowed": "Method not allowed", + "conflict": "Resource conflict", + "too_many_requests": "Too many requests, please try again later", + "validation_failed": "Validation failed", + "database_error": "Database operation failed", + "file_operation_error": "File operation failed" + }, + "auth": { + "invalid_credentials": "Invalid username or password", + "token_expired": "Session expired, please login again", + "token_invalid": "Invalid authentication token", + "token_missing": "Authentication token missing", + "unauthorized": "Unauthorized access", + "forbidden": "Permission denied", + "account_locked": "Account has been locked", + "account_disabled": "Account has been disabled", + "account_not_verified": "Account not verified", + "password_incorrect": "Incorrect password", + "password_too_weak": "Password is too weak", + "password_expired": "Password expired, please change it", + "email_not_verified": "Email not verified", + "phone_not_verified": "Phone number not verified", + "verification_code_invalid": "Invalid verification code", + "verification_code_expired": "Verification code expired", + "login_failed": "Login failed", + "logout_failed": "Logout failed", + "session_expired": "Session expired", + "already_logged_in": "Already logged in", + "not_logged_in": "Not logged in" + }, + "user": { + "not_found": "User not found", + "already_exists": "User already exists", + "email_already_exists": "Email already in use", + "phone_already_exists": "Phone number already in use", + "username_already_exists": "Username already taken", + "invalid_email": "Invalid email format", + "invalid_phone": "Invalid phone number format", + "invalid_username": "Invalid username format", + "create_failed": "Failed to create user", + "update_failed": "Failed to update user", + "delete_failed": "Failed to delete user", + "cannot_delete_self": "Cannot delete yourself", + "cannot_update_self_role": "Cannot update your own role", + "profile_update_failed": "Failed to update profile", + "avatar_upload_failed": "Failed to upload avatar", + "password_change_failed": "Failed to change password", + "old_password_incorrect": "Old password is incorrect" + }, + "workspace": { + "not_found": "Workspace not found", + "already_exists": "Workspace already exists", + "name_required": "Workspace name is required", + "name_too_long": "Workspace name is too long", + "create_failed": "Failed to create workspace", + "update_failed": "Failed to update workspace", + "delete_failed": "Failed to delete workspace", + "permission_denied": "Permission denied to access this workspace", + "not_member": "Not a workspace member", + "already_member": "Already a workspace member", + "member_limit_reached": "Member limit reached", + "cannot_leave_last_manager": "Cannot leave, you are the last manager", + "cannot_remove_last_manager": "Cannot remove the last manager", + "cannot_remove_self": "Cannot remove yourself", + "invite_not_found": "Invite not found", + "invite_expired": "Invite has expired", + "invite_already_accepted": "Invite already accepted", + "invite_already_revoked": "Invite already revoked", + "invite_send_failed": "Failed to send invite", + "archived": "Workspace is archived", + "suspended": "Workspace is suspended" + }, + "tenant": { + "not_found": "Tenant not found", + "already_exists": "Tenant already exists", + "create_failed": "Failed to create tenant", + "update_failed": "Failed to update tenant", + "delete_failed": "Failed to delete tenant", + "suspended": "Tenant is suspended", + "expired": "Tenant has expired", + "license_invalid": "Invalid license", + "license_expired": "License has expired", + "quota_exceeded": "Quota exceeded" + }, + "file": { + "not_found": "File not found", + "upload_failed": "File upload failed", + "download_failed": "File download failed", + "delete_failed": "File deletion failed", + "too_large": "File size exceeds limit", + "invalid_type": "Unsupported file type", + "invalid_format": "Invalid file format", + "corrupted": "File is corrupted", + "storage_full": "Storage is full", + "access_denied": "Access denied to this file" + }, + "api": { + "rate_limit_exceeded": "API rate limit exceeded", + "quota_exceeded": "API quota exceeded", + "invalid_api_key": "Invalid API key", + "api_key_expired": "API key has expired", + "api_key_revoked": "API key has been revoked", + "endpoint_not_found": "API endpoint not found", + "method_not_allowed": "Method not allowed", + "invalid_request": "Invalid request", + "missing_parameter": "Missing required parameter: {param}", + "invalid_parameter": "Invalid parameter: {param}" + }, + "database": { + "connection_failed": "Database connection failed", + "query_failed": "Database query failed", + "transaction_failed": "Database transaction failed", + "constraint_violation": "Data constraint violation", + "duplicate_key": "Duplicate data", + "foreign_key_violation": "Foreign key constraint violation", + "deadlock": "Database deadlock" + }, + "validation": { + "invalid_input": "Invalid input data", + "missing_field": "Missing required field: {field}", + "invalid_field": "Invalid field: {field}", + "field_too_long": "Field too long: {field}", + "field_too_short": "Field too short: {field}", + "invalid_format": "Invalid format: {field}", + "invalid_value": "Invalid value: {field}", + "out_of_range": "Value out of range: {field}" + } +} diff --git a/api/app/locales/en/i18n.json b/api/app/locales/en/i18n.json new file mode 100644 index 00000000..1662836d --- /dev/null +++ b/api/app/locales/en/i18n.json @@ -0,0 +1,27 @@ +{ + "language": { + "not_found": "Language {locale} not found", + "already_exists": "Language {locale} already exists", + "add_instructions": "Language {locale} validated successfully. Please create translation files in {dir} directory to complete the addition.", + "update_instructions": "Language {locale} update validated successfully. Please update I18N_SUPPORTED_LANGUAGES environment variable to apply configuration changes." + }, + "namespace": { + "not_found": "Namespace {namespace} not found in language {locale}" + }, + "translation": { + "invalid_key_format": "Invalid translation key format: {key}. Should use format: namespace.key.subkey", + "update_instructions": "Translation {locale}/{key} update validated successfully. Please modify the corresponding JSON translation file to apply changes." + }, + "reload": { + "disabled": "Translation hot reload is disabled. Please enable I18N_ENABLE_HOT_RELOAD in configuration.", + "success": "Translations reloaded successfully", + "failed": "Translation reload failed: {error}" + }, + "metrics": { + "reset_success": "Performance metrics reset successfully" + }, + "logs": { + "export_success": "Missing translations exported to: {file}", + "clear_success": "Missing translation logs cleared successfully" + } +} diff --git a/api/app/locales/en/tenant.json b/api/app/locales/en/tenant.json new file mode 100644 index 00000000..8c3b4b02 --- /dev/null +++ b/api/app/locales/en/tenant.json @@ -0,0 +1,63 @@ +{ + "info": { + "get_success": "Tenant information retrieved successfully", + "get_failed": "Failed to retrieve tenant information", + "update_success": "Tenant information updated successfully", + "update_failed": "Failed to update tenant information" + }, + "create": { + "success": "Tenant created successfully", + "failed": "Failed to create tenant" + }, + "delete": { + "success": "Tenant deleted successfully", + "failed": "Failed to delete tenant" + }, + "status": { + "activate_success": "Tenant activated successfully", + "activate_failed": "Failed to activate tenant", + "deactivate_success": "Tenant deactivated successfully", + "deactivate_failed": "Failed to deactivate tenant" + }, + "language": { + "get_success": "Tenant language configuration retrieved successfully", + "get_failed": "Failed to retrieve tenant language configuration", + "update_success": "Tenant language configuration updated successfully", + "update_failed": "Failed to update tenant language configuration", + "invalid_language": "Unsupported language code", + "default_not_in_supported": "Default language must be in the supported languages list" + }, + "list": { + "get_success": "Tenant list retrieved successfully", + "get_failed": "Failed to retrieve tenant list" + }, + "users": { + "list_success": "Tenant user list retrieved successfully", + "list_failed": "Failed to retrieve tenant user list", + "assign_success": "User assigned to tenant successfully", + "assign_failed": "Failed to assign user to tenant", + "remove_success": "User removed from tenant successfully", + "remove_failed": "Failed to remove user from tenant" + }, + "statistics": { + "get_success": "Tenant statistics retrieved successfully", + "get_failed": "Failed to retrieve tenant statistics" + }, + "validation": { + "name_required": "Tenant name is required", + "name_invalid": "Invalid tenant name format", + "name_too_long": "Tenant name cannot exceed {max} characters", + "description_too_long": "Tenant description cannot exceed {max} characters", + "language_code_invalid": "Invalid language code format", + "supported_languages_empty": "Supported languages list cannot be empty" + }, + "errors": { + "not_found": "Tenant not found", + "already_exists": "Tenant name already exists", + "permission_denied": "Permission denied to access this tenant", + "has_users": "Cannot delete tenant, associated users exist", + "has_workspaces": "Cannot delete tenant, associated workspaces exist", + "already_active": "Tenant is already active", + "already_inactive": "Tenant is already inactive" + } +} diff --git a/api/app/locales/en/users.json b/api/app/locales/en/users.json new file mode 100644 index 00000000..efd5d034 --- /dev/null +++ b/api/app/locales/en/users.json @@ -0,0 +1,72 @@ +{ + "info": { + "get_success": "User information retrieved successfully", + "get_failed": "Failed to retrieve user information", + "update_success": "User information updated successfully", + "update_failed": "Failed to update user information" + }, + "create": { + "success": "User created successfully", + "failed": "Failed to create user", + "superuser_success": "Superuser created successfully", + "superuser_failed": "Failed to create superuser" + }, + "delete": { + "success": "User deleted successfully", + "failed": "Failed to delete user", + "deactivate_success": "User deactivated successfully", + "deactivate_failed": "Failed to deactivate user" + }, + "activate": { + "success": "User activated successfully", + "failed": "Failed to activate user" + }, + "language": { + "get_success": "Language preference retrieved successfully", + "get_failed": "Failed to retrieve language preference", + "update_success": "Language preference updated successfully", + "update_failed": "Failed to update language preference", + "invalid_language": "Unsupported language code", + "current": "Current language preference" + }, + "email": { + "change_success": "Email changed successfully", + "change_failed": "Failed to change email", + "code_sent": "Verification code has been sent to your email", + "code_send_failed": "Failed to send verification code", + "code_invalid": "Invalid or expired verification code", + "already_exists": "Email already in use" + }, + "list": { + "get_success": "User list retrieved successfully", + "get_failed": "Failed to retrieve user list", + "superusers_success": "Tenant superuser list retrieved successfully", + "superusers_failed": "Failed to retrieve tenant superuser list" + }, + "validation": { + "username_required": "Username is required", + "username_invalid": "Invalid username format", + "username_too_long": "Username cannot exceed {max} characters", + "email_required": "Email is required", + "email_invalid": "Invalid email format", + "password_required": "Password is required", + "password_too_short": "Password must be at least {min} characters", + "password_too_long": "Password cannot exceed {max} characters", + "old_password_required": "Old password is required", + "new_password_required": "New password is required", + "verification_code_required": "Verification code is required", + "verification_code_invalid": "Invalid verification code format" + }, + "errors": { + "not_found": "User not found", + "already_exists": "User already exists", + "permission_denied": "Permission denied to access this user", + "cannot_delete_self": "Cannot delete yourself", + "cannot_deactivate_self": "Cannot deactivate yourself", + "already_deactivated": "User is already deactivated", + "already_activated": "User is already activated", + "password_verification_failed": "Password verification failed", + "old_password_incorrect": "Old password is incorrect", + "same_as_old_password": "New password cannot be the same as old password" + } +} diff --git a/api/app/locales/en/workspace.json b/api/app/locales/en/workspace.json new file mode 100644 index 00000000..cca29698 --- /dev/null +++ b/api/app/locales/en/workspace.json @@ -0,0 +1,44 @@ +{ + "list_retrieved": "Workspace list retrieved successfully", + "created": "Workspace created successfully", + "updated": "Workspace updated successfully", + "deleted": "Workspace deleted successfully", + "switched": "Workspace switched successfully", + "not_found": "Workspace not found or access denied", + "already_exists": "Workspace already exists", + "permission_denied": "No permission to access this workspace", + "name_required": "Workspace name is required", + "invalid_name": "Invalid workspace name format", + "members": { + "list_retrieved": "Workspace members list retrieved successfully", + "role_updated": "Member role updated successfully", + "deleted": "Member deleted successfully", + "not_found": "Member not found", + "cannot_remove_self": "Cannot remove yourself", + "cannot_remove_last_manager": "Cannot remove the last manager", + "already_member": "User is already a workspace member" + }, + "invites": { + "created": "Invite created successfully", + "list_retrieved": "Invite list retrieved successfully", + "validated": "Invite validated successfully", + "revoked": "Invite revoked successfully", + "accepted": "Invite accepted", + "not_found": "Invite not found", + "expired": "Invite has expired", + "already_used": "Invite has already been used", + "invalid_token": "Invalid invite token", + "email_required": "Email address is required", + "invalid_email": "Invalid email address format" + }, + "storage": { + "type_retrieved": "Storage type retrieved successfully", + "type_updated": "Storage type updated successfully", + "invalid_type": "Invalid storage type" + }, + "models": { + "config_retrieved": "Model configuration retrieved successfully", + "config_updated": "Model configuration updated successfully", + "invalid_config": "Invalid model configuration" + } +} diff --git a/api/app/locales/zh/README.md b/api/app/locales/zh/README.md new file mode 100644 index 00000000..edaa0fb4 --- /dev/null +++ b/api/app/locales/zh/README.md @@ -0,0 +1,26 @@ +# 中文翻译文件 + +此目录包含中文(简体)的翻译文件。 + +## 文件结构 + +- `common.json` - 通用翻译(成功消息、操作、验证) +- `auth.json` - 认证模块翻译 +- `workspace.json` - 工作空间模块翻译 +- `tenant.json` - 租户模块翻译 +- `errors.json` - 错误消息翻译 +- `enums.json` - 枚举值翻译 + +## 翻译文件格式 + +所有翻译文件使用 JSON 格式,支持嵌套结构。 + +示例: +```json +{ + "success": { + "created": "创建成功", + "updated": "更新成功" + } +} +``` diff --git a/api/app/locales/zh/auth.json b/api/app/locales/zh/auth.json new file mode 100644 index 00000000..283d2ffb --- /dev/null +++ b/api/app/locales/zh/auth.json @@ -0,0 +1,55 @@ +{ + "login": { + "success": "登录成功", + "failed": "登录失败", + "invalid_credentials": "用户名或密码错误", + "account_locked": "账户已被锁定", + "account_disabled": "账户已被禁用" + }, + "logout": { + "success": "登出成功", + "failed": "登出失败" + }, + "token": { + "refresh_success": "token刷新成功", + "invalid": "无效的token", + "expired": "token已过期", + "blacklisted": "token已失效", + "invalid_refresh_token": "无效的refresh token", + "refresh_token_blacklisted": "Refresh token已失效" + }, + "registration": { + "success": "注册成功", + "failed": "注册失败", + "email_exists": "邮箱已被使用", + "username_exists": "用户名已被使用" + }, + "password": { + "reset_success": "密码重置成功", + "reset_failed": "密码重置失败", + "change_success": "密码修改成功", + "change_failed": "密码修改失败", + "incorrect": "密码错误", + "too_weak": "密码强度不够", + "mismatch": "两次输入的密码不一致" + }, + "invite": { + "invalid": "邀请码无效或已过期", + "email_mismatch": "邀请邮箱与登录邮箱不匹配", + "accept_success": "接受邀请成功", + "accept_failed": "接受邀请失败", + "password_verification_failed": "接受邀请失败,密码验证错误", + "bind_workspace_success": "绑定工作空间成功", + "bind_workspace_failed": "绑定工作空间失败" + }, + "user": { + "not_found": "用户不存在", + "already_exists": "用户已存在", + "created_with_invite": "用户创建成功并已加入工作空间" + }, + "session": { + "expired": "会话已过期,请重新登录", + "invalid": "无效的会话", + "single_session_enabled": "单点登录已启用,其他设备的登录将被注销" + } +} diff --git a/api/app/locales/zh/common.json b/api/app/locales/zh/common.json new file mode 100644 index 00000000..b3c62adc --- /dev/null +++ b/api/app/locales/zh/common.json @@ -0,0 +1,132 @@ +{ + "success": { + "created": "创建成功", + "updated": "更新成功", + "deleted": "删除成功", + "retrieved": "获取成功", + "saved": "保存成功", + "uploaded": "上传成功", + "downloaded": "下载成功", + "sent": "发送成功", + "completed": "完成", + "confirmed": "已确认", + "cancelled": "已取消", + "archived": "已归档", + "restored": "已恢复" + }, + "actions": { + "create": "创建", + "update": "更新", + "delete": "删除", + "view": "查看", + "edit": "编辑", + "save": "保存", + "cancel": "取消", + "confirm": "确认", + "submit": "提交", + "upload": "上传", + "download": "下载", + "send": "发送", + "search": "搜索", + "filter": "筛选", + "sort": "排序", + "export": "导出", + "import": "导入", + "refresh": "刷新", + "reset": "重置", + "back": "返回", + "next": "下一步", + "previous": "上一步", + "finish": "完成", + "close": "关闭", + "open": "打开", + "archive": "归档", + "restore": "恢复", + "duplicate": "复制", + "share": "分享", + "invite": "邀请", + "remove": "移除", + "add": "添加", + "select": "选择", + "clear": "清除" + }, + "validation": { + "required": "{field}不能为空", + "invalid_format": "{field}格式不正确", + "too_long": "{field}长度不能超过{max}个字符", + "too_short": "{field}长度不能少于{min}个字符", + "invalid_email": "邮箱格式不正确", + "invalid_url": "URL格式不正确", + "invalid_phone": "手机号格式不正确", + "invalid_date": "日期格式不正确", + "invalid_number": "必须是有效的数字", + "out_of_range": "{field}必须在{min}和{max}之间", + "already_exists": "{field}已存在", + "not_found": "{field}不存在", + "invalid_value": "{field}的值无效", + "password_mismatch": "两次输入的密码不一致", + "weak_password": "密码强度不够,请使用更复杂的密码", + "invalid_credentials": "用户名或密码错误", + "unauthorized": "未授权访问", + "forbidden": "没有权限执行此操作", + "expired": "{field}已过期", + "invalid_token": "无效的令牌", + "file_too_large": "文件大小不能超过{max}", + "invalid_file_type": "不支持的文件类型", + "duplicate": "重复的{field}" + }, + "status": { + "active": "活跃", + "inactive": "未激活", + "pending": "待处理", + "processing": "处理中", + "completed": "已完成", + "failed": "失败", + "cancelled": "已取消", + "archived": "已归档", + "deleted": "已删除", + "draft": "草稿", + "published": "已发布", + "suspended": "已暂停", + "expired": "已过期" + }, + "messages": { + "loading": "加载中...", + "saving": "保存中...", + "processing": "处理中...", + "uploading": "上传中...", + "downloading": "下载中...", + "no_data": "暂无数据", + "no_results": "没有找到结果", + "confirm_delete": "确定要删除吗?此操作不可恢复。", + "confirm_action": "确定要执行此操作吗?", + "operation_success": "操作成功", + "operation_failed": "操作失败", + "please_wait": "请稍候...", + "try_again": "请重试", + "contact_support": "如果问题持续,请联系技术支持" + }, + "pagination": { + "page": "第{page}页", + "of": "共{total}页", + "items": "共{total}条", + "per_page": "每页{count}条", + "showing": "显示第{from}到第{to}条,共{total}条", + "first": "首页", + "last": "末页", + "next": "下一页", + "previous": "上一页" + }, + "time": { + "just_now": "刚刚", + "minutes_ago": "{count}分钟前", + "hours_ago": "{count}小时前", + "days_ago": "{count}天前", + "weeks_ago": "{count}周前", + "months_ago": "{count}个月前", + "years_ago": "{count}年前", + "today": "今天", + "yesterday": "昨天", + "tomorrow": "明天" + } +} diff --git a/api/app/locales/zh/enums.json b/api/app/locales/zh/enums.json new file mode 100644 index 00000000..9a241817 --- /dev/null +++ b/api/app/locales/zh/enums.json @@ -0,0 +1,132 @@ +{ + "workspace_role": { + "owner": "所有者", + "manager": "管理员", + "member": "成员", + "guest": "访客" + }, + "workspace_status": { + "active": "活跃", + "inactive": "未激活", + "archived": "已归档", + "suspended": "已暂停", + "deleted": "已删除" + }, + "invite_status": { + "pending": "待处理", + "accepted": "已接受", + "rejected": "已拒绝", + "revoked": "已撤销", + "expired": "已过期" + }, + "user_status": { + "active": "活跃", + "inactive": "未激活", + "suspended": "已暂停", + "deleted": "已删除", + "pending": "待激活" + }, + "tenant_status": { + "active": "活跃", + "inactive": "未激活", + "suspended": "已暂停", + "expired": "已过期", + "trial": "试用中" + }, + "file_status": { + "uploading": "上传中", + "processing": "处理中", + "completed": "已完成", + "failed": "失败", + "deleted": "已删除" + }, + "task_status": { + "pending": "待处理", + "running": "运行中", + "completed": "已完成", + "failed": "失败", + "cancelled": "已取消", + "paused": "已暂停" + }, + "priority": { + "low": "低", + "medium": "中", + "high": "高", + "urgent": "紧急" + }, + "visibility": { + "public": "公开", + "private": "私有", + "internal": "内部", + "shared": "共享" + }, + "permission": { + "read": "读取", + "write": "写入", + "delete": "删除", + "admin": "管理", + "owner": "所有者" + }, + "notification_type": { + "info": "信息", + "warning": "警告", + "error": "错误", + "success": "成功" + }, + "language": { + "zh": "中文(简体)", + "en": "English", + "ja": "日本語", + "ko": "한국어", + "fr": "Français", + "de": "Deutsch", + "es": "Español" + }, + "timezone": { + "utc": "UTC", + "asia_shanghai": "亚洲/上海", + "asia_tokyo": "亚洲/东京", + "america_new_york": "美洲/纽约", + "europe_london": "欧洲/伦敦" + }, + "date_format": { + "short": "短日期", + "medium": "中等日期", + "long": "长日期", + "full": "完整日期" + }, + "sort_order": { + "asc": "升序", + "desc": "降序" + }, + "filter_operator": { + "equals": "等于", + "not_equals": "不等于", + "contains": "包含", + "not_contains": "不包含", + "starts_with": "开始于", + "ends_with": "结束于", + "greater_than": "大于", + "less_than": "小于", + "greater_or_equal": "大于等于", + "less_or_equal": "小于等于", + "in": "在列表中", + "not_in": "不在列表中", + "is_null": "为空", + "is_not_null": "不为空" + }, + "log_level": { + "debug": "调试", + "info": "信息", + "warning": "警告", + "error": "错误", + "critical": "严重" + }, + "api_method": { + "get": "GET", + "post": "POST", + "put": "PUT", + "patch": "PATCH", + "delete": "DELETE" + } +} diff --git a/api/app/locales/zh/errors.json b/api/app/locales/zh/errors.json new file mode 100644 index 00000000..eafadad4 --- /dev/null +++ b/api/app/locales/zh/errors.json @@ -0,0 +1,138 @@ +{ + "common": { + "internal_error": "服务器内部错误", + "network_error": "网络连接错误", + "timeout": "请求超时", + "service_unavailable": "服务暂时不可用", + "bad_request": "请求参数错误", + "unauthorized": "未授权访问", + "forbidden": "没有权限访问", + "not_found": "请求的资源不存在", + "method_not_allowed": "不支持的请求方法", + "conflict": "资源冲突", + "too_many_requests": "请求过于频繁,请稍后再试", + "validation_failed": "数据验证失败", + "database_error": "数据库操作失败", + "file_operation_error": "文件操作失败" + }, + "auth": { + "invalid_credentials": "用户名或密码错误", + "token_expired": "登录已过期,请重新登录", + "token_invalid": "无效的登录令牌", + "token_missing": "缺少登录令牌", + "unauthorized": "未授权访问", + "forbidden": "没有权限执行此操作", + "account_locked": "账户已被锁定", + "account_disabled": "账户已被禁用", + "account_not_verified": "账户未验证", + "password_incorrect": "密码错误", + "password_too_weak": "密码强度不够", + "password_expired": "密码已过期,请修改密码", + "email_not_verified": "邮箱未验证", + "phone_not_verified": "手机号未验证", + "verification_code_invalid": "验证码无效", + "verification_code_expired": "验证码已过期", + "login_failed": "登录失败", + "logout_failed": "登出失败", + "session_expired": "会话已过期", + "already_logged_in": "已经登录", + "not_logged_in": "未登录" + }, + "user": { + "not_found": "用户不存在", + "already_exists": "用户已存在", + "email_already_exists": "邮箱已被使用", + "phone_already_exists": "手机号已被使用", + "username_already_exists": "用户名已被使用", + "invalid_email": "邮箱格式不正确", + "invalid_phone": "手机号格式不正确", + "invalid_username": "用户名格式不正确", + "create_failed": "创建用户失败", + "update_failed": "更新用户失败", + "delete_failed": "删除用户失败", + "cannot_delete_self": "不能删除自己", + "cannot_update_self_role": "不能修改自己的角色", + "profile_update_failed": "更新个人资料失败", + "avatar_upload_failed": "上传头像失败", + "password_change_failed": "修改密码失败", + "old_password_incorrect": "原密码错误" + }, + "workspace": { + "not_found": "工作空间不存在", + "already_exists": "工作空间已存在", + "name_required": "工作空间名称不能为空", + "name_too_long": "工作空间名称过长", + "create_failed": "创建工作空间失败", + "update_failed": "更新工作空间失败", + "delete_failed": "删除工作空间失败", + "permission_denied": "没有权限访问此工作空间", + "not_member": "不是工作空间成员", + "already_member": "已经是工作空间成员", + "member_limit_reached": "成员数量已达上限", + "cannot_leave_last_manager": "不能离开,您是最后一个管理员", + "cannot_remove_last_manager": "不能移除最后一个管理员", + "cannot_remove_self": "不能移除自己", + "invite_not_found": "邀请不存在", + "invite_expired": "邀请已过期", + "invite_already_accepted": "邀请已被接受", + "invite_already_revoked": "邀请已被撤销", + "invite_send_failed": "发送邀请失败", + "archived": "工作空间已归档", + "suspended": "工作空间已暂停" + }, + "tenant": { + "not_found": "租户不存在", + "already_exists": "租户已存在", + "create_failed": "创建租户失败", + "update_failed": "更新租户失败", + "delete_failed": "删除租户失败", + "suspended": "租户已暂停", + "expired": "租户已过期", + "license_invalid": "许可证无效", + "license_expired": "许可证已过期", + "quota_exceeded": "配额已超限" + }, + "file": { + "not_found": "文件不存在", + "upload_failed": "文件上传失败", + "download_failed": "文件下载失败", + "delete_failed": "文件删除失败", + "too_large": "文件大小超过限制", + "invalid_type": "不支持的文件类型", + "invalid_format": "文件格式不正确", + "corrupted": "文件已损坏", + "storage_full": "存储空间已满", + "access_denied": "没有权限访问此文件" + }, + "api": { + "rate_limit_exceeded": "API调用频率超限", + "quota_exceeded": "API调用配额已用完", + "invalid_api_key": "无效的API密钥", + "api_key_expired": "API密钥已过期", + "api_key_revoked": "API密钥已被撤销", + "endpoint_not_found": "API端点不存在", + "method_not_allowed": "不支持的请求方法", + "invalid_request": "无效的请求", + "missing_parameter": "缺少必需参数:{param}", + "invalid_parameter": "参数无效:{param}" + }, + "database": { + "connection_failed": "数据库连接失败", + "query_failed": "数据库查询失败", + "transaction_failed": "数据库事务失败", + "constraint_violation": "数据约束冲突", + "duplicate_key": "数据重复", + "foreign_key_violation": "外键约束冲突", + "deadlock": "数据库死锁" + }, + "validation": { + "invalid_input": "输入数据无效", + "missing_field": "缺少必需字段:{field}", + "invalid_field": "字段无效:{field}", + "field_too_long": "字段过长:{field}", + "field_too_short": "字段过短:{field}", + "invalid_format": "格式不正确:{field}", + "invalid_value": "值无效:{field}", + "out_of_range": "值超出范围:{field}" + } +} diff --git a/api/app/locales/zh/i18n.json b/api/app/locales/zh/i18n.json new file mode 100644 index 00000000..a072f332 --- /dev/null +++ b/api/app/locales/zh/i18n.json @@ -0,0 +1,27 @@ +{ + "language": { + "not_found": "语言 {locale} 不存在", + "already_exists": "语言 {locale} 已存在", + "add_instructions": "语言 {locale} 验证成功。请在 {dir} 目录下创建翻译文件以完成添加。", + "update_instructions": "语言 {locale} 更新验证成功。请更新环境变量 I18N_SUPPORTED_LANGUAGES 以应用配置更改。" + }, + "namespace": { + "not_found": "命名空间 {namespace} 在语言 {locale} 中不存在" + }, + "translation": { + "invalid_key_format": "翻译键格式无效: {key}。应使用格式: namespace.key.subkey", + "update_instructions": "翻译 {locale}/{key} 更新验证成功。请修改对应的 JSON 翻译文件以应用更改。" + }, + "reload": { + "disabled": "翻译热重载功能已禁用。请在配置中启用 I18N_ENABLE_HOT_RELOAD。", + "success": "翻译重载成功", + "failed": "翻译重载失败: {error}" + }, + "metrics": { + "reset_success": "性能指标已重置" + }, + "logs": { + "export_success": "缺失翻译已导出到: {file}", + "clear_success": "缺失翻译日志已清除" + } +} diff --git a/api/app/locales/zh/tenant.json b/api/app/locales/zh/tenant.json new file mode 100644 index 00000000..a8bdc124 --- /dev/null +++ b/api/app/locales/zh/tenant.json @@ -0,0 +1,63 @@ +{ + "info": { + "get_success": "租户信息获取成功", + "get_failed": "租户信息获取失败", + "update_success": "租户信息更新成功", + "update_failed": "租户信息更新失败" + }, + "create": { + "success": "租户创建成功", + "failed": "租户创建失败" + }, + "delete": { + "success": "租户删除成功", + "failed": "租户删除失败" + }, + "status": { + "activate_success": "租户启用成功", + "activate_failed": "租户启用失败", + "deactivate_success": "租户禁用成功", + "deactivate_failed": "租户禁用失败" + }, + "language": { + "get_success": "租户语言配置获取成功", + "get_failed": "租户语言配置获取失败", + "update_success": "租户语言配置更新成功", + "update_failed": "租户语言配置更新失败", + "invalid_language": "不支持的语言代码", + "default_not_in_supported": "默认语言必须在支持的语言列表中" + }, + "list": { + "get_success": "租户列表获取成功", + "get_failed": "租户列表获取失败" + }, + "users": { + "list_success": "租户用户列表获取成功", + "list_failed": "租户用户列表获取失败", + "assign_success": "用户分配到租户成功", + "assign_failed": "用户分配到租户失败", + "remove_success": "用户从租户移除成功", + "remove_failed": "用户从租户移除失败" + }, + "statistics": { + "get_success": "租户统计信息获取成功", + "get_failed": "租户统计信息获取失败" + }, + "validation": { + "name_required": "租户名称不能为空", + "name_invalid": "租户名称格式不正确", + "name_too_long": "租户名称长度不能超过{max}个字符", + "description_too_long": "租户描述长度不能超过{max}个字符", + "language_code_invalid": "语言代码格式不正确", + "supported_languages_empty": "支持的语言列表不能为空" + }, + "errors": { + "not_found": "租户不存在", + "already_exists": "租户名称已存在", + "permission_denied": "没有权限访问此租户", + "has_users": "无法删除租户,存在关联的用户", + "has_workspaces": "无法删除租户,存在关联的工作空间", + "already_active": "租户已处于激活状态", + "already_inactive": "租户已处于禁用状态" + } +} diff --git a/api/app/locales/zh/users.json b/api/app/locales/zh/users.json new file mode 100644 index 00000000..a446ed8d --- /dev/null +++ b/api/app/locales/zh/users.json @@ -0,0 +1,72 @@ +{ + "info": { + "get_success": "用户信息获取成功", + "get_failed": "用户信息获取失败", + "update_success": "用户信息更新成功", + "update_failed": "用户信息更新失败" + }, + "create": { + "success": "用户创建成功", + "failed": "用户创建失败", + "superuser_success": "超级管理员创建成功", + "superuser_failed": "超级管理员创建失败" + }, + "delete": { + "success": "用户删除成功", + "failed": "用户删除失败", + "deactivate_success": "用户停用成功", + "deactivate_failed": "用户停用失败" + }, + "activate": { + "success": "用户激活成功", + "failed": "用户激活失败" + }, + "language": { + "get_success": "语言偏好获取成功", + "get_failed": "语言偏好获取失败", + "update_success": "语言偏好更新成功", + "update_failed": "语言偏好更新失败", + "invalid_language": "不支持的语言代码", + "current": "当前语言偏好" + }, + "email": { + "change_success": "邮箱修改成功", + "change_failed": "邮箱修改失败", + "code_sent": "验证码已发送到您的邮箱,请查收", + "code_send_failed": "验证码发送失败", + "code_invalid": "验证码无效或已过期", + "already_exists": "该邮箱已被使用" + }, + "list": { + "get_success": "用户列表获取成功", + "get_failed": "用户列表获取失败", + "superusers_success": "租户超管列表获取成功", + "superusers_failed": "租户超管列表获取失败" + }, + "validation": { + "username_required": "用户名不能为空", + "username_invalid": "用户名格式不正确", + "username_too_long": "用户名长度不能超过{max}个字符", + "email_required": "邮箱不能为空", + "email_invalid": "邮箱格式不正确", + "password_required": "密码不能为空", + "password_too_short": "密码长度不能少于{min}个字符", + "password_too_long": "密码长度不能超过{max}个字符", + "old_password_required": "旧密码不能为空", + "new_password_required": "新密码不能为空", + "verification_code_required": "验证码不能为空", + "verification_code_invalid": "验证码格式不正确" + }, + "errors": { + "not_found": "用户不存在", + "already_exists": "用户已存在", + "permission_denied": "没有权限访问此用户", + "cannot_delete_self": "不能删除自己", + "cannot_deactivate_self": "不能停用自己", + "already_deactivated": "用户已被停用", + "already_activated": "用户已处于激活状态", + "password_verification_failed": "密码验证失败", + "old_password_incorrect": "旧密码不正确", + "same_as_old_password": "新密码不能与旧密码相同" + } +} diff --git a/api/app/locales/zh/workspace.json b/api/app/locales/zh/workspace.json new file mode 100644 index 00000000..e7dba7dc --- /dev/null +++ b/api/app/locales/zh/workspace.json @@ -0,0 +1,44 @@ +{ + "list_retrieved": "工作空间列表获取成功", + "created": "工作空间创建成功", + "updated": "工作空间更新成功", + "deleted": "工作空间删除成功", + "switched": "工作空间切换成功", + "not_found": "工作空间不存在或无权访问", + "already_exists": "工作空间已存在", + "permission_denied": "没有权限访问此工作空间", + "name_required": "工作空间名称不能为空", + "invalid_name": "工作空间名称格式不正确", + "members": { + "list_retrieved": "工作空间成员列表获取成功", + "role_updated": "成员角色更新成功", + "deleted": "成员删除成功", + "not_found": "成员不存在", + "cannot_remove_self": "不能删除自己", + "cannot_remove_last_manager": "不能删除最后一个管理员", + "already_member": "用户已经是工作空间成员" + }, + "invites": { + "created": "邀请创建成功", + "list_retrieved": "邀请列表获取成功", + "validated": "邀请验证成功", + "revoked": "邀请撤销成功", + "accepted": "邀请已接受", + "not_found": "邀请不存在", + "expired": "邀请已过期", + "already_used": "邀请已被使用", + "invalid_token": "无效的邀请令牌", + "email_required": "邮箱地址不能为空", + "invalid_email": "邮箱地址格式不正确" + }, + "storage": { + "type_retrieved": "存储类型获取成功", + "type_updated": "存储类型更新成功", + "invalid_type": "无效的存储类型" + }, + "models": { + "config_retrieved": "模型配置获取成功", + "config_updated": "模型配置更新成功", + "invalid_config": "无效的模型配置" + } +} diff --git a/api/app/main.py b/api/app/main.py index af5ed796..c6256e3c 100644 --- a/api/app/main.py +++ b/api/app/main.py @@ -92,6 +92,10 @@ app.add_middleware( allow_headers=["*"], ) +# Add i18n language detection middleware +from app.i18n.middleware import LanguageMiddleware +app.add_middleware(LanguageMiddleware) + logger.info("FastAPI应用程序启动") @@ -129,6 +133,11 @@ from app.core.exceptions import ( from app.core.sensitive_filter import SensitiveDataFilter import traceback +# Import i18n exception support +from app.i18n.exceptions import I18nException +from app.i18n.service import get_translation_service +from pydantic import ValidationError as PydanticValidationError + # 处理验证异常 @app.exception_handler(ValidationException) @@ -156,6 +165,131 @@ async def validation_exception_handler(request: Request, exc: ValidationExceptio ) +# 处理 i18n 异常(国际化异常) +@app.exception_handler(I18nException) +async def i18n_exception_handler(request: Request, exc: I18nException): + """ + 处理国际化异常 + + I18nException 已经自动翻译了错误消息,直接返回即可 + """ + # 获取当前语言 + language = getattr(request.state, "language", settings.I18N_DEFAULT_LANGUAGE) + + # 获取异常详情(已经包含翻译后的消息) + detail = exc.detail + + # 过滤敏感信息 + if isinstance(detail, dict): + filtered_message = SensitiveDataFilter.filter_string(detail.get("message", "")) + filtered_detail = { + **detail, + "message": filtered_message + } + else: + filtered_detail = SensitiveDataFilter.filter_string(str(detail)) + + logger.warning( + f"I18n exception: {exc.error_key}", + extra={ + "path": request.url.path, + "method": request.method, + "error_code": exc.error_code, + "error_key": exc.error_key, + "language": language, + "status_code": exc.status_code, + "params": exc.params + } + ) + + return JSONResponse( + status_code=exc.status_code, + content={ + "success": False, + **filtered_detail + }, + headers=exc.headers + ) + + +# 处理 Pydantic 验证错误(国际化支持) +@app.exception_handler(PydanticValidationError) +async def pydantic_validation_exception_handler(request: Request, exc: PydanticValidationError): + """ + 处理 Pydantic 验证错误,支持国际化 + """ + # 获取当前语言 + language = getattr(request.state, "language", settings.I18N_DEFAULT_LANGUAGE) + + # 获取翻译服务 + translation_service = get_translation_service() + + # 翻译验证错误消息 + errors = [] + for error in exc.errors(): + field = ".".join(str(loc) for loc in error["loc"]) + error_type = error["type"] + + # 尝试翻译错误消息 + if error_type == "value_error.missing": + message = translation_service.translate( + "errors.validation.missing_field", + language, + field=field + ) + elif error_type == "value_error.any_str.max_length": + message = translation_service.translate( + "errors.validation.field_too_long", + language, + field=field + ) + elif error_type == "value_error.any_str.min_length": + message = translation_service.translate( + "errors.validation.field_too_short", + language, + field=field + ) + else: + # 使用通用验证错误消息 + message = translation_service.translate( + "errors.validation.invalid_field", + language, + field=field + ) + + errors.append({ + "field": field, + "message": message, + "type": error_type + }) + + # 翻译主错误消息 + main_message = translation_service.translate( + "errors.common.validation_failed", + language + ) + + logger.warning( + f"Pydantic validation error: {len(errors)} errors", + extra={ + "path": request.url.path, + "method": request.method, + "language": language, + "errors": errors + } + ) + + return JSONResponse( + status_code=422, + content={ + "success": False, + "error_code": "VALIDATION_FAILED", + "message": main_message, + "errors": errors + } + ) + + # 处理资源不存在异常 @app.exception_handler(ResourceNotFoundException) async def not_found_exception_handler(request: Request, exc: ResourceNotFoundException): @@ -354,31 +488,66 @@ async def business_exception_handler(request: Request, exc: BusinessException): ) -# 统一异常处理:将HTTPException转换为统一响应结构 +# 统一异常处理:将HTTPException转换为统一响应结构(支持国际化) @app.exception_handler(HTTPException) async def http_exception_handler(request: Request, exc: HTTPException): - """处理HTTP异常""" - # 过滤敏感信息 - filtered_detail = SensitiveDataFilter.filter_string(str(exc.detail)) - + """处理HTTP异常,支持国际化""" + # 获取当前语言 + language = getattr(request.state, "language", settings.I18N_DEFAULT_LANGUAGE) + + # 获取翻译服务 + translation_service = get_translation_service() + + # 尝试翻译标准HTTP错误 + error_key_map = { + 400: "errors.common.bad_request", + 401: "errors.common.unauthorized", + 403: "errors.common.forbidden", + 404: "errors.common.not_found", + 405: "errors.common.method_not_allowed", + 409: "errors.common.conflict", + 422: "errors.common.validation_failed", + 429: "errors.common.too_many_requests", + 500: "errors.common.internal_error", + 503: "errors.common.service_unavailable", + } + + # 如果有对应的翻译键,使用翻译 + if exc.status_code in error_key_map: + translated_message = translation_service.translate( + error_key_map[exc.status_code], + language + ) + else: + # 否则过滤原始消息 + translated_message = SensitiveDataFilter.filter_string(str(exc.detail)) + logger.warning( - f"HTTP exception: {filtered_detail}", + f"HTTP exception: {translated_message}", extra={ "path": request.url.path, "method": request.method, - "status_code": exc.status_code + "status_code": exc.status_code, + "language": language } ) + return JSONResponse( status_code=exc.status_code, - content=fail(code=exc.status_code, msg=filtered_detail, error=filtered_detail) + content=fail(code=exc.status_code, msg=translated_message, error=translated_message) ) -# 捕获未处理的异常,返回统一错误结构 +# 捕获未处理的异常,返回统一错误结构(支持国际化) @app.exception_handler(Exception) async def unhandled_exception_handler(request: Request, exc: Exception): - """处理未捕获的异常""" + """处理未捕获的异常,支持国际化""" + # 获取当前语言 + language = getattr(request.state, "language", settings.I18N_DEFAULT_LANGUAGE) + + # 获取翻译服务 + translation_service = get_translation_service() + # 记录完整的堆栈跟踪(日志过滤器会自动过滤敏感信息) logger.error( f"Unhandled exception: {exc}", @@ -386,6 +555,7 @@ async def unhandled_exception_handler(request: Request, exc: Exception): "path": request.url.path, "method": request.method, "exception_type": type(exc).__name__, + "language": language, "traceback": traceback.format_exc() }, exc_info=True @@ -394,7 +564,11 @@ async def unhandled_exception_handler(request: Request, exc: Exception): # 生产环境隐藏详细错误信息 environment = os.getenv("ENVIRONMENT", "development") if environment == "production": - message = "服务器内部错误,请稍后重试" + # 使用翻译的通用错误消息 + message = translation_service.translate( + "errors.common.internal_error", + language + ) else: # 开发环境也要过滤敏感信息 message = SensitiveDataFilter.filter_string(str(exc)) diff --git a/api/app/models/agent_app_config_model.py b/api/app/models/agent_app_config_model.py index cc2e0686..3ece049e 100644 --- a/api/app/models/agent_app_config_model.py +++ b/api/app/models/agent_app_config_model.py @@ -31,6 +31,7 @@ class AgentConfig(Base): variables = Column(JSON, default=list, nullable=True, comment="变量配置") tools = Column(JSON, default=list, nullable=True, comment="工具配置") skills = Column(JSON, default=dict, nullable=True, comment="技能配置") + features = Column(JSON, default=dict, nullable=True, comment="功能特性配置") # 多 Agent 相关字段 agent_role = Column(String(20), comment="Agent 角色: master|sub|standalone") diff --git a/api/app/models/appshare_model.py b/api/app/models/appshare_model.py index 57ea59bc..4e9312d6 100644 --- a/api/app/models/appshare_model.py +++ b/api/app/models/appshare_model.py @@ -1,6 +1,6 @@ import datetime import uuid -from sqlalchemy import Column, DateTime, ForeignKey +from sqlalchemy import Boolean, Column, DateTime, ForeignKey, String from sqlalchemy.dialects.postgresql import UUID from app.db import Base from sqlalchemy.orm import relationship @@ -18,6 +18,8 @@ class AppShare(Base): source_workspace_id = Column(UUID(as_uuid=True), ForeignKey('workspaces.id'), nullable=False, comment="源工作空间ID") target_workspace_id = Column(UUID(as_uuid=True), ForeignKey('workspaces.id'), nullable=False, comment="目标工作空间ID") shared_by = Column(UUID(as_uuid=True), ForeignKey('users.id'), nullable=False, comment="分享者用户ID") + permission = Column(String, default="readonly", nullable=False, comment="权限模式: readonly | editable") + is_active = Column(Boolean, default=True, server_default='true', nullable=False, comment="是否有效,False 表示逻辑删除") created_at = Column(DateTime, default=datetime.datetime.now) updated_at = Column(DateTime, default=datetime.datetime.now) diff --git a/api/app/models/end_user_model.py b/api/app/models/end_user_model.py index 30b56fc5..60600fcf 100644 --- a/api/app/models/end_user_model.py +++ b/api/app/models/end_user_model.py @@ -12,7 +12,8 @@ class EndUser(Base): __tablename__ = "end_users" id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, nullable=False, index=True) - app_id = Column(UUID(as_uuid=True), ForeignKey("apps.id"), nullable=False) + app_id = Column(UUID(as_uuid=True), ForeignKey("apps.id"), nullable=True) + workspace_id = Column(UUID(as_uuid=True), ForeignKey("workspaces.id"), nullable=False) # end_user_id = Column(String, nullable=False, index=True) other_id = Column(String, nullable=True) # Store original user_id other_name = Column(String, default="", nullable=False) @@ -51,8 +52,17 @@ class EndUser(Base): growth_trajectory = Column(Text, nullable=True, comment="成长轨迹") memory_insight_updated_at = Column(DateTime, nullable=True, comment="洞察报告最后更新时间") + # RAG存储模式专用字段 - RAG Storage Mode Fields + # storage_type = Column(String, nullable=True, default="neo4j", comment="存储模式类型: neo4j / rag") + rag_tags = Column(Text, nullable=True, comment="RAG模式下提取的标签列表(JSON格式)") + rag_personas = Column(Text, nullable=True, comment="RAG模式下提取的人物形象列表(JSON格式)") + rag_summary_updated_at = Column(DateTime, nullable=True, comment="RAG摘要/标签/人物形象最后更新时间") + # 与 App 的反向关系 app = relationship( "App", back_populates="end_users" - ) \ No newline at end of file + ) + + # 与 WorkSpace 的反向关系 + workspace = relationship("Workspace", back_populates="end_users") \ No newline at end of file diff --git a/api/app/models/memory_perceptual_model.py b/api/app/models/memory_perceptual_model.py index cafb18d4..ae8cc1bd 100644 --- a/api/app/models/memory_perceptual_model.py +++ b/api/app/models/memory_perceptual_model.py @@ -7,7 +7,7 @@ from sqlalchemy.dialects.postgresql import UUID from sqlalchemy.dialects.postgresql import JSONB from app.db import Base - +from app.schemas import FileType class PerceptualType(IntEnum): VISION = 1 @@ -15,6 +15,16 @@ class PerceptualType(IntEnum): TEXT = 3 CONVERSATION = 4 + @staticmethod + def trans_from_file_type(file_type: FileType | str): + type_map = { + FileType.IMAGE: PerceptualType.VISION, + FileType.AUDIO: PerceptualType.AUDIO, + FileType.VIDEO: PerceptualType.VISION, + FileType.DOCUMENT: PerceptualType.TEXT + } + return type_map.get(file_type, PerceptualType.TEXT) + class FileStorageService(IntEnum): LOCAL = 1 diff --git a/api/app/models/tenant_model.py b/api/app/models/tenant_model.py index 54a3e347..044857d2 100644 --- a/api/app/models/tenant_model.py +++ b/api/app/models/tenant_model.py @@ -1,7 +1,7 @@ import datetime import uuid -from sqlalchemy import Column, String, DateTime, Boolean -from sqlalchemy.dialects.postgresql import UUID +from sqlalchemy import Column, String, DateTime, Boolean, text +from sqlalchemy.dialects.postgresql import UUID, ARRAY from sqlalchemy.orm import relationship from app.db import Base @@ -20,6 +20,10 @@ class Tenants(Base): external_id = Column(String(100), nullable=True, index=True) # 外部企业ID external_source = Column(String(50), nullable=True) # 来源系统 + # 国际化语言配置字段 + default_language = Column(String(10), nullable=False, default='zh', server_default='zh', index=True) # 租户默认语言 + supported_languages = Column(ARRAY(String(10)), nullable=False, default=lambda: ['zh', 'en'], server_default=text("'{zh,en}'")) # 租户支持的语言列表 + # Relationship to users - one tenant has many users users = relationship("User", back_populates="tenant") diff --git a/api/app/models/tool_model.py b/api/app/models/tool_model.py index 98448bc5..e8d9c528 100644 --- a/api/app/models/tool_model.py +++ b/api/app/models/tool_model.py @@ -110,7 +110,10 @@ class ToolConfig(Base): # 元数据 version = Column(String(50), default="1.0.0") tags = Column(JSON, default=list) # 标签列表 - + + # 逻辑删除标志 + is_active = Column(Boolean, default=True, server_default='true', nullable=False, index=True, comment="是否可用,False表示已删除") + # 时间戳 created_at = Column(DateTime, default=datetime.now, nullable=False) updated_at = Column(DateTime, default=datetime.now, onupdate=datetime.now, nullable=False) diff --git a/api/app/models/user_model.py b/api/app/models/user_model.py index 663bfc71..b6de28ec 100644 --- a/api/app/models/user_model.py +++ b/api/app/models/user_model.py @@ -1,6 +1,6 @@ import datetime import uuid -from sqlalchemy import Column, String, Boolean, DateTime, ForeignKey +from sqlalchemy import Column, String, Boolean, DateTime, ForeignKey, text from sqlalchemy.dialects.postgresql import UUID from sqlalchemy.orm import relationship from app.db import Base @@ -22,6 +22,9 @@ class User(Base): external_id = Column(String(100), nullable=True) # 外部用户ID external_source = Column(String(50), nullable=True) # 来源系统 + # 用户语言偏好 + preferred_language = Column(String(10), server_default=text("'zh'"), default='zh', nullable=False, index=True) # 用户偏好语言,默认中文 + current_workspace_id = Column(UUID(as_uuid=True), ForeignKey("workspaces.id"), nullable=True) # 当前工作空间ID,可为空 # Foreign key to tenant - each user belongs to exactly one tenant diff --git a/api/app/models/workflow_model.py b/api/app/models/workflow_model.py index 4f9ffe68..29fe5369 100644 --- a/api/app/models/workflow_model.py +++ b/api/app/models/workflow_model.py @@ -35,6 +35,7 @@ class WorkflowConfig(Base): # 执行配置 execution_config = Column(JSONB, nullable=False, default=dict) + features = Column(JSONB, nullable=True, default=dict) # 触发器配置(可选) triggers = Column(JSONB, default=list) diff --git a/api/app/models/workspace_model.py b/api/app/models/workspace_model.py index 4d42ed32..2db5e3df 100644 --- a/api/app/models/workspace_model.py +++ b/api/app/models/workspace_model.py @@ -38,6 +38,7 @@ class Workspace(Base): members = relationship("WorkspaceMember", back_populates="workspace") # users collaborate through membership api_keys = relationship("ApiKey", back_populates="workspace", cascade="all, delete-orphan") # API Keys memory_increments = relationship("MemoryIncrement", back_populates="workspace") + end_users = relationship("EndUser", back_populates="workspace", cascade="all, delete-orphan") class WorkspaceMember(Base): __tablename__ = "workspace_members" diff --git a/api/app/repositories/app_repository.py b/api/app/repositories/app_repository.py index 0c7ba6a4..75a91fd6 100644 --- a/api/app/repositories/app_repository.py +++ b/api/app/repositories/app_repository.py @@ -1,10 +1,11 @@ -from sqlalchemy.orm import Session -from typing import List, Optional import uuid +from typing import List -from app.models.app_model import App +from sqlalchemy import select +from sqlalchemy.orm import Session from app.core.logging_config import get_db_logger +from app.models.app_model import App # 获取数据库专用日志器 db_logger = get_db_logger() @@ -35,11 +36,27 @@ class AppRepository: except Exception as e: raise + def get_apps_by_name(self, app_name: str, app_type: str, workspace_id: uuid.UUID) -> List[App]: + try: + stmt = select(App).where( + App.name == app_name, + App.workspace_id == workspace_id, + App.type == app_type, + App.is_active.is_(True), + ) + apps = self.db.execute(stmt).scalars().all() + return list(apps) + except Exception as e: + db_logger.error(f"查询名称 {app_name} 应用异常: {str(e)}") + raise + + def get_apps_by_workspace_id(db: Session, workspace_id: uuid.UUID) -> List[App]: """根据工作空间ID查询应用""" repo = AppRepository(db) return repo.get_apps_by_workspace_id(workspace_id) + def get_apps_by_id(db: Session, app_id: uuid.UUID) -> App: """根据工作空间ID查询应用""" repo = AppRepository(db) diff --git a/api/app/repositories/end_user_repository.py b/api/app/repositories/end_user_repository.py index 48c9c4ec..71c93634 100644 --- a/api/app/repositories/end_user_repository.py +++ b/api/app/repositories/end_user_repository.py @@ -32,6 +32,21 @@ class EndUserRepository: db_logger.error(f"查询应用 {app_id} 下宿主时出错: {str(e)}") raise + def get_end_users_by_workspace(self, workspace_id: uuid.UUID) -> List[EndUser]: + """获取指定 workspace 下的所有 end_user""" + try: + end_users = ( + self.db.query(EndUser) + .filter(EndUser.workspace_id == workspace_id) + .all() + ) + db_logger.info(f"成功查询工作空间 {workspace_id} 下的 {len(end_users)} 个终端用户") + return end_users + except Exception as e: + self.db.rollback() + db_logger.error(f"查询工作空间 {workspace_id} 下终端用户时出错: {str(e)}") + raise + def get_end_user_by_id(self, end_user_id: uuid.UUID) -> Optional[EndUser]: """根据 end_user_id 查询宿主""" try: @@ -51,8 +66,9 @@ class EndUserRepository: raise def get_or_create_end_user( - self, - app_id: uuid.UUID, + self, + app_id: uuid.UUID, + workspace_id: uuid.UUID, other_id: str, original_user_id: Optional[str] = None ) -> EndUser: @@ -60,6 +76,7 @@ class EndUserRepository: Args: app_id: 应用ID + workspace_id: 工作空间ID other_id: 第三方ID original_user_id: 原始用户ID (存储到 other_id) """ @@ -68,26 +85,31 @@ class EndUserRepository: end_user = ( self.db.query(EndUser) .filter( - EndUser.app_id == app_id, + EndUser.workspace_id == workspace_id, EndUser.other_id == other_id ) + .order_by(EndUser.created_at.asc()) .first() ) if end_user: - db_logger.debug(f"找到现有终端用户: 应用ID {app_id}、第三方ID {other_id}") + db_logger.debug(f"找到现有终端用户: 应用ID {workspace_id}、第三方ID {other_id}") + end_user.app_id=app_id + self.db.commit() + self.db.refresh(end_user) return end_user # 创建新用户 end_user = EndUser( app_id=app_id, + workspace_id=workspace_id, other_id=other_id ) self.db.add(end_user) self.db.commit() self.db.refresh(end_user) - db_logger.info(f"创建新终端用户: (other_id: {other_id}) for app {app_id}") + db_logger.info(f"创建新终端用户: (other_id: {other_id}) for workspace {workspace_id}") return end_user except Exception as e: @@ -220,6 +242,88 @@ class EndUserRepository: db_logger.error(f"更新终端用户 {end_user_id} 的用户摘要缓存时出错: {str(e)}") raise + def update_rag_summary_tags( + self, + end_user_id: uuid.UUID, + user_summary: str, + rag_tags: str, + rag_personas: str, + ) -> bool: + """更新RAG模式下的用户摘要、标签和人物形象缓存 + + Args: + end_user_id: 终端用户ID + user_summary: 用户摘要文本 + rag_tags: 标签列表(JSON字符串) + rag_personas: 人物形象列表(JSON字符串) + + Returns: + bool: 更新成功返回True,否则返回False + """ + try: + updated_count = ( + self.db.query(EndUser) + .filter(EndUser.id == end_user_id) + .update( + { + EndUser.user_summary: user_summary, + EndUser.rag_tags: rag_tags, + EndUser.rag_personas: rag_personas, + EndUser.rag_summary_updated_at: datetime.datetime.now(), + }, + synchronize_session=False + ) + ) + self.db.commit() + if updated_count > 0: + db_logger.info(f"成功更新终端用户 {end_user_id} 的RAG摘要/标签/人物形象缓存") + return True + else: + db_logger.warning(f"未找到终端用户 {end_user_id},无法更新RAG摘要缓存") + return False + except Exception as e: + self.db.rollback() + db_logger.error(f"更新终端用户 {end_user_id} 的RAG摘要缓存时出错: {str(e)}") + raise + + def update_rag_insight( + self, + end_user_id: uuid.UUID, + memory_insight: str, + ) -> bool: + """更新RAG模式下的记忆洞察缓存 + + Args: + end_user_id: 终端用户ID + memory_insight: 洞察文本 + + Returns: + bool: 更新成功返回True,否则返回False + """ + try: + updated_count = ( + self.db.query(EndUser) + .filter(EndUser.id == end_user_id) + .update( + { + EndUser.memory_insight: memory_insight, + EndUser.memory_insight_updated_at: datetime.datetime.now(), + }, + synchronize_session=False + ) + ) + self.db.commit() + if updated_count > 0: + db_logger.info(f"成功更新终端用户 {end_user_id} 的RAG洞察缓存") + return True + else: + db_logger.warning(f"未找到终端用户 {end_user_id},无法更新RAG洞察缓存") + return False + except Exception as e: + self.db.rollback() + db_logger.error(f"更新终端用户 {end_user_id} 的RAG洞察缓存时出错: {str(e)}") + raise + def get_all_by_workspace(self, workspace_id: uuid.UUID) -> List[EndUser]: """获取工作空间的所有终端用户 @@ -232,8 +336,7 @@ class EndUserRepository: try: end_users = ( self.db.query(EndUser) - .join(App, EndUser.app_id == App.id) - .filter(App.workspace_id == workspace_id) + .filter(EndUser.workspace_id == workspace_id) .all() ) db_logger.info(f"成功查询工作空间 {workspace_id} 下的 {len(end_users)} 个终端用户") @@ -320,45 +423,79 @@ class EndUserRepository: db_logger.error(f"获取终端用户 {end_user_id} 的 memory_config_id 时出错: {str(e)}") raise - def batch_update_memory_config_id( - self, - app_id: uuid.UUID, - memory_config_id: uuid.UUID + # def batch_update_memory_config_id( + # self, + # app_id: uuid.UUID, + # memory_config_id: uuid.UUID + # ) -> int: + # """批量更新应用下所有终端用户的 memory_config_id + # + # Args: + # app_id: 应用ID + # memory_config_id: 新的记忆配置ID + # + # Returns: + # int: 更新的行数 + # """ + # try: + # from sqlalchemy import update + # + # stmt = ( + # update(EndUser) + # .where(EndUser.app_id == app_id) + # .values(memory_config_id=memory_config_id) + # ) + # + # result = self.db.execute(stmt) + # self.db.commit() + # + # updated_count = result.rowcount + # + # db_logger.info( + # f"批量更新终端用户记忆配置: app_id={app_id}, " + # f"memory_config_id={memory_config_id}, updated_count={updated_count}" + # ) + # + # return updated_count + # + # except Exception as e: + # self.db.rollback() + # db_logger.error( + # f"批量更新终端用户记忆配置时出错: app_id={app_id}, " + # f"memory_config_id={memory_config_id}, error={str(e)}" + # ) + # raise + + def batch_update_memory_config_id_by_workspace( + self, + workspace_id: uuid.UUID, + memory_config_id: uuid.UUID ) -> int: - """批量更新应用下所有终端用户的 memory_config_id - - Args: - app_id: 应用ID - memory_config_id: 新的记忆配置ID - - Returns: - int: 更新的行数 - """ + """批量更新工作空间下所有终端用户的 memory_config_id""" try: from sqlalchemy import update stmt = ( update(EndUser) - .where(EndUser.app_id == app_id) + .where(EndUser.workspace_id == workspace_id) .values(memory_config_id=memory_config_id) ) - + result = self.db.execute(stmt) self.db.commit() - + updated_count = result.rowcount - + db_logger.info( - f"批量更新终端用户记忆配置: app_id={app_id}, " + f"批量更新终端用户记忆配置: workspace_id={workspace_id}, " f"memory_config_id={memory_config_id}, updated_count={updated_count}" ) - + return updated_count - except Exception as e: self.db.rollback() db_logger.error( - f"批量更新终端用户记忆配置时出错: app_id={app_id}, " + f"批量更新终端用户记忆配置时出错: workspace_id={workspace_id}, " f"memory_config_id={memory_config_id}, error={str(e)}" ) raise @@ -410,7 +547,7 @@ class EndUserRepository: """ try: from sqlalchemy import update - + stmt = ( update(EndUser) .where(EndUser.memory_config_id == memory_config_id) @@ -437,10 +574,16 @@ class EndUserRepository: ) raise -def get_end_users_by_app_id(db: Session, app_id: uuid.UUID) -> List[EndUser]: - """根据应用ID查询宿主(返回 EndUser ORM 列表)""" +# def get_end_users_by_app_id(db: Session, app_id: uuid.UUID) -> List[EndUser]: +# """根据应用ID查询宿主(返回 EndUser ORM 列表)""" +# repo = EndUserRepository(db) +# end_users = repo.get_end_users_by_app_id(app_id) +# return end_users + +def get_end_users_by_workspace(db: Session, workspace_id: uuid.UUID) -> List[EndUser]: + """根据工作空间ID查询终端用户(返回 EndUser ORM 列表)""" repo = EndUserRepository(db) - end_users = repo.get_end_users_by_app_id(app_id) + end_users = repo.get_end_users_by_workspace(workspace_id) return end_users def get_end_user_by_id(db: Session, end_user_id: uuid.UUID) -> Optional[EndUser]: diff --git a/api/app/repositories/implicit_emotions_storage_repository.py b/api/app/repositories/implicit_emotions_storage_repository.py index 97405ab6..b6c40b40 100644 --- a/api/app/repositories/implicit_emotions_storage_repository.py +++ b/api/app/repositories/implicit_emotions_storage_repository.py @@ -5,13 +5,22 @@ Implicit Emotions Storage Repository 事务由调用方控制,仓储层只使用 flush/refresh """ import logging -from datetime import datetime, date, timezone, timedelta -from typing import Optional, Generator -from sqlalchemy.orm import Session -from sqlalchemy import select, not_, exists +from datetime import date, datetime, timezone +from typing import Generator, Optional + + +class TimeFilterUnavailableError(Exception): + """redis_client 不可用,无法执行时间轴筛选。 + + 调用方捕获此异常后可选择回退到 get_all_user_ids 进行全量处理。 + """ + +import redis +from sqlalchemy import exists, not_, select +from sqlalchemy.orm import Session -from app.models.implicit_emotions_storage_model import ImplicitEmotionsStorage from app.models.end_user_model import EndUser +from app.models.implicit_emotions_storage_model import ImplicitEmotionsStorage logger = logging.getLogger(__name__) @@ -111,6 +120,87 @@ class ImplicitEmotionsStorageRepository: logger.error(f"分批获取用户ID失败: offset={offset}, error={e}") break + def get_users_needing_refresh(self, redis_client: redis.StrictRedis, batch_size: int = 100) -> Generator[str, None, None]: + """分批次获取需要刷新隐性记忆/情绪数据的存量用户ID。 + + 筛选逻辑: + - 查询 implicit_emotions_storage 中所有用户的 end_user_id 和 updated_at + - 从 Redis 读取 write_message:last_done:{end_user_id} 的时间戳 + - 若 Redis 中无记录(该用户从未写入过记忆),跳过 + - 若 last_done > updated_at,说明上次刷新后又有新记忆写入,需要刷新 + - 若 last_done <= updated_at,说明已是最新,跳过 + + Args: + redis_client: 同步 redis.StrictRedis 实例(连接 CELERY_BACKEND DB) + batch_size: 每批次加载的数量 + + Raises: + TimeFilterUnavailableError: redis_client 为 None 时抛出,调用方可捕获并回退到 get_all_user_ids + + Yields: + 需要刷新的用户ID字符串 + """ + if redis_client is None: + raise TimeFilterUnavailableError("redis_client 不可用,无法执行时间轴筛选") + + from redis.exceptions import RedisError + + offset = 0 + while True: + try: + stmt = ( + select(ImplicitEmotionsStorage.end_user_id, ImplicitEmotionsStorage.updated_at) + .order_by(ImplicitEmotionsStorage.end_user_id) + .limit(batch_size) + .offset(offset) + ) + batch = self.db.execute(stmt).all() + if not batch: + break + + # 批量获取当前批次所有用户的 last_done 时间戳(一次网络往返) + keys = [f"write_message:last_done:{end_user_id}" for end_user_id, _ in batch] + + try: + raw_values = redis_client.mget(keys) + except RedisError as e: + logger.error( + f"Redis mget 操作失败: {e},当前批次降级为处理所有用户", + extra={"offset": offset, "batch_size": len(batch)} + ) + # Redis 操作失败,降级为返回当前批次所有用户 + yield from (end_user_id for end_user_id, _ in batch) + offset += batch_size + continue + + for (end_user_id, updated_at), raw in zip(batch, raw_values): + if raw is None: + continue + try: + last_done = datetime.fromisoformat(raw) + # last_done 写入时已是 UTC aware(+00:00),确保有 tzinfo + if last_done.tzinfo is None: + last_done = last_done.replace(tzinfo=timezone.utc) + + if updated_at is None: + yield end_user_id + continue + # updated_at 数据库存的是 UTC naive,补上 UTC tzinfo 再比较 + if updated_at.tzinfo is None: + updated_at_utc = updated_at.replace(tzinfo=timezone.utc) + else: + updated_at_utc = updated_at.astimezone(timezone.utc) + + if last_done > updated_at_utc: + yield end_user_id + except Exception as e: + logger.warning(f"解析 last_done 时间戳失败: end_user_id={end_user_id}, raw={raw}, error={e}") + + offset += batch_size + except Exception as e: + logger.error(f"get_users_needing_refresh 分批查询失败: offset={offset}, error={e}") + break + def get_new_user_ids_today(self, batch_size: int = 100) -> Generator[str, None, None]: """分批次获取当天新增的、尚未初始化隐性记忆和情绪建议数据的用户ID @@ -124,7 +214,8 @@ class ImplicitEmotionsStorageRepository: Yields: 用户ID字符串 """ - from sqlalchemy import cast, String as SAString + from sqlalchemy import String as SAString + from sqlalchemy import cast CST = timezone(timedelta(hours=8)) now_cst = datetime.now(CST) today_start = now_cst.replace(hour=0, minute=0, second=0, microsecond=0).astimezone(timezone.utc).replace(tzinfo=None) diff --git a/api/app/repositories/knowledge_repository.py b/api/app/repositories/knowledge_repository.py index e3832214..aa4dd549 100644 --- a/api/app/repositories/knowledge_repository.py +++ b/api/app/repositories/knowledge_repository.py @@ -111,6 +111,20 @@ def get_knowledge_by_id(db: Session, knowledge_id: uuid.UUID) -> Knowledge | Non raise +def get_knowledges_by_parent_id(db: Session, parent_id: uuid.UUID) -> list[Knowledge]: + db_logger.debug(f"Query knowledge bases based on parent ID: parent_id={parent_id}") + try: + knowledges = db.query(Knowledge).filter(Knowledge.parent_id == parent_id).all() + if knowledges: + db_logger.debug(f"Knowledge bases query successful: count={len(knowledges)} (parent_id: {parent_id})") + else: + db_logger.debug(f"No knowledge bases found for given parent: parent_id={parent_id}") + return knowledges + except Exception as e: + db_logger.error(f"Failed to query the knowledge bases based on parent ID: parent_id={parent_id} - {str(e)}") + raise + + def get_knowledge_by_name(db: Session, name: str, workspace_id: uuid.UUID) -> Knowledge | None: db_logger.debug(f"Query knowledge base based on name and workspace_id: name={name}, workspace_id={workspace_id}") diff --git a/api/app/repositories/memory_perceptual_repository.py b/api/app/repositories/memory_perceptual_repository.py index 9fa9536e..9077af03 100644 --- a/api/app/repositories/memory_perceptual_repository.py +++ b/api/app/repositories/memory_perceptual_repository.py @@ -2,7 +2,7 @@ import uuid from datetime import datetime from typing import List, Tuple, Optional -from sqlalchemy import and_, desc +from sqlalchemy import and_, desc, select from sqlalchemy.orm import Session from app.core.logging_config import get_db_logger @@ -127,6 +127,17 @@ class MemoryPerceptualRepository: db_logger.error(f"Failed to query perceptual memory timeline: end_user_id={end_user_id} - {str(e)}") raise + def get_by_url( + self, + file_url: str + ) -> list[MemoryPerceptualModel]: + try: + stmt = select(MemoryPerceptualModel).where(MemoryPerceptualModel.file_path == file_url) + return list(self.db.execute(stmt).scalars()) + except Exception: + db_logger.error(f"Failed to query perceptual memories by file_url: file_url={file_url}") + raise + def get_by_type( self, end_user_id: uuid.UUID, diff --git a/api/app/repositories/neo4j/community_repository.py b/api/app/repositories/neo4j/community_repository.py new file mode 100644 index 00000000..f9c4bd92 --- /dev/null +++ b/api/app/repositories/neo4j/community_repository.py @@ -0,0 +1,282 @@ +"""Community 节点仓库 + +管理 Neo4j 中 Community 节点及 BELONGS_TO_COMMUNITY 边的 CRUD 操作。 +""" + +import logging +from typing import Dict, List, Optional + +from app.repositories.neo4j.neo4j_connector import Neo4jConnector +from app.repositories.neo4j.cypher_queries import ( + COMMUNITY_NODE_UPSERT, + ENTITY_JOIN_COMMUNITY, + ENTITY_LEAVE_ALL_COMMUNITIES, + GET_ENTITY_NEIGHBORS, + GET_ALL_ENTITIES_FOR_USER, + GET_ENTITY_COUNT_FOR_USER, + GET_ALL_ENTITY_IDS_FOR_USER, + GET_ENTITIES_PAGE, + GET_COMMUNITY_MEMBERS, + GET_ALL_COMMUNITY_MEMBERS_BATCH, + GET_ALL_ENTITY_NEIGHBORS_BATCH, + GET_ENTITY_NEIGHBORS_BATCH_FOR_IDS, + CHECK_USER_HAS_COMMUNITIES, + UPDATE_COMMUNITY_MEMBER_COUNT, + UPDATE_COMMUNITY_METADATA, + BATCH_UPDATE_COMMUNITY_METADATA, +) + +logger = logging.getLogger(__name__) + + +class CommunityRepository: + def __init__(self, connector: Neo4jConnector): + self.connector = connector + + async def upsert_community( + self, community_id: str, end_user_id: str, member_count: int = 0 + ) -> Optional[str]: + """创建或更新 Community 节点,返回 community_id。""" + try: + result = await self.connector.execute_query( + COMMUNITY_NODE_UPSERT, + community_id=community_id, + end_user_id=end_user_id, + member_count=member_count, + ) + return result[0]["community_id"] if result else None + except Exception as e: + logger.error(f"upsert_community failed: {e}") + return None + + async def assign_entity_to_community( + self, entity_id: str, community_id: str, end_user_id: str + ) -> bool: + """将实体关联到社区(先解除旧关联,再建立新关联)。""" + try: + await self.connector.execute_query( + ENTITY_LEAVE_ALL_COMMUNITIES, + entity_id=entity_id, + end_user_id=end_user_id, + ) + result = await self.connector.execute_query( + ENTITY_JOIN_COMMUNITY, + entity_id=entity_id, + community_id=community_id, + end_user_id=end_user_id, + ) + return bool(result) + except Exception as e: + logger.error(f"assign_entity_to_community failed: {e}") + return False + + async def get_entity_neighbors( + self, entity_id: str, end_user_id: str + ) -> List[Dict]: + """查询实体的直接邻居及其社区归属。""" + try: + return await self.connector.execute_query( + GET_ENTITY_NEIGHBORS, + entity_id=entity_id, + end_user_id=end_user_id, + ) + except Exception as e: + logger.error(f"get_entity_neighbors failed: {e}") + return [] + + async def get_all_entity_neighbors_batch( + self, end_user_id: str + ) -> Dict[str, List[Dict]]: + """一次性批量拉取该用户下所有实体的邻居,返回 {entity_id: [neighbors]} 字典。 + 用于全量聚类预加载,避免每个实体单独查询。""" + try: + rows = await self.connector.execute_query( + GET_ALL_ENTITY_NEIGHBORS_BATCH, + end_user_id=end_user_id, + ) + result: Dict[str, List[Dict]] = {} + for row in rows: + eid = row["entity_id"] + neighbor = {k: v for k, v in row.items() if k != "entity_id"} + result.setdefault(eid, []).append(neighbor) + return result + except Exception as e: + logger.error(f"get_all_entity_neighbors_batch failed: {e}") + return {} + + async def get_all_entities(self, end_user_id: str) -> List[Dict]: + """拉取某用户下所有实体及其当前社区归属。""" + try: + return await self.connector.execute_query( + GET_ALL_ENTITIES_FOR_USER, + end_user_id=end_user_id, + ) + except Exception as e: + logger.error(f"get_all_entities failed: {e}") + return [] + + async def get_entity_count(self, end_user_id: str) -> int: + """仅返回用户实体总数,不加载实体数据。""" + try: + result = await self.connector.execute_query( + GET_ENTITY_COUNT_FOR_USER, + end_user_id=end_user_id, + ) + return result[0]["entity_count"] if result else 0 + except Exception as e: + logger.error(f"get_entity_count failed: {e}") + return 0 + + async def get_all_entity_ids(self, end_user_id: str) -> List[str]: + """仅返回用户所有实体 ID 列表,不加载 embedding 等大字段。""" + try: + result = await self.connector.execute_query( + GET_ALL_ENTITY_IDS_FOR_USER, + end_user_id=end_user_id, + ) + return [r["id"] for r in result] + except Exception as e: + logger.error(f"get_all_entity_ids failed: {e}") + return [] + + async def get_entities_page( + self, end_user_id: str, skip: int, limit: int + ) -> List[Dict]: + """分页拉取实体,用于全量聚类分批处理。""" + try: + return await self.connector.execute_query( + GET_ENTITIES_PAGE, + end_user_id=end_user_id, + skip=skip, + limit=limit, + ) + except Exception as e: + logger.error(f"get_entities_page failed: {e}") + return [] + + async def get_entity_neighbors_for_ids( + self, entity_ids: List[str], end_user_id: str + ) -> Dict[str, List[Dict]]: + """批量拉取指定实体列表的邻居,返回 {entity_id: [neighbors]}。""" + try: + rows = await self.connector.execute_query( + GET_ENTITY_NEIGHBORS_BATCH_FOR_IDS, + entity_ids=entity_ids, + end_user_id=end_user_id, + ) + result: Dict[str, List[Dict]] = {} + for row in rows: + eid = row["entity_id"] + neighbor = {k: v for k, v in row.items() if k != "entity_id"} + result.setdefault(eid, []).append(neighbor) + return result + except Exception as e: + logger.error(f"get_entity_neighbors_for_ids failed: {e}") + return {} + + async def get_community_members( + self, community_id: str, end_user_id: str + ) -> List[Dict]: + """查询社区成员列表。""" + try: + return await self.connector.execute_query( + GET_COMMUNITY_MEMBERS, + community_id=community_id, + end_user_id=end_user_id, + ) + except Exception as e: + logger.error(f"get_community_members failed: {e}") + return [] + + async def get_all_community_members_batch( + self, community_ids: List[str], end_user_id: str + ) -> Dict[str, List[Dict]]: + """批量查询多个社区的成员,返回 {community_id: [members]} 字典。""" + try: + rows = await self.connector.execute_query( + GET_ALL_COMMUNITY_MEMBERS_BATCH, + community_ids=community_ids, + end_user_id=end_user_id, + ) + result: Dict[str, List[Dict]] = {} + for row in rows: + cid = row["community_id"] + result.setdefault(cid, []).append(row) + return result + except Exception as e: + logger.error(f"get_all_community_members_batch failed: {e}") + return {} + + async def has_communities(self, end_user_id: str) -> bool: + """检查该用户是否已有 Community 节点(用于判断全量 vs 增量)。""" + try: + result = await self.connector.execute_query( + CHECK_USER_HAS_COMMUNITIES, + end_user_id=end_user_id, + ) + return result[0]["community_count"] > 0 if result else False + except Exception as e: + logger.error(f"has_communities failed: {e}") + return False + + async def refresh_member_count( + self, community_id: str, end_user_id: str + ) -> int: + """重新统计并更新社区成员数,返回最新数量。""" + try: + result = await self.connector.execute_query( + UPDATE_COMMUNITY_MEMBER_COUNT, + community_id=community_id, + end_user_id=end_user_id, + ) + return result[0]["member_count"] if result else 0 + except Exception as e: + logger.error(f"refresh_member_count failed: {e}") + return 0 + + async def update_community_metadata( + self, + community_id: str, + end_user_id: str, + name: str, + summary: str, + core_entities: List[str], + summary_embedding: Optional[List[float]] = None, + ) -> bool: + """更新社区的名称、摘要、核心实体列表和摘要向量。""" + try: + result = await self.connector.execute_query( + UPDATE_COMMUNITY_METADATA, + community_id=community_id, + end_user_id=end_user_id, + name=name, + summary=summary, + core_entities=core_entities, + summary_embedding=summary_embedding, + ) + return bool(result) + except Exception as e: + logger.error(f"update_community_metadata failed: {e}") + return False + + async def batch_update_community_metadata( + self, + communities: List[Dict], + ) -> bool: + """批量更新多个社区的元数据。 + + Args: + communities: 每项包含 community_id, end_user_id, name, summary, + core_entities, summary_embedding + """ + if not communities: + return True + try: + await self.connector.execute_query( + BATCH_UPDATE_COMMUNITY_METADATA, + communities=communities, + ) + return True + except Exception as e: + logger.error(f"batch_update_community_metadata failed: {e}") + return False diff --git a/api/app/repositories/neo4j/create_indexes.py b/api/app/repositories/neo4j/create_indexes.py index 55dead1b..d9e94117 100644 --- a/api/app/repositories/neo4j/create_indexes.py +++ b/api/app/repositories/neo4j/create_indexes.py @@ -42,6 +42,13 @@ async def create_fulltext_indexes(): OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } } """) print("✓ Created: summariesFulltext") + + # 创建 Community 索引 + await connector.execute_query(""" + CREATE FULLTEXT INDEX communitiesFulltext IF NOT EXISTS FOR (c:Community) ON EACH [c.name, c.summary] + OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } } + """) + print("✓ Created: communitiesFulltext") print("\nFull-text indexes created successfully with BM25 support.") except Exception as e: @@ -112,6 +119,18 @@ async def create_vector_indexes(): }} """) print("✓ Created: summary_embedding_index") + + # Community summary embedding index + await connector.execute_query(""" + CREATE VECTOR INDEX community_summary_embedding_index IF NOT EXISTS + FOR (c:Community) + ON c.summary_embedding + OPTIONS {indexConfig: { + `vector.dimensions`: 1024, + `vector.similarity_function`: 'cosine' + }} + """) + print("✓ Created: community_summary_embedding_index") # Dialogue embedding index (optional) await connector.execute_query(""" @@ -124,6 +143,18 @@ async def create_vector_indexes(): }} """) print("✓ Created: dialogue_embedding_index") + + # Community summary embedding index + await connector.execute_query(""" + CREATE VECTOR INDEX community_summary_embedding_index IF NOT EXISTS + FOR (c:Community) + ON c.summary_embedding + OPTIONS {indexConfig: { + `vector.dimensions`: 1024, + `vector.similarity_function`: 'cosine' + }} + """) + print("✓ Created: community_summary_embedding_index") print("\nVector indexes created successfully!") print("\nExpected performance improvement:") diff --git a/api/app/repositories/neo4j/cypher_queries.py b/api/app/repositories/neo4j/cypher_queries.py index 651c513f..7b027ca9 100644 --- a/api/app/repositories/neo4j/cypher_queries.py +++ b/api/app/repositories/neo4j/cypher_queries.py @@ -1058,4 +1058,261 @@ Graph_Node_query = """ 3 AS priority LIMIT $limit - """ \ No newline at end of file + """ + + +# ============================================================ +# Community 节点 & BELONGS_TO_COMMUNITY 边 +# ============================================================ + +# ─── Community 聚类相关 Cypher 模板 ─────────────────────────────────────────── + +COMMUNITY_NODE_UPSERT = """ +MERGE (c:Community {community_id: $community_id}) +SET c.end_user_id = $end_user_id, + c.member_count = $member_count, + c.updated_at = datetime() +RETURN c.community_id AS community_id +""" + +ENTITY_JOIN_COMMUNITY = """ +MATCH (e:ExtractedEntity {id: $entity_id, end_user_id: $end_user_id}) +MATCH (c:Community {community_id: $community_id, end_user_id: $end_user_id}) +MERGE (e)-[:BELONGS_TO_COMMUNITY]->(c) +SET c.updated_at = datetime() +RETURN e.id AS entity_id, c.community_id AS community_id +""" + +ENTITY_LEAVE_ALL_COMMUNITIES = """ +MATCH (e:ExtractedEntity {id: $entity_id, end_user_id: $end_user_id}) +MATCH (e)-[r:BELONGS_TO_COMMUNITY]->(:Community) +DELETE r +""" + +GET_ENTITY_NEIGHBORS = """ +MATCH (e:ExtractedEntity {id: $entity_id, end_user_id: $end_user_id}) + +// 来源一:直接关系邻居(EXTRACTED_RELATIONSHIP 边) +OPTIONAL MATCH (e)-[:EXTRACTED_RELATIONSHIP]-(nb1:ExtractedEntity {end_user_id: $end_user_id}) + +// 来源二:同 Statement 共现邻居(REFERENCES_ENTITY 边) +OPTIONAL MATCH (s:Statement)-[:REFERENCES_ENTITY]->(e) +OPTIONAL MATCH (s)-[:REFERENCES_ENTITY]->(nb2:ExtractedEntity {end_user_id: $end_user_id}) +WHERE nb2.id <> e.id + +WITH collect(DISTINCT nb1) + collect(DISTINCT nb2) AS all_neighbors +UNWIND all_neighbors AS nb +WITH nb WHERE nb IS NOT NULL +OPTIONAL MATCH (nb)-[:BELONGS_TO_COMMUNITY]->(c:Community) +RETURN DISTINCT + nb.id AS id, + nb.name AS name, + nb.name_embedding AS name_embedding, + nb.activation_value AS activation_value, + CASE WHEN c IS NOT NULL THEN c.community_id ELSE null END AS community_id +""" + +GET_ALL_ENTITIES_FOR_USER = """ +MATCH (e:ExtractedEntity {end_user_id: $end_user_id}) +OPTIONAL MATCH (e)-[:BELONGS_TO_COMMUNITY]->(c:Community) +RETURN e.id AS id, + e.name AS name, + e.name_embedding AS name_embedding, + e.activation_value AS activation_value, + CASE WHEN c IS NOT NULL THEN c.community_id ELSE null END AS community_id +""" + +GET_ENTITY_COUNT_FOR_USER = """ +MATCH (e:ExtractedEntity {end_user_id: $end_user_id}) +RETURN count(e) AS entity_count +""" + +GET_ALL_ENTITY_IDS_FOR_USER = """ +MATCH (e:ExtractedEntity {end_user_id: $end_user_id}) +RETURN e.id AS id +""" + +GET_COMMUNITY_MEMBERS = """ +MATCH (e:ExtractedEntity {end_user_id: $end_user_id})-[:BELONGS_TO_COMMUNITY]->(c:Community {community_id: $community_id}) +RETURN e.id AS id, e.name AS name, e.entity_type AS entity_type, + e.importance_score AS importance_score, e.activation_value AS activation_value, + e.name_embedding AS name_embedding, + e.aliases AS aliases, e.description AS description +ORDER BY coalesce(e.activation_value, 0) DESC +""" + +GET_ALL_COMMUNITY_MEMBERS_BATCH = """ +MATCH (e:ExtractedEntity {end_user_id: $end_user_id})-[:BELONGS_TO_COMMUNITY]->(c:Community) +RETURN c.community_id AS community_id, + e.id AS id, e.name AS name, e.entity_type AS entity_type, + e.importance_score AS importance_score, e.activation_value AS activation_value, + e.name_embedding AS name_embedding, + e.aliases AS aliases, e.description AS description +ORDER BY c.community_id, coalesce(e.activation_value, 0) DESC +""" + +CHECK_USER_HAS_COMMUNITIES = """ +MATCH (c:Community {end_user_id: $end_user_id}) +RETURN count(c) AS community_count +""" + +UPDATE_COMMUNITY_MEMBER_COUNT = """ +MATCH (e:ExtractedEntity {end_user_id: $end_user_id})-[:BELONGS_TO_COMMUNITY]->(c:Community {community_id: $community_id}) +WITH c, count(e) AS cnt +SET c.member_count = cnt +RETURN c.community_id AS community_id, cnt AS member_count +""" + +UPDATE_COMMUNITY_METADATA = """ +MATCH (c:Community {community_id: $community_id, end_user_id: $end_user_id}) +SET c.name = $name, + c.summary = $summary, + c.core_entities = $core_entities, + c.summary_embedding = $summary_embedding, + c.updated_at = datetime() +RETURN c.community_id AS community_id +""" + +BATCH_UPDATE_COMMUNITY_METADATA = """ +UNWIND $communities AS row +MATCH (c:Community {community_id: row.community_id, end_user_id: row.end_user_id}) +SET c.name = row.name, + c.summary = row.summary, + c.core_entities = row.core_entities, + c.summary_embedding = row.summary_embedding, + c.updated_at = datetime() +RETURN c.community_id AS community_id +""" + +GET_ENTITIES_PAGE = """ +MATCH (e:ExtractedEntity {end_user_id: $end_user_id}) +OPTIONAL MATCH (e)-[:BELONGS_TO_COMMUNITY]->(c:Community) +RETURN e.id AS id, + e.name AS name, + e.name_embedding AS name_embedding, + e.activation_value AS activation_value, + CASE WHEN c IS NOT NULL THEN c.community_id ELSE null END AS community_id +ORDER BY e.id +SKIP $skip LIMIT $limit +""" + +GET_ENTITY_NEIGHBORS_BATCH_FOR_IDS = """ +// 批量拉取指定实体列表的邻居(用于分批全量聚类) +MATCH (e:ExtractedEntity {end_user_id: $end_user_id}) +WHERE e.id IN $entity_ids +OPTIONAL MATCH (e)-[:EXTRACTED_RELATIONSHIP]-(nb1:ExtractedEntity {end_user_id: $end_user_id}) +OPTIONAL MATCH (s:Statement)-[:REFERENCES_ENTITY]->(e) +OPTIONAL MATCH (s)-[:REFERENCES_ENTITY]->(nb2:ExtractedEntity {end_user_id: $end_user_id}) +WHERE nb2.id <> e.id +WITH e, collect(DISTINCT nb1) + collect(DISTINCT nb2) AS all_neighbors +UNWIND all_neighbors AS nb +WITH e, nb WHERE nb IS NOT NULL +OPTIONAL MATCH (nb)-[:BELONGS_TO_COMMUNITY]->(c:Community) +RETURN DISTINCT + e.id AS entity_id, + nb.id AS id, + nb.name AS name, + nb.name_embedding AS name_embedding, + nb.activation_value AS activation_value, + CASE WHEN c IS NOT NULL THEN c.community_id ELSE null END AS community_id +""" + +GET_ALL_ENTITY_NEIGHBORS_BATCH = """ +// 批量拉取某用户下所有实体的邻居(用于全量聚类预加载) +MATCH (e:ExtractedEntity {end_user_id: $end_user_id}) + +// 来源一:直接关系邻居 +OPTIONAL MATCH (e)-[:EXTRACTED_RELATIONSHIP]-(nb1:ExtractedEntity {end_user_id: $end_user_id}) + +// 来源二:同 Statement 共现邻居 +OPTIONAL MATCH (s:Statement)-[:REFERENCES_ENTITY]->(e) +OPTIONAL MATCH (s)-[:REFERENCES_ENTITY]->(nb2:ExtractedEntity {end_user_id: $end_user_id}) +WHERE nb2.id <> e.id + +WITH e, collect(DISTINCT nb1) + collect(DISTINCT nb2) AS all_neighbors +UNWIND all_neighbors AS nb +WITH e, nb WHERE nb IS NOT NULL +OPTIONAL MATCH (nb)-[:BELONGS_TO_COMMUNITY]->(c:Community) +RETURN DISTINCT + e.id AS entity_id, + nb.id AS id, + nb.name AS name, + nb.name_embedding AS name_embedding, + nb.activation_value AS activation_value, + CASE WHEN c IS NOT NULL THEN c.community_id ELSE null END AS community_id +""" + +GET_COMMUNITY_GRAPH_DATA = """ +MATCH (c:Community {end_user_id: $end_user_id}) +MATCH (e:ExtractedEntity {end_user_id: $end_user_id})-[b:BELONGS_TO_COMMUNITY]->(c) +OPTIONAL MATCH (e)-[r:EXTRACTED_RELATIONSHIP]-(e2:ExtractedEntity {end_user_id: $end_user_id}) +RETURN + elementId(c) AS c_id, + properties(c) AS c_props, + elementId(e) AS e_id, + properties(e) AS e_props, + elementId(b) AS b_id, + elementId(e2) AS e2_id, + properties(e2) AS e2_props, + elementId(r) AS r_id, + type(r) AS r_type, + properties(r) AS r_props, + startNode(r) = e AS r_from_e +""" + + +# Community keyword search: matches name or summary via fulltext index +SEARCH_COMMUNITIES_BY_KEYWORD = """ +CALL db.index.fulltext.queryNodes("communitiesFulltext", $q) YIELD node AS c, score +WHERE ($end_user_id IS NULL OR c.end_user_id = $end_user_id) +RETURN c.community_id AS id, + c.name AS name, + c.summary AS content, + c.core_entities AS core_entities, + c.member_count AS member_count, + c.end_user_id AS end_user_id, + c.updated_at AS updated_at, + score +ORDER BY score DESC +LIMIT $limit +""" + +# Community 向量检索 ────────────────────────────────────────────────── +# Community embedding-based search: cosine similarity on Community.summary_embedding +COMMUNITY_EMBEDDING_SEARCH = """ +CALL db.index.vector.queryNodes('community_summary_embedding_index', $limit * 100, $embedding) +YIELD node AS c, score +WHERE c.summary_embedding IS NOT NULL + AND ($end_user_id IS NULL OR c.end_user_id = $end_user_id) +RETURN c.community_id AS id, + c.name AS name, + c.summary AS content, + c.core_entities AS core_entities, + c.member_count AS member_count, + c.end_user_id AS end_user_id, + c.updated_at AS updated_at, + score +ORDER BY score DESC +LIMIT $limit +""" + +# Community 展开检索 ────────────────────────────────────────────────── +# 命中社区后,拉取该社区所有成员实体关联的 Statement 节点(主题→细节两级检索) +EXPAND_COMMUNITY_STATEMENTS = """ +MATCH (c:Community {community_id: $community_id}) +MATCH (e:ExtractedEntity)-[:BELONGS_TO_COMMUNITY]->(c) +MATCH (s:Statement)-[:REFERENCES_ENTITY]->(e) +WHERE s.end_user_id = $end_user_id +RETURN s.statement AS statement, + s.id AS id, + s.end_user_id AS end_user_id, + s.created_at AS created_at, + s.valid_at AS valid_at, + s.invalid_at AS invalid_at, + COALESCE(s.activation_value, s.importance_score, 0.5) AS activation_value, + COALESCE(s.importance_score, 0.5) AS importance_score, + e.name AS source_entity, + c.name AS community_name +ORDER BY COALESCE(s.activation_value, 0) DESC +LIMIT $limit +""" diff --git a/api/app/repositories/neo4j/graph_saver.py b/api/app/repositories/neo4j/graph_saver.py index 526d16ec..29e337f1 100644 --- a/api/app/repositories/neo4j/graph_saver.py +++ b/api/app/repositories/neo4j/graph_saver.py @@ -1,4 +1,5 @@ -from typing import List +import asyncio +from typing import List, Optional # 使用新的仓储层 from app.repositories.neo4j.neo4j_connector import Neo4jConnector @@ -155,10 +156,13 @@ async def save_dialog_and_statements_to_neo4j( entity_edges: List[EntityEntityEdge], statement_chunk_edges: List[StatementChunkEdge], statement_entity_edges: List[StatementEntityEdge], - connector: Neo4jConnector + connector: Neo4jConnector, ) -> bool: """Save dialogue nodes, chunk nodes, statement nodes, entities, and all relationships to Neo4j using graph models. + 只负责数据写入,不触发聚类。聚类由调用方在写入成功后通过 + schedule_clustering_after_write() 显式触发。 + Args: dialogue_nodes: List of DialogueNode objects to save chunk_nodes: List of ChunkNode objects to save @@ -288,6 +292,7 @@ async def save_dialog_and_statements_to_neo4j( } logger.info("Transaction completed. Summary: %s", summary) logger.debug("Full transaction results: %r", results) + return True except Exception as e: @@ -295,3 +300,57 @@ async def save_dialog_and_statements_to_neo4j( print(f"Neo4j integration error: {e}") print("Continuing without database storage...") return False + + +def schedule_clustering_after_write( + entity_nodes: List, + config_id: Optional[str] = None, + llm_model_id: Optional[str] = None, + embedding_model_id: Optional[str] = None, +) -> None: + """ + 写入 Neo4j 成功后,调度后台聚类任务。 + + 可通过环境变量 CLUSTERING_ENABLED=false 禁用(用于基准测试对比)。 + 使用 asyncio.create_task 异步触发,不阻塞写入响应。 + """ + if not entity_nodes: + return + + clustering_enabled = os.getenv("CLUSTERING_ENABLED", "true").lower() != "false" + if not clustering_enabled: + logger.info("[Clustering] 聚类已禁用(CLUSTERING_ENABLED=false),跳过聚类触发") + return + + end_user_id = entity_nodes[0].end_user_id + new_entity_ids = [e.id for e in entity_nodes] + logger.info(f"[Clustering] 准备触发聚类,实体数: {len(new_entity_ids)}, end_user_id: {end_user_id}") + asyncio.create_task(_trigger_clustering(new_entity_ids, end_user_id, config_id=config_id, llm_model_id=llm_model_id, embedding_model_id=embedding_model_id)) + + +async def _trigger_clustering( + new_entity_ids: List[str], + end_user_id: str, + config_id: Optional[str] = None, + llm_model_id: Optional[str] = None, + embedding_model_id: Optional[str] = None, +) -> None: + """ + 聚类触发函数,自动判断全量初始化还是增量更新。 + """ + connector = None + try: + from app.core.memory.storage_services.clustering_engine import LabelPropagationEngine + logger.info(f"[Clustering] 开始聚类,end_user_id={end_user_id}, 实体数={len(new_entity_ids)}") + connector = Neo4jConnector() + engine = LabelPropagationEngine(connector, config_id=config_id, llm_model_id=llm_model_id, embedding_model_id=embedding_model_id) + await engine.run(end_user_id=end_user_id, new_entity_ids=new_entity_ids) + logger.info(f"[Clustering] 聚类完成,end_user_id={end_user_id}") + except Exception as e: + logger.error(f"[Clustering] 聚类触发失败: {e}", exc_info=True) + finally: + if connector: + try: + await connector.close() + except Exception: + pass diff --git a/api/app/repositories/neo4j/graph_search.py b/api/app/repositories/neo4j/graph_search.py index e8f52535..d3aabd32 100644 --- a/api/app/repositories/neo4j/graph_search.py +++ b/api/app/repositories/neo4j/graph_search.py @@ -4,10 +4,13 @@ from typing import Any, Dict, List, Optional from app.repositories.neo4j.cypher_queries import ( CHUNK_EMBEDDING_SEARCH, + COMMUNITY_EMBEDDING_SEARCH, ENTITY_EMBEDDING_SEARCH, + EXPAND_COMMUNITY_STATEMENTS, MEMORY_SUMMARY_EMBEDDING_SEARCH, SEARCH_CHUNK_BY_CHUNK_ID, SEARCH_CHUNKS_BY_CONTENT, + SEARCH_COMMUNITIES_BY_KEYWORD, SEARCH_DIALOGUE_BY_DIALOG_ID, SEARCH_ENTITIES_BY_NAME, SEARCH_MEMORY_SUMMARIES_BY_KEYWORD, @@ -285,6 +288,15 @@ async def search_graph( limit=limit, )) task_keys.append("summaries") + + if "communities" in include: + tasks.append(connector.execute_query( + SEARCH_COMMUNITIES_BY_KEYWORD, + q=q, + end_user_id=end_user_id, + limit=limit, + )) + task_keys.append("communities") # Execute all queries in parallel task_results = await asyncio.gather(*tasks, return_exceptions=True) @@ -293,6 +305,7 @@ async def search_graph( results = {} for key, result in zip(task_keys, task_results): if isinstance(result, Exception): + logger.warning(f"search_graph: {key} 关键词查询异常: {result}") results[key] = [] else: results[key] = result @@ -349,7 +362,11 @@ async def search_graph_by_embedding( print(f"[PERF] Embedding generation took: {embed_time:.4f}s") if not embeddings or not embeddings[0]: - return {"statements": [], "chunks": [], "entities": [], "summaries": []} + logger.warning( + f"search_graph_by_embedding: embedding 生成失败或为空," + f"query='{query_text[:50]}', end_user_id={end_user_id},向量检索跳过" + ) + return {"statements": [], "chunks": [], "entities": [], "summaries": [], "communities": []} embedding = embeddings[0] # Prepare tasks for parallel execution @@ -396,6 +413,16 @@ async def search_graph_by_embedding( )) task_keys.append("summaries") + # Communities (向量语义匹配) + if "communities" in include: + tasks.append(connector.execute_query( + COMMUNITY_EMBEDDING_SEARCH, + embedding=embedding, + end_user_id=end_user_id, + limit=limit, + )) + task_keys.append("communities") + # Execute all queries in parallel query_start = time.time() task_results = await asyncio.gather(*tasks, return_exceptions=True) @@ -408,10 +435,12 @@ async def search_graph_by_embedding( "chunks": [], "entities": [], "summaries": [], + "communities": [], } for key, result in zip(task_keys, task_results): if isinstance(result, Exception): + logger.warning(f"search_graph_by_embedding: {key} 向量查询异常: {result}") results[key] = [] else: results[key] = result @@ -661,6 +690,62 @@ async def search_graph_by_chunk_id( return {"chunks": chunks} +async def search_graph_community_expand( + connector: Neo4jConnector, + community_ids: List[str], + end_user_id: str, + limit: int = 10, +) -> Dict[str, List[Dict[str, Any]]]: + """ + 三期:社区展开检索 —— 主题 → 细节两级检索。 + + 命中 Community 节点后,沿 BELONGS_TO_COMMUNITY 关系拉取成员实体, + 再沿 REFERENCES_ENTITY 关系拉取关联的 Statement 节点, + 按 activation_value 降序返回,实现"主题摘要 → 具体记忆"的深度召回。 + + Args: + connector: Neo4j 连接器 + community_ids: 已命中的社区 ID 列表 + end_user_id: 用户 ID,用于数据隔离 + limit: 每个社区最多返回的 Statement 数量 + + Returns: + {"expanded_statements": [Statement 列表,含 community_name / source_entity 字段]} + """ + if not community_ids or not end_user_id: + return {"expanded_statements": []} + + tasks = [ + connector.execute_query( + EXPAND_COMMUNITY_STATEMENTS, + community_id=cid, + end_user_id=end_user_id, + limit=limit, + ) + for cid in community_ids + ] + + task_results = await asyncio.gather(*tasks, return_exceptions=True) + + expanded: List[Dict[str, Any]] = [] + for cid, result in zip(community_ids, task_results): + if isinstance(result, Exception): + logger.warning(f"社区展开检索失败 community_id={cid}: {result}") + else: + expanded.extend(result) + + # 按 activation_value 全局排序后去重 + from app.core.memory.src.search import _deduplicate_results + expanded.sort( + key=lambda x: float(x.get("activation_value") or 0), + reverse=True, + ) + expanded = _deduplicate_results(expanded) + + logger.info(f"社区展开检索完成: community_ids={community_ids}, 展开 statements={len(expanded)}") + return {"expanded_statements": expanded} + + async def search_graph_by_created_at( connector: Neo4jConnector, end_user_id: Optional[str] = None, diff --git a/api/app/repositories/tool_repository.py b/api/app/repositories/tool_repository.py index 257910c3..1a9b0b87 100644 --- a/api/app/repositories/tool_repository.py +++ b/api/app/repositories/tool_repository.py @@ -27,7 +27,7 @@ class ToolRepository: from app.models.app_model import App from app.models.workflow_model import WorkflowConfig from app.models.workspace_model import Workspace - + result = db.query(Workspace.tenant_id).join( App, App.workspace_id == Workspace.id ).join( @@ -35,7 +35,7 @@ class ToolRepository: ).filter( WorkflowConfig.id == workflow_id ).first() - + return result[0] if result else None @staticmethod @@ -67,18 +67,19 @@ class ToolRepository: @staticmethod def find_by_tenant( - db: Session, - tenant_id: uuid.UUID, - name: Optional[str] = None, - tool_type: Optional[ToolType] = None, - status: Optional[ToolStatus] = None, - is_enabled: Optional[bool] = None + db: Session, + tenant_id: uuid.UUID, + name: Optional[str] = None, + tool_type: Optional[ToolType] = None, + status: Optional[ToolStatus] = None, + is_enabled: Optional[bool] = None ) -> List[ToolConfig]: - """根据租户查找工具""" + """根据租户查找工具(只返回未删除的)""" query = db.query(ToolConfig).filter( - ToolConfig.tenant_id == tenant_id + ToolConfig.tenant_id == tenant_id, + ToolConfig.is_active.is_(True) ) - + if name: query = query.filter(ToolConfig.name.ilike(f"%{name}%")) if tool_type: @@ -91,8 +92,17 @@ class ToolRepository: return query.all() @staticmethod - def find_by_id_and_tenant(db:Session, tool_id: uuid.UUID, tenant_id: uuid.UUID) -> Optional[ToolConfig]: - """根据ID和租户查找工具""" + def find_by_id_and_tenant(db: Session, tool_id: uuid.UUID, tenant_id: uuid.UUID) -> Optional[ToolConfig]: + """根据ID和租户查找工具(只返回未删除的)""" + return db.query(ToolConfig).filter( + ToolConfig.id == tool_id, + ToolConfig.tenant_id == tenant_id, + ToolConfig.is_active.is_(True) + ).first() + + @staticmethod + def find_by_id_and_tenant_all(db: Session, tool_id: uuid.UUID, tenant_id: uuid.UUID) -> Optional[ToolConfig]: + """根据ID和租户查找工具(返回所有工具包括删除的)""" return db.query(ToolConfig).filter( ToolConfig.id == tool_id, ToolConfig.tenant_id == tenant_id @@ -100,29 +110,26 @@ class ToolRepository: @staticmethod def count_by_tenant(db: Session, tenant_id: uuid.UUID) -> int: - """统计租户工具数量""" + """统计租户工具数量(只统计未删除的)""" return db.query(ToolConfig).filter( - ToolConfig.tenant_id == tenant_id + ToolConfig.tenant_id == tenant_id, + ToolConfig.is_active.is_(True) ).count() @staticmethod def get_status_statistics(db: Session, tenant_id: uuid.UUID) -> List[tuple]: """获取状态统计""" - return db.query( - ToolConfig.status, - func.count(ToolConfig.id).label('count') - ).filter( - ToolConfig.tenant_id == tenant_id + return db.query(ToolConfig.status, func.count(ToolConfig.id).label('count')).filter( + ToolConfig.tenant_id == tenant_id, + ToolConfig.is_active.is_(True) ).group_by(ToolConfig.status).all() @staticmethod def get_type_statistics(db: Session, tenant_id: uuid.UUID) -> List[tuple]: """获取类型统计""" - return db.query( - ToolConfig.tool_type, - func.count(ToolConfig.id).label('count') - ).filter( - ToolConfig.tenant_id == tenant_id + return db.query(ToolConfig.tool_type, func.count(ToolConfig.id).label('count')).filter( + ToolConfig.tenant_id == tenant_id, + ToolConfig.is_active.is_(True) ).group_by(ToolConfig.tool_type).all() @staticmethod @@ -130,6 +137,7 @@ class ToolRepository: """统计租户启用的工具数量""" return db.query(ToolConfig).filter( ToolConfig.tenant_id == tenant_id, + ToolConfig.is_active.is_(True), ToolConfig.is_enabled == True ).count() @@ -138,7 +146,8 @@ class ToolRepository: """检查租户是否已有内置工具""" return db.query(ToolConfig).filter( ToolConfig.tenant_id == tenant_id, - ToolConfig.tool_type == ToolType.BUILTIN.value + ToolConfig.tool_type == ToolType.BUILTIN.value, + ToolConfig.is_active.is_(True) ).count() > 0 @@ -194,10 +203,10 @@ class ToolExecutionRepository: @staticmethod def find_by_tool_and_tenant( - db: Session, - tool_id: uuid.UUID, - tenant_id: uuid.UUID, - limit: int = 100 + db: Session, + tool_id: uuid.UUID, + tenant_id: uuid.UUID, + limit: int = 100 ) -> List[ToolExecution]: """根据工具和租户查找执行记录""" return db.query(ToolExecution).join( @@ -205,4 +214,4 @@ class ToolExecutionRepository: ).filter( ToolConfig.id == tool_id, ToolConfig.tenant_id == tenant_id - ).order_by(ToolExecution.started_at.desc()).limit(limit).all() \ No newline at end of file + ).order_by(ToolExecution.started_at.desc()).limit(limit).all() diff --git a/api/app/repositories/workspace_repository.py b/api/app/repositories/workspace_repository.py index 87b0e20f..68dbf13c 100644 --- a/api/app/repositories/workspace_repository.py +++ b/api/app/repositories/workspace_repository.py @@ -1,10 +1,13 @@ -from sqlalchemy.orm import Session, joinedload -from app.models.user_model import User -from typing import List, Optional import uuid -from app.models.workspace_model import Workspace, WorkspaceMember, WorkspaceRole -from app.schemas.workspace_schema import WorkspaceCreate, WorkspaceUpdate +from typing import List, Optional + +from sqlalchemy.orm import Session, joinedload +from sqlalchemy import select + from app.core.logging_config import get_db_logger +from app.models.user_model import User +from app.models.workspace_model import Workspace, WorkspaceMember, WorkspaceRole +from app.schemas.workspace_schema import WorkspaceCreate # 获取数据库专用日志器 db_logger = get_db_logger() @@ -19,7 +22,7 @@ class WorkspaceRepository: def create_workspace(self, workspace_data: WorkspaceCreate, tenant_id: uuid.UUID) -> Workspace: """创建工作空间""" db_logger.debug(f"创建工作空间记录: name={workspace_data.name}, tenant_id={tenant_id}") - + try: db_workspace = Workspace( name=workspace_data.name, @@ -34,7 +37,8 @@ class WorkspaceRepository: ) self.db.add(db_workspace) self.db.flush() - db_logger.info(f"工作空间记录创建成功: {workspace_data.name} (ID: {db_workspace.id}), storage_type: {workspace_data.storage_type}") + db_logger.info( + f"工作空间记录创建成功: {workspace_data.name} (ID: {db_workspace.id}), storage_type: {workspace_data.storage_type}") return db_workspace except Exception as e: db_logger.error(f"创建工作空间记录失败: name={workspace_data.name} - {str(e)}") @@ -43,7 +47,7 @@ class WorkspaceRepository: def get_workspace_by_id(self, workspace_id: uuid.UUID) -> Optional[Workspace]: """根据ID获取工作空间""" db_logger.debug(f"根据ID查询工作空间: workspace_id={workspace_id}") - + try: workspace = self.db.query(Workspace).filter(Workspace.id == workspace_id).first() if workspace: @@ -65,7 +69,7 @@ class WorkspaceRepository: 包含 llm, embedding, rerank 的字典,如果工作空间不存在则返回 None """ db_logger.debug(f"查询工作空间模型配置: workspace_id={workspace_id}") - + try: workspace = self.db.query(Workspace).filter(Workspace.id == workspace_id).first() if workspace: @@ -89,7 +93,7 @@ class WorkspaceRepository: def get_workspaces_by_user(self, user_id: uuid.UUID) -> List[Workspace]: """获取用户参与的所有工作空间(包括用户创建的和作为成员的)""" db_logger.debug(f"查询用户参与的工作空间: user_id={user_id}") - + try: # 首先获取用户信息以获取 tenant_id from app.models.user_model import User @@ -97,7 +101,7 @@ class WorkspaceRepository: if not user: db_logger.warning(f"用户不存在: user_id={user_id}") return [] - + if user.is_superuser: # 超级用户获取对应tenantid所有工作空间 workspaces = ( @@ -109,7 +113,7 @@ class WorkspaceRepository: ) db_logger.debug(f"超用户查询所有工作空间: user_id={user_id}, 数量={len(workspaces)}") return workspaces - + # 获取用户作为成员的工作空间 member_workspaces = ( self.db.query(Workspace) @@ -120,7 +124,7 @@ class WorkspaceRepository: .order_by(Workspace.updated_at.desc()) .all() ) - + db_logger.debug(f"用户工作空间查询成功: user_id={user_id}, 数量={len(member_workspaces)}") return member_workspaces except Exception as e: @@ -130,7 +134,7 @@ class WorkspaceRepository: def get_workspaces_by_tenant(self, tenant_id: uuid.UUID) -> List[Workspace]: """获取租户的所有工作空间""" db_logger.debug(f"查询租户的工作空间: tenant_id={tenant_id}") - + try: workspaces = ( self.db.query(Workspace) @@ -144,14 +148,32 @@ class WorkspaceRepository: db_logger.error(f"查询租户工作空间失败: tenant_id={tenant_id} - {str(e)}") raise - def add_member(self, workspace_id: uuid.UUID, user_id: uuid.UUID, role: WorkspaceRole = WorkspaceRole.member) -> WorkspaceMember: + def get_workspaces_by_name(self, tenant_id: uuid.UUID, workspace_name: str) -> List[Workspace]: + try: + stmt = ( + select(Workspace) + .where( + Workspace.tenant_id == tenant_id, + Workspace.name == workspace_name, + Workspace.is_active.is_(True) + ) + ) + + workspaces = self.db.execute(stmt).scalars().all() + return list(workspaces) + except Exception as e: + db_logger.error(f"查询工作空间失败: workspace_name={workspace_name} - {str(e)}") + raise + + def add_member(self, workspace_id: uuid.UUID, user_id: uuid.UUID, + role: WorkspaceRole = WorkspaceRole.member) -> WorkspaceMember: """添加工作空间成员""" db_logger.debug(f"添加工作空间成员: user_id={user_id}, workspace_id={workspace_id}, role={role}") - + try: db_member = WorkspaceMember( - user_id=user_id, - workspace_id=workspace_id, + user_id=user_id, + workspace_id=workspace_id, role=role ) self.db.add(db_member) @@ -165,7 +187,7 @@ class WorkspaceRepository: def get_member(self, user_id: uuid.UUID, workspace_id: uuid.UUID) -> Optional[WorkspaceMember]: """获取工作空间成员""" db_logger.debug(f"查询工作空间成员: user_id={user_id}, workspace_id={workspace_id}") - + try: member = self.db.query(WorkspaceMember).filter( WorkspaceMember.user_id == user_id, @@ -173,7 +195,8 @@ class WorkspaceRepository: WorkspaceMember.is_active.is_(True), ).first() if member: - db_logger.debug(f"工作空间成员查询成功: user_id={user_id}, workspace_id={workspace_id}, role={member.role}") + db_logger.debug( + f"工作空间成员查询成功: user_id={user_id}, workspace_id={workspace_id}, role={member.role}") else: db_logger.debug(f"工作空间成员不存在: user_id={user_id}, workspace_id={workspace_id}") return member @@ -199,7 +222,7 @@ class WorkspaceRepository: except Exception as e: db_logger.error(f"查询成员列表失败: workspace_id={workspace_id} - {str(e)}") raise - + def get_member_by_id(self, member_id: uuid.UUID) -> WorkspaceMember: """按成员ID获取工作空间成员,并预加载 user 与 workspace 关系""" db_logger.debug(f"查询成员的工作空间: member_id={member_id}") @@ -214,7 +237,8 @@ class WorkspaceRepository: .first() ) if member: - db_logger.debug(f"成员查询成功: member_id={member_id}, workspace_id={member.workspace_id}, role={member.role}") + db_logger.debug( + f"成员查询成功: member_id={member_id}, workspace_id={member.workspace_id}, role={member.role}") else: db_logger.debug(f"成员不存在: member_id={member_id}") return member @@ -222,7 +246,8 @@ class WorkspaceRepository: db_logger.error(f"查询成员列表失败: member_id={member_id} - {str(e)}") raise - def update_member_role(self, workspace_id: uuid.UUID, user_id: uuid.UUID, role: WorkspaceRole) -> Optional[WorkspaceMember]: + def update_member_role(self, workspace_id: uuid.UUID, user_id: uuid.UUID, role: WorkspaceRole) -> Optional[ + WorkspaceMember]: try: member = self.db.query(WorkspaceMember).filter( WorkspaceMember.workspace_id == workspace_id, @@ -255,7 +280,7 @@ class WorkspaceRepository: except Exception as e: db_logger.error(f"删除成员失败: workspace_id={workspace_id}, user_id={user_id} - {str(e)}") raise - + def delete_member_by_id(self, member_id: uuid.UUID) -> Optional[WorkspaceMember]: try: member = self.db.query(WorkspaceMember).filter( @@ -271,7 +296,7 @@ class WorkspaceRepository: except Exception as e: db_logger.error(f"删除成员失败: id={member_id} - {str(e)}") raise - + def update_member_role_by_id(self, id: uuid.UUID, role: WorkspaceRole) -> Optional[WorkspaceMember]: try: member = self.db.query(WorkspaceMember).filter( @@ -288,12 +313,18 @@ class WorkspaceRepository: db_logger.error(f"更新成员角色失败: id={id} - {str(e)}") raise + # 保持向后兼容的函数 def get_workspace_by_id(db: Session, workspace_id: uuid.UUID) -> Workspace | None: repo = WorkspaceRepository(db) return repo.get_workspace_by_id(workspace_id) +def get_workspaces_by_name(db: Session, tenant_id: uuid.UUID, name: str) -> List[Workspace]: + repo = WorkspaceRepository(db) + return repo.get_workspaces_by_name(tenant_id, name) + + def get_workspaces_by_user(db: Session, user_id: uuid.UUID) -> List[Workspace]: repo = WorkspaceRepository(db) return repo.get_workspaces_by_user(user_id) @@ -315,7 +346,7 @@ def create_workspace(db: Session, workspace: WorkspaceCreate, tenant_id: uuid.UU def add_member_to_workspace( - db: Session, user_id: uuid.UUID, workspace_id: uuid.UUID, role: WorkspaceRole + db: Session, user_id: uuid.UUID, workspace_id: uuid.UUID, role: WorkspaceRole ) -> WorkspaceMember: repo = WorkspaceRepository(db) return repo.add_member(workspace_id, user_id, role) @@ -325,39 +356,43 @@ def get_members_by_workspace(db: Session, workspace_id: uuid.UUID) -> List[Works repo = WorkspaceRepository(db) return repo.get_members_by_workspace(workspace_id) + def get_member_by_id(db: Session, member_id: uuid.UUID) -> WorkspaceMember | None: repo = WorkspaceRepository(db) return repo.get_member_by_id(member_id) + def update_member_role_in_workspace( - db: Session, - user_id: uuid.UUID, - workspace_id: uuid.UUID, - role: WorkspaceRole, + db: Session, + user_id: uuid.UUID, + workspace_id: uuid.UUID, + role: WorkspaceRole, ) -> Optional[WorkspaceMember]: repo = WorkspaceRepository(db) return repo.update_member_role(workspace_id, user_id, role) + def remove_member_from_workspace( - db: Session, - user_id: uuid.UUID, - workspace_id: uuid.UUID, + db: Session, + user_id: uuid.UUID, + workspace_id: uuid.UUID, ) -> Optional[WorkspaceMember]: repo = WorkspaceRepository(db) return repo.deactivate_member(workspace_id, user_id) + def remove_member_from_workspace_by_id( - db: Session, - member_id: uuid.UUID, + db: Session, + member_id: uuid.UUID, ) -> Optional[WorkspaceMember]: repo = WorkspaceRepository(db) return repo.delete_member_by_id(member_id) def update_member_role_by_id( - db: Session, - id: uuid.UUID, - role: WorkspaceRole, + db: Session, + id: uuid.UUID, + role: WorkspaceRole, ) -> Optional[WorkspaceMember]: repo = WorkspaceRepository(db) return repo.update_member_role_by_id(id, role) diff --git a/api/app/schemas/app_schema.py b/api/app/schemas/app_schema.py index f073a200..5238b978 100644 --- a/api/app/schemas/app_schema.py +++ b/api/app/schemas/app_schema.py @@ -45,11 +45,19 @@ class FileInput(BaseModel): url: Optional[str] = Field(None, description="远程URL(remote_url时必填)") file_type: Optional[str] = Field(None, description="具体文件格式(如image/jpg、audio/wav、document/docx、video/mp4)") + _content = None + def __init__(self, **data): if "type" in data: data['file_type'] = data['type'] super().__init__(**data) + def set_content(self, content: bytes): + self._content = content + + def get_content(self) -> bytes | None: + return self._content + @field_validator("type", mode="before") @classmethod def validate_type(cls, v): @@ -117,6 +125,85 @@ class SkillConfig(BaseModel): all_skills: Optional[bool] = Field(default=False, description="是否允许访问所有技能") +# ---------- App Features ---------- + +class FileUploadConfig(BaseModel): + """文件上传配置""" + enabled: bool = Field(default=False) + # 允许的传输方式:local_file / remote_url,默认两种都允许 + allowed_transfer_methods: List[str] = Field( + default=["local_file", "remote_url"], + description="允许的传输方式" + ) + # 图片文件:PNG/JPG/JPEG/GIF/WEBP,最大 20MB + image_enabled: bool = Field(default=False) + image_max_size_mb: int = Field(default=20) + image_allowed_extensions: List[str] = Field( + default=["png", "jpg", "jpeg"] + ) + # 语音文件:MP3/WAV/M4A/OGG/FLAC,最大 50MB + audio_enabled: bool = Field(default=False) + audio_max_size_mb: int = Field(default=50) + audio_allowed_extensions: List[str] = Field( + default=["mp3", "wav", "m4a"] + ) + # 通用文件:PDF/DOCX/XLSX/TXT/CSV/JSON,最大 100MB + document_enabled: bool = Field(default=False) + document_max_size_mb: int = Field(default=100) + document_allowed_extensions: List[str] = Field( + default=["pdf", "docx", "xlsx", "txt", "csv", "json", "md"] + ) + # 视频文件:MP4/MOV/AVI/WebM,最大 500MB + video_enabled: bool = Field(default=False) + video_max_size_mb: int = Field(default=500) + video_allowed_extensions: List[str] = Field( + default=["mp4", "mov"] + ) + # 最大文件数量 + max_file_count: int = Field(default=5, ge=1, le=20) + + +class OpeningStatementConfig(BaseModel): + """对话开场白配置""" + enabled: bool = Field(default=False) + statement: Optional[str] = Field(default=None, description="开场白内容") + suggested_questions: List[str] = Field(default_factory=list, description="预设问题列表") + + +class SuggestedQuestionsConfig(BaseModel): + """下一步问题建议配置""" + enabled: bool = Field(default=False) + + +class TextToSpeechConfig(BaseModel): + """文字转语音配置""" + enabled: bool = Field(default=False) + voice: Optional[str] = Field(default=None, description="语音音色") + language: Optional[str] = Field(default=None, description="语言") + autoplay: bool = Field(default=False, description="是否自动播放") + + +class CitationConfig(BaseModel): + """引用和归属配置""" + enabled: bool = Field(default=False) + + +class WebSearchConfig(BaseModel): + """联网搜索配置""" + enabled: bool = Field(default=False) + search_engine: Optional[str] = Field(default=None, description="搜索引擎") + + +class AppFeatures(BaseModel): + """应用功能特性配置""" + file_upload: FileUploadConfig = Field(default_factory=FileUploadConfig) + opening_statement: OpeningStatementConfig = Field(default_factory=OpeningStatementConfig) + suggested_questions_after_answer: SuggestedQuestionsConfig = Field(default_factory=SuggestedQuestionsConfig) + text_to_speech: TextToSpeechConfig = Field(default_factory=TextToSpeechConfig) + citation: CitationConfig = Field(default_factory=CitationConfig) + web_search: WebSearchConfig = Field(default_factory=WebSearchConfig) + + class ToolOldConfig(BaseModel): """工具配置""" enabled: bool = Field(default=False, description="是否启用该工具") @@ -193,6 +280,9 @@ class AgentConfigCreate(BaseModel): # 技能配置 skills: Optional[SkillConfig] = Field(default=dict, description="关联的技能列表") + # 功能特性 + features: Optional[AppFeatures] = Field(default=None, description="功能特性配置") + class AppCreate(BaseModel): name: str @@ -250,6 +340,9 @@ class AgentConfigUpdate(BaseModel): # 技能配置 skills: Optional[SkillConfig] = Field(default=dict, description="关联的技能列表") + # 功能特性 + features: Optional[AppFeatures] = Field(default=None, description="功能特性配置") + # ---------- Output Schemas ---------- @@ -269,7 +362,16 @@ class App(BaseModel): tags: List[str] = [] current_release_id: Optional[uuid.UUID] = None is_active: bool - is_shared: bool = False # 是否是共享应用(从其他工作空间共享来的) + is_shared: bool = False + share_permission: Optional[str] = None + source_workspace_name: Optional[str] = None # 共享来源工作空间名称(仅共享应用有值) + source_workspace_icon: Optional[str] = None # 共享来源工作空间图标 + source_app_version: Optional[str] = None # 应用版本号 + source_app_is_active: Optional[bool] = None # 应用是否生效 + share_id: Optional[uuid.UUID] = None # 分享记录ID(取消共享时使用) + shared_by: Optional[uuid.UUID] = None # 分享者用户ID + shared_by_name: Optional[str] = None # 分享者名称 + shared_at: Optional[datetime.datetime] = None # 分享时间 created_at: datetime.datetime updated_at: datetime.datetime @@ -281,6 +383,10 @@ class App(BaseModel): def _serialize_updated_at(self, dt: datetime.datetime): return int(dt.timestamp() * 1000) if dt else None + @field_serializer("shared_at", when_used="json") + def _serialize_shared_at(self, dt: Optional[datetime.datetime]): + return int(dt.timestamp() * 1000) if dt else None + class AgentConfig(BaseModel): """Agent 配置输出 Schema""" @@ -310,6 +416,8 @@ class AgentConfig(BaseModel): skills: Optional[SkillConfig] = {} + features: Optional[AppFeatures] = None + is_active: bool created_at: datetime.datetime updated_at: datetime.datetime @@ -346,6 +454,14 @@ class AgentConfig(BaseModel): return {} return v + @field_validator("features", mode="before") + @classmethod + def validate_features(cls, v): + """处理 None 值,返回默认 AppFeatures""" + if v is None: + return AppFeatures() + return v + @field_serializer("created_at", when_used="json") def _serialize_created_at(self, dt: datetime.datetime): return int(dt.timestamp() * 1000) if dt else None @@ -409,11 +525,24 @@ class AppRelease(BaseModel): return int(dt.timestamp() * 1000) if dt else None +# ---------- App Copy Schema ---------- + +class CopyAppRequest(BaseModel): + """复制应用请求""" + new_name: Optional[str] = Field(None, description="新应用名称,不填则使用原名称-副本") + + # ---------- App Share Schemas ---------- class AppShareCreate(BaseModel): """应用分享请求""" target_workspace_ids: List[uuid.UUID] = Field(..., description="目标工作空间ID列表") + permission: str = Field(default="readonly", description="权限模式: readonly | editable") + + +class UpdateSharePermissionRequest(BaseModel): + """更新共享权限请求""" + permission: str = Field(..., description="新权限值: readonly | editable") class AppShare(BaseModel): @@ -425,9 +554,32 @@ class AppShare(BaseModel): source_workspace_id: uuid.UUID target_workspace_id: uuid.UUID shared_by: uuid.UUID + permission: str = "readonly" created_at: datetime.datetime updated_at: datetime.datetime + # 关联名称(从 relationship 读取) + source_app_name: Optional[str] = None + source_app_type: Optional[str] = None + source_app_version: Optional[str] = None + source_app_is_active: Optional[bool] = None + target_workspace_name: Optional[str] = None + target_workspace_icon: Optional[str] = None + + @classmethod + def model_validate(cls, obj, **kwargs): + instance = super().model_validate(obj, **kwargs) + if hasattr(obj, 'source_app') and obj.source_app: + instance.source_app_name = obj.source_app.name + instance.source_app_type = obj.source_app.type + instance.source_app_is_active = obj.source_app.is_active + release = obj.source_app.current_release + instance.source_app_version = release.version_name if release else None + if hasattr(obj, 'target_workspace') and obj.target_workspace: + instance.target_workspace_name = obj.target_workspace.name + instance.target_workspace_icon = obj.target_workspace.icon + return instance + @field_serializer("created_at", when_used="json") def _serialize_created_at(self, dt: datetime.datetime): return int(dt.timestamp() * 1000) if dt else None @@ -458,12 +610,35 @@ class DraftRunRequest(BaseModel): files: Optional[List[FileInput]] = Field(default_factory=list, description="附件列表(支持多文件)") +class SuggestedQuestion(BaseModel): + """建议问题""" + content: str + + +class CitationSource(BaseModel): + """引用来源""" + title: str + content: str + score: Optional[float] = None + kb_id: Optional[str] = None + + class DraftRunResponse(BaseModel): """试运行响应(非流式)""" message: str = Field(..., description="AI 回复消息") conversation_id: Optional[str] = Field(default=None, description="会话ID(用于多轮对话)") usage: Optional[Dict[str, Any]] = Field(default=None, description="Token 使用情况") elapsed_time: Optional[float] = Field(default=None, description="耗时(秒)") + suggested_questions: List[str] = Field(default_factory=list, description="下一步建议问题") + citations: List[CitationSource] = Field(default_factory=list, description="引用来源") + audio_url: Optional[str] = Field(default=None, description="TTS 语音URL") + + +class OpeningResponse(BaseModel): + """应用开场白响应""" + enabled: bool + statement: Optional[str] = None + suggested_questions: List[str] = Field(default_factory=list) class DraftRunStreamChunk(BaseModel): diff --git a/api/app/schemas/conversation_schema.py b/api/app/schemas/conversation_schema.py index 13766ef6..98715612 100644 --- a/api/app/schemas/conversation_schema.py +++ b/api/app/schemas/conversation_schema.py @@ -51,6 +51,10 @@ class Message(BaseModel): def _serialize_created_at(self, dt: datetime.datetime): return int(dt.timestamp() * 1000) if dt else None + @field_serializer("meta_data", when_used="json") + def _serialize_meta_data(self, data: Optional[Dict[str, Any]]): + return data or {} + class Conversation(BaseModel): """会话输出""" diff --git a/api/app/schemas/end_user_schema.py b/api/app/schemas/end_user_schema.py index 6f7498a0..bbb6fd5c 100644 --- a/api/app/schemas/end_user_schema.py +++ b/api/app/schemas/end_user_schema.py @@ -8,7 +8,7 @@ class EndUser(BaseModel): model_config = ConfigDict(from_attributes=True) id: uuid.UUID = Field(description="终端用户ID") - app_id: uuid.UUID = Field(description="应用ID") + app_id: Optional[uuid.UUID] = Field(description="应用ID", default=None) # end_user_id: str = Field(description="终端用户ID") other_id: Optional[str] = Field(description="第三方ID", default=None) other_name: Optional[str] = Field(description="其他名称", default="") diff --git a/api/app/schemas/i18n_schema.py b/api/app/schemas/i18n_schema.py new file mode 100644 index 00000000..b2ae93c6 --- /dev/null +++ b/api/app/schemas/i18n_schema.py @@ -0,0 +1,73 @@ +""" +I18n Management API Schemas + +This module defines Pydantic schemas for i18n management APIs. +""" + +from pydantic import BaseModel, Field +from typing import Dict, List, Optional, Any + + +# ============================================================================ +# Language Management Schemas +# ============================================================================ + +class LanguageInfo(BaseModel): + """Language information""" + code: str = Field(..., description="Language code (e.g., 'zh', 'en')") + name: str = Field(..., description="Language name (e.g., 'Chinese', 'English')") + native_name: str = Field(..., description="Native language name (e.g., '中文', 'English')") + is_enabled: bool = Field(..., description="Whether the language is enabled") + is_default: bool = Field(..., description="Whether this is the default language") + + +class LanguageListResponse(BaseModel): + """Response for language list""" + languages: List[LanguageInfo] = Field(..., description="List of available languages") + + +class LanguageCreateRequest(BaseModel): + """Request to add a new language""" + code: str = Field(..., description="Language code (e.g., 'ja', 'ko')", min_length=2, max_length=10) + name: str = Field(..., description="Language name", min_length=1, max_length=100) + native_name: str = Field(..., description="Native language name", min_length=1, max_length=100) + is_enabled: bool = Field(default=True, description="Whether to enable the language") + + +class LanguageUpdateRequest(BaseModel): + """Request to update language configuration""" + is_enabled: Optional[bool] = Field(None, description="Whether the language is enabled") + is_default: Optional[bool] = Field(None, description="Whether this is the default language") + + +# ============================================================================ +# Translation Management Schemas +# ============================================================================ + +class TranslationResponse(BaseModel): + """Response for translation data""" + translations: Dict[str, Dict[str, Any]] = Field( + ..., + description="Translations organized by locale and namespace" + ) + + +class TranslationUpdateRequest(BaseModel): + """Request to update a translation""" + value: str = Field(..., description="New translation value", min_length=1) + description: Optional[str] = Field(None, description="Optional description of the translation") + + +class MissingTranslationsResponse(BaseModel): + """Response for missing translations""" + missing_translations: Dict[str, List[str]] = Field( + ..., + description="Missing translation keys organized by locale" + ) + + +class ReloadResponse(BaseModel): + """Response for translation reload""" + success: bool = Field(..., description="Whether the reload was successful") + reloaded_locales: List[str] = Field(..., description="List of reloaded locales") + total_locales: int = Field(..., description="Total number of available locales") diff --git a/api/app/schemas/memory_agent_schema.py b/api/app/schemas/memory_agent_schema.py index 1a5017eb..b4efe61d 100644 --- a/api/app/schemas/memory_agent_schema.py +++ b/api/app/schemas/memory_agent_schema.py @@ -25,5 +25,8 @@ class AgentMemory_Long_Term(ABC): STRATEGY_CHUNK = "chunk" STRATEGY_TIME = "time" DEFAULT_SCOPE = 6 - + TIME_SCOPE=5 +class AgentMemoryDataset(ABC): + PRONOUN=['我','本人','在下','自己','咱','鄙人','吴','余'] + NAME='用户' diff --git a/api/app/schemas/memory_config_schema.py b/api/app/schemas/memory_config_schema.py index 0c359d70..8d7490fe 100644 --- a/api/app/schemas/memory_config_schema.py +++ b/api/app/schemas/memory_config_schema.py @@ -417,7 +417,7 @@ class MemoryConfig: # Ontology scene association scene_id: Optional[UUID] = None - ontology_classes: Optional[list] = field(default=None) + ontology_class_infos: list[dict] = field(default_factory=list) def __post_init__(self): """Validate configuration after initialization.""" diff --git a/api/app/schemas/memory_perceptual_schema.py b/api/app/schemas/memory_perceptual_schema.py index 7dfefe01..c9b741ef 100644 --- a/api/app/schemas/memory_perceptual_schema.py +++ b/api/app/schemas/memory_perceptual_schema.py @@ -1,5 +1,4 @@ import uuid -from datetime import datetime from typing import Optional from pydantic import BaseModel, Field @@ -85,7 +84,6 @@ class Semantic(BaseModel): class Content(BaseModel): - summary: str keywords: list[str] topic: str domain: str diff --git a/api/app/schemas/model_schema.py b/api/app/schemas/model_schema.py index 4f3878ce..058f082d 100644 --- a/api/app/schemas/model_schema.py +++ b/api/app/schemas/model_schema.py @@ -326,3 +326,14 @@ class ModelBaseQuery(BaseModel): is_official: Optional[bool] = Field(None, description="是否官方模型") is_deprecated: Optional[bool] = Field(None, description="是否弃用") search: Optional[str] = Field(None, description="搜索关键词", max_length=255) + +class ModelInfo(BaseModel): + """模型信息Schema""" + model_name: str = Field(..., description="模型名称") + provider: str = Field(..., description="模型提供商") + api_key: str = Field(..., description="API密钥") + api_base: str = Field(..., description="API基础URL") + is_omni: bool = Field(default=False, description="是否为omni模型") + model_type: ModelType = Field(..., description="模型类型") + capability: List[str] = Field(default_factory=list, description="模型能力列表") + diff --git a/api/app/schemas/tenant_schema.py b/api/app/schemas/tenant_schema.py index 6e8bd158..4f49ee88 100644 --- a/api/app/schemas/tenant_schema.py +++ b/api/app/schemas/tenant_schema.py @@ -11,6 +11,8 @@ class TenantBase(BaseModel): name: str = Field(..., description="租户名称", max_length=255) description: Optional[str] = Field(None, description="租户描述", max_length=1000) is_active: bool = Field(True, description="是否激活") + default_language: Optional[str] = Field('zh', description="租户默认语言", max_length=10) + supported_languages: Optional[List[str]] = Field(['zh', 'en'], description="租户支持的语言列表") @field_validator('name') @classmethod @@ -18,6 +20,26 @@ class TenantBase(BaseModel): if not v or not v.strip(): raise ValidationException('租户名称不能为空', code=BizCode.VALIDATION_FAILED) return v.strip() + + @field_validator('default_language') + @classmethod + def validate_default_language(cls, v): + if v: + # Validate language code format (2-letter code, optionally with region) + import re + if not re.match(r'^[a-z]{2}(-[A-Z]{2})?$', v): + raise ValidationException('语言代码格式不正确', code=BizCode.VALIDATION_FAILED) + return v + + @field_validator('supported_languages') + @classmethod + def validate_supported_languages(cls, v): + if v: + import re + for lang in v: + if not re.match(r'^[a-z]{2}(-[A-Z]{2})?$', lang): + raise ValidationException(f'语言代码格式不正确: {lang}', code=BizCode.VALIDATION_FAILED) + return v class TenantCreate(TenantBase): @@ -30,6 +52,8 @@ class TenantUpdate(BaseModel): name: Optional[str] = Field(None, description="租户名称", max_length=255) description: Optional[str] = Field(None, description="租户描述", max_length=1000) is_active: Optional[bool] = Field(None, description="是否激活") + default_language: Optional[str] = Field(None, description="租户默认语言", max_length=10) + supported_languages: Optional[List[str]] = Field(None, description="租户支持的语言列表") @field_validator('name') @classmethod @@ -37,6 +61,25 @@ class TenantUpdate(BaseModel): if v is not None and (not v or not v.strip()): raise ValidationException('租户名称不能为空', code=BizCode.VALIDATION_FAILED) return v.strip() if v else v + + @field_validator('default_language') + @classmethod + def validate_default_language(cls, v): + if v: + import re + if not re.match(r'^[a-z]{2}(-[A-Z]{2})?$', v): + raise ValidationException('语言代码格式不正确', code=BizCode.VALIDATION_FAILED) + return v + + @field_validator('supported_languages') + @classmethod + def validate_supported_languages(cls, v): + if v: + import re + for lang in v: + if not re.match(r'^[a-z]{2}(-[A-Z]{2})?$', lang): + raise ValidationException(f'语言代码格式不正确: {lang}', code=BizCode.VALIDATION_FAILED) + return v class Tenant(TenantBase): @@ -62,4 +105,29 @@ class TenantList(BaseModel): total: int page: int size: int - pages: int \ No newline at end of file + pages: int + + +class TenantLanguageConfig(BaseModel): + """租户语言配置Schema""" + default_language: str = Field(..., description="租户默认语言", max_length=10) + supported_languages: List[str] = Field(..., description="租户支持的语言列表") + + @field_validator('default_language') + @classmethod + def validate_default_language(cls, v): + import re + if not re.match(r'^[a-z]{2}(-[A-Z]{2})?$', v): + raise ValidationException('语言代码格式不正确', code=BizCode.VALIDATION_FAILED) + return v + + @field_validator('supported_languages') + @classmethod + def validate_supported_languages(cls, v): + if not v: + raise ValidationException('支持的语言列表不能为空', code=BizCode.VALIDATION_FAILED) + import re + for lang in v: + if not re.match(r'^[a-z]{2}(-[A-Z]{2})?$', lang): + raise ValidationException(f'语言代码格式不正确: {lang}', code=BizCode.VALIDATION_FAILED) + return v diff --git a/api/app/schemas/tool_schema.py b/api/app/schemas/tool_schema.py index 2ba86c2c..79e01688 100644 --- a/api/app/schemas/tool_schema.py +++ b/api/app/schemas/tool_schema.py @@ -90,6 +90,7 @@ class ToolInfo(BaseModel): parameters: List[ToolParameter] = Field(default_factory=list, description="工具参数") config_data: Dict[str, Any] = Field(default_factory=dict, description="工具配置") status: ToolStatus = Field(ToolStatus.AVAILABLE, description="工具状态") + is_active: bool = Field(True, description="是否可用(False 表示已删除)") tags: List[str] = Field(default_factory=list, description="工具标签") tenant_id: Optional[str] = Field(None, description="租户ID") created_at: datetime = Field(..., description="创建时间") @@ -212,6 +213,11 @@ class ToolUpdateRequest(BaseModel): tags: Optional[List[str]] = None +class ToolActiveUpdate(BaseModel): + """工具可用状态更新""" + is_active: bool = Field(..., description="True=启用, False=禁用(逻辑删除)") + + class ToolExecuteRequest(BaseModel): """执行工具请求""" tool_id: str diff --git a/api/app/schemas/user_schema.py b/api/app/schemas/user_schema.py index 7b9e201d..6b880696 100644 --- a/api/app/schemas/user_schema.py +++ b/api/app/schemas/user_schema.py @@ -58,6 +58,16 @@ class VerifyPasswordRequest(BaseModel): password: str = Field(..., description="密码") +class LanguagePreferenceRequest(BaseModel): + """语言偏好设置请求""" + language: str = Field(..., min_length=2, max_length=10, description="语言代码,如 'zh', 'en'") + + +class LanguagePreferenceResponse(BaseModel): + """语言偏好响应""" + language: str = Field(..., description="当前语言偏好") + + class ChangePasswordResponse(BaseModel): """修改密码响应""" message: str @@ -74,6 +84,7 @@ class User(UserBase): current_workspace_id: Optional[uuid.UUID] = None current_workspace_name: Optional[str] = None role: Optional[WorkspaceRole] = None + preferred_language: Optional[str] = "zh" # 用户语言偏好 # 将 datetime 转换为毫秒时间戳 @validator("created_at", pre=True) diff --git a/api/app/schemas/workflow_schema.py b/api/app/schemas/workflow_schema.py index e580833f..d878d97c 100644 --- a/api/app/schemas/workflow_schema.py +++ b/api/app/schemas/workflow_schema.py @@ -80,6 +80,7 @@ class WorkflowConfigCreate(BaseModel): variables: list[VariableDefinition] = Field(default_factory=list, description="变量列表") execution_config: ExecutionConfig = Field(default_factory=ExecutionConfig, description="执行配置") triggers: list[TriggerConfig] = Field(default_factory=list, description="触发器列表") + features: dict = Field(default_factory=dict, description="功能特性配置") class WorkflowConfigUpdate(BaseModel): @@ -87,6 +88,7 @@ class WorkflowConfigUpdate(BaseModel): nodes: list[NodeDefinition] | None = None edges: list[EdgeDefinition] | None = None variables: list[VariableDefinition] | None = None + features: dict | None = None execution_config: ExecutionConfig | None = None triggers: list[TriggerConfig] | None = None @@ -102,6 +104,7 @@ class WorkflowConfig(BaseModel): variables: list[dict[str, Any]] execution_config: dict[str, Any] triggers: list[dict[str, Any]] + features: dict | None is_active: bool created_at: datetime.datetime updated_at: datetime.datetime @@ -114,6 +117,10 @@ class WorkflowConfig(BaseModel): def _serialize_updated_at(self, dt: datetime.datetime): return int(dt.timestamp() * 1000) if dt else None + @field_serializer("features", when_used="json") + def _serialize_features(self, features: dict | None): + return features or {} + # ==================== 工作流执行 ==================== diff --git a/api/app/services/agent_config_converter.py b/api/app/services/agent_config_converter.py index fbc75f4c..f86b8f19 100644 --- a/api/app/services/agent_config_converter.py +++ b/api/app/services/agent_config_converter.py @@ -51,6 +51,9 @@ class AgentConfigConverter: if hasattr(config, "skills") and config.skills: result["skills"] = config.skills.model_dump() + + if hasattr(config, "features") and config.features: + result["features"] = config.features.model_dump() return result diff --git a/api/app/services/app_chat_service.py b/api/app/services/app_chat_service.py index f3cdde2a..604514b4 100644 --- a/api/app/services/app_chat_service.py +++ b/api/app/services/app_chat_service.py @@ -8,26 +8,23 @@ from typing import Optional, Dict, Any, AsyncGenerator, Annotated, List from fastapi import Depends from sqlalchemy.orm import Session -from app.core.agent.agent_middleware import AgentMiddleware from app.core.agent.langchain_agent import LangChainAgent -from app.core.exceptions import BusinessException from app.core.logging_config import get_business_logger from app.db import get_db -from app.models import MultiAgentConfig, AgentConfig +from app.models import MultiAgentConfig, AgentConfig, ModelType from app.models import WorkflowConfig from app.repositories.tool_repository import ToolRepository from app.schemas import DraftRunRequest from app.schemas.app_schema import FileInput +from app.schemas.model_schema import ModelInfo from app.schemas.prompt_schema import render_prompt_message, PromptMessageRole from app.services.conversation_service import ConversationService -from app.services.draft_run_service import create_knowledge_retrieval_tool, create_long_term_memory_tool, \ - AgentRunService -from app.services.draft_run_service import create_web_search_tool +from app.services.draft_run_service import AgentRunService from app.services.model_service import ModelApiKeyService from app.services.multi_agent_orchestrator import MultiAgentOrchestrator from app.services.multimodal_service import MultimodalService -from app.services.tool_service import ToolService from app.services.workflow_service import WorkflowService +from app.schemas import FileType logger = get_business_logger() @@ -53,12 +50,23 @@ class AppChatService: storage_type: Optional[str] = None, user_rag_memory_id: Optional[str] = None, workspace_id: Optional[str] = None, - files: Optional[List[FileInput]] = None # 新增:多模态文件 + files: Optional[List[FileInput]] = None ) -> Dict[str, Any]: """聊天(非流式)""" start_time = time.time() config_id = None + # 应用 features 配置 + features_config: dict = config.features or {} + if hasattr(features_config, 'model_dump'): + features_config = features_config.model_dump() + web_search_feature = features_config.get("web_search", {}) + if not (isinstance(web_search_feature, dict) and web_search_feature.get("enabled")): + web_search = False + + # 校验文件上传 + self.agent_service._validate_file_upload(features_config, files) + variables = self.agent_service.prepare_variables(variables, config.variables) # 获取模型配置ID @@ -111,23 +119,29 @@ class AppChatService: ) # 加载历史消息 - history = [] - memory_config = {"enabled": True, 'max_history': 10} - if memory_config.get("enabled"): - messages = self.conversation_service.get_messages( - conversation_id=conversation_id, - limit=memory_config.get("max_history", 10) - ) - history = [ - {"role": msg.role, "content": msg.content} - for msg in messages - ] + messages = self.conversation_service.get_messages( + conversation_id=conversation_id, + limit=10 + ) + history = [ + {"role": msg.role, "content": msg.content} + for msg in messages + ] # 处理多模态文件 processed_files = None if files: - multimodal_service = MultimodalService(self.db, api_key_obj.provider, is_omni=api_key_obj.is_omni) - processed_files = await multimodal_service.process_files(files) + model_info = ModelInfo( + model_name=api_key_obj.model_name, + provider=api_key_obj.provider, + api_key=api_key_obj.api_key, + api_base=api_key_obj.api_base, + capability=api_key_obj.capability, + is_omni=api_key_obj.is_omni, + model_type=ModelType.LLM + ) + multimodal_service = MultimodalService(self.db, model_info) + processed_files = await multimodal_service.process_files(user_id, files) logger.info(f"处理了 {len(processed_files)} 个文件") # 调用 Agent(支持多模态) @@ -143,24 +157,61 @@ class AppChatService: files=processed_files # 传递处理后的文件 ) - # 保存消息 - message_id = self.conversation_service.save_conversation_messages( - conversation_id=conversation_id, - user_message=message, - assistant_message=result["content"], - meta_data={ - "usage": result.get("usage", { - "prompt_tokens": 0, - "completion_tokens": 0, - "total_tokens": 0 - }) - } - ) - ModelApiKeyService.record_api_key_usage(self.db, api_key_obj.id) elapsed_time = time.time() - start_time + # suggested_questions + suggested_questions = [] + sq_config = features_config.get("suggested_questions_after_answer", {}) + if isinstance(sq_config, dict) and sq_config.get("enabled"): + suggested_questions = await self.agent_service._generate_suggested_questions( + features_config, result["content"], + {"model_name": api_key_obj.model_name, "api_key": api_key_obj.api_key, + "api_base": api_key_obj.api_base}, {} + ) + + audio_url = await self.agent_service._generate_tts( + features_config, result["content"], + {"model_name": api_key_obj.model_name, "api_key": api_key_obj.api_key, + "api_base": api_key_obj.api_base, "provider": api_key_obj.provider}, + tenant_id=tenant_id, workspace_id=workspace_id + ) + + # 构建用户消息内容(含多模态文件) + human_meta = { + "files": [] + } + assistant_meta = { + "model": api_key_obj.model_name, + "usage": result.get("usage", {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}), + "audio_url": None + } + if files: + for f in files: + # url = await MultimodalService(self.db).get_file_url(f) + human_meta["files"].append({ + "type": f.type, + "url": f.url + }) + + # 保存消息 + if audio_url: + assistant_meta["audio_url"] = audio_url + self.conversation_service.add_message( + conversation_id=conversation_id, + role="user", + content=message, + meta_data=human_meta + ) + ai_message = self.conversation_service.add_message( + conversation_id=conversation_id, + role="assistant", + content=result["content"], + meta_data=assistant_meta + ) + message_id = ai_message.id + return { "conversation_id": conversation_id, "message_id": str(message_id), @@ -170,7 +221,10 @@ class AppChatService: "completion_tokens": 0, "total_tokens": 0 }), - "elapsed_time": elapsed_time + "elapsed_time": elapsed_time, + "suggested_questions": suggested_questions, + "citations": self.agent_service._filter_citations(features_config, result.get("citations", [])), + "audio_url": audio_url, } async def agnet_chat_stream( @@ -185,7 +239,7 @@ class AppChatService: storage_type: Optional[str] = None, user_rag_memory_id: Optional[str] = None, workspace_id: Optional[str] = None, - files: Optional[List[FileInput]] = None # 新增:多模态文件 + files: Optional[List[FileInput]] = None ) -> AsyncGenerator[str, None]: """聊天(流式)""" @@ -193,10 +247,19 @@ class AppChatService: start_time = time.time() config_id = None message_id = uuid.uuid4() - yield f"event: start\ndata: {json.dumps({ - 'conversation_id': str(conversation_id), - "message_id": str(message_id) - }, ensure_ascii=False)}\n\n" + + # 应用 features 配置 + features_config: dict = config.features or {} + if hasattr(features_config, 'model_dump'): + features_config = features_config.model_dump() + web_search_feature = features_config.get("web_search", {}) + if not (isinstance(web_search_feature, dict) and web_search_feature.get("enabled")): + web_search = False + + # 校验文件上传 + self.agent_service._validate_file_upload(features_config, files) + + yield f"event: start\ndata: {json.dumps({'conversation_id': str(conversation_id), 'message_id': str(message_id)}, ensure_ascii=False)}\n\n" variables = self.agent_service.prepare_variables(variables, config.variables) # 获取模型配置ID @@ -266,13 +329,30 @@ class AppChatService: # 处理多模态文件 processed_files = None if files: - multimodal_service = MultimodalService(self.db, api_key_obj.provider, is_omni=api_key_obj.is_omni) - processed_files = await multimodal_service.process_files(files) + model_info = ModelInfo( + model_name=api_key_obj.model_name, + provider=api_key_obj.provider, + api_key=api_key_obj.api_key, + api_base=api_key_obj.api_base, + capability=api_key_obj.capability, + is_omni=api_key_obj.is_omni, + model_type=ModelType.LLM + ) + multimodal_service = MultimodalService(self.db, model_info) + processed_files = await multimodal_service.process_files(user_id, files) logger.info(f"处理了 {len(processed_files)} 个文件") - # 流式调用 Agent(支持多模态) + # 流式调用 Agent(支持多模态),同时并行启动 TTS full_content = "" total_tokens = 0 + + text_queue: asyncio.Queue = asyncio.Queue() + stream_audio_url, tts_task = await self.agent_service._generate_tts_streaming( + features_config, api_key_obj, + text_queue=text_queue, + tenant_id=tenant_id, workspace_id=workspace_id + ) + async for chunk in agent.chat_stream( message=message, history=history, @@ -282,39 +362,67 @@ class AppChatService: user_rag_memory_id=user_rag_memory_id, config_id=config_id, memory_flag=memory_flag, - files=processed_files # 传递处理后的文件 + files=processed_files ): if isinstance(chunk, int): total_tokens = chunk else: full_content += chunk - # 发送消息块事件 yield f"event: message\ndata: {json.dumps({'content': chunk}, ensure_ascii=False)}\n\n" + if tts_task is not None: + await text_queue.put(chunk) + + if tts_task is not None: + await text_queue.put(None) elapsed_time = time.time() - start_time + ModelApiKeyService.record_api_key_usage(self.db, api_key_obj.id) + + # 发送结束事件(包含 suggested_questions、tts、citations) + end_data: dict = {"elapsed_time": elapsed_time, "message_length": len(full_content), "error": None} + sq_config = features_config.get("suggested_questions_after_answer", {}) + if isinstance(sq_config, dict) and sq_config.get("enabled"): + end_data["suggested_questions"] = await self.agent_service._generate_suggested_questions( + features_config, full_content, + {"model_name": api_key_obj.model_name, "api_key": api_key_obj.api_key, + "api_base": api_key_obj.api_base}, {} + ) + end_data["audio_url"] = stream_audio_url + end_data["citations"] = self.agent_service._filter_citations(features_config, []) # 保存消息 + human_meta = { + "files":[] + } + assistant_meta = { + "model": api_key_obj.model_name, + "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens}, + "audio_url": None + } + + if files: + for f in files: + # url = await MultimodalService(self.db).get_file_url(f) + human_meta["files"].append({ + "type": f.type, + "url": f.url + }) + + if stream_audio_url: + assistant_meta["audio_url"] = stream_audio_url self.conversation_service.add_message( conversation_id=conversation_id, role="user", - content=message + content=message, + meta_data=human_meta ) - self.conversation_service.add_message( message_id=message_id, conversation_id=conversation_id, role="assistant", content=full_content, - meta_data={ - "model": api_key_obj.model_name, - "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens} - } + meta_data=assistant_meta ) - - ModelApiKeyService.record_api_key_usage(self.db, api_key_obj.id) - - # 发送结束事件 - end_data = {"elapsed_time": elapsed_time, "message_length": len(full_content), "error": None} yield f"event: end\ndata: {json.dumps(end_data, ensure_ascii=False)}\n\n" logger.info( @@ -428,7 +536,7 @@ class AppChatService: try: message_id = uuid.uuid4() # 发送开始事件 - yield f"event: start\ndata: {json.dumps({'conversation_id': str(conversation_id), "message_id": str(message_id)}, ensure_ascii=False)}\n\n" + yield f"event: start\ndata: {json.dumps({'conversation_id': str(conversation_id), 'message_id': str(message_id)}, ensure_ascii=False)}\n\n" full_content = "" total_tokens = 0 @@ -520,6 +628,7 @@ class AppChatService: app_id: uuid.UUID, release_id: uuid.UUID, workspace_id: uuid.UUID, + files: Optional[List[FileInput]] = None, user_id: Optional[str] = None, variables: Optional[Dict[str, Any]] = None, web_search: bool = False, @@ -533,7 +642,8 @@ class AppChatService: variables=variables, conversation_id=str(conversation_id), stream=True, - user_id=user_id + user_id=user_id, + files=files ) return await self.workflow_service.run( app_id=app_id, diff --git a/api/app/services/app_dsl_service.py b/api/app/services/app_dsl_service.py new file mode 100644 index 00000000..a10aa70a --- /dev/null +++ b/api/app/services/app_dsl_service.py @@ -0,0 +1,457 @@ +"""应用 DSL 导入导出服务""" +import uuid +import datetime +from typing import Optional + +import yaml +from sqlalchemy.orm import Session + +from app.core.config import settings +from app.core.error_codes import BizCode +from app.core.exceptions import BusinessException, ResourceNotFoundException +from app.models import AgentConfig, MultiAgentConfig +from app.models.app_model import App, AppType +from app.models.appshare_model import AppShare +from app.models.app_release_model import AppRelease +from app.models.knowledge_model import Knowledge +from app.models.models_model import ModelConfig +from app.models.tool_model import ToolConfig as ToolConfigModel +from app.models.workflow_model import WorkflowConfig +from app.services.workflow_service import WorkflowService +from app.core.workflow.adapters.memory_bear.memory_bear_adapter import MemoryBearAdapter +from app.models.memory_config_model import MemoryConfig as MemoryConfigModel + + +class AppDslService: + + def __init__(self, db: Session): + self.db = db + + # ==================== 导出 ==================== + + def export_dsl(self, app_id: uuid.UUID, release_id: Optional[uuid.UUID] = None) -> tuple[str, str]: + """构建应用 DSL yaml 字符串,返回 (yaml_str, filename)""" + app = self.db.query(App).filter(App.id == app_id, App.is_active.is_(True)).first() + if not app: + raise ResourceNotFoundException("应用", str(app_id)) + + meta = { + "version": settings.SYSTEM_VERSION, + "platform": "MemoryBear", + "exported_at": datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S"), + } + app_meta = { + "name": app.name, + "description": app.description, + "icon": app.icon, + "icon_type": app.icon_type, + "type": app.type, + "tags": app.tags or [], + } + + if release_id is not None: + return self._export_release(app, release_id, meta, app_meta) + + return self._export_draft(app, meta, app_meta) + + def _export_release(self, app: App, release_id: uuid.UUID, meta: dict, app_meta: dict) -> tuple[str, str]: + release = self.db.query(AppRelease).filter( + AppRelease.app_id == app.id, + AppRelease.id == release_id, + AppRelease.is_active.is_(True) + ).first() + if not release: + raise ResourceNotFoundException("版本", str(release_id)) + + meta["release_version"] = release.version + meta["release_name"] = release.version_name + app_meta["name"] = release.name + app_meta["description"] = release.description + config_key = { + AppType.AGENT: "agent_config", + AppType.MULTI_AGENT: "multi_agent_config", + AppType.WORKFLOW: "workflow" + }.get(app.type, "config") + config_data = self._enrich_release_config(app.type, release.config or {}) + dsl = {**meta, "app": app_meta, config_key: config_data} + return yaml.dump(dsl, default_flow_style=False, allow_unicode=True), f"{release.name}_v{release.version_name}.yaml" + + def _enrich_release_config(self, app_type: str, cfg: dict) -> dict: + if app_type == AppType.AGENT: + enriched = {**cfg} + if "default_model_config_id" in cfg: + enriched["default_model_config_ref"] = self._model_ref(cfg["default_model_config_id"]) + if "knowledge_retrieval" in cfg: + enriched["knowledge_retrieval"] = self._enrich_knowledge_retrieval(cfg["knowledge_retrieval"]) + if "tools" in cfg: + enriched["tools"] = self._enrich_tools(cfg["tools"]) + return enriched + if app_type == AppType.MULTI_AGENT: + enriched = {**cfg} + if "default_model_config_id" in cfg: + enriched["default_model_config_ref"] = self._model_ref(cfg["default_model_config_id"]) + if "master_agent_id" in cfg: + enriched["master_agent_ref"] = self._release_ref(cfg["master_agent_id"]) + if "sub_agents" in cfg: + enriched["sub_agents"] = self._enrich_sub_agents(cfg["sub_agents"]) + if "routing_rules" in cfg: + enriched["routing_rules"] = [ + {**r, "_ref": self._agent_ref(r.get("target_agent_id"))} for r in (cfg["routing_rules"] or []) + ] + return enriched + return cfg + + def _export_draft(self, app: App, meta: dict, app_meta: dict) -> tuple[str, str]: + if app.type == AppType.WORKFLOW: + config = self.db.query(WorkflowConfig).filter(WorkflowConfig.app_id == app.id).first() + config_data = { + "variables": config.variables if config else [], + "edges": config.edges if config else [], + "nodes": config.nodes if config else [], + "execution_config": config.execution_config if config else {}, + "triggers": config.triggers if config else [], + } if config else {} + dsl = {**meta, "app": app_meta, "workflow": config_data} + + elif app.type == AppType.AGENT: + config = self.db.query(AgentConfig).filter(AgentConfig.app_id == app.id).first() + config_data = { + "system_prompt": config.system_prompt if config else None, + "model_parameters": self._to_dict(config.model_parameters) if config else None, + "default_model_config_ref": self._model_ref(config.default_model_config_id) if config else None, + "knowledge_retrieval": self._enrich_knowledge_retrieval(config.knowledge_retrieval) if config else None, + "memory": config.memory if config else None, + "variables": config.variables if config else [], + "tools": self._enrich_tools(config.tools) if config else [], + "skills": config.skills if config else {}, + } if config else {} + dsl = {**meta, "app": app_meta, "agent_config": config_data} + + elif app.type == AppType.MULTI_AGENT: + config = self.db.query(MultiAgentConfig).filter(MultiAgentConfig.app_id == app.id).first() + config_data = { + "orchestration_mode": config.orchestration_mode if config else None, + "master_agent_name": config.master_agent_name if config else None, + "model_parameters": self._to_dict(config.model_parameters) if config else None, + "default_model_config_ref": self._model_ref(config.default_model_config_id) if config else None, + "master_agent_ref": self._release_ref(config.master_agent_id) if config else None, + "sub_agents": self._enrich_sub_agents(config.sub_agents) if config else [], + "routing_rules": [ + {**r, "_ref": self._agent_ref(r.get("target_agent_id"))} for r in (config.routing_rules or []) + ] if config else [], + + "execution_config": config.execution_config if config else {}, + "aggregation_strategy": config.aggregation_strategy if config else "merge", + } if config else {} + dsl = {**meta, "app": app_meta, "multi_agent_config": config_data} + + else: + raise BusinessException(f"不支持的应用类型: {app.type}", BizCode.BAD_REQUEST) + + return yaml.dump(dsl, default_flow_style=False, allow_unicode=True), f"{app.name}.yaml" + + def _to_dict(self, value): + """将 Pydantic 对象转为普通 dict,供 yaml.dump 安全序列化""" + if value is None: + return None + if hasattr(value, "model_dump"): + return value.model_dump() + return value + + def _model_ref(self, model_config_id) -> Optional[dict]: + if not model_config_id: + return None + m = self.db.query(ModelConfig).filter(ModelConfig.id == model_config_id).first() + return {"id": str(model_config_id), "name": m.name, "provider": m.provider, "type": m.type} if m else {"id": str(model_config_id)} + + def _kb_ref(self, kb_id) -> Optional[dict]: + if not kb_id: + return None + kb = self.db.query(Knowledge).filter(Knowledge.id == kb_id).first() + return {"id": str(kb_id), "name": kb.name} if kb else {"id": str(kb_id)} + + def _tool_ref(self, tool_id) -> Optional[dict]: + if not tool_id: + return None + t = self.db.query(ToolConfigModel).filter(ToolConfigModel.id == tool_id).first() + return {"id": str(tool_id), "name": t.name, "tool_type": t.tool_type} if t else {"id": str(tool_id)} + + def _enrich_knowledge_retrieval(self, kr: Optional[dict]) -> Optional[dict]: + if not kr: + return kr + kbs = [{**kb, "_ref": self._kb_ref(kb.get("kb_id"))} for kb in kr.get("knowledge_bases", [])] + return {**kr, "knowledge_bases": kbs} + + def _enrich_tools(self, tools: list) -> list: + return [{**t, "_ref": self._tool_ref(t.get("tool_id"))} for t in (tools or [])] + + def _agent_ref(self, agent_id) -> Optional[dict]: + if not agent_id: + return None + a = self.db.query(App).filter(App.id == agent_id).first() + return {"id": str(agent_id), "name": a.name} if a else {"id": str(agent_id)} + + def _release_ref(self, release_id) -> Optional[dict]: + if not release_id: + return None + r = self.db.query(AppRelease).filter(AppRelease.id == release_id).first() + return {"id": str(release_id), "name": r.name, "version": r.version, "app_id": str(r.app_id)} if r else {"id": str(release_id)} + + def _enrich_sub_agents(self, sub_agents: list) -> list: + return [{**s, "_ref": self._agent_ref(s.get("agent_id"))} for s in (sub_agents or [])] + + # ==================== 导入 ==================== + + def import_dsl( + self, + dsl: dict, + workspace_id: uuid.UUID, + tenant_id: uuid.UUID, + user_id: uuid.UUID, + ) -> tuple[App, list[str]]: + """解析 DSL,创建应用及配置,返回 (new_app, warnings)""" + app_meta = dsl.get("app", {}) + app_type = app_meta.get("type") + if app_type not in (AppType.AGENT, AppType.MULTI_AGENT, AppType.WORKFLOW): + raise BusinessException(f"不支持的应用类型: {app_type}", BizCode.BAD_REQUEST) + + warnings: list[str] = [] + now = datetime.datetime.now() + + new_app = App( + id=uuid.uuid4(), + workspace_id=workspace_id, + created_by=user_id, + name=self._unique_app_name(app_meta.get("name", "导入应用"), workspace_id, app_type), + description=app_meta.get("description"), + icon=app_meta.get("icon"), + icon_type=app_meta.get("icon_type"), + type=app_type, + visibility="private", + status="draft", + tags=app_meta.get("tags", []), + is_active=True, + created_at=now, + updated_at=now, + ) + self.db.add(new_app) + self.db.flush() + + if app_type == AppType.AGENT: + cfg = dsl.get("agent_config") or {} + self.db.add(AgentConfig( + id=uuid.uuid4(), + app_id=new_app.id, + system_prompt=cfg.get("system_prompt"), + model_parameters=cfg.get("model_parameters"), + default_model_config_id=self._resolve_model(cfg.get("default_model_config_ref"), tenant_id, warnings), + knowledge_retrieval=self._resolve_knowledge_retrieval(cfg.get("knowledge_retrieval"), workspace_id, warnings), + memory=self._resolve_memory(cfg.get("memory"), workspace_id, warnings), + variables=cfg.get("variables", []), + tools=self._resolve_tools(cfg.get("tools", []), tenant_id, warnings), + skills=cfg.get("skills", {}), + is_active=True, + created_at=now, + updated_at=now, + )) + + elif app_type == AppType.MULTI_AGENT: + cfg = dsl.get("multi_agent_config") or {} + self.db.add(MultiAgentConfig( + id=uuid.uuid4(), + app_id=new_app.id, + orchestration_mode=cfg.get("orchestration_mode", "collaboration"), + master_agent_name=cfg.get("master_agent_name"), + model_parameters=cfg.get("model_parameters"), + default_model_config_id=self._resolve_model(cfg.get("default_model_config_ref"), tenant_id, warnings), + master_agent_id=self._resolve_release(cfg.get("master_agent_ref"), warnings), + sub_agents=self._resolve_sub_agents(cfg.get("sub_agents", []), warnings), + routing_rules=self._resolve_routing_rules(cfg.get("routing_rules"), warnings), + execution_config=cfg.get("execution_config", {}), + aggregation_strategy=cfg.get("aggregation_strategy", "merge"), + is_active=True, + created_at=now, + updated_at=now, + )) + + elif app_type == AppType.WORKFLOW: + adapter = MemoryBearAdapter(dsl) + if not adapter.validate_config(): + raise BusinessException("工作流配置格式无效", BizCode.BAD_REQUEST) + result = adapter.parse_workflow() + for e in result.errors: + warnings.append(f"[节点错误] {e.node_name or e.node_id}: {e.detail}") + for w in result.warnings: + warnings.append(f"[节点警告] {w.node_name or w.node_id}: {w.detail}") + wf = dsl.get("workflow") or {} + WorkflowService(self.db).create_workflow_config( + app_id=new_app.id, + nodes=[n.model_dump() for n in result.nodes], + edges=[e.model_dump() for e in result.edges], + variables=[v.model_dump() for v in result.variables], + execution_config=wf.get("execution_config", {}), + triggers=wf.get("triggers", []), + validate=False, + ) + + self.db.commit() + self.db.refresh(new_app) + return new_app, warnings + + def _unique_app_name(self, name: str, workspace_id: uuid.UUID, app_type: AppType) -> str: + """生成唯一应用名称,同时检查本空间自有应用和共享到本空间的应用""" + # 本空间自有应用名 + existing = {r[0] for r in self.db.query(App.name).filter( + App.workspace_id == workspace_id, + App.type == app_type, + App.is_active.is_(True) + ).all()} + # 共享到本空间的应用名 + shared_names = {r[0] for r in self.db.query(App.name).join( + AppShare, AppShare.source_app_id == App.id + ).filter( + AppShare.target_workspace_id == workspace_id, + App.type == app_type, + App.is_active.is_(True) + ).all()} + existing |= shared_names + if name not in existing: + return name + counter = 1 + while f"{name}({counter})" in existing: + counter += 1 + return f"{name}({counter})" + + def _resolve_model(self, ref: Optional[dict], tenant_id: uuid.UUID, warnings: list) -> Optional[uuid.UUID]: + if not ref: + return None + q = self.db.query(ModelConfig).filter( + ModelConfig.tenant_id == tenant_id, + ModelConfig.name == ref.get("name"), + ModelConfig.is_active.is_(True) + ) + if ref.get("provider"): + q = q.filter(ModelConfig.provider == ref["provider"]) + if ref.get("type"): + q = q.filter(ModelConfig.type == ref["type"]) + m = q.first() + if not m: + warnings.append(f"模型 '{ref.get('name')}' 未匹配,已置空,请导入后手动配置") + return m.id if m else None + + def _resolve_kb(self, ref: Optional[dict], workspace_id: uuid.UUID, warnings: list) -> Optional[str]: + if not ref: + return None + kb = self.db.query(Knowledge).filter( + Knowledge.workspace_id == workspace_id, + Knowledge.name == ref.get("name") + ).first() + if not kb: + warnings.append(f"知识库 '{ref.get('name')}' 未匹配,已置空,请导入后手动配置") + return str(kb.id) if kb else None + + def _resolve_tool(self, ref: Optional[dict], tenant_id: uuid.UUID, warnings: list) -> Optional[str]: + if not ref: + return None + q = self.db.query(ToolConfigModel).filter( + ToolConfigModel.tenant_id == tenant_id, + ToolConfigModel.name == ref.get("name") + ) + if ref.get("tool_type"): + q = q.filter(ToolConfigModel.tool_type == ref["tool_type"]) + t = q.first() + if not t: + warnings.append(f"工具 '{ref.get('name')}' 未匹配,已置空,请导入后手动配置") + return str(t.id) if t else None + + def _resolve_release(self, ref: Optional[dict], warnings: list) -> Optional[uuid.UUID]: + if not ref: + return None + r = self.db.query(AppRelease).filter( + AppRelease.app_id == ref.get("app_id"), + AppRelease.version == ref.get("version"), + AppRelease.is_active.is_(True) + ).first() + if not r: + warnings.append(f"主 Agent 发布版本 '{ref.get('name')}' 未匹配,已置空,请导入后手动配置") + return r.id if r else None + + def _resolve_sub_agents(self, sub_agents: list, warnings: list) -> list: + result = [] + for s in (sub_agents or []): + ref = s.get("_ref") + entry = {k: v for k, v in s.items() if k != "_ref"} + if ref: + a = self.db.query(App).filter(App.name == ref.get("name"), App.is_active.is_(True)).first() + if not a: + warnings.append(f"子 Agent '{ref.get('name')}' 未匹配,已置空,请导入后手动配置") + entry["agent_id"] = str(a.id) if a else None + result.append(entry) + return result + + def _resolve_routing_rules(self, rules: Optional[list], warnings: list) -> Optional[list]: + if rules is None: + return None + result = [] + for r in rules: + ref = r.get("_ref") + entry = {k: v for k, v in r.items() if k != "_ref"} + if ref: + a = self.db.query(App).filter(App.name == ref.get("name"), App.is_active.is_(True)).first() + if not a: + warnings.append(f"路由目标 Agent '{ref.get('name')}' 未匹配,已置空,请导入后手动配置") + entry["target_agent_id"] = str(a.id) if a else None + result.append(entry) + return result + + def _resolve_knowledge_retrieval(self, kr: Optional[dict], workspace_id: uuid.UUID, warnings: list) -> Optional[dict]: + if not kr: + return kr + resolved_kbs = [] + for kb in kr.get("knowledge_bases", []): + ref = kb.get("_ref") or ({"name": kb.get("kb_id")} if kb.get("kb_id") else None) + entry = {k: v for k, v in kb.items() if k != "_ref"} + resolved_id = self._resolve_kb(ref, workspace_id, warnings) + if resolved_id is None: + continue + entry["kb_id"] = resolved_id + resolved_kbs.append(entry) + return {k: v for k, v in kr.items() if k != "knowledge_bases"} | {"knowledge_bases": resolved_kbs} + + def _resolve_memory(self, memory: Optional[dict], workspace_id: uuid.UUID, warnings: list) -> Optional[dict]: + if not memory: + return memory + config_id = memory.get("memory_config_id") or memory.get("memory_content") + if not config_id: + return memory + try: + config_uuid = uuid.UUID(str(config_id)) + except (ValueError, AttributeError): + exists = self.db.query(MemoryConfigModel).filter( + MemoryConfigModel.config_id_old == int(config_id), + MemoryConfigModel.workspace_id == workspace_id + ).first() + if not exists: + warnings.append(f"记忆配置 '{config_id}' 未匹配,已置空,请导入后手动配置") + return {**memory, "memory_config_id": None, "enabled": False} + return memory + exists = self.db.query(MemoryConfigModel).filter( + MemoryConfigModel.config_id == config_uuid, + MemoryConfigModel.workspace_id == workspace_id + ).first() + if not exists: + warnings.append(f"记忆配置 '{config_id}' 未匹配,已置空,请导入后手动配置") + return {**memory, "memory_config_id": None, "enabled": False} + return memory + + def _resolve_tools(self, tools: list, tenant_id: uuid.UUID, warnings: list) -> list: + result = [] + for t in (tools or []): + ref = t.get("_ref") or ({"name": t.get("tool_id")} if t.get("tool_id") else None) + entry = {k: v for k, v in t.items() if k != "_ref"} + resolved_id = self._resolve_tool(ref, tenant_id, warnings) + if resolved_id is None: + continue + entry["tool_id"] = resolved_id + result.append(entry) + return result diff --git a/api/app/services/app_service.py b/api/app/services/app_service.py index 5a799937..68d255f8 100644 --- a/api/app/services/app_service.py +++ b/api/app/services/app_service.py @@ -7,12 +7,13 @@ - 应用发布和版本管理 - 应用回滚 """ +import copy import datetime import uuid from typing import Annotated, Any, Dict, List, Optional, Tuple from fastapi import Depends -from sqlalchemy import and_, func, or_, select +from sqlalchemy import and_, delete, func, or_, select from sqlalchemy.orm import Session from app.core.error_codes import BizCode @@ -33,7 +34,7 @@ from app.models import ( Workspace, ) from app.models.app_model import AppStatus, AppType -from app.repositories.app_repository import get_apps_by_id +from app.repositories.app_repository import get_apps_by_id, AppRepository from app.repositories.workflow_repository import WorkflowConfigRepository from app.schemas import app_schema from app.schemas.workflow_schema import WorkflowConfigUpdate @@ -59,6 +60,7 @@ class AppService: db: 数据库会话 """ self.db = db + self.app_repo = AppRepository(self.db) # ==================== 私有辅助方法 ==================== @@ -79,6 +81,8 @@ class AppService: ) raise BusinessException("应用不在指定工作空间中", BizCode.WORKSPACE_NO_ACCESS) + + def _check_app_accessible(self, app: App, workspace_id: Optional[uuid.UUID]) -> bool: """检查应用是否可访问(包括共享应用) @@ -101,13 +105,14 @@ class AppService: # 2. 检查是否是共享给本工作空间的应用 stmt = select(AppShare).where( AppShare.source_app_id == app.id, - AppShare.target_workspace_id == workspace_id + AppShare.target_workspace_id == workspace_id, + AppShare.is_active.is_(True) ) share = self.db.scalars(stmt).first() return share is not None - def _validate_app_accessible(self, app: App, workspace_id: Optional[uuid.UUID]) -> None: + def _validate_app_accessible(self, app: App, workspace_id: Optional[uuid.UUID]) -> None: """验证应用是否可访问(包括共享应用,用于只读操作) Args: @@ -124,6 +129,77 @@ class AppService: ) raise BusinessException("应用不可访问", BizCode.WORKSPACE_NO_ACCESS) + def _unique_app_name(self, name: str, workspace_id: uuid.UUID, app_type: AppType) -> str: + """生成唯一应用名称,同时检查本空间自有应用和共享到本空间的应用""" + existing = {r[0] for r in self.db.query(App.name).filter( + App.workspace_id == workspace_id, + App.type == app_type, + App.is_active.is_(True) + ).all()} + shared_names = {r[0] for r in self.db.query(App.name).join( + AppShare, AppShare.source_app_id == App.id + ).filter( + AppShare.target_workspace_id == workspace_id, + App.type == app_type, + App.is_active.is_(True) + ).all()} + existing |= shared_names + if name not in existing: + return name + counter = 1 + while f"{name}({counter})" in existing: + counter += 1 + return f"{name}({counter})" + + def _get_share_permission(self, app: App, workspace_id: Optional[uuid.UUID]) -> Optional[str]: + """获取共享应用的权限 + + Returns: + None: 不是共享应用(是本工作空间的应用) + 'readonly': 只读共享 + 'editable': 可编辑共享 + """ + from app.models import AppShare + + if workspace_id is None or app.workspace_id == workspace_id: + return None # 本工作空间的应用,不是共享的 + + stmt = select(AppShare).where( + AppShare.source_app_id == app.id, + AppShare.target_workspace_id == workspace_id, + AppShare.is_active.is_(True) + ) + share = self.db.scalars(stmt).first() + return share.permission if share else None + + def _validate_app_writable(self, app: App, workspace_id: Optional[uuid.UUID]) -> None: + """Validate that the app config is writable. + + - Own workspace app: allowed + - Shared app with editable permission: allowed + - Shared app with readonly permission: denied + + Raises: + BusinessException: when app is not writable + """ + if workspace_id is None: + return + + # Own workspace app, allow + if app.workspace_id == workspace_id: + return + + # Check share permission + permission = self._get_share_permission(app, workspace_id) + if permission == "editable": + return + + logger.warning( + "应用写操作被拒", + extra={"app_id": str(app.id), "workspace_id": str(workspace_id)} + ) + raise BusinessException("共享应用不可修改配置", BizCode.WORKSPACE_NO_ACCESS) + def _get_app_or_404(self, app_id: uuid.UUID) -> App: """获取应用或抛出404异常 @@ -314,6 +390,7 @@ class AppService: variables=storage_data.get("variables", []), tools=storage_data.get("tools", []), skills=storage_data.get("skills", {}), + features=storage_data.get("features", {}), is_active=True, created_at=now, updated_at=now, @@ -453,6 +530,42 @@ class AppService: Returns: app_schema.App: 应用 Schema """ + is_shared = app.workspace_id != current_workspace_id + share_permission = None + source_workspace_name = None + source_workspace_icon = None + source_app_version = None + source_app_is_active = None + share_id = None + shared_by = None + shared_by_name = None + shared_at = None + + if is_shared: + # 查询共享权限和来源工作空间名称 + from app.models import AppShare + stmt = select(AppShare).where( + AppShare.source_app_id == app.id, + AppShare.target_workspace_id == current_workspace_id, + AppShare.is_active.is_(True) + ) + share = self.db.scalars(stmt).first() + if share: + share_id = share.id + share_permission = share.permission + shared_by = share.shared_by + shared_at = share.created_at + if share.shared_user: + shared_by_name = share.shared_user.username + if share.source_workspace: + source_workspace_name = share.source_workspace.name + source_workspace_icon = share.source_workspace.icon + + # 版本号和生效状态 + if app.current_release: + source_app_version = app.current_release.version_name + source_app_is_active = app.is_active + app_dict = { "id": app.id, "workspace_id": app.workspace_id, @@ -467,7 +580,16 @@ class AppService: "tags": app.tags or [], "current_release_id": app.current_release_id, "is_active": app.is_active, - "is_shared": app.workspace_id != current_workspace_id, # 判断是否是共享应用 + "is_shared": is_shared, + "share_permission": share_permission, + "source_workspace_name": source_workspace_name, + "source_workspace_icon": source_workspace_icon, + "source_app_version": source_app_version, + "source_app_is_active": source_app_is_active, + "share_id": share_id, + "shared_by": shared_by, + "shared_by_name": shared_by_name, + "shared_at": shared_at, "created_at": app.created_at, "updated_at": app.updated_at } @@ -521,6 +643,9 @@ class AppService: "创建应用", extra={"app_name": data.name, "type": data.type, "workspace_id": str(workspace_id)} ) + apps = self.app_repo.get_apps_by_name(data.name, data.type, workspace_id) + if apps: + raise BusinessException(message="已存在同名应用", code=BizCode.RESOURCE_ALREADY_EXISTS) try: now = datetime.datetime.now() @@ -590,7 +715,7 @@ class AppService: logger.info("更新应用", extra={"app_id": str(app_id)}) app = self._get_app_or_404(app_id) - self._validate_workspace_access(app, workspace_id) + self._validate_app_writable(app, workspace_id) changed = False for field in ["name", "description", "icon", "icon_type", "visibility", "status", "tags"]: @@ -679,6 +804,7 @@ class AppService: # 确定新应用名称 if not new_name: new_name = f"{source_app.name} - 副本" + new_name = self._unique_app_name(new_name, target_workspace_id, source_app.type) now = datetime.datetime.now() @@ -702,6 +828,19 @@ class AppService: self.db.add(new_app) self.db.flush() + # 判断是否跨工作空间复制(共享应用复制到自己的工作空间) + is_cross_workspace = target_workspace_id != source_app.workspace_id + + # 跨工作空间时,获取目标工作空间的 tenant_id 用于判断模型配置是否可用 + target_tenant_id = None + available_model_ids: set = set() + available_kb_ids: set = set() + if is_cross_workspace: + target_ws = self.db.get(Workspace, target_workspace_id) + if not target_ws: + raise ResourceNotFoundException("工作空间", str(target_workspace_id)) + target_tenant_id = target_ws.tenant_id + # 如果是 agent 类型,复制 AgentConfig if source_app.type == AppType.AGENT: source_config = self.db.query(AgentConfig).filter( @@ -709,16 +848,40 @@ class AppService: ).first() if source_config: + if is_cross_workspace: + # Batch-collect and preload all referenced resources + model_ids, kb_ids = self._collect_resource_ids_from_config( + source_config.default_model_config_id, + source_config.knowledge_retrieval, + source_config.tools + ) + available_model_ids, available_kb_ids = self._preload_cross_workspace_resources( + target_tenant_id, target_workspace_id, model_ids, kb_ids + ) + new_model_config_id = self._is_model_available( + source_config.default_model_config_id, available_model_ids + ) + new_knowledge_retrieval = self._clean_knowledge_retrieval( + source_config.knowledge_retrieval, available_kb_ids + ) + new_tools = self._clean_tools( + source_config.tools, available_kb_ids + ) + else: + new_model_config_id = source_config.default_model_config_id + new_knowledge_retrieval = copy.deepcopy(source_config.knowledge_retrieval) if source_config.knowledge_retrieval else None + new_tools = copy.deepcopy(source_config.tools) if source_config.tools else [] + new_config = AgentConfig( id=uuid.uuid4(), app_id=new_app.id, system_prompt=source_config.system_prompt, - default_model_config_id=source_config.default_model_config_id, - model_parameters=source_config.model_parameters.copy() if source_config.model_parameters else None, - knowledge_retrieval=source_config.knowledge_retrieval.copy() if source_config.knowledge_retrieval else None, - memory=source_config.memory.copy() if source_config.memory else None, - variables=source_config.variables.copy() if source_config.variables else [], - tools=source_config.tools.copy() if source_config.tools else [], + default_model_config_id=new_model_config_id, + model_parameters=copy.deepcopy(source_config.model_parameters) if source_config.model_parameters else None, + knowledge_retrieval=new_knowledge_retrieval, + memory=copy.deepcopy(source_config.memory) if source_config.memory else None, + variables=copy.deepcopy(source_config.variables) if source_config.variables else [], + tools=new_tools, is_active=True, created_at=now, updated_at=now, @@ -731,14 +894,29 @@ class AppService: ).first() if source_config: + if is_cross_workspace: + model_ids, kb_ids = self._collect_resource_ids_from_workflow_nodes( + source_config.nodes + ) + available_model_ids, available_kb_ids = self._preload_cross_workspace_resources( + target_tenant_id, target_workspace_id, model_ids, kb_ids + ) + new_nodes = self._clean_workflow_nodes_for_cross_workspace( + source_config.nodes or [], + available_model_ids, + available_kb_ids + ) + else: + new_nodes = copy.deepcopy(source_config.nodes) if source_config.nodes else [] + new_config = WorkflowConfig( id=uuid.uuid4(), app_id=new_app.id, - nodes=source_config.nodes.copy() if source_config.nodes else [], - edges=source_config.edges.copy() if source_config.edges else [], - variables=source_config.variables.copy() if source_config.variables else [], - execution_config=source_config.execution_config.copy() if source_config.execution_config else {}, - triggers=source_config.triggers.copy() if source_config.triggers else [], + nodes=new_nodes, + edges=copy.deepcopy(source_config.edges) if source_config.edges else [], + variables=copy.deepcopy(source_config.variables) if source_config.variables else [], + execution_config=copy.deepcopy(source_config.execution_config) if source_config.execution_config else {}, + triggers=copy.deepcopy(source_config.triggers) if source_config.triggers else [], is_active=True, created_at=now, updated_at=now, @@ -751,17 +929,28 @@ class AppService: ).first() if source_config: + if is_cross_workspace: + model_ids = {source_config.default_model_config_id} if source_config.default_model_config_id else set() + available_model_ids, _ = self._preload_cross_workspace_resources( + target_tenant_id, target_workspace_id, model_ids, set() + ) + new_model_config_id = self._is_model_available( + source_config.default_model_config_id, available_model_ids + ) + else: + new_model_config_id = source_config.default_model_config_id + new_config = MultiAgentConfig( id=uuid.uuid4(), app_id=new_app.id, - master_agent_id=source_config.master_agent_id, + master_agent_id=source_config.master_agent_id if not is_cross_workspace else None, master_agent_name=source_config.master_agent_name, - default_model_config_id=source_config.default_model_config_id, + default_model_config_id=new_model_config_id, model_parameters=source_config.model_parameters, orchestration_mode=source_config.orchestration_mode, - sub_agents=source_config.sub_agents.copy() if source_config.sub_agents else [], - routing_rules=source_config.routing_rules.copy() if source_config.routing_rules else None, - execution_config=source_config.execution_config.copy() if source_config.execution_config else {}, + sub_agents=copy.deepcopy(source_config.sub_agents) if source_config.sub_agents else [], + routing_rules=copy.deepcopy(source_config.routing_rules) if source_config.routing_rules else None, + execution_config=copy.deepcopy(source_config.execution_config) if source_config.execution_config else {}, aggregation_strategy=source_config.aggregation_strategy, is_active=True, created_at=now, @@ -791,6 +980,241 @@ class AppService: ) raise BusinessException(f"应用复制失败: {str(e)}", BizCode.INTERNAL_ERROR, cause=e) + def _preload_cross_workspace_resources( + self, + target_tenant_id: Optional[uuid.UUID], + target_workspace_id: uuid.UUID, + model_config_ids: set, + kb_ids: set + ) -> tuple: + """Batch-load model configs and knowledge bases to avoid N+1 queries. + + Returns: + (available_model_ids, available_kb_ids): sets of IDs available in target workspace + """ + from app.models.models_model import ModelConfig as MC + from app.models.knowledge_model import Knowledge + from app.models.knowledgeshare_model import KnowledgeShare + + # Batch check model configs by tenant + available_model_ids: set = set() + if model_config_ids and target_tenant_id: + stmt = select(MC.id).where( + MC.id.in_(model_config_ids), + MC.tenant_id == target_tenant_id + ) + available_model_ids = set(self.db.scalars(stmt).all()) + + # Batch check knowledge bases + available_kb_ids: set = set() + if kb_ids: + kb_uuids = set() + for kid in kb_ids: + try: + kb_uuids.add(uuid.UUID(str(kid))) + except (ValueError, AttributeError): + pass + + if kb_uuids: + # KBs in target workspace + stmt = select(Knowledge.id).where( + Knowledge.id.in_(kb_uuids), + Knowledge.workspace_id == target_workspace_id + ) + available_kb_ids.update(self.db.scalars(stmt).all()) + + # KBs shared to target workspace + remaining = kb_uuids - available_kb_ids + if remaining: + stmt = select(KnowledgeShare.source_kb_id).where( + KnowledgeShare.source_kb_id.in_(remaining), + KnowledgeShare.target_workspace_id == target_workspace_id + ) + available_kb_ids.update(self.db.scalars(stmt).all()) + + return available_model_ids, available_kb_ids + + @staticmethod + def _collect_resource_ids_from_config( + model_config_id: Optional[uuid.UUID], + knowledge_retrieval: Optional[dict], + tools: Optional[list] + ) -> tuple: + """Extract all model config IDs and knowledge base IDs from an app config.""" + model_ids: set = set() + kb_ids: set = set() + + if model_config_id: + model_ids.add(model_config_id) + + if knowledge_retrieval and isinstance(knowledge_retrieval, dict): + if "kb_ids" in knowledge_retrieval: + for kid in knowledge_retrieval.get("kb_ids", []): + if kid: + kb_ids.add(str(kid)) + if knowledge_retrieval.get("knowledge_id"): + kb_ids.add(str(knowledge_retrieval["knowledge_id"])) + + if tools: + for tool in tools: + if isinstance(tool, dict): + kid = tool.get("knowledge_id") or tool.get("kb_id") + if kid: + kb_ids.add(str(kid)) + + return model_ids, kb_ids + + @staticmethod + def _collect_resource_ids_from_workflow_nodes(nodes: list) -> tuple: + """Extract all model config IDs and knowledge base IDs from workflow nodes.""" + model_ids: set = set() + kb_ids: set = set() + + for node in (nodes or []): + if not isinstance(node, dict): + continue + data = node.get("data", {}) + if not isinstance(data, dict): + continue + for key in ("model_config_id", "default_model_config_id"): + val = data.get(key) + if val: + try: + model_ids.add(uuid.UUID(str(val))) + except (ValueError, AttributeError): + pass + kr = data.get("knowledge_retrieval") + if isinstance(kr, dict): + for kid in kr.get("kb_ids", []): + if kid: + kb_ids.add(str(kid)) + if kr.get("knowledge_id"): + kb_ids.add(str(kr["knowledge_id"])) + if data.get("knowledge_id"): + kb_ids.add(str(data["knowledge_id"])) + for kid in data.get("kb_ids", []): + if kid: + kb_ids.add(str(kid)) + + return model_ids, kb_ids + + @staticmethod + def _is_model_available(model_config_id: Optional[uuid.UUID], available_model_ids: set) -> Optional[uuid.UUID]: + if not model_config_id: + return None + return model_config_id if model_config_id in available_model_ids else None + + @staticmethod + def _is_kb_available(kb_id: Optional[str], available_kb_ids: set) -> Optional[str]: + if not kb_id: + return None + try: + return kb_id if uuid.UUID(str(kb_id)) in available_kb_ids else None + except (ValueError, AttributeError): + return None + + def _clean_knowledge_retrieval( + self, + knowledge_retrieval: Optional[dict], + available_kb_ids: set + ) -> Optional[dict]: + """Clean knowledge retrieval config, keeping only available KBs.""" + if not knowledge_retrieval: + return None + + cleaned = copy.deepcopy(knowledge_retrieval) + + if "kb_ids" in cleaned and isinstance(cleaned["kb_ids"], list): + cleaned["kb_ids"] = [ + kid for kid in cleaned["kb_ids"] + if self._is_kb_available(kid, available_kb_ids) + ] + + if "knowledge_id" in cleaned: + cleaned["knowledge_id"] = self._is_kb_available( + cleaned.get("knowledge_id"), available_kb_ids + ) + + return cleaned + + def _clean_tools( + self, + tools: Optional[list], + available_kb_ids: set + ) -> list: + """Clean tools config, keeping built-in tools and tools with available KBs.""" + if not tools: + return [] + + cleaned = [] + for tool in tools: + if not isinstance(tool, dict): + cleaned.append(tool) + continue + + tool_type = tool.get("type", "") + if tool_type in ("builtin", "built_in", "system"): + cleaned.append(copy.deepcopy(tool)) + continue + + kb_id = tool.get("knowledge_id") or tool.get("kb_id") + if kb_id: + if self._is_kb_available(kb_id, available_kb_ids): + cleaned.append(copy.deepcopy(tool)) + continue + + cleaned.append(copy.deepcopy(tool)) + + return cleaned + + def _clean_workflow_nodes_for_cross_workspace( + self, + nodes: list, + available_model_ids: set, + available_kb_ids: set + ) -> list: + """Clean workflow nodes, using pre-loaded resource sets. Uses deepcopy to avoid mutating source.""" + if not nodes: + return [] + + cleaned = [] + for node in nodes: + if not isinstance(node, dict): + cleaned.append(node) + continue + + node_copy = copy.deepcopy(node) + data = node_copy.get("data") + if not isinstance(data, dict): + cleaned.append(node_copy) + continue + + for key in ("model_config_id", "default_model_config_id"): + if key in data and data[key]: + try: + mid = uuid.UUID(str(data[key])) + except (ValueError, AttributeError): + data[key] = None + continue + data[key] = str(mid) if mid in available_model_ids else None + + if "knowledge_retrieval" in data and data["knowledge_retrieval"]: + data["knowledge_retrieval"] = self._clean_knowledge_retrieval( + data["knowledge_retrieval"], available_kb_ids + ) + if "knowledge_id" in data: + data["knowledge_id"] = self._is_kb_available( + data.get("knowledge_id"), available_kb_ids + ) + if "kb_ids" in data and isinstance(data["kb_ids"], list): + data["kb_ids"] = [ + kid for kid in data["kb_ids"] + if self._is_kb_available(kid, available_kb_ids) + ] + + cleaned.append(node_copy) + return cleaned + def list_apps( self, *, @@ -800,6 +1224,7 @@ class AppService: status: Optional[str] = None, search: Optional[str] = None, include_shared: bool = True, + shared_only: bool = False, page: int = 1, pagesize: int = 10, ) -> Tuple[List[App], int]: @@ -845,18 +1270,24 @@ class AppService: if search: filters.append(func.lower(App.name).like(f"%{search.lower()}%")) - # 基础查询:本工作空间的应用 - if include_shared: - # 查询本工作空间的应用 + 分享给本工作空间的应用 - # 使用 OR 条件:workspace_id = current OR app_id IN (shared apps) + # shared_only implies include_shared; enforce to avoid confusing API usage + if shared_only: + include_shared = True - # 获取分享给本工作空间的应用ID列表 + # 基础查询:本工作空间的应用 + if shared_only: + # 只返回共享给本工作空间的应用,不含自有应用 shared_app_ids_stmt = ( select(AppShare.source_app_id) - .where(AppShare.target_workspace_id == workspace_id) + .where(AppShare.target_workspace_id == workspace_id, AppShare.is_active.is_(True)) + ) + stmt = select(App).where(App.id.in_(shared_app_ids_stmt)) + elif include_shared: + # 查询本工作空间的应用 + 分享给本工作空间的应用 + shared_app_ids_stmt = ( + select(AppShare.source_app_id) + .where(AppShare.target_workspace_id == workspace_id, AppShare.is_active.is_(True)) ) - - # 构建主查询:本工作空间的应用 OR 分享的应用 stmt = select(App).where( or_( App.workspace_id == workspace_id, @@ -948,7 +1379,7 @@ class AppService: if app.type != "agent": raise BusinessException("只有 Agent 类型应用支持 Agent 配置", BizCode.APP_TYPE_NOT_SUPPORTED) - self._validate_workspace_access(app, workspace_id) + self._validate_app_writable(app, workspace_id) stmt = select(AgentConfig).where(AgentConfig.app_id == app_id, AgentConfig.is_active.is_(True)).order_by( AgentConfig.updated_at.desc()) @@ -985,6 +1416,7 @@ class AppService: # if data.tools is not None: agent_cfg.tools = storage_data.get("tools", []) agent_cfg.skills = storage_data.get("skills", {}) + agent_cfg.features = storage_data.get("features", {}) agent_cfg.updated_at = now @@ -994,6 +1426,50 @@ class AppService: logger.info("Agent 配置更新成功", extra={"app_id": str(app_id)}) return agent_cfg + def _agent_config_from_release(self, release: "AppRelease") -> "AgentConfig": + """从发布版本快照重建 AgentConfig 对象(不入库,仅用于运行)""" + cfg = release.config or {} + now = release.created_at or datetime.datetime.now() + agent_cfg = AgentConfig( + id=uuid.uuid4(), + app_id=release.app_id, + system_prompt=cfg.get("system_prompt", ""), + default_model_config_id=release.default_model_config_id, + model_parameters=cfg.get("model_parameters"), + knowledge_retrieval=cfg.get("knowledge_retrieval"), + memory=cfg.get("memory", {}), + variables=cfg.get("variables", []), + tools=cfg.get("tools", []), + skills=cfg.get("skills", {}), + features=cfg.get("features", {}), + is_active=True, + created_at=now, + updated_at=now, + ) + return agent_cfg + + def _workflow_config_from_release(self, release: "AppRelease") -> "WorkflowConfig": + """从发布版本快照重建 WorkflowConfig 对象(不入库,仅用于运行)""" + cfg = release.config or {} + now = release.created_at or datetime.datetime.now() + from app.models.workflow_model import WorkflowConfig as WorkflowConfigModel + # 查出源应用真实的 WorkflowConfig id,供 workflow_executions 外键使用 + real_config = WorkflowConfigRepository(self.db).get_by_app_id(release.app_id) + real_id = real_config.id if real_config else uuid.uuid4() + wf_cfg = WorkflowConfigModel( + id=real_id, + app_id=release.app_id, + nodes=cfg.get("nodes", []), + edges=cfg.get("edges", []), + variables=cfg.get("variables", []), + execution_config=cfg.get("execution_config", {}), + triggers=cfg.get("triggers", []), + is_active=True, + created_at=now, + updated_at=now, + ) + return wf_cfg + def get_agent_config( self, *, @@ -1025,6 +1501,15 @@ class AppService: # 只读操作,允许访问共享应用 self._validate_app_accessible(app, workspace_id) + # 共享应用:返回最新发布版本的配置快照,而非草稿 + if workspace_id and app.workspace_id != workspace_id: + if not app.current_release_id: + raise BusinessException("该应用尚未发布,无法使用", BizCode.AGENT_CONFIG_MISSING) + release = self.db.get(AppRelease, app.current_release_id) + if not release: + raise BusinessException("发布版本不存在", BizCode.AGENT_CONFIG_MISSING) + return self._agent_config_from_release(release) + stmt = select(AgentConfig).where( AgentConfig.app_id == app_id, AgentConfig.is_active.is_(True) @@ -1085,6 +1570,7 @@ class AppService: variables=[], tools=[], skills=[], + features={}, is_active=True, created_at=now, updated_at=now, @@ -1122,6 +1608,16 @@ class AppService: # 只读操作,允许访问共享应用 self._validate_app_accessible(app, workspace_id) + + # 共享应用:返回最新发布版本的配置快照,而非草稿 + if workspace_id and app.workspace_id != workspace_id: + if not app.current_release_id: + raise BusinessException("该应用尚未发布,无法使用", BizCode.CONFIG_MISSING) + release = self.db.get(AppRelease, app.current_release_id) + if not release: + raise BusinessException("发布版本不存在", BizCode.CONFIG_MISSING) + return self._workflow_config_from_release(release) + repo = WorkflowConfigRepository(self.db) config = repo.get_by_app_id(app_id) if config: @@ -1159,7 +1655,7 @@ class AppService: if app.type != AppType.WORKFLOW: raise BusinessException("只有 Workflow 类型应用支持 Workflow 配置", BizCode.APP_TYPE_NOT_SUPPORTED) - self._validate_workspace_access(app, workspace_id) + self._validate_app_writable(app, workspace_id) # 获取现有配置 repo = WorkflowConfigRepository(self.db) @@ -1176,6 +1672,7 @@ class AppService: variables=[var.model_dump() for var in data.variables] if data.variables else [], execution_config=data.execution_config.model_dump() if data.execution_config else {}, triggers=[trigger.model_dump() for trigger in data.triggers] if data.triggers else [], + features=data.features or {}, is_active=True, created_at=now, updated_at=now @@ -1189,6 +1686,7 @@ class AppService: workflow_cfg.variables = [var.model_dump() for var in data.variables] if data.variables else [] workflow_cfg.execution_config = data.execution_config.model_dump() if data.execution_config else {} workflow_cfg.triggers = [trigger.model_dump() for trigger in data.triggers] if data.triggers else [] + workflow_cfg.features = data.features or {} workflow_cfg.updated_at = now self.db.commit() @@ -1301,15 +1799,15 @@ class AppService: return config.config_id - def _update_endusers_memory_config( + def _update_endusers_memory_config_by_workspace( self, - app_id: uuid.UUID, + workspace_id: uuid.UUID, memory_config_id: uuid.UUID ) -> int: """批量更新应用下所有终端用户的 memory_config_id Args: - app_id: 应用ID + workspace_id: 工作空间ID memory_config_id: 新的记忆配置ID Returns: @@ -1318,8 +1816,8 @@ class AppService: from app.repositories.end_user_repository import EndUserRepository repo = EndUserRepository(self.db) - updated_count = repo.batch_update_memory_config_id( - app_id=app_id, + updated_count = repo.batch_update_memory_config_id_by_workspace( + workspace_id=workspace_id, memory_config_id=memory_config_id ) @@ -1368,6 +1866,15 @@ class AppService: if not agent_cfg: raise BusinessException("Agent 应用缺少配置,无法发布", BizCode.AGENT_CONFIG_MISSING) + miss_params = [] + if agent_cfg.default_model_config_id is None: + miss_params.append("model config") + + if agent_cfg.memory.get("enabled") and not agent_cfg.memory.get("memory_config_id"): + miss_params.append("memory config") + if miss_params: + raise BusinessException(f"{', '.join(miss_params)} is required") + config = { "system_prompt": agent_cfg.system_prompt, "model_parameters": model_parameters_to_dict(agent_cfg.model_parameters), @@ -1376,6 +1883,7 @@ class AppService: "variables": agent_cfg.variables or [], "tools": agent_cfg.tools or [], "skills": agent_cfg.skills or {}, + "features": agent_cfg.features or {} } # config = AgentConfigConverter.from_storage_format(agent_cfg) default_model_config_id = agent_cfg.default_model_config_id @@ -1432,7 +1940,8 @@ class AppService: "edges": workflow_cfg.edges, "variables": workflow_cfg.variables, "execution_config": workflow_cfg.execution_config, - "triggers": workflow_cfg.triggers + "triggers": workflow_cfg.triggers, + "features": workflow_cfg.features or {} } is_valid, errors = WorkflowValidator.validate_for_publish(config) @@ -1481,11 +1990,15 @@ class AppService: ) if memory_config_id: - updated_count = self._update_endusers_memory_config(app_id, memory_config_id) - logger.info( - f"发布时更新终端用户记忆配置: app_id={app_id}, " - f"memory_config_id={memory_config_id}, updated_count={updated_count}" - ) + app = self.db.query(App).filter(App.id == app_id).first() + if app: + updated_count = self._update_endusers_memory_config_by_workspace( + app.workspace_id, memory_config_id + ) + logger.info( + f"发布时更新终端用户记忆配置: app_id={app_id}, workspace_id={app.workspace_id}, " + f"memory_config_id={memory_config_id}, updated_count={updated_count}" + ) # 更新当前发布版本指针 app.current_release_id = release.id @@ -1615,7 +2128,8 @@ class AppService: ) if memory_config_id: - updated_count = self._update_endusers_memory_config(app_id, memory_config_id) + + updated_count = self._update_endusers_memory_config_by_workspace(app.workspace_id, memory_config_id) logger.info( f"回滚时更新终端用户记忆配置: app_id={app_id}, version={version}, " f"memory_config_id={memory_config_id}, updated_count={updated_count}" @@ -1641,7 +2155,8 @@ class AppService: app_id: uuid.UUID, target_workspace_ids: List[uuid.UUID], user_id: uuid.UUID, - workspace_id: Optional[uuid.UUID] = None + workspace_id: Optional[uuid.UUID] = None, + permission: str = "readonly" ) -> list[AppShare]: """分享应用到其他工作空间 @@ -1672,6 +2187,14 @@ class AppService: app = self._get_app_or_404(app_id) self._validate_workspace_access(app, workspace_id) + # 仅允许 agent 和 workflow 类型共享,multi_agent 不支持 + from app.models.app_model import AppType + if app.type == AppType.MULTI_AGENT: + raise BusinessException( + "集群 Agent 不支持共享应用功能", + BizCode.INVALID_PARAMETER + ) + # 2. 验证目标工作空间 for target_ws_id in target_workspace_ids: target_ws = self.db.get(Workspace, target_ws_id) @@ -1693,7 +2216,8 @@ class AppService: # 检查是否已经分享过 stmt = select(AppShare).where( AppShare.source_app_id == app_id, - AppShare.target_workspace_id == target_ws_id + AppShare.target_workspace_id == target_ws_id, + AppShare.is_active.is_(True) ) existing_share = self.db.scalars(stmt).first() @@ -1712,6 +2236,7 @@ class AppService: source_workspace_id=app.workspace_id, target_workspace_id=target_ws_id, shared_by=user_id, + permission=permission, created_at=now, updated_at=now ) @@ -1771,7 +2296,8 @@ class AppService: # 2. 查找分享记录 stmt = select(AppShare).where( AppShare.source_app_id == app_id, - AppShare.target_workspace_id == target_workspace_id + AppShare.target_workspace_id == target_workspace_id, + AppShare.is_active.is_(True) ) share = self.db.scalars(stmt).first() @@ -1785,8 +2311,8 @@ class AppService: f"app_id={app_id}, target_workspace_id={target_workspace_id}" ) - # 3. 删除分享记录 - self.db.delete(share) + # 3. 逻辑删除分享记录 + share.is_active = False self.db.commit() logger.info( @@ -1794,6 +2320,48 @@ class AppService: extra={"app_id": str(app_id), "target_workspace_id": str(target_workspace_id)} ) + def unshare_all_apps_to_workspace( + self, + *, + target_workspace_id: uuid.UUID, + workspace_id: uuid.UUID + ) -> int: + """Cancel all app shares from current workspace to a target workspace. + + Args: + target_workspace_id: Target workspace ID to cancel all shares to + workspace_id: Current workspace ID (source) + + Returns: + Number of share records deleted + """ + from app.models import AppShare + + logger.info( + "取消对目标工作空间的所有应用分享", + extra={"target_workspace_id": str(target_workspace_id), "workspace_id": str(workspace_id)} + ) + + # Query active records first for reliable count + id_stmt = select(AppShare.id).where( + AppShare.source_workspace_id == workspace_id, + AppShare.target_workspace_id == target_workspace_id, + AppShare.is_active.is_(True) + ) + ids = list(self.db.scalars(id_stmt).all()) + count = len(ids) + + if ids: + # Soft delete: mark as inactive + from sqlalchemy import update as sa_update + self.db.execute( + sa_update(AppShare).where(AppShare.id.in_(ids)).values(is_active=False) + ) + self.db.commit() + + logger.info("已取消分享记录数", extra={"count": count}) + return count + def list_app_shares( self, *, @@ -1823,7 +2391,8 @@ class AppService: # 查询分享记录 stmt = select(AppShare).where( - AppShare.source_app_id == app_id + AppShare.source_app_id == app_id, + AppShare.is_active.is_(True) ).order_by(AppShare.created_at.desc()) shares = list(self.db.scalars(stmt).all()) @@ -1835,6 +2404,166 @@ class AppService: return shares + def remove_shared_app( + self, + *, + app_id: uuid.UUID, + workspace_id: uuid.UUID + ) -> None: + """被共享者从自己的工作空间移除共享应用 + + 只删除共享记录,不影响源应用。 + + Args: + app_id: 应用ID + workspace_id: 当前工作空间ID(被共享的目标工作空间) + + Raises: + ResourceNotFoundException: 当共享记录不存在时 + """ + from app.models import AppShare + + logger.info( + "移除共享应用", + extra={"app_id": str(app_id), "workspace_id": str(workspace_id)} + ) + + stmt = select(AppShare).where( + AppShare.source_app_id == app_id, + AppShare.target_workspace_id == workspace_id, + AppShare.is_active.is_(True) + ) + share = self.db.scalars(stmt).first() + + if not share: + raise ResourceNotFoundException( + "共享记录", + f"app_id={app_id}, workspace_id={workspace_id}" + ) + + # Soft delete + share.is_active = False + self.db.commit() + + logger.info( + "共享应用已移除", + extra={"app_id": str(app_id), "workspace_id": str(workspace_id)} + ) + + def remove_all_shared_apps_from_workspace( + self, + *, + source_workspace_id: uuid.UUID, + workspace_id: uuid.UUID + ) -> int: + """Remove all shared apps from a specific source workspace. + + Args: + source_workspace_id: The workspace that shared the apps + workspace_id: Current workspace ID (recipient) + + Returns: + Number of share records deleted + """ + from app.models import AppShare + + logger.info( + "批量移除来源工作空间的共享应用", + extra={"source_workspace_id": str(source_workspace_id), "workspace_id": str(workspace_id)} + ) + + # Query active records for reliable count, then soft delete + id_stmt = select(AppShare.id).where( + AppShare.source_workspace_id == source_workspace_id, + AppShare.target_workspace_id == workspace_id, + AppShare.is_active.is_(True) + ) + ids = list(self.db.scalars(id_stmt).all()) + count = len(ids) + + if ids: + from sqlalchemy import update as sa_update + self.db.execute( + sa_update(AppShare).where(AppShare.id.in_(ids)).values(is_active=False) + ) + self.db.commit() + + logger.info("已移除共享记录数", extra={"count": count}) + return count + + def list_my_shared_out( + self, + *, + workspace_id: uuid.UUID + ) -> List[AppShare]: + """列出本工作空间主动分享出去的所有记录(我的共享) + + Returns: + List[AppShare]: 分享记录列表,含源应用信息 + """ + from app.models import AppShare + + stmt = ( + select(AppShare) + .where( + AppShare.source_workspace_id == workspace_id, + AppShare.is_active.is_(True) + ) + .order_by(AppShare.created_at.desc()) + ) + return list(self.db.scalars(stmt).all()) + def update_share_permission( + self, + *, + app_id: uuid.UUID, + target_workspace_id: uuid.UUID, + permission: str, + workspace_id: Optional[uuid.UUID] = None + ) -> "AppShare": + """更新共享权限(readonly <-> editable) + + Args: + app_id: 应用ID + target_workspace_id: 目标工作空间ID + permission: 新权限值 readonly | editable + workspace_id: 当前工作空间ID(用于权限验证) + + Returns: + AppShare: 更新后的共享记录 + """ + from app.models import AppShare + + if permission not in ("readonly", "editable"): + raise BusinessException("权限值无效,只允许 readonly 或 editable", BizCode.INVALID_PARAMETER) + + app = self._get_app_or_404(app_id) + self._validate_workspace_access(app, workspace_id) + + stmt = select(AppShare).where( + AppShare.source_app_id == app_id, + AppShare.target_workspace_id == target_workspace_id, + AppShare.is_active.is_(True) + ) + share = self.db.scalars(stmt).first() + + if not share: + raise ResourceNotFoundException( + "共享记录", + f"app_id={app_id}, target_workspace_id={target_workspace_id}" + ) + + share.permission = permission + share.updated_at = datetime.datetime.now() + self.db.commit() + self.db.refresh(share) + + logger.info( + "共享权限已更新", + extra={"app_id": str(app_id), "target_workspace_id": str(target_workspace_id), "permission": permission} + ) + return share + + # ==================== 向后兼容的函数接口 ==================== # 保留函数接口以兼容现有代码,但内部使用服务类 @@ -1929,6 +2658,7 @@ def list_apps( status: Optional[str] = None, search: Optional[str] = None, include_shared: bool = True, + shared_only: bool = False, page: int = 1, pagesize: int = 10, ) -> Tuple[List[App], int]: @@ -1941,6 +2671,7 @@ def list_apps( status=status, search=search, include_shared=include_shared, + shared_only=shared_only, page=page, pagesize=pagesize, ) diff --git a/api/app/services/audio_transcription_service.py b/api/app/services/audio_transcription_service.py index 11d13f38..8b94bbe2 100644 --- a/api/app/services/audio_transcription_service.py +++ b/api/app/services/audio_transcription_service.py @@ -75,7 +75,7 @@ class AudioTranscriptionService: try: # 下载音频文件 async with httpx.AsyncClient(timeout=60.0) as client: - audio_response = await client.get(audio_url) + audio_response = await client.get(audio_url, follow_redirects=True) audio_response.raise_for_status() audio_data = audio_response.content diff --git a/api/app/services/auth_service.py b/api/app/services/auth_service.py index 03e1ebc0..436a5c96 100644 --- a/api/app/services/auth_service.py +++ b/api/app/services/auth_service.py @@ -80,6 +80,7 @@ def authenticate_user_or_raise(db: Session, email: str, password: str) -> User: from app.core.exceptions import BusinessException from app.core.error_codes import BizCode from app.core.logging_config import get_auth_logger + from app.i18n.service import t logger = get_auth_logger() @@ -87,17 +88,17 @@ def authenticate_user_or_raise(db: Session, email: str, password: str) -> User: user = user_repository.get_user_by_email(db, email=email) if not user: logger.warning(f"用户不存在: {email}") - raise BusinessException("用户不存在", code=BizCode.USER_NOT_FOUND) + raise BusinessException(t("auth.user.not_found"), code=BizCode.USER_NOT_FOUND) # 检查用户状态 if not user.is_active: logger.warning(f"用户未激活: {email}") - raise BusinessException("用户未激活", code=BizCode.USER_NOT_FOUND) + raise BusinessException(t("auth.login.account_disabled"), code=BizCode.USER_NOT_FOUND) # 验证密码 if not verify_password(password, user.hashed_password): logger.warning(f"密码错误: {email}") - raise BusinessException("密码错误", code=BizCode.PASSWORD_ERROR) + raise BusinessException(t("auth.password.incorrect"), code=BizCode.PASSWORD_ERROR) logger.info(f"用户认证成功: {email}") return user @@ -254,6 +255,8 @@ def decode_access_token(token: str) -> dict: Raises: BusinessException: token 无效 """ + from app.i18n.service import t + try: payload = jwt.decode(token, TOKEN_SECRET_KEY, algorithms=[TOKEN_ALGORITHM]) return { @@ -261,4 +264,4 @@ def decode_access_token(token: str) -> dict: "share_token": payload["share_token"] } except jwt.InvalidTokenError: - raise BusinessException("无效的访问 token", BizCode.INVALID_TOKEN) \ No newline at end of file + raise BusinessException(t("auth.token.invalid"), BizCode.INVALID_TOKEN) \ No newline at end of file diff --git a/api/app/services/draft_run_service.py b/api/app/services/draft_run_service.py index 5026bf27..ba41d323 100644 --- a/api/app/services/draft_run_service.py +++ b/api/app/services/draft_run_service.py @@ -18,14 +18,16 @@ from sqlalchemy.orm import Session from app.celery_app import celery_app from app.core.agent.agent_middleware import AgentMiddleware from app.core.agent.langchain_agent import LangChainAgent +from app.core.config import settings from app.core.error_codes import BizCode from app.core.exceptions import BusinessException from app.core.logging_config import get_business_logger from app.core.rag.nlp.search import knowledge_retrieval from app.db import get_db_context -from app.models import AgentConfig, ModelConfig +from app.models import AgentConfig, ModelConfig, ModelType from app.repositories.tool_repository import ToolRepository from app.schemas.app_schema import FileInput +from app.schemas.model_schema import ModelInfo from app.schemas.prompt_schema import PromptMessageRole, render_prompt_message from app.services import task_service from app.services.conversation_service import ConversationService @@ -35,6 +37,7 @@ from app.services.model_parameter_merger import ModelParameterMerger from app.services.model_service import ModelApiKeyService from app.services.multimodal_service import MultimodalService from app.services.tool_service import ToolService +from app.schemas import FileType logger = get_business_logger() @@ -97,7 +100,7 @@ def create_long_term_memory_tool( **重要:如果用户的问题可以直接回答,不要调用此工具。只在确实需要历史信息时才使用。** Args: - question: 需要检索的问题(保持原问题的核心语义,使用清晰的关键词) + question: 需要检索的问题(保持原问题的核心语义,使用清晰的关键词,第三人称描述的偏好、行为通常指用户本人,比如(我,本人,在下,自己,咱,鄙人,吴,余)通指用户) Returns: 检索到的历史记忆内容 @@ -261,9 +264,12 @@ class AgentRunService: def load_tools_config(self, tools_config, web_search, tenant_id) -> list: """加载工具配置""" - if not tools_config: - return [] tools = [] + if web_search: + search_tool = create_web_search_tool({}) + tools.append(search_tool) + if not tools_config: + return tools tool_service = ToolService(self.db) if tools_config and isinstance(tools_config, list): @@ -272,24 +278,15 @@ class AgentRunService: # 根据工具名称查找工具实例 tool_instance = tool_service.get_tool_instance(tool_config.get("tool_id", ""), tenant_id) if tool_instance: - if tool_instance.name == "baidu_search_tool" and not web_search: - continue # 转换为LangChain工具 langchain_tool = tool_instance.to_langchain_tool(tool_config.get("operation", None)) tools.append(langchain_tool) - elif tools_config and isinstance(tools_config, dict): - web_search_choice = tools_config.get("web_search", {}) - web_search_enable = web_search_choice.get("enabled", False) - if web_search and web_search_enable: - search_tool = create_web_search_tool({}) - tools.append(search_tool) - - logger.debug( - "已添加网络搜索工具", - extra={ - "tool_count": len(tools) - } - ) + logger.debug( + "已添加网络搜索工具", + extra={ + "tool_count": len(tools) + } + ) return tools def load_skill_config( @@ -372,6 +369,86 @@ class AgentRunService: ) return tools, bool(memory_config.get("enabled")) + @staticmethod + def _validate_file_upload( + features_config: Dict[str, Any], + files: Optional[List[FileInput]] + ) -> None: + """校验上传文件是否符合 file_upload 配置""" + if not files or not features_config: + return + fu = features_config.get("file_upload", {}) + if not (isinstance(fu, dict) and fu.get("enabled")): + raise BusinessException("该应用未开启文件上传功能", BizCode.BAD_REQUEST) + max_count = fu.get("max_file_count", 5) + if len(files) > max_count: + raise BusinessException(f"文件数量超过限制(最多 {max_count} 个)", BizCode.BAD_REQUEST) + + # 校验传输方式 + allowed_methods = fu.get("allowed_transfer_methods", ["local_file", "remote_url"]) + for f in files: + if f.transfer_method.value not in allowed_methods: + raise BusinessException( + f"不支持的文件传输方式:{f.transfer_method.value},允许的方式:{', '.join(allowed_methods)}", + BizCode.BAD_REQUEST + ) + + # 各类型对应的开关和大小限制配置键 + type_cfg = { + "image": ("image_enabled", "image_max_size_mb", 20, "图片"), + "audio": ("audio_enabled", "audio_max_size_mb", 50, "音频"), + "document": ("document_enabled", "document_max_size_mb", 100, "文档"), + "video": ("video_enabled", "video_max_size_mb", 500, "视频"), + } + + for f in files: + ftype = str(f.type) # 如 "image", "audio", "document", "video" + cfg = type_cfg.get(ftype) + if cfg is None: + continue + enabled_key, size_key, default_max_mb, label = cfg + + # 校验类型开关 + if not fu.get(enabled_key): + raise BusinessException(f"该应用未开启{label}文件上传", BizCode.BAD_REQUEST) + + # 校验文件大小(仅当内容已加载时) + content = f.get_content() + if content is not None: + max_mb = fu.get(size_key, default_max_mb) + size_mb = len(content) / (1024 * 1024) + if size_mb > max_mb: + raise BusinessException( + f"{label}文件大小超过限制(最大 {max_mb}MB,当前 {size_mb:.1f}MB)", + BizCode.BAD_REQUEST + ) + + @staticmethod + def _inject_opening_statement( + features_config: Dict[str, Any], + system_prompt: str, + is_new_conversation: bool + ) -> str: + """首轮对话时将开场白注入 system_prompt""" + if not is_new_conversation: + return system_prompt + opening = features_config.get("opening_statement", {}) + if not (isinstance(opening, dict) and opening.get("enabled") and opening.get("statement")): + return system_prompt + statement = opening["statement"] + return f"{system_prompt}\n\n[对话开场白]\n{statement}" + + @staticmethod + def _filter_citations( + features_config: Dict[str, Any], + citations: List[Any] + ) -> List[Any]: + """根据 citation 开关决定是否返回引用来源""" + citation_cfg = features_config.get("citation", {}) + if isinstance(citation_cfg, dict) and citation_cfg.get("enabled"): + return citations + return [] + async def run( self, *, @@ -414,6 +491,15 @@ class AgentRunService: skills_config: dict | None = agent_config.skills knowledge_retrieval_config: dict | None = agent_config.knowledge_retrieval memory_config: dict | None = agent_config.memory + features_config: dict = agent_config.features or {} + + # 从 features 中读取功能开关(优先级高于参数默认值) + web_search_feature = features_config.get("web_search", {}) + if not isinstance(web_search_feature, dict) or not web_search_feature.get("enabled"): + web_search = False + + # file_upload 校验 + self._validate_file_upload(features_config, files) try: # 1. 获取 API Key 配置 @@ -448,6 +534,10 @@ class AgentRunService: # 3. 处理系统提示词(支持变量替换) system_prompt = system_prompt.get_text_content() or "你是一个专业的AI助手" + # opening_statement:首轮对话注入开场白 + is_new_conversation = not conversation_id + system_prompt = self._inject_opening_statement(features_config, system_prompt, is_new_conversation) + # 4. 准备工具列表 tools = [] @@ -490,20 +580,27 @@ class AgentRunService: ) # 6. 加载历史消息 - history = [] - if memory_config and memory_config.get("enabled"): - history = await self._load_conversation_history( - conversation_id=conversation_id, - max_history=agent_config.memory.get("max_history", 10) - ) + history = await self._load_conversation_history( + conversation_id=conversation_id, + max_history=10 + ) # 6. 处理多模态文件 processed_files = None if files: # 获取 provider 信息 + model_info = ModelInfo( + model_name=api_key_config["model_name"], + provider=api_key_config["provider"], + api_key=api_key_config["api_key"], + api_base=api_key_config["api_base"], + capability=api_key_config["capability"], + is_omni=api_key_config["is_omni"], + model_type=ModelType.LLM + ) provider = api_key_config.get("provider", "openai") - multimodal_service = MultimodalService(self.db, provider=provider, is_omni=api_key_config.get("is_omni", False)) - processed_files = await multimodal_service.process_files(files) + multimodal_service = MultimodalService(self.db, model_info) + processed_files = await multimodal_service.process_files(user_id, files) logger.info(f"处理了 {len(processed_files)} 个文件,provider={provider}") # 7. 知识库检索 @@ -540,8 +637,14 @@ class AgentRunService: ModelApiKeyService.record_api_key_usage(self.db, api_key_config.get("api_key_id")) - # 9. 保存会话消息 - if not sub_agent and memory_config and memory_config.get("enabled"): + # 9. 生成 TTS audio_url(在保存消息前生成,以便一并存入 meta_data) + audio_url = await self._generate_tts( + features_config, result["content"], api_key_config, + tenant_id=tenant_id, workspace_id=workspace_id + ) if not sub_agent else None + + # 10. 保存会话消息 + if not sub_agent: await self._save_conversation_message( conversation_id=conversation_id, user_message=message, @@ -554,7 +657,9 @@ class AgentRunService: "completion_tokens": 0, "total_tokens": 0 }) - } + }, + files=files, + audio_url=audio_url ) response = { @@ -565,7 +670,12 @@ class AgentRunService: "completion_tokens": 0, "total_tokens": 0 }), - "elapsed_time": elapsed_time + "elapsed_time": elapsed_time, + "suggested_questions": await self._generate_suggested_questions( + features_config, result["content"], api_key_config, effective_params + ) if not sub_agent else [], + "citations": self._filter_citations(features_config, result.get("citations", [])), + "audio_url": audio_url, } logger.info( @@ -620,6 +730,15 @@ class AgentRunService: skills_config: dict | None = agent_config.skills knowledge_retrieval_config: dict | None = agent_config.knowledge_retrieval memory_config: dict | None = agent_config.memory + features_config: dict = agent_config.features or {} + + # 从 features 中读取功能开关 + web_search_feature = features_config.get("web_search", {}) + if not (isinstance(web_search_feature, dict) and web_search_feature.get("enabled")): + web_search = False + + # file_upload 校验 + self._validate_file_upload(features_config, files) start_time = time.time() @@ -649,6 +768,10 @@ class AgentRunService: # 3. 处理系统提示词(支持变量替换) system_prompt = system_prompt.get_text_content() or "你是一个专业的AI助手" + # opening_statement:首轮对话注入开场白 + is_new_conversation = not conversation_id + system_prompt = self._inject_opening_statement(features_config, system_prompt, is_new_conversation) + # 4. 准备工具列表 tools = [] @@ -688,24 +811,32 @@ class AgentRunService: conversation_id=conversation_id, app_id=agent_config.app_id, workspace_id=workspace_id, - user_id=user_id + user_id=user_id, + sub_agent=sub_agent ) # 6. 加载历史消息 - history = [] - if memory_config and memory_config.get("enabled"): - history = await self._load_conversation_history( - conversation_id=conversation_id, - max_history=memory_config.get("max_history", 10) - ) + history = await self._load_conversation_history( + conversation_id=conversation_id, + max_history=memory_config.get("max_history", 10) + ) # 6. 处理多模态文件 processed_files = None if files: # 获取 provider 信息 + model_info = ModelInfo( + model_name=api_key_config["model_name"], + provider=api_key_config["provider"], + api_key=api_key_config["api_key"], + api_base=api_key_config["api_base"], + capability=api_key_config["capability"], + is_omni=api_key_config["is_omni"], + model_type=ModelType.LLM + ) provider = api_key_config.get("provider", "openai") - multimodal_service = MultimodalService(self.db, provider=provider, is_omni=api_key_config.get("is_omni", False)) - processed_files = await multimodal_service.process_files(files) + multimodal_service = MultimodalService(self.db, model_info) + processed_files = await multimodal_service.process_files(user_id, files) logger.info(f"处理了 {len(processed_files)} 个文件,provider={provider}") # 7. 知识库检索 @@ -721,9 +852,18 @@ class AgentRunService: # 兼容新旧字段名:优先使用 memory_config_id,回退到 memory_content config_id = memory_config_.get("memory_config_id") or memory_config_.get("memory_content", None) - # 9. 流式调用 Agent(支持多模态) + # 9. 流式调用 Agent(支持多模态),同时并行启动 TTS full_content = "" total_tokens = 0 + + # 启动流式 TTS(文本边输出边合成) + text_queue: asyncio.Queue = asyncio.Queue() + stream_audio_url, tts_task = await self._generate_tts_streaming( + features_config, api_key_config, + text_queue=text_queue, + tenant_id=tenant_id, workspace_id=workspace_id + ) if not sub_agent else (None, None) + async for chunk in agent.chat_stream( message=message, history=history, @@ -733,28 +873,28 @@ class AgentRunService: storage_type=storage_type, user_rag_memory_id=user_rag_memory_id, memory_flag=memory_flag, - files=processed_files # 传递处理后的文件 + files=processed_files ): if isinstance(chunk, int): total_tokens = chunk else: full_content += chunk - # 发送消息块事件 - yield self._format_sse_event("message", { - "content": chunk - }) + yield self._format_sse_event("message", {"content": chunk}) + if tts_task is not None: + await text_queue.put(chunk) + + # 文本结束,通知 TTS + if tts_task is not None: + await text_queue.put(None) elapsed_time = time.time() - start_time - ModelApiKeyService.record_api_key_usage(self.db, api_key_config.get("api_key_id")) if sub_agent: - yield self._format_sse_event("sub_usage", { - "total_tokens": total_tokens - }) + yield self._format_sse_event("sub_usage", {"total_tokens": total_tokens}) - # 10. 保存会话消息 - if not sub_agent and memory_config and memory_config.get("enabled"): + # 11. 保存会话消息 + if not sub_agent: await self._save_conversation_message( conversation_id=conversation_id, user_message=message, @@ -763,15 +903,24 @@ class AgentRunService: user_id=user_id, meta_data={ "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens} - } + }, + files=files, + audio_url=stream_audio_url ) - # 11. 发送结束事件 - yield self._format_sse_event("end", { + # 12. 发送结束事件(包含 suggested_questions 和 tts) + end_data: Dict[str, Any] = { "conversation_id": conversation_id, "elapsed_time": elapsed_time, "message_length": len(full_content) - }) + } + if not sub_agent: + end_data["suggested_questions"] = await self._generate_suggested_questions( + features_config, full_content, api_key_config, effective_params + ) + end_data["audio_url"] = stream_audio_url + end_data["citations"] = self._filter_citations(features_config, []) + yield self._format_sse_event("end", end_data) logger.info( "流式试运行完成", @@ -840,7 +989,8 @@ class AgentRunService: "api_key": api_key.api_key, "api_base": api_key.api_base, "api_key_id": api_key.id, - "is_omni": api_key.is_omni + "is_omni": api_key.is_omni, + "capability": api_key.capability } async def _ensure_conversation( @@ -848,7 +998,8 @@ class AgentRunService: conversation_id: Optional[str], app_id: uuid.UUID, workspace_id: uuid.UUID, - user_id: Optional[str] + user_id: Optional[str], + sub_agent: bool = False ) -> str: """确保会话存在(创建或验证) @@ -909,20 +1060,36 @@ class AgentRunService: conv_uuid = uuid.UUID(conversation_id) conversation = conversation_service.get_conversation(conv_uuid) - # 验证会话属于当前工作空间 - if conversation.workspace_id != workspace_id: - logger.warning( - "会话不属于当前工作空间", - extra={ - "conversation_id": conversation_id, - "conversation_workspace_id": str(conversation.workspace_id), - "current_workspace_id": str(workspace_id) - } - ) - raise BusinessException( - "会话不属于当前工作空间", - BizCode.PERMISSION_DENIED - ) + # 验证会话属于当前工作空间(或属于共享应用的源工作空间) + # sub_agent 内部调用时跳过校验,已在上层验证过 + if not sub_agent and conversation.workspace_id != workspace_id: + # 检查是否是共享应用的会话(被共享者 workspace 访问源应用) + from app.models import AppShare + from sqlalchemy import select as sa_select + share = self.db.scalars( + sa_select(AppShare).where( + AppShare.source_app_id == app_id, + AppShare.target_workspace_id == workspace_id + ) + ).first() + + # 情况2:sub_agent 内部调用时,workspace_id 是源应用的 workspace, + # 而会话是被共享者创建的,只要会话属于同一个 app 即可放行 + same_app = (conversation.app_id == app_id) + + if not share and not same_app: + logger.warning( + "会话不属于当前工作空间", + extra={ + "conversation_id": conversation_id, + "conversation_workspace_id": str(conversation.workspace_id), + "current_workspace_id": str(workspace_id) + } + ) + raise BusinessException( + "会话不属于当前工作空间", + BizCode.PERMISSION_DENIED + ) logger.debug( "使用现有会话", @@ -990,7 +1157,9 @@ class AgentRunService: assistant_message: str, meta_data: dict, app_id: Optional[uuid.UUID] = None, - user_id: Optional[str] = None + user_id: Optional[str] = None, + files: Optional[List[FileInput]] = None, + audio_url: Optional[str] = None ) -> None: """保存会话消息(会话已通过 _ensure_conversation 确保存在) @@ -1009,13 +1178,26 @@ class AgentRunService: conv_uuid = uuid.UUID(conversation_id) # 保存消息(会话已经存在) + human_meta = { + "files": [] + } + if files: + for f in files: + # url = await MultimodalService(self.db).get_file_url(f) + human_meta["files"].append({ + "type": f.type, + "url": f.url + }) # 保存用户消息 conversation_service.add_message( conversation_id=conv_uuid, role="user", - content=user_message + content=user_message, + meta_data=human_meta ) - # 保存助手消息 + # 保存助手消息(含 audio_url) + if audio_url: + meta_data["audio_url"] = audio_url conversation_service.add_message( conversation_id=conv_uuid, role="assistant", @@ -1099,6 +1281,385 @@ class AgentRunService: logger.debug("获取配置快照失败(可能是多 Agent 应用)", exc_info=True, extra={"error": str(e)}) return {} + async def _generate_suggested_questions( + self, + features_config: Dict[str, Any], + assistant_message: str, + api_key_config: Dict[str, Any], + effective_params: Dict[str, Any] + ) -> List[str]: + """根据 suggested_questions_after_answer 配置生成下一步建议问题""" + sq_config = features_config.get("suggested_questions_after_answer", {}) + if not isinstance(sq_config, dict) or not sq_config.get("enabled"): + return [] + try: + from langchain_openai import ChatOpenAI + from langchain_core.messages import HumanMessage, SystemMessage + llm = ChatOpenAI( + model=api_key_config["model_name"], + api_key=api_key_config["api_key"], + base_url=api_key_config.get("api_base"), + temperature=0.5, + max_tokens=200, + ) + prompt = ( + f"根据以下AI回复,生成3个用户可能继续追问的简短问题,每行一个,不加序号:\n\n{assistant_message}" + ) + resp = await llm.ainvoke([HumanMessage(content=prompt)]) + lines = [l.strip() for l in resp.content.strip().split("\n") if l.strip()] + return lines[:3] + except Exception as e: + logger.warning(f"生成建议问题失败: {e}") + return [] + + async def _generate_tts( + self, + features_config: Dict[str, Any], + text: str, + api_key_config: Dict[str, Any], + tenant_id: Optional[uuid.UUID] = None, + workspace_id: Optional[uuid.UUID] = None, + ) -> Optional[str]: + """先注册文件元数据并返回 audio_url,再后台流式写入音频内容""" + tts_config = features_config.get("text_to_speech", {}) + if not isinstance(tts_config, dict) or not tts_config.get("enabled"): + return None + if not text or not text.strip(): + return None + + from app.models.file_metadata_model import FileMetadata + from app.services.file_storage_service import FileStorageService, generate_file_key + + provider = api_key_config.get("provider", "openai") + api_key = api_key_config.get("api_key") + api_base = api_key_config.get("api_base") + voice = tts_config.get("voice") + file_ext, content_type = ".mp3", "audio/mpeg" + + file_id = uuid.uuid4() + file_key = generate_file_key(tenant_id, workspace_id, file_id, file_ext) + + # 先写入 pending 状态的元数据,立即返回 URL + db_file = FileMetadata( + id=file_id, + tenant_id=tenant_id, + workspace_id=workspace_id, + file_key=file_key, + file_name=f"tts_{file_id}{file_ext}", + file_ext=file_ext, + file_size=0, + content_type=content_type, + status="pending", + ) + self.db.add(db_file) + self.db.commit() + + server_url = settings.FILE_LOCAL_SERVER_URL + audio_url = f"{server_url}/storage/permanent/{file_id}" + + # 后台任务:流式生成并写入存储,完成后更新状态 + async def _stream_to_storage(): + try: + storage_service = FileStorageService() + if provider == "dashscope": + stream = self._tts_dashscope_stream( + api_key=api_key, + text=text, + voice=voice or "longxiaochun", + tts_config=tts_config, + ) + else: + stream = self._tts_openai_stream( + api_key=api_key, + api_base=api_base, + text=text, + voice=voice or "alloy", + ) + + total_size = await storage_service.upload_stream( + tenant_id=tenant_id, + workspace_id=workspace_id, + file_id=file_id, + file_ext=file_ext, + stream=stream, + content_type=content_type, + ) + + # 更新元数据状态 + with get_db_context() as bg_db: + record = bg_db.get(FileMetadata, file_id) + if record: + record.status = "completed" + record.file_size = total_size + bg_db.commit() + logger.debug(f"TTS 流式写入完成,provider={provider}, file_key={file_key}") + except Exception as e: + logger.warning(f"TTS 流式写入失败: {e}") + with get_db_context() as bg_db: + record = bg_db.get(FileMetadata, file_id) + if record: + record.status = "failed" + bg_db.commit() + + asyncio.create_task(_stream_to_storage()) + return audio_url + + async def _generate_tts_streaming( + self, + features_config: Dict[str, Any], + api_key_config: Dict[str, Any], + text_queue: asyncio.Queue, + tenant_id: Optional[uuid.UUID] = None, + workspace_id: Optional[uuid.UUID] = None, + ) -> tuple[Optional[str], Optional[asyncio.Task]]: + """文本流式输入并行合成音频。 + 返回 (audio_url, task),audio_url 立即可用,task 完成后文件内容就绪。 + 调用方向 text_queue put 文本 chunk,结束时 put None。 + """ + tts_config = features_config.get("text_to_speech", {}) + if not isinstance(tts_config, dict) or not tts_config.get("enabled"): + return None, None + + from app.models.file_metadata_model import FileMetadata + from app.services.file_storage_service import FileStorageService, generate_file_key + + provider = api_key_config.get("provider", "openai") + api_key = api_key_config.get("api_key") + api_base = api_key_config.get("api_base") + voice = tts_config.get("voice") + file_ext, content_type = ".mp3", "audio/mpeg" + + file_id = uuid.uuid4() + file_key = generate_file_key(tenant_id, workspace_id, file_id, file_ext) + + db_file = FileMetadata( + id=file_id, + tenant_id=tenant_id, + workspace_id=workspace_id, + file_key=file_key, + file_name=f"tts_{file_id}{file_ext}", + file_ext=file_ext, + file_size=0, + content_type=content_type, + status="pending", + ) + self.db.add(db_file) + self.db.commit() + + server_url = settings.FILE_LOCAL_SERVER_URL + audio_url = f"{server_url}/storage/permanent/{file_id}" + + async def _run(): + try: + storage_service = FileStorageService() + if provider == "dashscope": + audio_stream = self._tts_dashscope_stream_from_queue( + api_key=api_key, + voice=voice or "longxiaochun", + tts_config=tts_config, + text_queue=text_queue, + ) + else: + audio_stream = self._tts_openai_stream_from_queue( + api_key=api_key, + api_base=api_base, + voice=voice or "alloy", + text_queue=text_queue, + ) + total_size = await storage_service.upload_stream( + tenant_id=tenant_id, + workspace_id=workspace_id, + file_id=file_id, + file_ext=file_ext, + stream=audio_stream, + content_type=content_type, + ) + with get_db_context() as bg_db: + record = bg_db.get(FileMetadata, file_id) + if record: + record.status = "completed" + record.file_size = total_size + bg_db.commit() + logger.debug(f"TTS 流式合成完成,provider={provider}, file_key={file_key}") + except Exception as e: + logger.warning(f"TTS 流式合成失败: {e}") + with get_db_context() as bg_db: + record = bg_db.get(FileMetadata, file_id) + if record: + record.status = "failed" + bg_db.commit() + + task = asyncio.create_task(_run()) + return audio_url, task + + @staticmethod + async def _tts_openai_stream_from_queue( + api_key: str, + api_base: Optional[str], + voice: str, + text_queue: asyncio.Queue, + ): + """OpenAI TTS:收集全部文本后流式合成(OpenAI 不支持增量输入)""" + from openai import AsyncOpenAI + # 收集全部文本(此时文本流已并行输出,等待时间短) + parts = [] + while True: + chunk = await text_queue.get() + if chunk is None: + break + parts.append(chunk) + full_text = "".join(parts) + if not full_text.strip(): + return + client = AsyncOpenAI(api_key=api_key, base_url=api_base) + async with client.audio.speech.with_streaming_response.create( + model="tts-1", + voice=voice, + input=full_text[:4096], + ) as response: + async for chunk in response.iter_bytes(chunk_size=4096): + yield chunk + + @staticmethod + async def _tts_dashscope_stream_from_queue( + api_key: str, + voice: str, + tts_config: Dict[str, Any], + text_queue: asyncio.Queue, + ): + """DashScope TTS:文本流式输入,实现真正并行合成""" + import dashscope + from dashscope.audio.tts_v2 import SpeechSynthesizer, AudioFormat, ResultCallback + + model = tts_config.get("model") or "cosyvoice-v2" + is_v2 = model.endswith("-v2") + if is_v2 and not voice.endswith("_v2"): + voice = voice + "_v2" + elif not is_v2 and voice.endswith("_v2"): + voice = voice[:-3] + + audio_queue: asyncio.Queue = asyncio.Queue() + loop = asyncio.get_event_loop() + + class _Callback(ResultCallback): + def on_data(self, data: bytes): + if data: + loop.call_soon_threadsafe(audio_queue.put_nowait, data) + def on_complete(self): + loop.call_soon_threadsafe(audio_queue.put_nowait, None) + def on_error(self, message): + loop.call_soon_threadsafe(audio_queue.put_nowait, RuntimeError(str(message))) + def on_open(self): pass + def on_close(self): pass + + dashscope.api_key = api_key + synthesizer = SpeechSynthesizer( + model=model, + voice=voice, + format=AudioFormat.MP3_22050HZ_MONO_256KBPS, + callback=_Callback(), + ) + + async def _feed_text(): + """从 text_queue 取文本按句子切分后喂给 synthesizer""" + import re + buf = "" + sentence_end = re.compile(r'[\u3002\uff01\uff1f\.!?\n]') + while True: + chunk = await text_queue.get() + if chunk is None: + if buf.strip(): + await asyncio.to_thread(synthesizer.streaming_call, buf) + await asyncio.to_thread(synthesizer.streaming_complete) + break + buf += chunk + # 按句子切分喂入 + while sentence_end.search(buf): + m = sentence_end.search(buf) + sentence = buf[:m.end()] + buf = buf[m.end():] + await asyncio.to_thread(synthesizer.streaming_call, sentence) + + asyncio.create_task(_feed_text()) + + while True: + item = await audio_queue.get() + if item is None: + break + if isinstance(item, Exception): + raise item + yield item + + @staticmethod + async def _tts_openai_stream( + api_key: str, + api_base: Optional[str], + text: str, + voice: str, + ): + """OpenAI 兼容 TTS 流式生成,yield bytes chunks""" + from openai import AsyncOpenAI + client = AsyncOpenAI(api_key=api_key, base_url=api_base) + async with client.audio.speech.with_streaming_response.create( + model="tts-1", + voice=voice, + input=text[:4096], + ) as response: + async for chunk in response.iter_bytes(chunk_size=4096): + yield chunk + + @staticmethod + async def _tts_dashscope_stream( + api_key: str, + text: str, + voice: str, + tts_config: Dict[str, Any], + ): + """DashScope TTS 流式生成,yield bytes chunks""" + import dashscope + from dashscope.audio.tts_v2 import SpeechSynthesizer, AudioFormat, ResultCallback + + model = tts_config.get("model") or "cosyvoice-v2" + is_v2 = model.endswith("-v2") + if is_v2 and not voice.endswith("_v2"): + voice = voice + "_v2" + elif not is_v2 and voice.endswith("_v2"): + voice = voice[:-3] + + queue: asyncio.Queue = asyncio.Queue() + loop = asyncio.get_event_loop() + + class _Callback(ResultCallback): + def on_data(self, data: bytes): + if data: + loop.call_soon_threadsafe(queue.put_nowait, data) + def on_complete(self): + loop.call_soon_threadsafe(queue.put_nowait, None) + def on_error(self, message): + loop.call_soon_threadsafe(queue.put_nowait, RuntimeError(str(message))) + def on_open(self): pass + def on_close(self): pass + + def _sync_stream(): + dashscope.api_key = api_key + synthesizer = SpeechSynthesizer( + model=model, + voice=voice, + format=AudioFormat.MP3_22050HZ_MONO_256KBPS, + callback=_Callback(), + ) + synthesizer.streaming_call(text[:4096]) + synthesizer.streaming_complete() + + asyncio.create_task(asyncio.to_thread(_sync_stream)) + while True: + item = await queue.get() + if item is None: + break + if isinstance(item, Exception): + raise item + yield item + def _replace_variables( self, text: str, @@ -1183,6 +1744,12 @@ class AgentRunService: } ) + # 提前校验文件上传(与 run() 内部保持一致) + features_config: dict = agent_config.features or {} + if hasattr(features_config, 'model_dump'): + features_config = features_config.model_dump() + # self._validate_file_upload(features_config, files) + async def run_single_model(model_info): """运行单个模型""" try: @@ -1233,6 +1800,9 @@ class AgentRunService: if elapsed > 0 and usage.get("completion_tokens") else None ), "cost_estimate": self._estimate_cost(usage, model_info["model_config"]), + "audio_url": result.get("audio_url"), + "citations": result.get("citations", []), + "suggested_questions": result.get("suggested_questions", []), "error": None } @@ -1305,7 +1875,12 @@ class AgentRunService: ) return { - "results": results, + "results": [{ + **r, + "audio_url": r.get("audio_url"), + "citations": r.get("citations", []), + "suggested_questions": r.get("suggested_questions", []), + } for r in results], "total_elapsed_time": sum(r.get("elapsed_time", 0) for r in results), "successful_count": len(successful), "failed_count": len(failed), @@ -1396,6 +1971,12 @@ class AgentRunService: extra={"model_count": len(models), "parallel": parallel} ) + # 提前校验文件上传 + # features_config: dict = agent_config.features or {} + # if hasattr(features_config, 'model_dump'): + # features_config = features_config.model_dump() + # self._validate_file_upload(features_config, files) + # 发送开始事件 yield self._format_sse_event("compare_start", { "conversation_id": conversation_id, @@ -1427,6 +2008,9 @@ class AgentRunService: start_time = time.time() full_content = "" returned_conversation_id = model_conversation_id + audio_url = None + citations = [] + suggested_questions = [] # 临时修改参数 original_params = agent_config.model_parameters @@ -1480,6 +2064,12 @@ class AgentRunService: "content": chunk })) + # 从 end 事件中提取 features 输出字段 + if event_type == "end" and event_data: + audio_url = event_data.get("audio_url") + citations = event_data.get("citations", []) + suggested_questions = event_data.get("suggested_questions", []) + if event_type == "error" and event_data: await event_queue.put(self._format_sse_event("model_error", { "model_index": idx, @@ -1505,6 +2095,9 @@ class AgentRunService: "parameters_used": model_info["parameters"], "message": full_content, "elapsed_time": elapsed, + "audio_url": audio_url, + "citations": citations, + "suggested_questions": suggested_questions, "error": None } @@ -1516,6 +2109,9 @@ class AgentRunService: "conversation_id": returned_conversation_id, "elapsed_time": elapsed, "message_length": len(full_content), + "audio_url": audio_url, + "citations": citations, + "suggested_questions": suggested_questions, "timestamp": time.time() })) @@ -1647,8 +2243,11 @@ class AgentRunService: "model_name": r["model_name"], "label": r["label"], "conversation_id": r.get("conversation_id"), - "message": r.get("message"), # 包含完整消息 + "message": r.get("message"), "elapsed_time": r.get("elapsed_time", 0), + "audio_url": r.get("audio_url"), + "citations": r.get("citations", []), + "suggested_questions": r.get("suggested_questions", []), "error": r.get("error") }) diff --git a/api/app/services/file_storage_service.py b/api/app/services/file_storage_service.py index bb9f1894..2ebc5d9a 100644 --- a/api/app/services/file_storage_service.py +++ b/api/app/services/file_storage_service.py @@ -9,7 +9,7 @@ and error handling. import logging import time import uuid -from typing import Optional +from typing import AsyncIterator, Optional from app.core.storage import StorageFactory, StorageBackend from app.core.storage_exceptions import ( @@ -162,6 +162,31 @@ class FileStorageService: cause=e, ) + async def upload_stream( + self, + tenant_id: uuid.UUID, + workspace_id: uuid.UUID | None, + file_id: uuid.UUID, + file_ext: str, + stream: AsyncIterator[bytes], + content_type: Optional[str] = None, + ) -> int: + """ + Upload a file from an async byte stream. + + Returns: + Total bytes written. + """ + file_key = generate_file_key(tenant_id, workspace_id, file_id, file_ext) + logger.info(f"Starting stream upload: file_key={file_key}, content_type={content_type}") + try: + total = await self.storage.upload_stream(file_key, stream, content_type) + logger.info(f"Stream upload successful: file_key={file_key}, size={total} bytes") + return total + except Exception as e: + logger.error(f"Stream upload failed: file_key={file_key}, error={str(e)}") + raise + async def download_file(self, file_key: str) -> bytes: """ Download a file from storage. diff --git a/api/app/services/memory_agent_service.py b/api/app/services/memory_agent_service.py index f272c541..1e1d9e45 100644 --- a/api/app/services/memory_agent_service.py +++ b/api/app/services/memory_agent_service.py @@ -274,7 +274,7 @@ class MemoryAgentService: Args: end_user_id: Group identifier (also used as end_user_id) - message: Message to write + messages: Message to write config_id: Configuration ID from database db: SQLAlchemy database session storage_type: Storage type (neo4j or rag) @@ -1165,6 +1165,7 @@ def get_end_user_connected_config(end_user_id: str, db: Session) -> Dict[str, An logger.info(f"Getting connected config for end_user: {end_user_id}") + # TODO: check sources for enduserid, should be one of these three: chat, draft, apikey # 1. 获取 end_user 及其 app_id end_user = db.query(EndUser).filter(EndUser.id == end_user_id).first() if not end_user: @@ -1179,10 +1180,10 @@ def get_end_user_connected_config(end_user_id: str, db: Session) -> Dict[str, An if not app: logger.warning(f"App not found: {app_id}") raise ValueError(f"应用不存在: {app_id}") - - if not app.current_release_id: - logger.warning(f"No current release for app: {app_id}") - raise ValueError(f"应用未发布: {app_id}") + # TODO: temp fix for draft run + # if not app.current_release_id: + # logger.warning(f"No current release for app: {app_id}") + # raise ValueError(f"应用未发布: {app_id}") # 3. 兼容旧数据:如果 memory_config_id 为空,从 AppRelease.config 获取并回填 memory_config_id_to_use = end_user.memory_config_id @@ -1223,7 +1224,9 @@ def get_end_user_connected_config(end_user_id: str, db: Session) -> Dict[str, An if legacy_config_id: # 验证提取的 config_id 是否存在于数据库中 - from app.models.memory_config_model import MemoryConfig as MemoryConfigModel + from app.models.memory_config_model import ( + MemoryConfig as MemoryConfigModel, + ) existing_config = db.get(MemoryConfigModel, legacy_config_id) if existing_config: @@ -1257,7 +1260,7 @@ def get_end_user_connected_config(end_user_id: str, db: Session) -> Dict[str, An result = { "end_user_id": str(end_user_id), "app_id": str(app_id), - "release_id": str(app.current_release_id), + "release_id": str(app.current_release_id) if app.current_release_id else None, "memory_config_id": memory_config_id, "workspace_id": str(app.workspace_id) } diff --git a/api/app/services/memory_config_service.py b/api/app/services/memory_config_service.py index 00757f8c..a3751c07 100644 --- a/api/app/services/memory_config_service.py +++ b/api/app/services/memory_config_service.py @@ -107,38 +107,29 @@ def _validate_config_id(config_id, db: Session = None): ) -# 专门场景的内置 key 集合,直接从 SceneConfigRegistry 派生,避免重复维护 -# 使用懒加载函数避免模块级循环导入 -def _get_builtin_pruning_scenes() -> set: - from app.core.memory.storage_services.extraction_engine.data_preprocessing.scene_config import SceneConfigRegistry - return set(SceneConfigRegistry.get_all_scenes()) - - -def _load_ontology_classes(db: Session, scene_id, pruning_scene: Optional[str]) -> Optional[list]: - """当 pruning_scene 不是内置场景时,从 ontology_class 表加载类型名称列表。 +def _load_ontology_class_infos(db: Session, scene_id) -> list: + """从 ontology_class 表加载完整本体类型信息(name + description),用于注入剪枝提示词。 Args: db: 数据库会话 scene_id: 本体场景 UUID - pruning_scene: 语义剪枝场景名称 Returns: - class_name 字符串列表,或 None(内置场景 / 无数据时) + [{"class_name": ..., "class_description": ...}, ...] 或空列表 """ if not scene_id: - return None - # 内置场景走 SceneConfigRegistry,不需要注入类型列表 - if pruning_scene in _get_builtin_pruning_scenes(): - return None + return [] try: from app.repositories.ontology_class_repository import OntologyClassRepository repo = OntologyClassRepository(db) classes = repo.get_classes_by_scene(scene_id) - names = [c.class_name for c in classes if c.class_name] - return names if names else None + return [ + {"class_name": c.class_name, "class_description": c.class_description or ""} + for c in classes if c.class_name + ] except Exception as e: - logger.warning(f"Failed to load ontology classes for scene_id={scene_id}: {e}") - return None + logger.warning(f"Failed to load ontology class infos for scene_id={scene_id}: {e}") + return [] class MemoryConfigService: @@ -393,7 +384,7 @@ class MemoryConfigService: pruning_threshold=float(memory_config.pruning_threshold) if memory_config.pruning_threshold is not None else 0.5, # Ontology scene association scene_id=memory_config.scene_id, - ontology_classes=_load_ontology_classes(self.db, memory_config.scene_id, memory_config.pruning_scene), + ontology_class_infos=_load_ontology_class_infos(self.db, memory_config.scene_id), ) elapsed_ms = (time.time() - start_time) * 1000 @@ -560,11 +551,13 @@ class MemoryConfigService: - pruning_switch: bool - pruning_scene: str - pruning_threshold: float + - ontology_class_infos: list of {class_name, class_description} dicts """ return { "pruning_switch": memory_config.pruning_enabled, "pruning_scene": memory_config.pruning_scene, "pruning_threshold": memory_config.pruning_threshold, + "ontology_class_infos": memory_config.ontology_class_infos or [], } def get_ontology_types(self, memory_config: MemoryConfig): diff --git a/api/app/services/memory_dashboard_service.py b/api/app/services/memory_dashboard_service.py index 05aed57e..d0078088 100644 --- a/api/app/services/memory_dashboard_service.py +++ b/api/app/services/memory_dashboard_service.py @@ -68,14 +68,14 @@ def get_workspace_end_users( return [] # 提取所有 app_id - app_ids = [app.id for app in apps_orm] + # app_ids = [app.id for app in apps_orm] # 批量查询所有 end_users(一次查询而非循环查询) # 按 created_at 降序排序,NULL 值排在最后;id 作为次级排序键保证确定性 from app.models.end_user_model import EndUser as EndUserModel from sqlalchemy import desc, nullslast end_users_orm = db.query(EndUserModel).filter( - EndUserModel.app_id.in_(app_ids) + EndUserModel.workspace_id == workspace_id ).order_by( nullslast(desc(EndUserModel.created_at)), desc(EndUserModel.id) @@ -535,7 +535,8 @@ def get_users_total_chunk_batch( def get_rag_content( end_user_id: str, - limit: int, + page: int, + pagesize: int, db: Session, current_user: User ) -> dict: @@ -543,9 +544,9 @@ def get_rag_content( 先在documents表中查询file_name=='end_user_id'+'.txt'的id和kb_id, 然后调用/chunks/{kb_id}/{document_id}/chunks接口的相关代码获取所有内容, 接着对获取的内容进行提取,只要page_content的内容, - 最后返回数据 + 最后返回分页数据 """ - business_logger.info(f"获取RAG内容: end_user_id={end_user_id}, limit={limit}, 操作者: {current_user.username}") + business_logger.info(f"获取RAG内容: end_user_id={end_user_id}, page={page}, pagesize={pagesize}, 操作者: {current_user.username}") try: from app.models.document_model import Document @@ -562,63 +563,76 @@ def get_rag_content( if not documents: business_logger.warning(f"未找到文件: {file_name}") return { - "total": 0, - "contents": [] + "page": { + "page": page, + "pagesize": pagesize, + "total": 0, + "hasnext": False, + }, + "items": [] } business_logger.info(f"找到 {len(documents)} 个文档记录") - # 3. 获取所有chunks的page_content - all_contents = [] - total_chunks = 0 + # 3. 按全局偏移量计算当前页数据 + # 全局偏移范围:[offset_start, offset_end) + offset_start = (page - 1) * pagesize + offset_end = offset_start + pagesize + + global_total = 0 # 所有文档的 chunk 总数 + page_contents = [] # 当前页的内容 for document in documents: try: - # 获取知识库信息 kb = knowledge_repository.get_knowledge_by_id(db, document.kb_id) if not kb: business_logger.warning(f"知识库不存在: kb_id={document.kb_id}") continue - # 初始化向量服务 vector_service = ElasticSearchVectorFactory().init_vector(knowledge=kb) - # 获取该文档的所有chunks(分页获取) - page = 1 - pagesize = 100 # 每页100条 + # 先用 pagesize=1 获取该文档的 chunk 总数 + doc_total, _ = vector_service.search_by_segment( + document_id=str(document.id), + query=None, + pagesize=1, + page=1, + asc=True + ) - while True: - total, items = vector_service.search_by_segment( + doc_offset_start = global_total # 该文档在全局中的起始偏移 + doc_offset_end = global_total + doc_total # 该文档在全局中的结束偏移 + global_total += doc_total + + # 当前页与该文档无交集,跳过 + if doc_offset_end <= offset_start or doc_offset_start >= offset_end: + continue + + # 计算需要从该文档取的局部范围 + local_start = max(offset_start - doc_offset_start, 0) + local_end = min(offset_end - doc_offset_start, doc_total) + need_count = local_end - local_start + + # 换算成 ES 分页参数(ES page 从1开始) + es_page = (local_start // pagesize) + 1 + es_offset_in_page = local_start % pagesize + + fetched = [] + while len(fetched) < es_offset_in_page + need_count: + _, items = vector_service.search_by_segment( document_id=str(document.id), query=None, pagesize=pagesize, - page=page, + page=es_page, asc=True ) - if not items: break - - # 提取page_content - for item in items: - all_contents.append(item.page_content) - total_chunks += 1 - - # # 如果达到limit限制,直接返回 - # if limit > 0 and total_chunks >= limit: - # business_logger.info(f"已达到limit限制: {limit}") - # return { - # "total": total_chunks, - # "contents": all_contents[:limit] - # } - - # 检查是否还有下一页 - if page * pagesize >= total: - break - - page += 1 + fetched.extend(items) + es_page += 1 - business_logger.info(f"文档 {document.id} 获取了 {len(items)} 个chunks") + slice_items = fetched[es_offset_in_page: es_offset_in_page + need_count] + page_contents.extend([item.page_content for item in slice_items]) except Exception as e: business_logger.error(f"获取文档 {document.id} 的chunks失败: {str(e)}") @@ -626,11 +640,16 @@ def get_rag_content( # 4. 返回结果 result = { - "total": total_chunks, - "contents": all_contents[:limit] if limit > 0 else all_contents + "page": { + "page": page, + "pagesize": pagesize, + "total": global_total, + "hasnext": offset_end < global_total, + }, + "items": page_contents } - business_logger.info(f"成功获取RAG内容: total={total_chunks}, 返回={len(result['contents'])} 条") + business_logger.info(f"成功获取RAG内容: total={global_total}, page={page}, 返回={len(page_contents)} 条") return result except Exception as e: @@ -646,59 +665,26 @@ async def get_chunk_summary_and_tags( current_user: User ) -> dict: """ - 获取chunk的总结、标签和人物形象 - - Args: - end_user_id: 宿主ID - limit: 返回的chunk数量限制 - max_tags: 最大标签数量 - db: 数据库会话 - current_user: 当前用户 - - Returns: - 包含summary、tags和personas的字典 + 纯读库:从end_user表返回RAG摘要、标签和人物形象缓存。 + 无数据时返回空结构,不触发LLM生成。 """ - business_logger.info(f"获取chunk摘要、标签和人物形象: end_user_id={end_user_id}, limit={limit}, 操作者: {current_user.username}") - - try: - # 1. 获取chunk内容 - rag_content = get_rag_content(end_user_id, limit, db, current_user) - chunks = rag_content.get("contents", []) - - if not chunks: - business_logger.warning(f"未找到chunk内容: end_user_id={end_user_id}") - return { - "summary": "暂无内容", - "tags": [], - "personas": [] - } - - # 2. 导入RAG工具函数 - from app.core.rag_utils import generate_chunk_summary, extract_chunk_tags, extract_chunk_persona - - # 3. 并发生成摘要、提取标签和人物形象 - import asyncio - summary_task = generate_chunk_summary(chunks, max_chunks=limit) - tags_task = extract_chunk_tags(chunks, max_tags=max_tags, max_chunks=limit) - personas_task = extract_chunk_persona(chunks, max_personas=5, max_chunks=limit) - - summary, tags_with_freq, personas = await asyncio.gather(summary_task, tags_task, personas_task) - - # 4. 格式化标签数据 - tags = [{"tag": tag, "frequency": freq} for tag, freq in tags_with_freq] - - result = { - "summary": summary, - "tags": tags, - "personas": personas - } - - business_logger.info(f"成功获取chunk摘要、{len(tags)} 个标签和 {len(personas)} 个人物形象") - return result - - except Exception as e: - business_logger.error(f"获取chunk摘要、标签和人物形象失败: end_user_id={end_user_id} - {str(e)}") - raise + import json + from app.repositories.end_user_repository import EndUserRepository + + business_logger.info(f"读取chunk摘要/标签/人物形象缓存: end_user_id={end_user_id}") + + repo = EndUserRepository(db) + end_user = repo.get_by_id(uuid.UUID(end_user_id)) + + if not end_user: + return {"summary": "", "tags": [], "personas": [], "generated": False} + + return { + "summary": end_user.user_summary or "", + "tags": json.loads(end_user.rag_tags) if end_user.rag_tags else [], + "personas": json.loads(end_user.rag_personas) if end_user.rag_personas else [], + "generated": bool(end_user.user_summary), + } async def get_chunk_insight( @@ -708,43 +694,98 @@ async def get_chunk_insight( current_user: User ) -> dict: """ - 获取chunk的洞察分析 - - Args: - end_user_id: 宿主ID - limit: 返回的chunk数量限制 - db: 数据库会话 - current_user: 当前用户 - - Returns: - 包含insight的字典 + 纯读库:从end_user表返回RAG洞察缓存。 + 无数据时返回空结构,不触发LLM生成。 """ - business_logger.info(f"获取chunk洞察: end_user_id={end_user_id}, limit={limit}, 操作者: {current_user.username}") - - try: - # 1. 获取chunk内容 - rag_content = get_rag_content(end_user_id, limit, db, current_user) - chunks = rag_content.get("contents", []) - - if not chunks: - business_logger.warning(f"未找到chunk内容: end_user_id={end_user_id}") - return { - "insight": "暂无足够数据生成洞察报告" - } - - # 2. 导入RAG工具函数 - from app.core.rag_utils import generate_chunk_insight - - # 3. 生成洞察 - insight = await generate_chunk_insight(chunks, max_chunks=limit) - - result = { - "insight": insight - } - - business_logger.info("成功获取chunk洞察") - return result - - except Exception as e: - business_logger.error(f"获取chunk洞察失败: end_user_id={end_user_id} - {str(e)}") - raise \ No newline at end of file + from app.repositories.end_user_repository import EndUserRepository + + business_logger.info(f"读取chunk洞察缓存: end_user_id={end_user_id}") + + repo = EndUserRepository(db) + end_user = repo.get_by_id(uuid.UUID(end_user_id)) + + if not end_user: + return {"insight": "", "behavior_pattern": "", "key_findings": "", "growth_trajectory": "", "generated": False} + + return { + "insight": end_user.memory_insight or "", + "behavior_pattern": end_user.behavior_pattern or "", + "key_findings": end_user.key_findings or "", + "growth_trajectory": end_user.growth_trajectory or "", + "generated": bool(end_user.memory_insight), + } + + +async def generate_rag_profile( + end_user_id: str, + limit: int, + max_tags: int, + db: Session, + current_user: User, +) -> dict: + """ + 生产接口:为RAG存储模式的end_user全量重新生成并持久化完整画像数据。 + 每次调用都会重新生成,覆盖已有数据。 + + 生成内容: + - user_summary / rag_tags / rag_personas + - memory_insight / behavior_pattern / key_findings / growth_trajectory + """ + import json + import asyncio + from app.repositories.end_user_repository import EndUserRepository + from app.core.rag_utils import ( + generate_chunk_summary, + extract_chunk_tags, + extract_chunk_persona, + generate_chunk_insight_sections, + ) + + business_logger.info(f"开始生产RAG画像: end_user_id={end_user_id}, 操作者: {current_user.username}") + + repo = EndUserRepository(db) + end_user = repo.get_by_id(uuid.UUID(end_user_id)) + + if not end_user: + raise ValueError(f"end_user {end_user_id} 不存在") + + rag_content = get_rag_content(end_user_id, page=1, pagesize=limit, db=db, current_user=current_user) + chunks = rag_content.get("items", []) + + if not chunks: + business_logger.warning(f"未找到chunk内容,无法生产RAG画像: end_user_id={end_user_id}") + raise ValueError("暂无chunk内容,无法生成画像") + + summary, tags_with_freq, personas, insight_sections = await asyncio.gather( + generate_chunk_summary(chunks, max_chunks=limit, end_user_id=end_user_id), + extract_chunk_tags(chunks, max_tags=max_tags, max_chunks=limit, end_user_id=end_user_id), + extract_chunk_persona(chunks, max_personas=5, max_chunks=limit, end_user_id=end_user_id), + generate_chunk_insight_sections(chunks, max_chunks=limit, end_user_id=end_user_id), + ) + + tags = [{"tag": tag, "frequency": freq} for tag, freq in tags_with_freq] + + repo.update_rag_summary_tags( + end_user_id=end_user.id, + user_summary=summary, + rag_tags=json.dumps(tags, ensure_ascii=False), + rag_personas=json.dumps(personas, ensure_ascii=False), + ) + + repo.update_memory_insight( + end_user_id=end_user.id, + memory_insight=insight_sections.get("memory_insight", ""), + behavior_pattern=insight_sections.get("behavior_pattern", ""), + key_findings=insight_sections.get("key_findings", ""), + growth_trajectory=insight_sections.get("growth_trajectory", ""), + ) + + business_logger.info(f"RAG画像生产完成: end_user_id={end_user_id}, tags={len(tags)}, personas={len(personas)}") + + return { + "end_user_id": end_user_id, + "summary_length": len(summary), + "tags_count": len(tags), + "personas_count": len(personas), + "insight_generated": bool(insight_sections.get("memory_insight")), + } \ No newline at end of file diff --git a/api/app/services/memory_forget_service.py b/api/app/services/memory_forget_service.py index e1030b24..84c4aff6 100644 --- a/api/app/services/memory_forget_service.py +++ b/api/app/services/memory_forget_service.py @@ -518,7 +518,7 @@ class MemoryForgetService: 'total_nodes': result['total_nodes'] or 0, 'nodes_with_activation': result['nodes_with_activation'] or 0, 'nodes_without_activation': result['nodes_without_activation'] or 0, - 'average_activation_value': result['average_activation'], + 'average_activation_value': round(result['average_activation'], 2) if result['average_activation'] is not None else None, 'low_activation_nodes': result['low_activation_nodes'] or 0, 'forgetting_threshold': forgetting_threshold, 'timestamp': int(datetime.now().timestamp() * 1000) diff --git a/api/app/services/memory_perceptual_service.py b/api/app/services/memory_perceptual_service.py index b9d96a0b..8a7c86e2 100644 --- a/api/app/services/memory_perceptual_service.py +++ b/api/app/services/memory_perceptual_service.py @@ -1,19 +1,29 @@ +import os import uuid from typing import Dict, Any, Optional +from urllib.parse import urlparse, unquote +import json_repair +from jinja2 import Template +from sqlalchemy import select from sqlalchemy.orm import Session from app.core.error_codes import BizCode from app.core.exceptions import BusinessException from app.core.logging_config import get_business_logger +from app.core.models import RedBearLLM, RedBearModelConfig +from app.models import FileMetadata from app.models.memory_perceptual_model import PerceptualType, FileStorageService +from app.models.prompt_optimizer_model import RoleType from app.repositories.memory_perceptual_repository import MemoryPerceptualRepository +from app.schemas import FileType from app.schemas.memory_perceptual_schema import ( PerceptualQuerySchema, PerceptualTimelineResponse, PerceptualMemoryItem, AudioModal, Content, VideoModal, TextModal ) +from app.schemas.model_schema import ModelInfo business_logger = get_business_logger() @@ -99,7 +109,7 @@ class MemoryPerceptualService: "keywords": content.keywords, "topic": content.topic, "domain": content.domain, - "created_time": int(memory.created_time.timestamp()*1000), + "created_time": int(memory.created_time.timestamp() * 1000), **detail } @@ -108,7 +118,8 @@ class MemoryPerceptualService: return result except Exception as e: - business_logger.error(f"Failed to fetch latest {perceptual_type.name.lower()} memory: {str(e)}") + business_logger.error(f"Failed to fetch latest {perceptual_type.name.lower()} memory: {str(e)}", + exc_info=True) raise BusinessException(f"Failed to fetch latest {perceptual_type.name.lower()} memory: {str(e)}", BizCode.DB_ERROR) @@ -138,7 +149,7 @@ class MemoryPerceptualService: for memory in memories: meta_data = memory.meta_data or {} content = meta_data.get("content", {}) - + # 安全地提取 content 字段,提供默认值 if content: content_obj = Content(**content) @@ -149,7 +160,7 @@ class MemoryPerceptualService: topic = "Unknown" domain = "Unknown" keywords = [] - + memory_item = PerceptualMemoryItem( id=memory.id, perceptual_type=PerceptualType(memory.perceptual_type), @@ -161,7 +172,7 @@ class MemoryPerceptualService: topic=topic, domain=domain, keywords=keywords, - created_time=int(memory.created_time.timestamp()*1000), + created_time=int(memory.created_time.timestamp() * 1000), storage_service=FileStorageService(memory.storage_service), ) memory_items.append(memory_item) @@ -183,3 +194,110 @@ class MemoryPerceptualService: except Exception as e: business_logger.error(f"Failed to fetch perceptual memory timeline: {str(e)}") raise BusinessException(f"Failed to fetch perceptual memory timeline: {str(e)}", BizCode.DB_ERROR) + + async def generate_perceptual_memory( + self, + end_user_id: str, + model_config: ModelInfo, + file_type: str, + file_url: str, + file_message: dict, + ): + memories = self.repository.get_by_url(file_url) + if memories: + business_logger.info(f"Perceptual memory already exists: {file_url}") + if end_user_id not in [memory.end_user_id for memory in memories]: + business_logger.info(f"Copy perceptual memory end_user_id: {end_user_id}") + memory_cache = memories[0] + self.repository.create_perceptual_memory( + end_user_id=uuid.UUID(end_user_id), + perceptual_type=PerceptualType(memory_cache.perceptual_type), + file_path=memory_cache.file_path, + file_name=memory_cache.file_name, + file_ext=memory_cache.file_ext, + summary=memory_cache.summary, + meta_data=memory_cache.meta_data + ) + self.db.commit() + + return + llm = RedBearLLM(RedBearModelConfig( + model_name=model_config.model_name, + provider=model_config.provider, + api_key=model_config.api_key, + base_url=model_config.api_base, + is_omni=model_config.is_omni + ), type=model_config.model_type) + try: + prompt_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'prompt') + with open(os.path.join(prompt_path, 'perceptual_summary_system.jinja2'), 'r', encoding='utf-8') as f: + opt_system_prompt = f.read() + rendered_system_message = Template(opt_system_prompt).render(file_type=file_type, language='zh') + except FileNotFoundError: + raise BusinessException(message="System prompt template not found", code=BizCode.NOT_FOUND) + messages = [ + {"role": RoleType.SYSTEM.value, "content": [{"type": "text", "text": rendered_system_message}]}, + {"role": RoleType.USER.value, "content": [ + {"type": "text", "text": "Summarize the following file"}, file_message + ]} + ] + result = await llm.ainvoke(messages) + content = json_repair.repair_json(result.content, return_objects=True) + path = urlparse(file_url).path + filename = os.path.basename(path) + filename = unquote(filename) + file_ext = os.path.splitext(filename)[1] + try: + file_id = uuid.UUID(filename) + stmt = select(FileMetadata).where( + FileMetadata.id == file_id + ) + file = self.db.execute(stmt).scalar_one_or_none() + + if file: + filename = file.file_name + file_ext = file.file_ext + except ValueError: + business_logger.debug(f"Remote file, file_id={filename}") + if not file_ext: + if file_type == FileType.AUDIO: + file_ext = ".mp3" + elif file_type == FileType.VIDEO: + file_ext = ".mp4" + elif file_type == FileType.DOCUMENT: + file_ext = ".txt" + elif file_type == FileType.IMAGE: + file_ext = ".jpg" + filename += file_ext + file_content = { + "keywords": content.get("keywords", []), + "topic": content.get("topic"), + "domain": content.get("domain") + } + if file_type in [FileType.IMAGE, FileType.VIDEO]: + file_modalities = { + "scene": content.get("scene", []) + } + elif file_type in [FileType.DOCUMENT]: + file_modalities = { + "section_count": content.get("section_count", 0), + "title": content.get("title", ""), + "first_line": content.get("first_line", "") + } + else: + file_modalities = { + "speaker_count": content.get("speaker_count", 0) + } + self.repository.create_perceptual_memory( + end_user_id=uuid.UUID(end_user_id), + perceptual_type=PerceptualType.trans_from_file_type(file_type), + file_path=file_url, + file_name=filename, + file_ext=file_ext, + summary=content.get('summary', ""), + meta_data={ + "content": file_content, + "modalities": file_modalities + } + ) + self.db.commit() diff --git a/api/app/services/multi_agent_orchestrator.py b/api/app/services/multi_agent_orchestrator.py index f42ee95a..60a3b5b8 100644 --- a/api/app/services/multi_agent_orchestrator.py +++ b/api/app/services/multi_agent_orchestrator.py @@ -1638,6 +1638,7 @@ class MultiAgentOrchestrator: self.variables = config_data.get("variables", []) self.tools = config_data.get("tools", {}) self.skills = config_data.get("skills", {}) + self.features = config_data.get("features", {}) self.default_model_config_id = release.default_model_config_id return AgentConfigProxy(release, app, config_data) diff --git a/api/app/services/multimodal_service.py b/api/app/services/multimodal_service.py index 9b06c287..f0c7cee2 100644 --- a/api/app/services/multimodal_service.py +++ b/api/app/services/multimodal_service.py @@ -8,47 +8,74 @@ - Bedrock/Anthropic: 仅支持 base64 格式 - OpenAI: 支持 URL 和 base64 格式 """ -import uuid -import httpx import base64 -from typing import List, Dict, Any, Optional -from abc import ABC, abstractmethod -from sqlalchemy.orm import Session -from docx import Document import io -import PyPDF2 +import uuid +from abc import ABC, abstractmethod +from typing import List, Dict, Any, Optional + +import csv +import json + +import PyPDF2 +import httpx +import magic +import openpyxl +from docx import Document +from sqlalchemy.orm import Session -from app.core.logging_config import get_business_logger -from app.core.exceptions import BusinessException -from app.core.error_codes import BizCode -from app.schemas.app_schema import FileInput, FileType, TransferMethod -from app.models.file_metadata_model import FileMetadata from app.core.config import settings +from app.core.error_codes import BizCode +from app.core.exceptions import BusinessException +from app.core.logging_config import get_business_logger +from app.models import ModelApiKey +from app.models.file_metadata_model import FileMetadata +from app.schemas.app_schema import FileInput, FileType, TransferMethod +from app.schemas.model_schema import ModelInfo from app.services.audio_transcription_service import AudioTranscriptionService +from app.tasks import write_perceptual_memory logger = get_business_logger() +TEXT_MIME = ['text/plain', 'text/x-markdown'] +PDF_MIME = ['application/pdf'] +DOC_MIME = [ + 'application/msword', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'application/zip' +] +XLSX_MIME = [ + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + 'application/vnd.ms-excel', + 'application/zip' +] +CSV_MIME = ['text/csv', 'application/csv'] +JSON_MIME = ['application/json'] + class MultimodalFormatStrategy(ABC): """多模态格式策略基类""" + def __init__(self, file: FileInput): + self.file = file + @abstractmethod - async def format_image(self, url: str) -> Dict[str, Any]: + async def format_image(self, url: str, content: bytes | None = None) -> tuple[bool, Dict[str, Any]]: """格式化图片""" pass @abstractmethod - async def format_document(self, file_name: str, text: str) -> Dict[str, Any]: + async def format_document(self, file_name: str, text: str) -> tuple[bool, Dict[str, Any]]: """格式化文档""" pass @abstractmethod - async def format_audio(self, file_type: str, url: str) -> Dict[str, Any]: + async def format_audio(self, file_type: str, url: str, content: bytes | None = None) -> tuple[bool, Dict[str, Any]]: """格式化音频""" pass @abstractmethod - async def format_video(self, url: str) -> Dict[str, Any]: + async def format_video(self, url: str) -> tuple[bool, Dict[str, Any]]: """格式化视频""" pass @@ -56,40 +83,46 @@ class MultimodalFormatStrategy(ABC): class DashScopeFormatStrategy(MultimodalFormatStrategy): """通义千问策略""" - async def format_image(self, url: str) -> Dict[str, Any]: + async def format_image(self, url: str, content: bytes | None = None) -> tuple[bool, Dict[str, Any]]: """通义千问图片格式:{"type": "image", "image": "url"}""" - return { + return True, { "type": "image", "image": url } - async def format_document(self, file_name: str, text: str) -> Dict[str, Any]: + async def format_document(self, file_name: str, text: str) -> tuple[bool, Dict[str, Any]]: """通义千问文档格式""" - return { + return True, { "type": "text", "text": f"\n{text}\n" } - async def format_audio(self, file_type: str, url: str, transcription: Optional[str] = None) -> Dict[str, Any]: + async def format_audio( + self, + file_type: str, + url: str, + content: bytes | None = None, + transcription: Optional[str] = None + ) -> tuple[bool, Dict[str, Any]]: """ 通义千问音频格式 - 原生支持: qwen-audio 系列 - 其他模型: 需要转录为文本 """ if transcription: - return { + return True, { "type": "text", - "text": f"" + "text": f"" } # 通义千问音频格式:{"type": "audio", "audio": "url"} - return { + return True, { "type": "audio", "audio": url } - async def format_video(self, url: str) -> Dict[str, Any]: + async def format_video(self, url: str) -> tuple[bool, Dict[str, Any]]: """通义千问视频格式(qwen-vl 系列原生支持)""" - return { + return True, { "type": "video", "video": url } @@ -98,52 +131,45 @@ class DashScopeFormatStrategy(MultimodalFormatStrategy): class BedrockFormatStrategy(MultimodalFormatStrategy): """Bedrock/Anthropic 策略""" - async def format_image(self, url: str) -> Dict[str, Any]: + async def format_image(self, url: str, content: bytes | None = None) -> tuple[bool, Dict[str, Any]]: """ Bedrock/Anthropic 格式: base64 编码 {"type": "image", "source": {"type": "base64", "media_type": "...", "data": "..."}} """ - from mimetypes import guess_type logger.info(f"下载并编码图片: {url}") # 下载图片 - async with httpx.AsyncClient(timeout=30.0) as client: - response = await client.get(url) - response.raise_for_status() - - # 获取图片数据 - image_data = response.content + if content is None: + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.get(url, follow_redirects=True) + response.raise_for_status() + content = response.content + self.file.set_content(content) # 确定 media type - content_type = response.headers.get("content-type") - if content_type and content_type.startswith("image/"): - media_type = content_type - else: - guessed_type, _ = guess_type(url) - media_type = guessed_type if guessed_type and guessed_type.startswith("image/") else "image/jpeg" + content_type = magic.from_buffer(content, mime=True) + media_type = content_type if content_type.startswith("image/") else "image/jpeg" + base64_data = base64.b64encode(content).decode("utf-8") - # 转换为 base64 - base64_data = base64.b64encode(image_data).decode("utf-8") + logger.info(f"图片编码完成: media_type={media_type}, size={len(base64_data)}") - logger.info(f"图片编码完成: media_type={media_type}, size={len(base64_data)}") - - return { - "type": "image", - "source": { - "type": "base64", - "media_type": media_type, - "data": base64_data - } + return True, { + "type": "image", + "source": { + "type": "base64", + "media_type": media_type, + "data": base64_data } + } - async def format_document(self, file_name: str, text: str) -> Dict[str, Any]: + async def format_document(self, file_name: str, text: str) -> tuple[bool, Dict[str, Any]]: """Bedrock/Anthropic 文档格式(需要 base64 编码)""" # Bedrock 文档需要 base64 编码 text_bytes = text.encode('utf-8') base64_text = base64.b64encode(text_bytes).decode('utf-8') - return { + return True, { "type": "document", "source": { "type": "base64", @@ -152,24 +178,29 @@ class BedrockFormatStrategy(MultimodalFormatStrategy): } } - async def format_audio(self, file_type: str, url: str, transcription: Optional[str] = None) -> Dict[str, Any]: + async def format_audio( + self, file_type: str, + url: str, + content: bytes | None = None, + transcription: Optional[str] = None + ) -> tuple[bool, Dict[str, Any]]: """ Bedrock/Anthropic 音频格式 不支持原生音频,必须转录为文本 """ if transcription: - return { + return True, { "type": "text", "text": f"[音频转录]\n{transcription}" } - return { + return False, { "type": "text", "text": "[音频文件:Bedrock 不支持原生音频,请启用音频转文本功能]" } - async def format_video(self, url: str) -> Dict[str, Any]: + async def format_video(self, url: str) -> tuple[bool, Dict[str, Any]]: """Bedrock/Anthropic 视频格式""" - return { + return False, { "type": "text", "text": f"" } @@ -178,71 +209,81 @@ class BedrockFormatStrategy(MultimodalFormatStrategy): class OpenAIFormatStrategy(MultimodalFormatStrategy): """OpenAI 策略""" - async def format_image(self, url: str) -> Dict[str, Any]: + async def format_image(self, url: str, content: bytes | None = None) -> tuple[bool, Dict[str, Any]]: """OpenAI 格式: {"type": "image_url", "image_url": {"url": "..."}}""" - return { + return True, { "type": "image_url", "image_url": { "url": url } } - async def format_document(self, file_name: str, text: str) -> Dict[str, Any]: + async def format_document(self, file_name: str, text: str) -> tuple[bool, Dict[str, Any]]: """OpenAI 文档格式""" - return { + return True, { "type": "text", "text": f"\n{text}\n" } - async def format_audio(self, file_type: str, url: str, transcription: Optional[str] = None) -> Dict[str, Any]: + async def format_audio( + self, + file_type: str, + url: str, + content: bytes | None = None, + transcription: Optional[str] = None + ) -> tuple[bool, Dict[str, Any]]: """ OpenAI 音频格式 - gpt-4o-audio 系列支持原生音频(需要 base64 编码) - 其他模型使用转录文本 """ if transcription: - return { + return True, { "type": "text", "text": f"" } # OpenAI 音频需要 base64 编码 try: - async with httpx.AsyncClient(timeout=30.0) as client: - response = await client.get(url) - response.raise_for_status() - audio_data = response.content - base64_audio = base64.b64encode(audio_data).decode('utf-8') - # 1. 优先从 file_type (MIME) 取扩展名 - file_ext = file_type.split('/')[-1] if file_type and '/' in file_type else None - # 2. 从响应头 content-type 取 - if not file_ext: - ct = response.headers.get("content-type", "") - file_ext = ct.split('/')[-1].split(';')[0].strip() if '/' in ct else None - # 3. 从 URL 路径取扩展名 - if not file_ext: - file_ext = url.split('?')[0].rsplit('.', 1)[-1].lower() or None - # 4. 默认 wav - # supported_ext = {"wav", "mp3", "mp4", "ogg", "flac", "webm", "m4a", "wave", "x-m4a"} - file_ext = "wav" if not file_ext else file_ext + audio_data = content + if content is None: + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.get(url, follow_redirects=True) + response.raise_for_status() + audio_data = response.content + self.file.set_content(audio_data) + base64_audio = base64.b64encode(audio_data).decode('utf-8') - return { - "type": "input_audio", - "input_audio": { - "data": f"data:;base64,{base64_audio}", - "format": file_ext - } + # 1. 优先从 file_type (MIME) 取扩展名 + file_ext = file_type.split('/')[-1] if file_type and '/' in file_type else None + # 2. 从响应头 content-type 取 + if not file_ext: + content_type = magic.from_buffer(audio_data, mime=True) + file_ext = content_type.split('/')[-1].split(';')[0].strip() if '/' in content_type else None + # 3. 从 URL 路径取扩展名 + if not file_ext: + file_ext = url.split('?')[0].rsplit('.', 1)[-1].lower() or None + # 4. 默认 wav + # supported_ext = {"wav", "mp3", "mp4", "ogg", "flac", "webm", "m4a", "wave", "x-m4a"} + file_ext = "wav" if not file_ext else file_ext + + return True, { + "type": "input_audio", + "input_audio": { + "data": f"data:;base64,{base64_audio}", + "format": file_ext } + } except Exception as e: logger.error(f"下载音频失败: {e}") - return { + return False, { "type": "text", "text": f"[音频处理失败: {str(e)}]" } - async def format_video(self, url: str) -> Dict[str, Any]: + async def format_video(self, url: str) -> tuple[bool, Dict[str, Any]]: """OpenAI 视频格式""" - return { + return True, { "type": "video_url", "video_url": { "url": url @@ -260,33 +301,56 @@ PROVIDER_STRATEGIES = { class MultimodalService: - """多模态文件处理服务""" + """ + Service for handling multimodal file processing. - def __init__(self, db: Session, provider: str = "dashscope", api_key: Optional[str] = None, enable_audio_transcription: bool = False, is_omni: bool = False): + Attributes: + db (Session): Database session. + model_api_key (str): API key for the model provider. + provider (str): Name of the model provider. + is_omni (bool): Indicates whether the model supports full multimodal capability. + capability (list): Capability configuration of the model. + audio_api_key (str | None): API key used for audio transcription. + enable_audio_transcription (bool): Whether audio transcription is enabled. + """ + + def __init__( + self, + db: Session, + api_config: ModelInfo | None = None, + audio_api_key: Optional[str] = None, + enable_audio_transcription: bool = False, + ): """ - 初始化多模态服务 - + Initialize the multimodal service. + Args: - db: 数据库会话 - provider: 模型提供商(dashscope, bedrock, anthropic, openai 等) - api_key: API 密钥(用于音频转文本) - enable_audio_transcription: 是否启用音频转文本 - is_omni: 是否为 Omni 模型(dashscope 的 omni 模型需要使用 OpenAI 兼容格式) + db (Session): Database session. + api_config (ModelApiKey | None): Model API configuration. + audio_api_key (str | None): API key for audio transcription. + enable_audio_transcription (bool): Enable audio transcription. """ self.db = db - self.provider = provider.lower() - self.api_key = api_key + self.api_config = api_config + if self.api_config is not None: + self.model_api_key = api_config.api_key + self.provider = api_config.provider.lower() + self.is_omni = api_config.is_omni + self.capability = api_config.capability + self.audio_api_key = audio_api_key self.enable_audio_transcription = enable_audio_transcription - self.is_omni = is_omni async def process_files( self, - files: Optional[List[FileInput]] + end_user_id: uuid.UUID | str, + files: Optional[List[FileInput]], + ) -> List[Dict[str, Any]]: """ 处理文件列表,返回 LLM 可用的格式 Args: + end_user_id: 用户ID files: 文件输入列表 Returns: @@ -294,6 +358,8 @@ class MultimodalService: """ if not files: return [] + if isinstance(end_user_id, uuid.UUID): + end_user_id = str(end_user_id) # 获取对应的策略 # dashscope 的 omni 模型使用 OpenAI 兼容格式 @@ -305,23 +371,32 @@ class MultimodalService: logger.warning(f"未找到 provider '{self.provider}' 的策略,使用默认策略") strategy_class = DashScopeFormatStrategy - strategy = strategy_class() - result = [] for idx, file in enumerate(files): + strategy = strategy_class(file) + if not file.url: + file.url = await self.get_file_url(file) try: - if file.type == FileType.IMAGE: - content = await self._process_image(file, strategy) + if file.type == FileType.IMAGE and "vision" in self.capability: + is_support, content = await self._process_image(file, strategy) result.append(content) + if is_support: + self.write_perceptual_memory(end_user_id, file.type, file.url, content) elif file.type == FileType.DOCUMENT: - content = await self._process_document(file, strategy) + is_support, content = await self._process_document(file, strategy) result.append(content) - elif file.type == FileType.AUDIO: - content = await self._process_audio(file, strategy) + if is_support: + self.write_perceptual_memory(end_user_id, file.type, file.url, content) + elif file.type == FileType.AUDIO and "audio" in self.capability: + is_support, content = await self._process_audio(file, strategy) result.append(content) - elif file.type == FileType.VIDEO: - content = await self._process_video(file, strategy) + if is_support: + self.write_perceptual_memory(end_user_id, file.type, file.url, content) + elif file.type == FileType.VIDEO and "video" in self.capability: + is_support, content = await self._process_video(file, strategy) result.append(content) + if is_support: + self.write_perceptual_memory(end_user_id, file.type, file.url, content) else: logger.warning(f"不支持的文件类型: {file.type}") except Exception as e: @@ -331,7 +406,8 @@ class MultimodalService: "file_index": idx, "file_type": file.type, "error": str(e) - } + }, + exc_info=True ) # 继续处理其他文件,不中断整个流程 result.append({ @@ -342,7 +418,18 @@ class MultimodalService: logger.info(f"成功处理 {len(result)}/{len(files)} 个文件,provider={self.provider}") return result - async def _process_image(self, file: FileInput, strategy) -> Dict[str, Any]: + def write_perceptual_memory( + self, + end_user_id: str, + file_type: str, + file_url: str, + file_message: dict + ): + """写入感知记忆""" + if end_user_id and self.api_config: + write_perceptual_memory.delay(end_user_id, self.api_config.model_dump(), file_type, file_url, file_message) + + async def _process_image(self, file: FileInput, strategy) -> tuple[bool, Dict[str, Any]]: """ 处理图片文件 @@ -354,53 +441,16 @@ class MultimodalService: Dict: 根据 provider 返回不同格式的图片内容 """ try: - url = await self.get_file_url(file) - return await strategy.format_image(url) + # url = await self.get_file_url(file) + return await strategy.format_image(file.url, content=file.get_content()) except Exception as e: logger.error(f"处理图片失败: {e}", exc_info=True) - return { + return False, { "type": "text", "text": f"[图片处理失败: {str(e)}]" } - @staticmethod - async def _download_and_encode_image(url: str) -> tuple[str, str]: - """ - 下载图片并转换为 base64 - - Args: - url: 图片 URL - - Returns: - tuple: (base64_data, media_type) - """ - from mimetypes import guess_type - - # 下载图片 - async with httpx.AsyncClient(timeout=30.0) as client: - response = await client.get(url) - response.raise_for_status() - - # 获取图片数据 - image_data = response.content - - # 确定 media type - content_type = response.headers.get("content-type") - if content_type and content_type.startswith("image/"): - media_type = content_type - else: - # 从 URL 推断 - guessed_type, _ = guess_type(url) - media_type = guessed_type if guessed_type and guessed_type.startswith("image/") else "image/jpeg" - - # 转换为 base64 - base64_data = base64.b64encode(image_data).decode("utf-8") - - logger.debug(f"图片编码完成: media_type={media_type}, size={len(base64_data)}") - - return base64_data, media_type - - async def _process_document(self, file: FileInput, strategy) -> Dict[str, Any]: + async def _process_document(self, file: FileInput, strategy) -> tuple[bool, Dict[str, Any]]: """ 处理文档文件(PDF、Word 等) @@ -412,14 +462,15 @@ class MultimodalService: Dict: 根据 provider 返回不同格式的文档内容 """ if file.transfer_method == TransferMethod.REMOTE_URL: - # 远程文档暂不支持提取 - return { + return True, { "type": "text", - "text": f"\n[远程文档,暂不支持内容提取]\n" + "text": f"\n{await self._extract_document_text(file)}\n" } else: # 本地文件,提取文本内容 - text = await self._extract_document_text(file.upload_file_id) + server_url = settings.FILE_LOCAL_SERVER_URL + file.url = f"{server_url}/storage/permanent/{file.upload_file_id}" + text = await self._extract_document_text(file) file_metadata = self.db.query(FileMetadata).filter( FileMetadata.id == file.upload_file_id ).first() @@ -429,7 +480,7 @@ class MultimodalService: # 使用策略格式化文档 return await strategy.format_document(file_name, text) - async def _process_audio(self, file: FileInput, strategy) -> Dict[str, Any]: + async def _process_audio(self, file: FileInput, strategy) -> tuple[bool, Dict[str, Any]]: """ 处理音频文件 @@ -441,28 +492,28 @@ class MultimodalService: Dict: 根据 provider 返回不同格式的音频内容 """ try: - url = await self.get_file_url(file) + # url = await self.get_file_url(file) # 如果启用音频转文本且有 API Key transcription = None - if self.enable_audio_transcription and self.api_key: - logger.info(f"开始音频转文本: {url}") + if self.enable_audio_transcription and self.audio_api_key: + logger.info(f"开始音频转文本: {file.url}") if self.provider == "dashscope": - transcription = await AudioTranscriptionService.transcribe_dashscope(url, self.api_key) + transcription = await AudioTranscriptionService.transcribe_dashscope(file.url, self.audio_api_key) elif self.provider == "openai": - transcription = await AudioTranscriptionService.transcribe_openai(url, self.api_key) + transcription = await AudioTranscriptionService.transcribe_openai(file.url, self.audio_api_key) else: logger.warning(f"Provider {self.provider} 不支持音频转文本") - return await strategy.format_audio(file.file_type, url, transcription) + return await strategy.format_audio(file.file_type, file.url, file.get_content(), transcription) except Exception as e: logger.error(f"处理音频失败: {e}", exc_info=True) - return { + return False, { "type": "text", "text": f"[音频处理失败: {str(e)}]" } - async def _process_video(self, file: FileInput, strategy) -> Dict[str, Any]: + async def _process_video(self, file: FileInput, strategy) -> tuple[bool, Dict[str, Any]]: """ 处理视频文件 @@ -474,11 +525,11 @@ class MultimodalService: Dict: 根据 provider 返回不同格式的视频内容 """ try: - url = await self.get_file_url(file) - return await strategy.format_video(url) + # url = await self.get_file_url(file) + return await strategy.format_video(file.url) except Exception as e: logger.error(f"处理视频失败: {e}", exc_info=True) - return { + return False, { "type": "text", "text": f"[视频处理失败: {str(e)}]" } @@ -500,8 +551,6 @@ class MultimodalService: return file.url else: file_id = file.upload_file_id - print("="*50) - print("file_id",file_id) # 查询 FileMetadata file_metadata = self.db.query(FileMetadata).filter( @@ -519,66 +568,50 @@ class MultimodalService: server_url = settings.FILE_LOCAL_SERVER_URL return f"{server_url}/storage/permanent/{file_id}" - async def _extract_document_text(self, file_id: uuid.UUID) -> str: + async def _extract_document_text(self, file: FileInput) -> str: """ 提取文档文本内容 Args: - file_id: 文件ID + file: 文件输入 Returns: str: 提取的文本内容 """ - file_metadata = self.db.query(FileMetadata).filter( - FileMetadata.id == file_id, - FileMetadata.status == "completed" - ).first() - - if not file_metadata: - raise BusinessException( - f"文件不存在或已删除: {file_id}", - BizCode.NOT_FOUND - ) - - file_ext = file_metadata.file_ext.lower() - server_url = settings.FILE_LOCAL_SERVER_URL - file_url = f"{server_url}/storage/permanent/{file_id}" - - if file_ext in ['.txt', '.md', '.markdown']: - return await self._read_text_file(file_url) - elif file_ext == '.pdf': - return await self._extract_pdf_text(file_url) - elif file_ext in ['.doc', '.docx']: - return await self._extract_word_text(file_url) - else: - return f"[不支持的文档格式: {file_ext}]" - - @staticmethod - async def _read_text_file(file_url: str) -> str: - """读取纯文本文件""" try: - # 下载文件 - async with httpx.AsyncClient(timeout=30.0) as client: - response = await client.get(file_url) - response.raise_for_status() - return response.text + file_content = file.get_content() + if not file_content: + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.get(file.url, follow_redirects=True) + response.raise_for_status() + file_content = response.content + file.set_content(file_content) + file_mime_type = magic.from_buffer(file_content, mime=True) + if file_mime_type in TEXT_MIME: + return file_content.decode("utf-8") + elif file_mime_type in PDF_MIME: + return await self._extract_pdf_text(file_content) + elif file_mime_type in DOC_MIME and file.file_type.endswith(('docx', 'doc')): + return await self._extract_word_text(file_content) + elif file_mime_type in XLSX_MIME and file.file_type.endswith(("xlsx", "xls")): + return await self._extract_xlsx_text(file_content) + elif file_mime_type in CSV_MIME: + return await self._extract_csv_text(file_content) + elif file_mime_type in JSON_MIME: + return await self._extract_json_text(file_content) + else: + return f"[Unsupported file type: {file_mime_type}]" except Exception as e: - logger.error(f"读取文本文件失败: {e}") - return f"[文件读取失败: {str(e)}]" + logger.error(f"Failed to load file. - {e}") + return "[Failed to load file.]" @staticmethod - async def _extract_pdf_text(file_url: str) -> str: + async def _extract_pdf_text(file_content: bytes) -> str: """提取 PDF 文本""" try: - # 下载 PDF 文件 - async with httpx.AsyncClient(timeout=30.0) as client: - response = await client.get(file_url) - response.raise_for_status() - pdf_data = response.content - # 使用 BytesIO 读取 PDF text_parts = [] - pdf_file = io.BytesIO(pdf_data) + pdf_file = io.BytesIO(file_content) pdf_reader = PyPDF2.PdfReader(pdf_file) for page in pdf_reader.pages: text_parts.append(page.extract_text()) @@ -588,17 +621,10 @@ class MultimodalService: return f"[PDF 提取失败: {str(e)}]" @staticmethod - async def _extract_word_text(file_url: str) -> str: + async def _extract_word_text(file_content: bytes) -> str: """提取 Word 文档文本""" try: - # 下载 Word 文件 - async with httpx.AsyncClient(timeout=30.0) as client: - response = await client.get(file_url) - response.raise_for_status() - word_data = response.content - - # 使用 BytesIO 读取 Word 文档 - word_file = io.BytesIO(word_data) + word_file = io.BytesIO(file_content) doc = Document(word_file) text_parts = [paragraph.text for paragraph in doc.paragraphs] return '\n'.join(text_parts) @@ -606,6 +632,42 @@ class MultimodalService: logger.error(f"提取 Word 文本失败: {e}") return f"[Word 提取失败: {str(e)}]" + @staticmethod + async def _extract_xlsx_text(file_content: bytes) -> str: + """提取 Excel 文本""" + try: + wb = openpyxl.load_workbook(io.BytesIO(file_content), read_only=True, data_only=True) + parts = [] + for sheet in wb.worksheets: + parts.append(f"[Sheet: {sheet.title}]") + for row in sheet.iter_rows(values_only=True): + parts.append('\t'.join('' if v is None else str(v) for v in row)) + return '\n'.join(parts) + except Exception as e: + logger.error(f"提取 Excel 文本失败: {e}") + return f"[Excel 提取失败: {str(e)}]" + + @staticmethod + async def _extract_csv_text(file_content: bytes) -> str: + """提取 CSV 文本""" + try: + text = file_content.decode('utf-8-sig') + reader = csv.reader(io.StringIO(text)) + return '\n'.join('\t'.join(row) for row in reader) + except Exception as e: + logger.error(f"提取 CSV 文本失败: {e}") + return f"[CSV 提取失败: {str(e)}]" + + @staticmethod + async def _extract_json_text(file_content: bytes) -> str: + """提取 JSON 文本""" + try: + data = json.loads(file_content.decode('utf-8')) + return json.dumps(data, ensure_ascii=False, indent=2) + except Exception as e: + logger.error(f"提取 JSON 文本失败: {e}") + return f"[JSON 提取失败: {str(e)}]" + def get_multimodal_service(db: Session) -> MultimodalService: """获取多模态服务实例(依赖注入)""" diff --git a/api/app/services/pilot_run_service.py b/api/app/services/pilot_run_service.py index 5d00d8a5..fc749157 100644 --- a/api/app/services/pilot_run_service.py +++ b/api/app/services/pilot_run_service.py @@ -120,7 +120,8 @@ async def run_pilot_extraction( "pruning_switch": memory_config.pruning_enabled, "pruning_scene": memory_config.pruning_scene, "pruning_threshold": memory_config.pruning_threshold, - "llm_model_id": str(memory_config.llm_model_id), + "scene_id": str(memory_config.scene_id) if memory_config.scene_id else None, + "ontology_class_infos": memory_config.ontology_class_infos, } config = PruningConfig(**pruning_config_dict) @@ -231,9 +232,11 @@ async def run_pilot_extraction( "chunker_strategy": memory_config.chunker_strategy, } - # 添加剪枝统计信息 - if pruning_stats: - preprocessing_summary["pruning"] = pruning_stats + # 添加剪枝统计信息(始终包含 pruning 字段,确保前端不会因字段缺失报错) + preprocessing_summary["pruning"] = pruning_stats if pruning_stats else { + "enabled": memory_config.pruning_enabled, + "deleted_count": 0, + } await progress_callback("text_preprocessing_complete", "预处理文本完成(剪枝 + 分块)", preprocessing_summary) diff --git a/api/app/services/prompt/perceptual_summary_system.jinja2 b/api/app/services/prompt/perceptual_summary_system.jinja2 new file mode 100644 index 00000000..ee5d3eb5 --- /dev/null +++ b/api/app/services/prompt/perceptual_summary_system.jinja2 @@ -0,0 +1,53 @@ +{% raw %}You are a professional information extraction system. + +Your task is to analyze the provided document content and generate structured metadata. + +Extract the following fields: + +* **summary**: A concise summary of the document in 2–4 sentences. +* **keywords**: 5–10 important keywords or key phrases that best represent the document. This field MUST be a JSON array of strings. +* **topic**: The primary topic of the document expressed as a short phrase (3–8 words). +* **domain**: The broader knowledge domain or field the document belongs to (e.g., Artificial Intelligence, Computer Science, Finance, Healthcare, Education, Law, etc.). + +STRICT RULES: + +1. Output MUST be valid JSON. +2. Do NOT output markdown. +3. Do NOT output explanations. +4. Do NOT output any text before or after the JSON. +5. The JSON MUST contain EXACTLY these four keys: + * summary + * keywords + * topic + * domain{% endraw %} +{% if file_type == 'image' or file_type == 'video' %} * scene {% endif %} +{% if file_type == 'audio' %} * speaker_count {% endif %} +{% if file_type == 'document' %} * section_count + * title + * first_line +{% endif %} +{% raw %} +6. `keywords` MUST be a JSON array of strings. +7. If the document content is insufficient, infer the best possible answer based on context. +8. Ensure the JSON is syntactically correct. +{% endraw %} +9. Output using the language {{ language }} +{% raw %} +Required JSON format: + +{ +"summary": "string", +"keywords": ["keyword1", "keyword2", "keyword3", "keyword4", "keyword5"], +"topic": "string", +"domain": "string", +{% endraw %} +{% if file_type == 'image' or file_type == 'video' %} "scene": ["string", "string"] {% endif %} +{% if file_type == 'document' %} "section_count": integer +"title": "string", +"first_line": "string" +{% endif %} +{% if file_type == 'audio' %} "speaker_count": integer {% endif %} +{% raw %} +} + +Now analyze the following document and return the JSON result.{% endraw %} diff --git a/api/app/services/tenant_service.py b/api/app/services/tenant_service.py index 2edb46df..066edf57 100644 --- a/api/app/services/tenant_service.py +++ b/api/app/services/tenant_service.py @@ -217,4 +217,55 @@ class TenantService: skip=skip, limit=limit, is_active=is_active - ) \ No newline at end of file + ) + + def get_tenant_language_config(self, tenant_id: uuid.UUID) -> Optional[dict]: + """获取租户语言配置""" + tenant = self.tenant_repo.get_tenant_by_id(tenant_id) + if not tenant: + raise BusinessException("租户不存在", code=BizCode.TENANT_NOT_FOUND) + + return { + "default_language": tenant.default_language, + "supported_languages": tenant.supported_languages + } + + def update_tenant_language_config( + self, + tenant_id: uuid.UUID, + default_language: str, + supported_languages: list + ) -> Optional[dict]: + """更新租户语言配置""" + # 检查租户是否存在 + tenant = self.tenant_repo.get_tenant_by_id(tenant_id) + if not tenant: + raise BusinessException("租户不存在", code=BizCode.TENANT_NOT_FOUND) + + # 验证默认语言在支持的语言列表中 + if default_language not in supported_languages: + raise BusinessException( + "默认语言必须在支持的语言列表中", + code=BizCode.VALIDATION_FAILED + ) + + try: + # 更新语言配置 + tenant.default_language = default_language + tenant.supported_languages = supported_languages + self.db.commit() + self.db.refresh(tenant) + + business_logger.info( + f"更新租户语言配置成功: {tenant.name} (ID: {tenant.id}), " + f"默认语言: {default_language}, 支持语言: {supported_languages}" + ) + + return { + "default_language": tenant.default_language, + "supported_languages": tenant.supported_languages + } + except Exception as e: + self.db.rollback() + business_logger.error(f"更新租户语言配置失败: {str(e)}") + raise BusinessException(f"更新租户语言配置失败: {str(e)}", code=BizCode.DB_ERROR) diff --git a/api/app/services/tool_service.py b/api/app/services/tool_service.py index 4fe1e9e6..089f0ec5 100644 --- a/api/app/services/tool_service.py +++ b/api/app/services/tool_service.py @@ -78,7 +78,7 @@ class ToolService: def get_tool_info(self, tool_id: str, tenant_id: uuid.UUID) -> Optional[ToolInfo]: """获取工具详情""" - config = self.tool_repo.find_by_id_and_tenant(self.db, uuid.UUID(tool_id), tenant_id) + config = self.tool_repo.find_by_id_and_tenant_all(self.db, uuid.UUID(tool_id), tenant_id) return self._config_to_info(config) if config else None def _check_name_duplicate(self, name: str, tool_type: ToolType, tenant_id: uuid.UUID, exclude_id: Optional[uuid.UUID] = None): @@ -93,7 +93,44 @@ class ToolService: if query.first(): raise BusinessException(f"工具名称 '{name}' 已存在", BizCode.DUPLICATE_NAME) - def create_tool( + def _check_mcp_duplicate(self, name: str, tool_type: ToolType, tenant_id: uuid.UUID, config: Dict[str, Any]): + """检查MCP工具是否重复:市场来源按market_id+market_config_id+mcp_service_id判断(名称无关),自建按name+tool_type判断""" + from app.models.tool_model import MCPSourceChannel + source_channel = config.get("source_channel") + is_market_source = ( + source_channel is not None + and source_channel != MCPSourceChannel.SELF_HOSTED + ) + if is_market_source: + exists = ( + self.db.query(ToolConfig) + .join(MCPToolConfig, MCPToolConfig.id == ToolConfig.id) + .filter( + ToolConfig.tenant_id == tenant_id, + ToolConfig.tool_type == tool_type, + MCPToolConfig.source_channel == source_channel, + MCPToolConfig.market_id == config.get("market_id"), + MCPToolConfig.market_config_id == config.get("market_config_id"), + MCPToolConfig.mcp_service_id == config.get("mcp_service_id"), + ) + .first() + ) + if exists: + raise BusinessException(f"该MCP服务已添加", BizCode.DUPLICATE_NAME) + else: + exists = ( + self.db.query(ToolConfig) + .filter( + ToolConfig.name == name, + ToolConfig.tool_type == tool_type, + ToolConfig.tenant_id == tenant_id, + ) + .first() + ) + if exists: + raise BusinessException(f"工具 '{name}' 已存在", BizCode.DUPLICATE_NAME) + + async def create_tool( self, name: str, tool_type: ToolType, @@ -106,7 +143,19 @@ class ToolService: """创建工具""" if tool_type == ToolType.BUILTIN: raise ValueError("内置工具不允许创建") - self._check_name_duplicate(name, tool_type, tenant_id) + + cfg = config or {} + if tool_type == ToolType.MCP: + self._check_mcp_duplicate(name, tool_type, tenant_id, cfg) + # 创建前测试连接 + test_result = await self._test_mcp_connection_by_config(cfg) + if not test_result["success"]: + raise BusinessException(f"MCP连接测试失败: {test_result['message']}", BizCode.INVALID_PARAMETER) + # 将发现的工具列表写回 config + if "available_tools" in test_result: + cfg["available_tools"] = test_result["available_tools"] + else: + self._check_name_duplicate(name, tool_type, tenant_id) try: # 创建基础配置 @@ -117,19 +166,22 @@ class ToolService: tool_type=tool_type.value, tenant_id=tenant_id, status=ToolStatus.AVAILABLE.value, - config_data=config or {}, + config_data=cfg, tags=tags ) self.db.add(tool_config) self.db.flush() # 创建类型特定配置 - self._create_type_config(tool_config, config or {}) + self._create_type_config(tool_config, cfg) self.db.commit() logger.info(f"工具创建成功: {tool_config.id}") return str(tool_config.id) + except BusinessException: + self.db.rollback() + raise except Exception as e: self.db.rollback() logger.error(f"创建工具失败: {e}") @@ -185,7 +237,7 @@ class ToolService: return False def delete_tool(self, tool_id: str, tenant_id: uuid.UUID) -> bool: - """删除工具""" + """删除工具(逻辑删除)""" config = self._get_tool_config(tool_id, tenant_id) if not config: return False @@ -194,14 +246,7 @@ class ToolService: raise ValueError("内置工具不允许删除") try: - # 删除关联表记录 - if config.tool_type == ToolType.CUSTOM.value: - self.db.query(CustomToolConfig).filter(CustomToolConfig.id == config.id).delete() - elif config.tool_type == ToolType.MCP.value: - self.db.query(MCPToolConfig).filter(MCPToolConfig.id == config.id).delete() - - # 删除主表记录(ToolExecution会通过cascade自动删除) - self.db.delete(config) + config.is_active = False self._clear_tool_cache(tool_id) self.db.commit() return True @@ -210,6 +255,27 @@ class ToolService: logger.error(f"删除工具失败: {tool_id}, {e}") return False + def set_tool_active(self, tool_id: str, tenant_id: uuid.UUID, is_active: bool) -> bool: + """设置工具可用状态(启用/禁用)""" + # 直接查询,包含 is_active=False 的记录 + config = self.db.query(ToolConfig).filter( + ToolConfig.id == uuid.UUID(tool_id), + ToolConfig.tenant_id == tenant_id + ).first() + if not config: + return False + if config.tool_type == ToolType.BUILTIN.value: + raise ValueError("内置工具不允许修改可用状态") + try: + config.is_active = is_active + self._clear_tool_cache(tool_id) + self.db.commit() + return True + except Exception as e: + self.db.rollback() + logger.error(f"设置工具状态失败: {tool_id}, {e}") + return False + async def execute_tool( self, tool_id: str, @@ -326,7 +392,7 @@ class ToolService: Returns: 方法列表或None """ - config = self._get_tool_config(tool_id, tenant_id) + config = self._get_tool_config_all(tool_id, tenant_id) if not config: return None @@ -805,16 +871,20 @@ class ToolService: } def _get_tool_config(self, tool_id: str, tenant_id: uuid.UUID) -> Optional[ToolConfig]: - """获取工具配置""" + """获取工具配置(仅返回 is_active=True)""" return self.tool_repo.find_by_id_and_tenant(self.db, uuid.UUID(tool_id), tenant_id) + def _get_tool_config_all(self, tool_id: str, tenant_id: uuid.UUID) -> Optional[ToolConfig]: + """获取工具配置(返回所有)""" + return self.tool_repo.find_by_id_and_tenant_all(self.db, uuid.UUID(tool_id), tenant_id) + def get_tool_instance(self, tool_id: str, tenant_id: uuid.UUID) -> Optional[BaseTool]: - """获取工具实例""" + """获取工具实例(仅返回 is_active=True 的工具)""" if tool_id in self._tool_cache: return self._tool_cache[tool_id] config = self._get_tool_config(tool_id, tenant_id) - if not config: + if not config or not config.is_active: return None try: @@ -928,6 +998,7 @@ class ToolService: tags=config.tags or [], tenant_id=str(config.tenant_id) if config.tenant_id else None, config_data=config_data, + is_active=config.is_active, created_at=config.created_at ) @@ -1165,6 +1236,27 @@ class ToolService: logger.error(f"加载内置工具配置失败: {e}") return {} + async def _test_mcp_connection_by_config(self, config: Dict[str, Any]) -> Dict[str, Any]: + """根据配置参数直接测试MCP连接(创建前调用,无需已存在的工具记录)""" + server_url = config.get("server_url") + if not server_url: + return {"success": False, "message": "server_url不能为空"} + connection_config = config.get("connection_config") or {} + try: + test_result = await self.mcp_tool_manager.test_tool_connection(server_url, connection_config) + if not test_result["success"]: + return test_result + success_flag, tools, error = await self.mcp_tool_manager.discover_tools(server_url, connection_config) + if not success_flag: + return {"success": False, "message": f"获取工具列表失败: {error}"} + tool_list = [ + {tool["name"]: {"description": tool.get("description", ""), "inputSchema": tool.get("inputSchema", {})}} + for tool in tools if tool.get("name") + ] + return {"success": True, "message": "MCP连接测试成功", "available_tools": tool_list} + except Exception as e: + return {"success": False, "message": f"连接测试异常: {str(e)}"} + async def _test_mcp_connection(self, config: ToolConfig) -> Dict[str, Any]: """测试MCP连接并自动同步工具列表""" try: diff --git a/api/app/services/user_memory_service.py b/api/app/services/user_memory_service.py index 8bacc112..d5d19e0d 100644 --- a/api/app/services/user_memory_service.py +++ b/api/app/services/user_memory_service.py @@ -1727,6 +1727,150 @@ async def analytics_graph_data( # 辅助函数 +async def analytics_community_graph_data( + db: Session, + end_user_id: str, +) -> Dict[str, Any]: + """ + 获取社区图谱数据,包含 Community 节点、ExtractedEntity 节点及其关系。 + + Returns: + 包含 nodes、edges、statistics 的字典,格式与 analytics_graph_data 一致 + """ + try: + user_uuid = uuid.UUID(end_user_id) + repo = EndUserRepository(db) + end_user = repo.get_by_id(user_uuid) + if not end_user: + return { + "nodes": [], "edges": [], + "statistics": {"total_nodes": 0, "total_edges": 0, "node_types": {}, "edge_types": {}}, + "message": "用户不存在" + } + + # 查询社区节点、实体节点、BELONGS_TO_COMMUNITY 边、实体间关系 + from app.repositories.neo4j.cypher_queries import GET_COMMUNITY_GRAPH_DATA + rows = await _neo4j_connector.execute_query(GET_COMMUNITY_GRAPH_DATA, end_user_id=end_user_id) + + nodes_map: Dict[str, dict] = {} + edges_map: Dict[str, dict] = {} + # 记录每个 Community 对应的实体 id 列表 + community_members: Dict[str, list] = {} + + for row in rows: + # Community 节点 + c_id = row["c_id"] + if c_id and c_id not in nodes_map: + raw = row["c_props"] or {} + props = {k: _clean_neo4j_value(raw.get(k)) for k in ( + "community_id", "end_user_id", "member_count", "updated_at", + "name", "summary", "core_entities", + ) if k in raw} + nodes_map[c_id] = { + "id": c_id, + "label": "Community", + "properties": props, + } + + # ExtractedEntity 节点 (e) + e_id = row["e_id"] + if e_id and e_id not in nodes_map: + raw = row["e_props"] or {} + props = {k: _clean_neo4j_value(raw.get(k)) for k in ( + "name", "end_user_id", "description", "created_at", "entity_type", + ) if k in raw} + # 注入所属社区名称(c 是 e 直接归属的社区) + c_raw = row["c_props"] or {} + props["community_name"] = _clean_neo4j_value(c_raw.get("name")) or "" + nodes_map[e_id] = { + "id": e_id, + "label": "ExtractedEntity", + "properties": props, + } + + # ExtractedEntity 节点 (e2,可选) + e2_id = row.get("e2_id") + if e2_id and e2_id not in nodes_map: + raw = row["e2_props"] or {} + props = {k: _clean_neo4j_value(raw.get(k)) for k in ( + "name", "end_user_id", "description", "created_at", "entity_type", + ) if k in raw} + # e2 的社区归属在后处理阶段通过 community_members 补充 + props["community_name"] = "" + nodes_map[e2_id] = { + "id": e2_id, + "label": "ExtractedEntity", + "properties": props, + } + + # BELONGS_TO_COMMUNITY 边 + b_id = row["b_id"] + if b_id and b_id not in edges_map: + edges_map[b_id] = { + "id": b_id, + "source": e_id, + "target": c_id, + } + # 收集社区成员 id + if c_id and e_id: + community_members.setdefault(c_id, []) + if e_id not in community_members[c_id]: + community_members[c_id].append(e_id) + + # EXTRACTED_RELATIONSHIP 边(可选) + r_id = row.get("r_id") + if r_id and r_id not in edges_map and e2_id: + r_props = {k: _clean_neo4j_value(v) for k, v in (row["r_props"] or {}).items()} + source = e_id if row.get("r_from_e") else e2_id + target = e2_id if row.get("r_from_e") else e_id + edges_map[r_id] = { + "id": r_id, + "source": source, + "target": target, + } + + nodes = list(nodes_map.values()) + edges = list(edges_map.values()) + + # 为每个 Community 节点注入 member_entity_ids,同时补全 e2 节点的 community_name + for c_id, member_ids in community_members.items(): + c_node = nodes_map.get(c_id) + if c_node: + c_node["properties"]["member_entity_ids"] = member_ids + c_name = c_node["properties"].get("name") or "" + # 补全属于该社区但 community_name 为空的实体(即 e2 节点) + for eid in member_ids: + e_node = nodes_map.get(eid) + if e_node and e_node["label"] == "ExtractedEntity": + if not e_node["properties"].get("community_name"): + e_node["properties"]["community_name"] = c_name + + node_type_counts: Dict[str, int] = {} + for n in nodes: + node_type_counts[n["label"]] = node_type_counts.get(n["label"], 0) + 1 + + return { + "nodes": nodes, + "edges": edges, + "statistics": { + "total_nodes": len(nodes), + "total_edges": len(edges), + "node_types": node_type_counts, + } + } + + except ValueError: + logger.error(f"无效的 end_user_id 格式: {end_user_id}") + return { + "nodes": [], "edges": [], + "statistics": {"total_nodes": 0, "total_edges": 0, "node_types": {}, "edge_types": {}}, + "message": "无效的用户ID格式" + } + except Exception as e: + logger.error(f"获取社区图谱数据失败: {str(e)}", exc_info=True) + raise + + async def _extract_node_properties(label: str, properties: Dict[str, Any],node_id: str) -> Dict[str, Any]: """ 根据节点类型提取需要的属性字段 diff --git a/api/app/services/user_service.py b/api/app/services/user_service.py index 22dabed7..e23b1ac3 100644 --- a/api/app/services/user_service.py +++ b/api/app/services/user_service.py @@ -438,24 +438,26 @@ def update_last_login_time(db: Session, user_id: uuid.UUID) -> User: async def change_password(db: Session, user_id: uuid.UUID, old_password: str, new_password: str, current_user: User) -> User: """普通用户修改自己的密码""" + from app.i18n.service import t + business_logger.info(f"用户修改密码请求: user_id={user_id}, current_user={current_user.id}") # 检查权限:只能修改自己的密码 if current_user.id != user_id: business_logger.warning(f"用户尝试修改他人密码: current_user={current_user.id}, target_user={user_id}") - raise PermissionDeniedException("You can only change your own password") + raise PermissionDeniedException(t("auth.password.change_failed")) try: # 获取用户 db_user = user_repository.get_user_by_id(db=db, user_id=user_id) if not db_user: business_logger.warning(f"用户不存在: {user_id}") - raise BusinessException("User not found", code=BizCode.USER_NOT_FOUND) + raise BusinessException(t("auth.user.not_found"), code=BizCode.USER_NOT_FOUND) # 验证旧密码 if not verify_password(old_password, db_user.hashed_password): business_logger.warning(f"用户旧密码验证失败: {user_id}") - raise BusinessException("当前密码不正确", code=BizCode.VALIDATION_FAILED) + raise BusinessException(t("auth.password.incorrect"), code=BizCode.VALIDATION_FAILED) # 更新密码 db_user.hashed_password = get_password_hash(new_password) @@ -471,7 +473,7 @@ async def change_password(db: Session, user_id: uuid.UUID, old_password: str, ne except Exception as e: business_logger.error(f"修改用户密码失败: user_id={user_id} - {str(e)}") db.rollback() - raise BusinessException(f"修改用户密码失败: user_id={user_id} - {str(e)}", code=BizCode.DB_ERROR) + raise BusinessException(t("auth.password.change_failed"), code=BizCode.DB_ERROR) async def admin_change_password(db: Session, target_user_id: uuid.UUID, new_password: str = None, current_user: User = None) -> tuple[User, str]: @@ -487,6 +489,8 @@ async def admin_change_password(db: Session, target_user_id: uuid.UUID, new_pass Returns: tuple[User, str]: (更新后的用户对象, 实际使用的密码) """ + from app.i18n.service import t + business_logger.info(f"管理员修改用户密码请求: admin={current_user.id}, target_user={target_user_id}") # 检查权限:只有超级管理员可以修改他人密码 @@ -496,7 +500,7 @@ async def admin_change_password(db: Session, target_user_id: uuid.UUID, new_pass try: permission_service.check_superuser( subject, - error_message="只有超级管理员可以修改他人密码" + error_message=t("auth.password.change_failed") ) except PermissionDeniedException as e: business_logger.warning(f"非超管用户尝试修改他人密码: current_user={current_user.id}") @@ -507,12 +511,12 @@ async def admin_change_password(db: Session, target_user_id: uuid.UUID, new_pass target_user = user_repository.get_user_by_id(db=db, user_id=target_user_id) if not target_user: business_logger.warning(f"目标用户不存在: {target_user_id}") - raise BusinessException("目标用户不存在", code=BizCode.USER_NOT_FOUND) + raise BusinessException(t("auth.user.not_found"), code=BizCode.USER_NOT_FOUND) # 检查租户权限:超管只能修改同租户用户的密码 if current_user.tenant_id != target_user.tenant_id: business_logger.warning(f"跨租户密码修改尝试: admin_tenant={current_user.tenant_id}, target_tenant={target_user.tenant_id}") - raise BusinessException("不可跨租户修改用户密码", code=BizCode.FORBIDDEN) + raise BusinessException(t("auth.password.change_failed"), code=BizCode.FORBIDDEN) # 如果没有提供新密码,则生成随机密码 actual_password = new_password if new_password else generate_random_password() @@ -532,7 +536,7 @@ async def admin_change_password(db: Session, target_user_id: uuid.UUID, new_pass except Exception as e: business_logger.error(f"管理员修改用户密码失败: admin={current_user.id}, target_user={target_user_id} - {str(e)}") db.rollback() - raise BusinessException(f"管理员修改用户密码失败: admin={current_user.id}, target_user={target_user_id} - {str(e)}", code=BizCode.DB_ERROR) + raise BusinessException(t("auth.password.change_failed"), code=BizCode.DB_ERROR) def generate_random_password(length: int = 12) -> str: @@ -740,3 +744,54 @@ async def verify_and_change_email(db: Session, user_id: uuid.UUID, new_email: Em # # business_logger.info(f"用户邮箱修改成功: {db_user.username}, new_email={new_email}") # return db_user + + +def get_user_language_preference(db: Session, user_id: uuid.UUID, current_user: User) -> str: + """获取用户语言偏好""" + business_logger.info(f"获取用户语言偏好: user_id={user_id}") + + # 权限检查:只能获取自己的语言偏好 + if current_user.id != user_id: + raise PermissionDeniedException("只能获取自己的语言偏好") + + db_user = user_repository.get_user_by_id(db=db, user_id=user_id) + if not db_user: + raise BusinessException("用户不存在", code=BizCode.USER_NOT_FOUND) + + language = db_user.preferred_language or "zh" + business_logger.info(f"用户语言偏好: {db_user.username}, language={language}") + return language + + +def update_user_language_preference( + db: Session, + user_id: uuid.UUID, + language: str, + current_user: User +) -> User: + """更新用户语言偏好""" + business_logger.info(f"更新用户语言偏好: user_id={user_id}, language={language}") + + # 权限检查:只能修改自己的语言偏好 + if current_user.id != user_id: + raise PermissionDeniedException("只能修改自己的语言偏好") + + # 验证语言代码是否支持 + from app.core.config import settings + if language not in settings.I18N_SUPPORTED_LANGUAGES: + raise BusinessException( + f"不支持的语言代码: {language}。支持的语言: {', '.join(settings.I18N_SUPPORTED_LANGUAGES)}", + code=BizCode.VALIDATION_FAILED + ) + + db_user = user_repository.get_user_by_id(db=db, user_id=user_id) + if not db_user: + raise BusinessException("用户不存在", code=BizCode.USER_NOT_FOUND) + + # 更新语言偏好 + db_user.preferred_language = language + db.commit() + db.refresh(db_user) + + business_logger.info(f"用户语言偏好更新成功: {db_user.username}, language={language}") + return db_user diff --git a/api/app/services/workflow_service.py b/api/app/services/workflow_service.py index eaf78b90..04a778a1 100644 --- a/api/app/services/workflow_service.py +++ b/api/app/services/workflow_service.py @@ -458,7 +458,7 @@ class WorkflowService: type=file.type, url=await self.multimodal_service.get_file_url(file), transfer_method=file.transfer_method, - file_id=str(file.upload_file_id), + file_id=str(file.upload_file_id) if file.upload_file_id else None, origin_file_type=file.file_type, is_file=True ).model_dump() @@ -570,6 +570,9 @@ class WorkflowService: message=f"工作流配置不存在: app_id={app_id}" ) + feature_configs = config.features or {} + self._validate_file_upload(feature_configs, payload.files) + input_data = { "message": payload.message, "variables": payload.variables, "conversation_id": payload.conversation_id, @@ -633,30 +636,33 @@ class WorkflowService: final_messages = result.get("messages", [])[init_message_length:] human_message = "" assistant_message = "" + human_meta = { + "files": [] + } for message in final_messages: if message["role"] == "user": if isinstance(message["content"], str): human_message += message["content"] elif isinstance(message["content"], list): for file in message["content"]: - if file.get("type") == FileType.IMAGE: - human_message += f"![image]({file.get('url', '')})" - else: - human_message += f"[{file.get('type')}]({file.get('url', '')})" + human_meta["files"].append({ + "type": file.get("type"), + "url": file.get("url") + }) if message["role"] == "assistant": assistant_message = message["content"] self.conversation_service.add_message( conversation_id=conversation_id_uuid, role="user", content=human_message, - meta_data=None + meta_data=human_meta ) self.conversation_service.add_message( message_id=message_id, conversation_id=conversation_id_uuid, role="assistant", content=assistant_message, - meta_data={"usage": token_usage} + meta_data={"usage": token_usage, "audio_url": None} ) self.update_execution_status( execution.execution_id, @@ -737,6 +743,8 @@ class WorkflowService: code=BizCode.CONFIG_MISSING, message=f"工作流配置不存在: app_id={app_id}" ) + feature_configs = config.features or {} + self._validate_file_upload(feature_configs, payload.files) input_data = { "message": payload.message, "variables": payload.variables, @@ -797,30 +805,33 @@ class WorkflowService: final_messages = event.get("data", {}).get("messages", [])[init_message_length:] human_message = "" assistant_message = "" + human_meta = { + "files": [] + } for message in final_messages: if message["role"] == "user": if isinstance(message["content"], str): human_message += message["content"] elif isinstance(message["content"], list): for file in message["content"]: - if file.get("type") == FileType.IMAGE: - human_message += f"![image]({file.get('url', '')})" - else: - human_message += f"[{file.get('type')}]({file.get('url', '')})" + human_meta["files"].append({ + "type": file.get("type"), + "url": file.get("url") + }) if message["role"] == "assistant": assistant_message = message["content"] self.conversation_service.add_message( conversation_id=conversation_id_uuid, role="user", content=human_message, - meta_data=None + meta_data=human_meta ) self.conversation_service.add_message( message_id=message_id, conversation_id=conversation_id_uuid, role="assistant", content=assistant_message, - meta_data={"usage": token_usage} + meta_data={"usage": token_usage, "audio_url": None} ) self.update_execution_status( execution.execution_id, @@ -845,7 +856,10 @@ class WorkflowService: yield event except Exception as e: - logger.error(f"工作流流式执行失败: execution_id={execution.execution_id}, error={e}", exc_info=True) + logger.error( + f"Workflow streaming execution failed: execution_id={execution.execution_id}, error={e}", + exc_info=True + ) self.update_execution_status( execution.execution_id, "failed", @@ -868,6 +882,80 @@ class WorkflowService: return node.get("config", {}).get("variables", []) raise BusinessException("workflow config error - start node not found") + @staticmethod + def is_memory_enable(config: dict) -> bool: + nodes = config.get("nodes", []) + for node in nodes: + if node.get("type") in [NodeType.MEMORY_READ, NodeType.MEMORY_WRITE]: + return True + return False + + @staticmethod + def _validate_file_upload( + features_config: dict[str, Any], + files: Optional[list[FileInput]] + ) -> None: + """校验上传文件是否符合 file_upload 配置""" + if not files: + return + fu = features_config.get("file_upload") + if fu is None: + return + if not (isinstance(fu, dict) and fu.get("enabled")): + raise BusinessException( + "The application does not have file upload functionality enabled", + BizCode.BAD_REQUEST + ) + max_count = fu.get("max_file_count", 5) + if len(files) > max_count: + raise BusinessException( + f"File count exceeds limit (maximum {max_count} files)", + BizCode.BAD_REQUEST + ) + + # 校验传输方式 + allowed_methods = fu.get("allowed_transfer_methods", ["local_file", "remote_url"]) + for f in files: + if f.transfer_method.value not in allowed_methods: + raise BusinessException( + f"Unsupport file transfer method:{f.transfer_method.value}," + f"allowed method:{', '.join(allowed_methods)}", + BizCode.BAD_REQUEST + ) + + # 各类型对应的开关和大小限制配置键 + type_cfg = { + "image": ("image_enabled", "image_max_size_mb", 20, "image"), + "audio": ("audio_enabled", "audio_max_size_mb", 50, "audio"), + "document": ("document_enabled", "document_max_size_mb", 100, "document"), + "video": ("video_enabled", "video_max_size_mb", 500, "video"), + } + + for f in files: + ftype = str(f.type) # 如 "image", "audio", "document", "video" + cfg = type_cfg.get(ftype) + if cfg is None: + continue + enabled_key, size_key, default_max_mb, label = cfg + + # 校验类型开关 + if not fu.get(enabled_key): + raise BusinessException( + f"The application has not enabled {label} file upload", + BizCode.BAD_REQUEST + ) + + # 校验文件大小(仅当内容已加载时) + content = f.get_content() + if content is not None: + max_mb = fu.get(size_key, default_max_mb) + size_mb = len(content) / (1024 * 1024) + if size_mb > max_mb: + raise BusinessException( + f"{label} File size exceeds the limit (maximum {max_mb} MB, current {size_mb:.1f} MB)", + BizCode.BAD_REQUEST + ) + # ==================== 依赖注入函数 ==================== diff --git a/api/app/services/workspace_service.py b/api/app/services/workspace_service.py index 74880410..cefb8380 100644 --- a/api/app/services/workspace_service.py +++ b/api/app/services/workspace_service.py @@ -2,11 +2,11 @@ import datetime import hashlib import secrets import uuid -from os import getenv from typing import List, Optional from sqlalchemy.orm import Session +from app.config.default_ontology_initializer import DefaultOntologyInitializer from app.core.config import settings from app.core.error_codes import BizCode from app.core.exceptions import BusinessException, PermissionDeniedException @@ -30,17 +30,15 @@ from app.schemas.workspace_schema import ( WorkspaceModelsUpdate, WorkspaceUpdate, ) -from app.config.default_ontology_initializer import DefaultOntologyInitializer # 获取业务逻辑专用日志器 business_logger = get_business_logger() -from dotenv import load_dotenv -load_dotenv() + def switch_workspace( - db: Session, - workspace_id: uuid.UUID, - user: User, + db: Session, + workspace_id: uuid.UUID, + user: User, ): """切换工作空间""" business_logger.debug(f"用户 {user.username} 请求切换工作空间为 {workspace_id}") @@ -60,31 +58,32 @@ def switch_workspace( raise BusinessException(f"切换工作空间失败: {str(e)}", BizCode.INTERNAL_ERROR) -def delete_workspace_member( - db: Session, - workspace_id: uuid.UUID, - member_id: uuid.UUID, - user: User, - ): - """删除工作空间成员""" - business_logger.debug(f"用户 {user.username} 请求删除工作空间 {workspace_id} 的成员 {member_id}") - _check_workspace_admin_permission(db, workspace_id, user) - workspace_member = workspace_repository.get_member_by_id(db=db, member_id=member_id) - if not workspace_member: - raise BusinessException(f"工作空间成员 {member_id} 不存在", BizCode.WORKSPACE_NOT_FOUND) +def delete_workspace_member( + db: Session, + workspace_id: uuid.UUID, + member_id: uuid.UUID, + user: User, +): + """删除工作空间成员""" + business_logger.debug(f"用户 {user.username} 请求删除工作空间 {workspace_id} 的成员 {member_id}") + _check_workspace_admin_permission(db, workspace_id, user) + workspace_member = workspace_repository.get_member_by_id(db=db, member_id=member_id) + if not workspace_member: + raise BusinessException(f"工作空间成员 {member_id} 不存在", BizCode.WORKSPACE_NOT_FOUND) - if workspace_member.workspace_id != workspace_id: - raise BusinessException(f"工作空间成员 {member_id} 不存在于工作空间 {workspace_id}", BizCode.WORKSPACE_NOT_FOUND) + if workspace_member.workspace_id != workspace_id: + raise BusinessException(f"工作空间成员 {member_id} 不存在于工作空间 {workspace_id}", + BizCode.WORKSPACE_NOT_FOUND) - try: - workspace_member.is_active = False - workspace_member.user.current_workspace_id = None - db.commit() - business_logger.info(f"用户 {user.username} 成功删除工作空间 {workspace_id} 的成员 {member_id}") - except Exception as e: - db.rollback() - business_logger.error(f"删除工作空间成员失败 - 工作空间: {workspace_id}, 成员: {member_id}, 错误: {str(e)}") - raise BusinessException(f"删除工作空间成员失败: {str(e)}", BizCode.INTERNAL_ERROR) + try: + workspace_member.is_active = False + workspace_member.user.current_workspace_id = None + db.commit() + business_logger.info(f"用户 {user.username} 成功删除工作空间 {workspace_id} 的成员 {member_id}") + except Exception as e: + db.rollback() + business_logger.error(f"删除工作空间成员失败 - 工作空间: {workspace_id}, 成员: {member_id}, 错误: {str(e)}") + raise BusinessException(f"删除工作空间成员失败: {str(e)}", BizCode.INTERNAL_ERROR) def get_user_workspaces(db: Session, user: User) -> List[Workspace]: @@ -102,19 +101,19 @@ def get_user_workspaces(db: Session, user: User) -> List[Workspace]: """ business_logger.debug(f"获取用户工作空间列表: {user.username} (ID: {user.id})") workspaces = workspace_repository.get_workspaces_by_user(db=db, user_id=user.id) - + # Ensure each neo4j workspace has a default memory config for workspace in workspaces: if workspace.storage_type == 'neo4j': _ensure_default_memory_config(db, workspace) _ensure_default_ontology_scenes(db, workspace) - + business_logger.info(f"用户 {user.username} 的工作空间数量: {len(workspaces)}") return workspaces def _create_workspace_only( - db: Session, workspace: WorkspaceCreate, owner: User + db: Session, workspace: WorkspaceCreate, owner: User ) -> Workspace: business_logger.debug(f"创建工作空间: {workspace.name}, 创建者: {owner.username}") @@ -130,6 +129,7 @@ def _create_workspace_only( business_logger.error(f"创建工作空间失败: {workspace.name} - {str(e)}") raise + def create_workspace( db: Session, workspace: WorkspaceCreate, user: User, language: str = "zh" ) -> Workspace: @@ -137,9 +137,14 @@ def create_workspace( f"创建工作空间: {workspace.name}, 创建者: {user.username}, " f"storage_type: {workspace.storage_type}" ) - llm=workspace.llm - embedding=workspace.embedding - rerank=workspace.rerank + if workspace_repository.get_workspaces_by_name(db=db, name=workspace.name, tenant_id=user.tenant_id): + raise BusinessException( + message="同名工作空间已存在", + code=BizCode.RESOURCE_ALREADY_EXISTS + ) + llm = workspace.llm + embedding = workspace.embedding + rerank = workspace.rerank try: # Create the workspace without adding any members business_logger.debug(f"创建工作空间: {workspace.name}") @@ -158,26 +163,26 @@ def create_workspace( success, error_msg = initializer.initialize_default_scenes( db_workspace.id, language=language ) - + if success: business_logger.info( f"为工作空间 {db_workspace.id} 创建默认本体场景成功 (language={language})" ) - - # 获取默认场景ID,优先使用"在线教育"场景,如果不存在则使用"情感陪伴"场景 + + # 获取默认场景ID,优先使用"在线教育"场景,如果不存在则使用"情感陪伴"场景 from app.repositories.ontology_scene_repository import OntologySceneRepository from app.config.default_ontology_config import ( - ONLINE_EDUCATION_SCENE, + ONLINE_EDUCATION_SCENE, EMOTIONAL_COMPANION_SCENE, get_scene_name ) - + scene_repo = OntologySceneRepository(db) - + # 优先尝试获取教育场景 education_scene_name = get_scene_name(ONLINE_EDUCATION_SCENE, language) education_scene = scene_repo.get_by_name(education_scene_name, db_workspace.id) - + if education_scene: default_scene_id = education_scene.scene_id default_scene_name = education_scene.scene_name @@ -188,7 +193,7 @@ def create_workspace( # 如果教育场景不存在,尝试获取情感陪伴场景 companion_scene_name = get_scene_name(EMOTIONAL_COMPANION_SCENE, language) companion_scene = scene_repo.get_by_name(companion_scene_name, db_workspace.id) - + if companion_scene: default_scene_id = companion_scene.scene_id default_scene_name = companion_scene.scene_name @@ -255,10 +260,10 @@ def create_workspace( avatar='', type=KnowledgeType.General, permission_id=PermissionType.Memory, - embedding_id=uuid.UUID(getenv('KB_embedding_id')) if None else embedding, - reranker_id=uuid.UUID(getenv('KB_reranker_id')) if None else rerank, - llm_id=uuid.UUID(getenv('KB_llm_id')) if None else llm, - image2text_id=uuid.UUID(getenv('KB_llm_id')) if None else llm, + embedding_id=embedding, + reranker_id=rerank, + llm_id=llm, + image2text_id=llm, parser_config={ "layout_recognize": "DeepDOC", "chunk_token_num": 256, @@ -293,7 +298,7 @@ def create_workspace( business_logger.info( f"工作空间 {db_workspace.id} 及相关资源创建完成并已提交" ) - + return db_workspace except Exception as e: @@ -303,11 +308,11 @@ def create_workspace( def update_workspace( - db: Session, workspace_id: uuid.UUID, workspace_in: WorkspaceUpdate, user: User + db: Session, workspace_id: uuid.UUID, workspace_in: WorkspaceUpdate, user: User ) -> Workspace: business_logger.info(f"更新工作空间: workspace_id={workspace_id}, 操作者: {user.username}") - db_workspace = _check_workspace_admin_permission(db,workspace_id,user) + db_workspace = _check_workspace_admin_permission(db, workspace_id, user) try: # 更新工作空间 business_logger.debug(f"执行工作空间更新: {db_workspace.name} (ID: {workspace_id})") @@ -327,7 +332,7 @@ def update_workspace( def get_workspace_members( - db: Session, workspace_id: uuid.UUID, user: User + db: Session, workspace_id: uuid.UUID, user: User ) -> List[WorkspaceMember]: """获取某工作空间的成员列表(关系序列化由模型关系支持)""" business_logger.info(f"获取工作空间成员: workspace_id={workspace_id}, 操作者: {user.username}") @@ -371,7 +376,6 @@ def get_workspace_members( return members - # ==================== 邀请相关服务方法 ==================== def _generate_invite_token() -> tuple[str, str]: @@ -464,13 +468,14 @@ def _check_workspace_admin_permission(db: Session, workspace_id: uuid.UUID, user def create_workspace_invite( - db: Session, - workspace_id: uuid.UUID, - invite_data: WorkspaceInviteCreate, - user: User + db: Session, + workspace_id: uuid.UUID, + invite_data: WorkspaceInviteCreate, + user: User ) -> WorkspaceInviteResponse: """创建工作空间邀请""" - business_logger.info(f"创建工作空间邀请: workspace_id={workspace_id}, email={invite_data.email}, 创建者: {user.username}") + business_logger.info( + f"创建工作空间邀请: workspace_id={workspace_id}, email={invite_data.email}, 创建者: {user.username}") try: # 检查权限 @@ -533,17 +538,18 @@ def create_workspace_invite( except Exception as e: db.rollback() - business_logger.error(f"创建工作空间邀请失败: workspace_id={workspace_id}, email={invite_data.email} - {str(e)}") + business_logger.error( + f"创建工作空间邀请失败: workspace_id={workspace_id}, email={invite_data.email} - {str(e)}") raise def get_workspace_invites( - db: Session, - workspace_id: uuid.UUID, - user: User, - status: Optional[InviteStatus] = None, - limit: int = 50, - offset: int = 0 + db: Session, + workspace_id: uuid.UUID, + user: User, + status: Optional[InviteStatus] = None, + limit: int = 50, + offset: int = 0 ) -> List[WorkspaceInviteResponse]: """获取工作空间邀请列表""" business_logger.info(f"获取工作空间邀请列表: workspace_id={workspace_id}, 操作者: {user.username}") @@ -604,9 +610,9 @@ def validate_invite_token(db: Session, token: str) -> InviteValidateResponse: def accept_workspace_invite( - db: Session, - accept_request: InviteAcceptRequest, - user: User + db: Session, + accept_request: InviteAcceptRequest, + user: User ) -> dict: """接受工作空间邀请""" business_logger.info(f"接受工作空间邀请: 用户 {user.username}") @@ -694,7 +700,8 @@ def accept_workspace_invite( # 获取工作空间信息 workspace = workspace_repository.get_workspace_by_id(db=db, workspace_id=invite.workspace_id) - business_logger.info(f"用户成功加入工作空间: user={user.username}, workspace={workspace.name}, role={workspace_role}") + business_logger.info( + f"用户成功加入工作空间: user={user.username}, workspace={workspace.name}, role={workspace_role}") return { "message": "Successfully joined the workspace", @@ -709,13 +716,14 @@ def accept_workspace_invite( def revoke_workspace_invite( - db: Session, - workspace_id: uuid.UUID, - invite_id: uuid.UUID, - user: User + db: Session, + workspace_id: uuid.UUID, + invite_id: uuid.UUID, + user: User ) -> dict: """撤销工作空间邀请""" - business_logger.info(f"撤销工作空间邀请: workspace_id={workspace_id}, invite_id={invite_id}, 操作者: {user.username}") + business_logger.info( + f"撤销工作空间邀请: workspace_id={workspace_id}, invite_id={invite_id}, 操作者: {user.username}") try: # 检查权限 @@ -744,13 +752,14 @@ def revoke_workspace_invite( def update_workspace_member_roles( - db: Session, - workspace_id: uuid.UUID, - updates: List[WorkspaceMemberUpdate], - user: User, + db: Session, + workspace_id: uuid.UUID, + updates: List[WorkspaceMemberUpdate], + user: User, ) -> List[WorkspaceMember]: """更新工作空间成员角色""" - business_logger.info(f"更新工作空间成员角色: workspace_id={workspace_id}, 操作者: {user.username}, 更新数量: {len(updates)}") + business_logger.info( + f"更新工作空间成员角色: workspace_id={workspace_id}, 操作者: {user.username}, 更新数量: {len(updates)}") # 检查管理员权限 _check_workspace_admin_permission(db, workspace_id, user) @@ -764,7 +773,8 @@ def update_workspace_member_roles( for upd in updates: # 检查成员是否存在 if upd.id not in member_map: - raise BusinessException(f"成员 {upd.id} 不存在于工作空间 {workspace_id}", BizCode.WORKSPACE_MEMBER_NOT_FOUND) + raise BusinessException(f"成员 {upd.id} 不存在于工作空间 {workspace_id}", + BizCode.WORKSPACE_MEMBER_NOT_FOUND) member = member_map[upd.id] @@ -916,10 +926,10 @@ def get_workspace_models_configs( def update_workspace_models_configs( - db: Session, - workspace_id: uuid.UUID, - models_update: WorkspaceModelsUpdate, - user: User, + db: Session, + workspace_id: uuid.UUID, + models_update: WorkspaceModelsUpdate, + user: User, ) -> Workspace: """更新工作空间的模型配置(llm, embedding, rerank) @@ -966,6 +976,126 @@ def update_workspace_models_configs( raise BusinessException(f"更新模型配置失败: {str(e)}", BizCode.INTERNAL_ERROR) +def _fill_workspace_configs_model_defaults( + db: Session, + workspace: Workspace +) -> None: + """Fill empty model fields for all memory configs in a workspace. + + Updates llm_id, embedding_id, rerank_id, reflection_model_id, and emotion_model_id + if they are None, using the corresponding workspace default models. + + Args: + db: Database session + workspace: The workspace containing default model settings + """ + from app.models.memory_config_model import MemoryConfig + + # Get all configs for this workspace + configs = db.query(MemoryConfig).filter( + MemoryConfig.workspace_id == workspace.id + ).all() + + if not configs: + return + + # Map of memory_config field -> workspace field + model_field_mappings = [ + ("llm_id", "llm"), + ("embedding_id", "embedding"), + ("rerank_id", "rerank"), + ("reflection_model_id", "llm"), # reflection uses LLM + ("emotion_model_id", "llm"), # emotion uses LLM + ] + + configs_updated = 0 + + for memory_config in configs: + updated_fields = [] + + for config_field, workspace_field in model_field_mappings: + config_value = getattr(memory_config, config_field, None) + workspace_value = getattr(workspace, workspace_field, None) + + if not config_value and workspace_value: + setattr(memory_config, config_field, workspace_value) + updated_fields.append(config_field) + + if updated_fields: + configs_updated += 1 + business_logger.debug( + f"Updated memory config {memory_config.config_id} fields: {updated_fields}" + ) + + if configs_updated > 0: + try: + db.commit() + business_logger.info( + f"Updated {configs_updated} memory configs in workspace {workspace.id} with default models" + ) + except Exception as e: + db.rollback() + business_logger.error( + f"Failed to update memory configs in workspace {workspace.id}: {str(e)}" + ) + + +def _create_default_memory_config( + db: Session, + workspace_id: uuid.UUID, + workspace_name: str, + llm_id: Optional[uuid.UUID] = None, + embedding_id: Optional[uuid.UUID] = None, + rerank_id: Optional[uuid.UUID] = None, + scene_id: Optional[uuid.UUID] = None, + pruning_scene_name: Optional[str] = None, +) -> None: + """Create a default memory config for a newly created workspace. + + Args: + db: Database session + workspace_id: The workspace ID + workspace_name: The workspace name (used for config naming) + llm_id: Optional LLM model ID + embedding_id: Optional embedding model ID + rerank_id: Optional rerank model ID + scene_id: Optional ontology scene ID (默认关联教育场景) + pruning_scene_name: Optional pruning scene name,取自 ontology_scene.scene_name + """ + from app.models.memory_config_model import MemoryConfig + + config_id = uuid.uuid4() + + default_config = MemoryConfig( + config_id=config_id, + config_name=f"{workspace_name} 默认配置", + config_desc="工作空间创建时自动生成的默认记忆配置", + workspace_id=workspace_id, + llm_id=str(llm_id) if llm_id else None, + embedding_id=str(embedding_id) if embedding_id else None, + rerank_id=str(rerank_id) if rerank_id else None, + scene_id=scene_id, # 关联本体场景ID(默认为"在线教育"场景) + pruning_scene=pruning_scene_name, # 语义剪枝场景直接使用 scene_name + state=True, # Active by default + is_default=True, # Mark as workspace default + ) + + db.add(default_config) + db.flush() # 使用 flush 而不是 commit,让调用者统一提交 + + business_logger.info( + "Created default memory config for workspace", + extra={ + "workspace_id": str(workspace_id), + "config_id": str(config_id), + "config_name": default_config.config_name, + "scene_id": str(scene_id) if scene_id else None, + } + ) + + +# ==================== 检查配置相关服务 ==================== + def _ensure_default_memory_config(db: Session, workspace: Workspace) -> None: """Ensure a workspace has a default memory config, creating one if missing. @@ -976,19 +1106,19 @@ def _ensure_default_memory_config(db: Session, workspace: Workspace) -> None: workspace: The workspace to check """ from app.models.memory_config_model import MemoryConfig - + # Check if default config exists for this workspace existing_default = db.query(MemoryConfig).filter( MemoryConfig.workspace_id == workspace.id, MemoryConfig.is_default == True ).first() - + if not existing_default: # No default config exists, create one business_logger.info( f"Workspace {workspace.id} missing default memory config, creating one" ) - + # 尝试获取默认场景ID,优先教育场景,其次情感陪伴场景 default_scene_id = None try: @@ -998,7 +1128,7 @@ def _ensure_default_memory_config(db: Session, workspace: Workspace) -> None: EMOTIONAL_COMPANION_SCENE, get_scene_name ) - + scene_repo = OntologySceneRepository(db) # 尝试中文和英文场景名称 for language in ["zh", "en"]: @@ -1011,7 +1141,7 @@ def _ensure_default_memory_config(db: Session, workspace: Workspace) -> None: f"找到教育场景用于默认记忆配置: scene_id={default_scene_id}, scene_name={education_scene_name}" ) break - + # 如果教育场景不存在,尝试情感陪伴场景 companion_scene_name = get_scene_name(EMOTIONAL_COMPANION_SCENE, language) companion_scene = scene_repo.get_by_name(companion_scene_name, workspace.id) @@ -1025,7 +1155,7 @@ def _ensure_default_memory_config(db: Session, workspace: Workspace) -> None: business_logger.warning( f"获取默认场景失败,将创建不关联场景的记忆配置: {str(scene_error)}" ) - + try: _create_default_memory_config( db=db, @@ -1040,75 +1170,11 @@ def _ensure_default_memory_config(db: Session, workspace: Workspace) -> None: business_logger.error( f"Failed to create default memory config for workspace {workspace.id}: {str(e)}" ) - + # Fill empty model fields for ALL configs in this workspace _fill_workspace_configs_model_defaults(db, workspace) -def _fill_workspace_configs_model_defaults( - db: Session, - workspace: Workspace -) -> None: - """Fill empty model fields for all memory configs in a workspace. - - Updates llm_id, embedding_id, rerank_id, reflection_model_id, and emotion_model_id - if they are None, using the corresponding workspace default models. - - Args: - db: Database session - workspace: The workspace containing default model settings - """ - from app.models.memory_config_model import MemoryConfig - - # Get all configs for this workspace - configs = db.query(MemoryConfig).filter( - MemoryConfig.workspace_id == workspace.id - ).all() - - if not configs: - return - - # Map of memory_config field -> workspace field - model_field_mappings = [ - ("llm_id", "llm"), - ("embedding_id", "embedding"), - ("rerank_id", "rerank"), - ("reflection_model_id", "llm"), # reflection uses LLM - ("emotion_model_id", "llm"), # emotion uses LLM - ] - - configs_updated = 0 - - for memory_config in configs: - updated_fields = [] - - for config_field, workspace_field in model_field_mappings: - config_value = getattr(memory_config, config_field, None) - workspace_value = getattr(workspace, workspace_field, None) - - if not config_value and workspace_value: - setattr(memory_config, config_field, workspace_value) - updated_fields.append(config_field) - - if updated_fields: - configs_updated += 1 - business_logger.debug( - f"Updated memory config {memory_config.config_id} fields: {updated_fields}" - ) - - if configs_updated > 0: - try: - db.commit() - business_logger.info( - f"Updated {configs_updated} memory configs in workspace {workspace.id} with default models" - ) - except Exception as e: - db.rollback() - business_logger.error( - f"Failed to update memory configs in workspace {workspace.id}: {str(e)}" - ) - - def _ensure_default_ontology_scenes(db: Session, workspace: Workspace) -> None: """Ensure a workspace has default ontology scenes, creating them if missing. @@ -1153,57 +1219,3 @@ def _ensure_default_ontology_scenes(db: Session, workspace: Workspace) -> None: business_logger.error( f"为工作空间 {workspace.id} 补建默认本体场景异常: {str(e)}" ) - - -def _create_default_memory_config( - db: Session, - workspace_id: uuid.UUID, - workspace_name: str, - llm_id: Optional[uuid.UUID] = None, - embedding_id: Optional[uuid.UUID] = None, - rerank_id: Optional[uuid.UUID] = None, - scene_id: Optional[uuid.UUID] = None, - pruning_scene_name: Optional[str] = None, -) -> None: - """Create a default memory config for a newly created workspace. - - Args: - db: Database session - workspace_id: The workspace ID - workspace_name: The workspace name (used for config naming) - llm_id: Optional LLM model ID - embedding_id: Optional embedding model ID - rerank_id: Optional rerank model ID - scene_id: Optional ontology scene ID (默认关联教育场景) - pruning_scene_name: Optional pruning scene name,取自 ontology_scene.scene_name - """ - from app.models.memory_config_model import MemoryConfig - - config_id = uuid.uuid4() - - default_config = MemoryConfig( - config_id=config_id, - config_name=f"{workspace_name} 默认配置", - config_desc="工作空间创建时自动生成的默认记忆配置", - workspace_id=workspace_id, - llm_id=str(llm_id) if llm_id else None, - embedding_id=str(embedding_id) if embedding_id else None, - rerank_id=str(rerank_id) if rerank_id else None, - scene_id=scene_id, # 关联本体场景ID(默认为"在线教育"场景) - pruning_scene=pruning_scene_name, # 语义剪枝场景直接使用 scene_name - state=True, # Active by default - is_default=True, # Mark as workspace default - ) - - db.add(default_config) - db.flush() # 使用 flush 而不是 commit,让调用者统一提交 - - business_logger.info( - "Created default memory config for workspace", - extra={ - "workspace_id": str(workspace_id), - "config_id": str(config_id), - "config_name": default_config.config_name, - "scene_id": str(scene_id) if scene_id else None, - } - ) diff --git a/api/app/tasks.py b/api/app/tasks.py index a6ebbb8e..f5258330 100644 --- a/api/app/tasks.py +++ b/api/app/tasks.py @@ -1,5 +1,5 @@ import asyncio -import json +import hashlib import os import re import shutil @@ -10,14 +10,14 @@ from datetime import datetime, timezone from math import ceil from pathlib import Path from typing import Any, Dict, List, Optional -from uuid import UUID import redis -import requests +from redis.exceptions import RedisError # Import a unified Celery instance from app.celery_app import celery_app from app.core.config import settings +from app.core.logging_config import get_logger from app.core.rag.crawler.web_crawler import WebCrawler from app.core.rag.graphrag.general.index import init_graphrag, run_graphrag_for_kb from app.core.rag.graphrag.utils import get_llm_cache, set_llm_cache @@ -35,12 +35,83 @@ from app.core.rag.vdb.elasticsearch.elasticsearch_vector import ( ElasticSearchVectorFactory, ) from app.db import get_db, get_db_context -from app.models.document_model import Document -from app.models.file_model import File -from app.models.knowledge_model import Knowledge +from app.models import Document, File, Knowledge from app.schemas import document_schema, file_schema +from app.schemas.model_schema import ModelInfo from app.services.memory_agent_service import MemoryAgentService +from app.services.memory_perceptual_service import MemoryPerceptualService from app.utils.config_utils import resolve_config_id +from app.utils.redis_lock import RedisLock + +logger = get_logger(__name__) + +# 模块级同步 Redis 连接池,供 Celery 任务共享使用 +# 连接 CELERY_BACKEND DB,与 write_message:last_done 时间戳写入保持一致 +# 使用连接池而非单例客户端,提供更好的并发性能和自动重连 +_sync_redis_pool: redis.ConnectionPool | None = None + + +def _get_or_create_redis_pool() -> redis.ConnectionPool | None: + """获取或创建 Redis 连接池(懒初始化)""" + global _sync_redis_pool + if _sync_redis_pool is None: + try: + _sync_redis_pool = redis.ConnectionPool( + host=settings.REDIS_HOST, + port=settings.REDIS_PORT, + db=settings.REDIS_DB_CELERY_BACKEND, + password=settings.REDIS_PASSWORD, + decode_responses=True, + max_connections=10, + socket_connect_timeout=5, + socket_timeout=5, + retry_on_timeout=True, + health_check_interval=30, + ) + logger.info("Redis connection pool created for Celery tasks") + except Exception as e: + logger.error(f"Failed to create Redis connection pool: {e}", exc_info=True) + return None + return _sync_redis_pool + + +def get_sync_redis_client() -> Optional[redis.StrictRedis]: + """获取同步 Redis 客户端(使用连接池) + + 使用连接池提供的客户端,支持自动重连和健康检查。 + 如果 Redis 不可用,返回 None,调用方应优雅降级。 + + Returns: + redis.StrictRedis: Redis 客户端实例,如果连接失败则返回 None + """ + try: + pool = _get_or_create_redis_pool() + if pool is None: + return None + + client = redis.StrictRedis(connection_pool=pool) + # 验证连接可用性 + client.ping() + return client + except RedisError as e: + logger.error(f"Redis connection failed: {e}", exc_info=True) + return None + except Exception as e: + logger.error(f"Unexpected error getting Redis client: {e}", exc_info=True) + return None + + +def set_asyncio_event_loop(): + """Set the asyncio event loop for the current thread.""" + try: + loop = asyncio.get_event_loop() + if loop.is_closed(): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + except RuntimeError: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + return loop @celery_app.task(name="tasks.process_item") @@ -237,9 +308,18 @@ def parse_document(file_path: str, document_id: uuid.UUID): vector_size = len(vts[0]) init_graphrag(task, vector_size) - async def _run(row: dict, document_ids: list[str], language: str, parser_config: dict, vector_service, - chat_model, embedding_model, callback, with_resolution: bool = True, - with_community: bool = True, ) -> dict: + async def _run( + row: dict, + document_ids: list[str], + language: str, + parser_config: dict, + vector_service, + chat_model, + embedding_model, + callback, + with_resolution: bool = True, + with_community: bool = True + ) -> dict: await trio.sleep(5) # Delay for 10 seconds nonlocal progress_msg # Declare the use of an external progress_msg variable result = await run_graphrag_for_kb( @@ -272,6 +352,7 @@ def parse_document(file_path: str, document_id: uuid.UUID): with_community=with_community, ) ) + try: with ThreadPoolExecutor(max_workers=1) as executor: future = executor.submit(sync_task) @@ -391,6 +472,7 @@ def build_graphrag_for_kb(kb_id: uuid.UUID): with_community=with_community, ) ) + try: with ThreadPoolExecutor(max_workers=1) as executor: future = executor.submit(sync_task) @@ -945,29 +1027,21 @@ def read_message_task(self, end_user_id: str, message: str, history: List[Dict[s # Log but continue - will fail later with proper error pass - async def _run() -> str: + async def _run() -> dict: with get_db_context() as db: service = MemoryAgentService() - return await service.read_memory(end_user_id, message, history, search_switch, actual_config_id, db, - storage_type, user_rag_memory_id) + return await service.read_memory( + end_user_id, + message, + history, + search_switch, + actual_config_id, db, + storage_type, user_rag_memory_id + ) try: - # 使用 nest_asyncio 来避免事件循环冲突 - try: - import nest_asyncio - nest_asyncio.apply() - except ImportError: - pass - # 尝试获取现有事件循环,如果不存在则创建新的 - try: - loop = asyncio.get_event_loop() - if loop.is_closed(): - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - except RuntimeError: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) + loop = set_asyncio_event_loop() result = loop.run_until_complete(_run()) elapsed_time = time.time() - start_time @@ -999,7 +1073,8 @@ def read_message_task(self, end_user_id: str, message: str, history: List[Dict[s @celery_app.task(name="app.core.memory.agent.write_message", bind=True) -def write_message_task(self, end_user_id: str, message: list[dict], config_id: str | int, storage_type: str, user_rag_memory_id: str, +def write_message_task(self, end_user_id: str, message: list[dict], config_id: str | int, storage_type: str, + user_rag_memory_id: str, language: str = "zh") -> Dict[str, Any]: """Celery task to process a write message via MemoryAgentService. Args: @@ -1016,10 +1091,11 @@ def write_message_task(self, end_user_id: str, message: list[dict], config_id: s Raises: Exception on failure """ - from app.core.logging_config import get_logger - logger = get_logger(__name__) - logger.info(f"[CELERY WRITE] Starting write task - end_user_id={end_user_id}, config_id={config_id} (type: {type(config_id).__name__}), storage_type={storage_type}, language={language}") + logger.info( + f"[CELERY WRITE] Starting write task - end_user_id={end_user_id}, " + f"config_id={config_id} (type: {type(config_id).__name__}), " + f"storage_type={storage_type}, language={language}") start_time = time.time() # Convert config_id to UUID @@ -1029,13 +1105,14 @@ def write_message_task(self, end_user_id: str, message: list[dict], config_id: s try: with get_db_context() as db: actual_config_id = resolve_config_id(config_id, db) - print(100*'-') + print(100 * '-') print(actual_config_id) - print(100*'-') + print(100 * '-') logger.info( f"[CELERY WRITE] Converted config_id to UUID: {actual_config_id} (type: {type(actual_config_id).__name__})") except (ValueError, AttributeError) as e: - logger.error(f"[CELERY WRITE] Invalid config_id format: {config_id} (type: {type(config_id).__name__}), error: {e}") + logger.error( + f"[CELERY WRITE] Invalid config_id format: {config_id} (type: {type(config_id).__name__}), error: {e}") return { "status": "FAILURE", "error": f"Invalid config_id format: {config_id} - {str(e)}", @@ -1059,7 +1136,8 @@ def write_message_task(self, end_user_id: str, message: list[dict], config_id: s async def _run() -> str: with get_db_context() as db: logger.info( - f"[CELERY WRITE] Executing MemoryAgentService.write_memory with config_id={actual_config_id} (type: {type(actual_config_id).__name__}), language={language}") + f"[CELERY WRITE] Executing MemoryAgentService.write_memory " + f"with config_id={actual_config_id} (type: {type(actual_config_id).__name__}), language={language}") service = MemoryAgentService() result = await service.write_memory(end_user_id, message, actual_config_id, db, storage_type, user_rag_memory_id, language) @@ -1067,22 +1145,8 @@ def write_message_task(self, end_user_id: str, message: list[dict], config_id: s return result try: - # 使用 nest_asyncio 来避免事件循环冲突 - try: - import nest_asyncio - nest_asyncio.apply() - except ImportError: - pass - # 尝试获取现有事件循环,如果不存在则创建新的 - try: - loop = asyncio.get_event_loop() - if loop.is_closed(): - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - except RuntimeError: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) + loop = set_asyncio_event_loop() result = loop.run_until_complete(_run()) elapsed_time = time.time() - start_time @@ -1090,6 +1154,20 @@ def write_message_task(self, end_user_id: str, message: list[dict], config_id: s logger.info( f"[CELERY WRITE] Task completed successfully - elapsed_time={elapsed_time:.2f}s, task_id={self.request.id}") + # 记录该用户最后一次 write_message 成功的时间,供时间轴筛选使用 + try: + _r = get_sync_redis_client() + if _r is not None: + from datetime import timezone as _tz + _now_utc = datetime.now(_tz.utc).isoformat() + _r.set( + f"write_message:last_done:{end_user_id}", + _now_utc, + ex=86400 * 30, + ) + except Exception as _e: + logger.warning(f"[CELERY WRITE] 写入 last_done 时间戳失败(不影响主流程): {_e}") + return { "status": "SUCCESS", "result": result, @@ -1120,28 +1198,6 @@ def write_message_task(self, end_user_id: str, message: list[dict], config_id: s } -def reflection_engine() -> None: - """Empty function placeholder for timed background reflection. - - Intentionally left blank; replace with real reflection logic later. - """ - import asyncio - - from app.core.memory.utils.self_reflexion_utils.self_reflexion import self_reflexion - - host_id = uuid.UUID("2f6ff1eb-50c7-4765-8e89-e4566be19122") - asyncio.run(self_reflexion(host_id)) - - -@celery_app.task(name="app.core.memory.agent.reflection.timer") -def reflection_timer_task() -> None: - """Periodic Celery task that invokes reflection_engine. - - Raises an exception on failure. - """ - reflection_engine() - - # unused task # @celery_app.task(name="app.core.memory.agent.health.check_read_service") # def check_read_service_task() -> Dict[str, str]: @@ -1236,9 +1292,9 @@ def write_total_memory_task(workspace_id: str) -> Dict[str, Any]: } # 2. 查询所有app下的end_user_id(去重) - app_ids = [app.id for app in apps] + # app_ids = [app.id for app in apps] end_users = db.query(EndUser.id).filter( - EndUser.app_id.in_(app_ids) + EndUser.workspace_id == workspace_id ).distinct().all() # 3. 遍历所有end_user,查询每个宿主的记忆总量并累加 @@ -1295,6 +1351,8 @@ def write_total_memory_task(workspace_id: str) -> Dict[str, Any]: "workspace_id": workspace_id, "elapsed_time": elapsed_time, } + + @celery_app.task( name="app.tasks.write_all_workspaces_memory_task", bind=True, @@ -1318,15 +1376,12 @@ def write_all_workspaces_memory_task(self) -> Dict[str, Any]: start_time = time.time() async def _run() -> Dict[str, Any]: - from app.core.logging_config import get_api_logger from app.models.app_model import App from app.models.end_user_model import EndUser from app.models.workspace_model import Workspace from app.repositories.memory_increment_repository import write_memory_increment from app.services.memory_storage_service import search_all - api_logger = get_api_logger() - with get_db_context() as db: try: # 获取所有活跃的工作空间 @@ -1335,7 +1390,7 @@ def write_all_workspaces_memory_task(self) -> Dict[str, Any]: ).all() if not workspaces: - api_logger.warning("没有找到活跃的工作空间") + logger.warning("没有找到活跃的工作空间") return { "status": "SUCCESS", "message": "没有找到活跃的工作空间", @@ -1343,13 +1398,13 @@ def write_all_workspaces_memory_task(self) -> Dict[str, Any]: "workspace_results": [] } - api_logger.info(f"开始统计 {len(workspaces)} 个工作空间的记忆增量") + logger.info(f"开始统计 {len(workspaces)} 个工作空间的记忆增量") all_workspace_results = [] # 遍历每个工作空间 for workspace in workspaces: workspace_id = workspace.id - api_logger.info(f"开始处理工作空间: {workspace.name} (ID: {workspace_id})") + logger.info(f"开始处理工作空间: {workspace.name} (ID: {workspace_id})") try: # 1. 查询当前workspace下的所有app(仅未删除的) @@ -1374,13 +1429,13 @@ def write_all_workspaces_memory_task(self) -> Dict[str, Any]: "memory_increment_id": str(memory_increment.id), "created_at": memory_increment.created_at.isoformat(), }) - api_logger.info(f"工作空间 {workspace.name} 没有应用,记录总量为0") + logger.info(f"工作空间 {workspace.name} 没有应用,记录总量为0") continue # 2. 查询所有app下的end_user_id(去重) - app_ids = [app.id for app in apps] + # app_ids = [app.id for app in apps] end_users = db.query(EndUser.id).filter( - EndUser.app_id.in_(app_ids) + EndUser.workspace_id == workspace_id ).distinct().all() # 3. 遍历所有end_user,查询每个宿主的记忆总量并累加 @@ -1399,7 +1454,7 @@ def write_all_workspaces_memory_task(self) -> Dict[str, Any]: }) except Exception as e: # 记录单个用户查询失败,但继续处理其他用户 - api_logger.warning(f"查询用户 {end_user_id} 记忆失败: {str(e)}") + logger.warning(f"查询用户 {end_user_id} 记忆失败: {str(e)}") end_user_details.append({ "end_user_id": str(end_user_id), "total": 0, @@ -1423,13 +1478,13 @@ def write_all_workspaces_memory_task(self) -> Dict[str, Any]: "created_at": memory_increment.created_at.isoformat(), }) - api_logger.info( + logger.info( f"工作空间 {workspace.name} 统计完成: 总量={total_num}, 用户数={len(end_users)}" ) except Exception as e: db.rollback() # 回滚失败的事务,允许继续处理下一个工作空间 - api_logger.error(f"处理工作空间 {workspace.name} (ID: {workspace_id}) 失败: {str(e)}") + logger.error(f"处理工作空间 {workspace.name} (ID: {workspace_id}) 失败: {str(e)}") all_workspace_results.append({ "workspace_id": str(workspace_id), "workspace_name": workspace.name, @@ -1452,7 +1507,7 @@ def write_all_workspaces_memory_task(self) -> Dict[str, Any]: } except Exception as e: - api_logger.error(f"记忆增量统计任务执行失败: {str(e)}") + logger.error(f"记忆增量统计任务执行失败: {str(e)}") return { "status": "FAILURE", "error": str(e), @@ -1461,22 +1516,8 @@ def write_all_workspaces_memory_task(self) -> Dict[str, Any]: } try: - # 使用 nest_asyncio 来避免事件循环冲突 - try: - import nest_asyncio - nest_asyncio.apply() - except ImportError: - pass - # 尝试获取现有事件循环,如果不存在则创建新的 - try: - loop = asyncio.get_event_loop() - if loop.is_closed(): - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - except RuntimeError: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) + loop = set_asyncio_event_loop() result = loop.run_until_complete(_run()) elapsed_time = time.time() - start_time @@ -1524,11 +1565,9 @@ def regenerate_memory_cache(self) -> Dict[str, Any]: start_time = time.time() async def _run() -> Dict[str, Any]: - from app.core.logging_config import get_logger from app.repositories.end_user_repository import EndUserRepository from app.services.user_memory_service import UserMemoryService - logger = get_logger(__name__) logger.info("开始执行记忆缓存重新生成定时任务") service = UserMemoryService() @@ -1661,22 +1700,8 @@ def regenerate_memory_cache(self) -> Dict[str, Any]: } try: - # 使用 nest_asyncio 来避免事件循环冲突 - try: - import nest_asyncio - nest_asyncio.apply() - except ImportError: - pass - # 尝试获取现有事件循环,如果不存在则创建新的 - try: - loop = asyncio.get_event_loop() - if loop.is_closed(): - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - except RuntimeError: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) + loop = set_asyncio_event_loop() result = loop.run_until_complete(_run()) elapsed_time = time.time() - start_time @@ -1712,15 +1737,12 @@ def workspace_reflection_task(self) -> Dict[str, Any]: start_time = time.time() async def _run() -> Dict[str, Any]: - from app.core.logging_config import get_api_logger from app.models.workspace_model import Workspace from app.services.memory_reflection_service import ( MemoryReflectionService, WorkspaceAppService, ) - api_logger = get_api_logger() - with get_db_context() as db: try: # 获取所有工作空间 @@ -1739,7 +1761,7 @@ def workspace_reflection_task(self) -> Dict[str, Any]: # 遍历每个工作空间 for workspace in workspaces: workspace_id = workspace.id - api_logger.info(f"开始处理工作空间反思,workspace_id: {workspace_id}") + logger.info(f"开始处理工作空间反思,workspace_id: {workspace_id}") try: reflection_service = MemoryReflectionService(db) @@ -1751,7 +1773,7 @@ def workspace_reflection_task(self) -> Dict[str, Any]: workspace_reflection_results = [] for data in result['apps_detailed_info']: - if data['memory_configs'] == []: + if not data['memory_configs']: continue releases = data['releases'] @@ -1762,7 +1784,7 @@ def workspace_reflection_task(self) -> Dict[str, Any]: if str(base['config']) == str(config['config_id']) and str(base['app_id']) == str( user['app_id']): # 调用反思服务 - api_logger.info(f"为用户 {user['id']} 启动反思,config_id: {config['config_id']}") + logger.info(f"为用户 {user['id']} 启动反思,config_id: {config['config_id']}") reflection_result = await reflection_service.start_reflection_from_data( config_data=config, @@ -1782,12 +1804,12 @@ def workspace_reflection_task(self) -> Dict[str, Any]: "reflection_results": workspace_reflection_results }) - api_logger.info( + logger.info( f"工作空间 {workspace_id} 反思处理完成,处理了 {len(workspace_reflection_results)} 个任务") except Exception as e: db.rollback() # Rollback failed transaction to allow next query - api_logger.error(f"处理工作空间 {workspace_id} 反思失败: {str(e)}") + logger.error(f"处理工作空间 {workspace_id} 反思失败: {str(e)}") all_reflection_results.append({ "workspace_id": str(workspace_id), "error": str(e), @@ -1806,7 +1828,7 @@ def workspace_reflection_task(self) -> Dict[str, Any]: } except Exception as e: - api_logger.error(f"工作空间反思任务执行失败: {str(e)}") + logger.error(f"工作空间反思任务执行失败: {str(e)}") return { "status": "FAILURE", "error": str(e), @@ -1815,22 +1837,8 @@ def workspace_reflection_task(self) -> Dict[str, Any]: } try: - # 使用 nest_asyncio 来避免事件循环冲突 - try: - import nest_asyncio - nest_asyncio.apply() - except ImportError: - pass - # 尝试获取现有事件循环,如果不存在则创建新的 - try: - loop = asyncio.get_event_loop() - if loop.is_closed(): - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - except RuntimeError: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) + loop = set_asyncio_event_loop() result = loop.run_until_complete(_run()) elapsed_time = time.time() - start_time @@ -1871,18 +1879,16 @@ def run_forgetting_cycle_task(self, config_id: Optional[uuid.UUID] = None) -> Di start_time = time.time() async def _run() -> Dict[str, Any]: - from app.core.logging_config import get_api_logger from app.services.memory_forget_service import MemoryForgetService - api_logger = get_api_logger() - with get_db_context() as db: try: - api_logger.info(f"开始执行遗忘周期定时任务,config_id: {config_id}") + logger.info(f"开始执行遗忘周期定时任务,config_id: {config_id}") forget_service = MemoryForgetService() # 运行遗忘周期 + # FIXME: MemeoryForgetService report = await forget_service.trigger_forgetting( db=db, end_user_id=None, # 处理所有组 @@ -1891,7 +1897,7 @@ def run_forgetting_cycle_task(self, config_id: Optional[uuid.UUID] = None) -> Di duration = time.time() - start_time - api_logger.info( + logger.info( f"遗忘周期定时任务完成: " f"融合 {report['merged_count']} 对节点, " f"失败 {report['failed_count']} 对, " @@ -1907,7 +1913,7 @@ def run_forgetting_cycle_task(self, config_id: Optional[uuid.UUID] = None) -> Di except Exception as e: duration = time.time() - start_time - api_logger.error(f"遗忘周期定时任务失败: {str(e)}", exc_info=True) + logger.error(f"遗忘周期定时任务失败: {str(e)}", exc_info=True) return { "status": "FAILED", @@ -1924,6 +1930,7 @@ def run_forgetting_cycle_task(self, config_id: Optional[uuid.UUID] = None) -> Di finally: loop.close() + # ============================================================================= # Long-term Memory Storage Tasks (Batched Write Strategies) # ============================================================================= @@ -2149,14 +2156,16 @@ def update_implicit_emotions_storage(self) -> Dict[str, Any]: start_time = time.time() async def _run() -> Dict[str, Any]: - from app.core.logging_config import get_logger - from app.repositories.implicit_emotions_storage_repository import ImplicitEmotionsStorageRepository - from app.models.implicit_emotions_storage_model import ImplicitEmotionsStorage - from sqlalchemy import select, func - from app.services.implicit_memory_service import ImplicitMemoryService - from app.services.emotion_analytics_service import EmotionAnalyticsService + from sqlalchemy import select + + from app.models.implicit_emotions_storage_model import ImplicitEmotionsStorage + from app.repositories.implicit_emotions_storage_repository import ( + ImplicitEmotionsStorageRepository, + TimeFilterUnavailableError, + ) + from app.services.emotion_analytics_service import EmotionAnalyticsService + from app.services.implicit_memory_service import ImplicitMemoryService - logger = get_logger(__name__) logger.info("开始执行隐性记忆和情绪数据更新定时任务") total_users = 0 @@ -2167,21 +2176,30 @@ def update_implicit_emotions_storage(self) -> Dict[str, Any]: with get_db_context() as db: try: - # 获取所有已存储数据的用户ID(分批次处理) repo = ImplicitEmotionsStorageRepository(db) - + # 先统计总数用于日志 from sqlalchemy import func total_users = db.execute( select(func.count()).select_from(ImplicitEmotionsStorage) ).scalar() or 0 - logger.info(f"找到 {total_users} 个需要更新的用户") + logger.info(f"表中存量用户总数: {total_users},开始时间轴筛选") - # 遍历每个用户并更新数据(分批次,避免一次性加载所有ID) - for end_user_id in repo.get_all_user_ids(batch_size=100): + # 构建 Redis 同步客户端,用于时间轴筛选 + _redis_client = get_sync_redis_client() + + # 只处理 last_done > updated_at 的用户(有新记忆写入的用户) + # Redis 不可用时回退到全量处理 + try: + refresh_iter = repo.get_users_needing_refresh(_redis_client, batch_size=100) + except TimeFilterUnavailableError as e: + logger.warning(f"时间轴筛选不可用,回退到全量刷新: {e}") + refresh_iter = repo.get_all_user_ids(batch_size=100) + + for end_user_id in refresh_iter: logger.info(f"开始处理用户: {end_user_id}") user_start_time = time.time() - + implicit_success = False emotion_success = False errors = [] @@ -2232,7 +2250,7 @@ def update_implicit_emotions_storage(self) -> Dict[str, Any]: failed += 1 user_elapsed = time.time() - user_start_time - + # 记录用户处理结果 user_result = { "end_user_id": end_user_id, @@ -2264,10 +2282,10 @@ def update_implicit_emotions_storage(self) -> Dict[str, Any]: user_results.append(error_info) logger.error(f"处理用户 {end_user_id} 时出错: {str(e)}") - # ---- 处理增量用户(当天新增、尚未初始化的用户)---- + # ---- 当天新增用户兜底初始化 ---- new_users_initialized = 0 new_users_failed = 0 - logger.info("开始处理当天新增的增量用户初始化") + logger.info("开始处理当天新增用户的兜底初始化") for end_user_id in repo.get_new_user_ids_today(batch_size=100): logger.info(f"开始初始化新用户: {end_user_id}") @@ -2281,35 +2299,27 @@ def update_implicit_emotions_storage(self) -> Dict[str, Any]: implicit_service = ImplicitMemoryService(db=db, end_user_id=end_user_id) profile_data = await implicit_service.generate_complete_profile(user_id=end_user_id) await implicit_service.save_profile_cache( - end_user_id=end_user_id, - profile_data=profile_data, - db=db + end_user_id=end_user_id, profile_data=profile_data, db=db ) implicit_success = True logger.info(f"成功初始化新用户 {end_user_id} 的隐性记忆画像") except Exception as e: - error_msg = f"隐性记忆初始化失败: {str(e)}" - errors.append(error_msg) - logger.error(f"新用户 {end_user_id} {error_msg}") + errors.append(f"隐性记忆初始化失败: {str(e)}") + logger.error(f"新用户 {end_user_id} 隐性记忆初始化失败: {e}") try: emotion_service = EmotionAnalyticsService() suggestions_data = await emotion_service.generate_emotion_suggestions( - end_user_id=end_user_id, - db=db, - language="zh" + end_user_id=end_user_id, db=db, language="zh" ) await emotion_service.save_suggestions_cache( - end_user_id=end_user_id, - suggestions_data=suggestions_data, - db=db + end_user_id=end_user_id, suggestions_data=suggestions_data, db=db ) emotion_success = True logger.info(f"成功初始化新用户 {end_user_id} 的情绪建议") except Exception as e: - error_msg = f"情绪建议初始化失败: {str(e)}" - errors.append(error_msg) - logger.error(f"新用户 {end_user_id} {error_msg}") + errors.append(f"情绪建议初始化失败: {str(e)}") + logger.error(f"新用户 {end_user_id} 情绪建议初始化失败: {e}") if implicit_success or emotion_success: new_users_initialized += 1 @@ -2319,7 +2329,7 @@ def update_implicit_emotions_storage(self) -> Dict[str, Any]: user_elapsed = time.time() - user_start_time user_results.append({ "end_user_id": end_user_id, - "type": "init", + "type": "new_user_init", "implicit_success": implicit_success, "emotion_success": emotion_success, "errors": errors, @@ -2331,7 +2341,7 @@ def update_implicit_emotions_storage(self) -> Dict[str, Any]: user_elapsed = time.time() - user_start_time user_results.append({ "end_user_id": end_user_id, - "type": "init", + "type": "new_user_init", "implicit_success": False, "emotion_success": False, "errors": [str(e)], @@ -2339,27 +2349,24 @@ def update_implicit_emotions_storage(self) -> Dict[str, Any]: }) logger.error(f"初始化新用户 {end_user_id} 时出错: {str(e)}") - logger.info( - f"增量用户初始化完成: 成功={new_users_initialized}, 失败={new_users_failed}" - ) - # ---- 增量用户处理结束 ---- + logger.info(f"当天新增用户兜底初始化完成: 成功={new_users_initialized}, 失败={new_users_failed}") + # ---- 新增用户兜底初始化结束 ---- - # 记录总体统计信息 logger.info( f"隐性记忆和情绪数据更新定时任务完成: " f"存量用户总数={total_users}, " f"隐性记忆成功={successful_implicit}, " f"情绪建议成功={successful_emotion}, " f"存量失败={failed}, " - f"增量初始化成功={new_users_initialized}, " - f"增量初始化失败={new_users_failed}" + f"新增用户初始化成功={new_users_initialized}, " + f"新增用户初始化失败={new_users_failed}" ) return { "status": "SUCCESS", "message": ( f"存量用户 {total_users} 个,隐性记忆 {successful_implicit} 个成功,情绪建议 {successful_emotion} 个成功;" - f"增量新用户初始化 {new_users_initialized} 个成功,{new_users_failed} 个失败" + f"当天新增用户初始化 {new_users_initialized} 个成功,{new_users_failed} 个失败" ), "total_users": total_users, "successful_implicit": successful_implicit, @@ -2367,7 +2374,7 @@ def update_implicit_emotions_storage(self) -> Dict[str, Any]: "failed": failed, "new_users_initialized": new_users_initialized, "new_users_failed": new_users_failed, - "user_results": user_results[:50] # 只保留前50个用户的详细结果 + "user_results": user_results[:50] } except Exception as e: @@ -2385,22 +2392,8 @@ def update_implicit_emotions_storage(self) -> Dict[str, Any]: } try: - # 使用 nest_asyncio 来避免事件循环冲突 - try: - import nest_asyncio - nest_asyncio.apply() - except ImportError: - pass - # 尝试获取现有事件循环,如果不存在则创建新的 - try: - loop = asyncio.get_event_loop() - if loop.is_closed(): - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - except RuntimeError: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) + loop = set_asyncio_event_loop() result = loop.run_until_complete(_run()) elapsed_time = time.time() - start_time @@ -2416,3 +2409,385 @@ def update_implicit_emotions_storage(self) -> Dict[str, Any]: "elapsed_time": elapsed_time, "task_id": self.request.id } + + +# ============================================================================= + +@celery_app.task( + name="app.tasks.init_implicit_emotions_for_users", + bind=True, + ignore_result=True, + max_retries=0, + acks_late=False, + time_limit=3600, + soft_time_limit=3300, + # 触发型任务标识,区别于 periodic_tasks 队列中的定时任务 + triggered=True, +) +def init_implicit_emotions_for_users(self, end_user_ids: List[str]) -> Dict[str, Any]: + """事件触发任务:对指定用户列表做存在性检查,无记录则执行首次初始化。 + + 由 /dashboard/end_users 接口触发,已有数据的用户直接跳过。 + 存量用户的数据刷新由定时任务 update_implicit_emotions_storage 负责。 + + Args: + end_user_ids: 需要检查的用户ID列表 + + Returns: + 包含任务执行结果的字典 + """ + start_time = time.time() + + async def _run() -> Dict[str, Any]: + from app.repositories.implicit_emotions_storage_repository import ( + ImplicitEmotionsStorageRepository, + ) + from app.services.emotion_analytics_service import EmotionAnalyticsService + from app.services.implicit_memory_service import ImplicitMemoryService + + logger.info(f"开始按需初始化隐性记忆/情绪数据,候选用户数: {len(end_user_ids)}") + + initialized = 0 + failed = 0 + skipped = 0 + + with get_db_context() as db: + repo = ImplicitEmotionsStorageRepository(db) + + for end_user_id in end_user_ids: + existing = repo.get_by_end_user_id(end_user_id) + if existing is not None: + skipped += 1 + continue + + logger.info(f"用户 {end_user_id} 无记录,开始初始化") + implicit_ok = False + emotion_ok = False + try: + try: + implicit_service = ImplicitMemoryService(db=db, end_user_id=end_user_id) + profile_data = await implicit_service.generate_complete_profile(user_id=end_user_id) + await implicit_service.save_profile_cache( + end_user_id=end_user_id, profile_data=profile_data, db=db + ) + implicit_ok = True + except Exception as e: + logger.error(f"用户 {end_user_id} 隐性记忆初始化失败: {e}") + + try: + emotion_service = EmotionAnalyticsService() + suggestions_data = await emotion_service.generate_emotion_suggestions( + end_user_id=end_user_id, db=db, language="zh" + ) + await emotion_service.save_suggestions_cache( + end_user_id=end_user_id, suggestions_data=suggestions_data, db=db + ) + emotion_ok = True + except Exception as e: + logger.error(f"用户 {end_user_id} 情绪建议初始化失败: {e}") + + if implicit_ok or emotion_ok: + initialized += 1 + else: + failed += 1 + except Exception as e: + failed += 1 + logger.error(f"用户 {end_user_id} 初始化异常: {e}") + + logger.info(f"按需初始化完成: 初始化={initialized}, 跳过={skipped}, 失败={failed}") + return { + "status": "SUCCESS", + "initialized": initialized, + "skipped": skipped, + "failed": failed, + } + + try: + loop = set_asyncio_event_loop() + + result = loop.run_until_complete(_run()) + result["elapsed_time"] = time.time() - start_time + result["task_id"] = self.request.id + return result + except Exception as e: + return { + "status": "FAILURE", + "error": str(e), + "elapsed_time": time.time() - start_time, + "task_id": self.request.id, + } + + +# ============================================================================= + +@celery_app.task( + name="app.tasks.init_interest_distribution_for_users", + bind=True, + ignore_result=True, + max_retries=0, + acks_late=False, + time_limit=3600, + soft_time_limit=3300, +) +def init_interest_distribution_for_users(self, end_user_ids: List[str]) -> Dict[str, Any]: + """事件触发任务:检查指定用户列表的兴趣分布缓存,无缓存则生成并写入 Redis。 + + 由 /dashboard/end_users 接口触发,已有缓存的用户直接跳过。 + 默认生成中文(zh)兴趣分布数据。 + + Args: + self: task object + end_user_ids: 需要检查的用户ID列表 + + Returns: + 包含任务执行结果的字典 + """ + start_time = time.time() + + async def _run() -> Dict[str, Any]: + from app.cache.memory.interest_memory import InterestMemoryCache, INTEREST_CACHE_EXPIRE + from app.services.memory_agent_service import MemoryAgentService + + logger.info(f"开始按需初始化兴趣分布缓存,候选用户数: {len(end_user_ids)}") + + initialized = 0 + failed = 0 + skipped = 0 + language = "zh" + + service = MemoryAgentService() + + with get_db_context() as db: + for end_user_id in end_user_ids: + # 存在性检查:缓存有数据则跳过 + cached = await InterestMemoryCache.get_interest_distribution( + end_user_id=end_user_id, + language=language, + ) + if cached is not None: + skipped += 1 + continue + + logger.info(f"用户 {end_user_id} 无兴趣分布缓存,开始生成") + try: + result = await service.get_interest_distribution_by_user( + end_user_id=end_user_id, + limit=5, + language=language, + ) + await InterestMemoryCache.set_interest_distribution( + end_user_id=end_user_id, + language=language, + data=result, + expire=INTEREST_CACHE_EXPIRE, + ) + initialized += 1 + logger.info(f"用户 {end_user_id} 兴趣分布缓存生成成功") + except Exception as e: + failed += 1 + logger.error(f"用户 {end_user_id} 兴趣分布缓存生成失败: {e}") + + logger.info(f"兴趣分布按需初始化完成: 初始化={initialized}, 跳过={skipped}, 失败={failed}") + return { + "status": "SUCCESS", + "initialized": initialized, + "skipped": skipped, + "failed": failed, + } + + try: + loop = set_asyncio_event_loop() + + result = loop.run_until_complete(_run()) + result["elapsed_time"] = time.time() - start_time + result["task_id"] = self.request.id + return result + except Exception as e: + return { + "status": "FAILURE", + "error": str(e), + "elapsed_time": time.time() - start_time, + "task_id": self.request.id, + } + + +@celery_app.task( + name="app.tasks.write_perceptual_memory", + bind=True, + ignore_result=True, + max_retries=0, + acks_late=False, + time_limit=3600, + soft_time_limit=3300, +) +def write_perceptual_memory( + self, + end_user_id: str, + model_api_config: dict, + file_type: str, + file_url: str, + file_message: dict +): + """ + Write perceptual memory for a user into PostgreSQL and Neo4j. + + This task generates or updates the user's perceptual memory + in the backend databases. It is intended to be executed asynchronously + via Celery. + + Args: + end_user_id (uuid.UUID): The unique identifier of the end user. + model_api_config (ModelInfo): API configuration for the model + used to generate perceptual memory. + file_type (str): The file type + file_url (url): The url of file + file_message (dict): The file message containing details about the file + to be processed. + + Returns: + None + """ + file_url_md5 = hashlib.md5(file_url.encode("utf-8")).hexdigest() + set_asyncio_event_loop() + with RedisLock(f"perceptual:{file_url_md5}", redis_client=get_sync_redis_client()): + model_info = ModelInfo(**model_api_config) + with get_db_context() as db: + memory_perceptual_service = MemoryPerceptualService(db) + return asyncio.run(memory_perceptual_service.generate_perceptual_memory( + end_user_id, + model_info, + file_type, + file_url, + file_message, + )) + + +# ============================================================================= +# 社区聚类补全任务(触发型) +# ============================================================================= + +@celery_app.task( + name="app.tasks.init_community_clustering_for_users", + bind=True, + ignore_result=False, + max_retries=0, + acks_late=False, + time_limit=7200, # 2小时硬超时 + soft_time_limit=6900, +) +def init_community_clustering_for_users(self, end_user_ids: List[str]) -> Dict[str, Any]: + """触发型任务:检查指定用户列表,对有 ExtractedEntity 但无 Community 节点的用户执行全量聚类。 + + 由 /dashboard/end_users 接口触发,已有社区节点的用户直接跳过。 + + Args: + end_user_ids: 需要检查的用户 ID 列表 + + Returns: + 包含任务执行结果的字典 + """ + start_time = time.time() + + async def _run() -> Dict[str, Any]: + from app.core.logging_config import get_logger + from app.repositories.neo4j.community_repository import CommunityRepository + from app.repositories.neo4j.neo4j_connector import Neo4jConnector + from app.core.memory.storage_services.clustering_engine.label_propagation import LabelPropagationEngine + + logger = get_logger(__name__) + logger.info(f"[CommunityCluster] 开始社区聚类补全任务,候选用户数: {len(end_user_ids)}") + + initialized = 0 + skipped = 0 + failed = 0 + + connector = Neo4jConnector() + try: + repo = CommunityRepository(connector) + + # 批量预取所有用户的配置(内置兜底:用户配置不可用时自动回退到工作空间默认配置) + user_llm_map: Dict[str, Optional[str]] = {} + try: + with get_db_context() as db: + from app.services.memory_agent_service import get_end_users_connected_configs_batch + from app.services.memory_config_service import MemoryConfigService + batch_configs = get_end_users_connected_configs_batch(end_user_ids, db) + for uid, cfg_info in batch_configs.items(): + config_id = cfg_info.get("memory_config_id") + if config_id: + try: + cfg = MemoryConfigService(db).load_memory_config(config_id=config_id) + user_llm_map[uid] = str(cfg.llm_model_id) if cfg.llm_model_id else None + except Exception as e: + logger.warning(f"[CommunityCluster] 用户 {uid} 加载 LLM 配置失败,将使用 None: {e}") + user_llm_map[uid] = None + else: + user_llm_map[uid] = None + except Exception as e: + logger.warning(f"[CommunityCluster] 批量获取 LLM 配置失败,所有用户将使用 None: {e}") + + for end_user_id in end_user_ids: + try: + # 已有社区节点则跳过 + has_communities = await repo.has_communities(end_user_id) + if has_communities: + skipped += 1 + logger.debug(f"[CommunityCluster] 用户 {end_user_id} 已有社区节点,跳过") + continue + + # 检查是否有 ExtractedEntity 节点 + entities = await repo.get_all_entities(end_user_id) + if not entities: + skipped += 1 + logger.debug(f"[CommunityCluster] 用户 {end_user_id} 无实体节点,跳过") + continue + + # 每个用户使用自己的 llm_model_id + llm_model_id = user_llm_map.get(end_user_id) + engine = LabelPropagationEngine( + connector=connector, + llm_model_id=llm_model_id, + ) + + logger.info(f"[CommunityCluster] 用户 {end_user_id} 有 {len(entities)} 个实体,开始全量聚类,llm_model_id={llm_model_id}") + await engine.full_clustering(end_user_id) + initialized += 1 + logger.info(f"[CommunityCluster] 用户 {end_user_id} 聚类完成") + + except Exception as e: + failed += 1 + logger.error(f"[CommunityCluster] 用户 {end_user_id} 聚类失败: {e}") + + finally: + await connector.close() + + logger.info( + f"[CommunityCluster] 任务完成: 初始化={initialized}, 跳过={skipped}, 失败={failed}" + ) + return { + "status": "SUCCESS", + "initialized": initialized, + "skipped": skipped, + "failed": failed, + } + + try: + try: + import nest_asyncio + nest_asyncio.apply() + except ImportError: + pass + + loop = set_asyncio_event_loop() + result = loop.run_until_complete(_run()) + result["elapsed_time"] = time.time() - start_time + result["task_id"] = self.request.id + return result + + except Exception as e: + return { + "status": "FAILURE", + "error": str(e), + "elapsed_time": time.time() - start_time, + "task_id": self.request.id, + } diff --git a/api/app/utils/app_config_utils.py b/api/app/utils/app_config_utils.py index afa18417..bc03bb28 100644 --- a/api/app/utils/app_config_utils.py +++ b/api/app/utils/app_config_utils.py @@ -100,7 +100,8 @@ def agent_config_4_app_release(release: AppRelease) -> AgentConfig: memory=config_dict.get("memory"), variables=config_dict.get("variables", []), tools=config_dict.get("tools", []), - skills=config_dict.get("skills", {}) + skills=config_dict.get("skills", {}), + features=config_dict.get("features", {}) ) return agent_config diff --git a/api/app/utils/redis_lock.py b/api/app/utils/redis_lock.py new file mode 100644 index 00000000..99f62d84 --- /dev/null +++ b/api/app/utils/redis_lock.py @@ -0,0 +1,61 @@ +import redis +import uuid +import time + +UNLOCK_SCRIPT = """ +if redis.call("get", KEYS[1]) == ARGV[1] then + return redis.call("del", KEYS[1]) +else + return 0 +end +""" + + +class RedisLock: + def __init__( + self, + key: str, + redis_client: redis.StrictRedis, + expire: int = 60, + retry_interval: float = 0.1, + timeout: float = 30 + + ): + self.key = key + self.expire = expire + self.value = str(uuid.uuid4()) + self._locked = False + self.retry_interval = retry_interval + self.timeout = timeout + self.redis_client = redis_client + + def acquire(self) -> bool: + start = time.time() + while True: + ok = self.redis_client.set(self.key, self.value, ex=self.expire, nx=True) + if ok: + self._locked = True + return True + if time.time() - start >= self.timeout: + return False + time.sleep(self.retry_interval) + + def release(self): + if not self._locked: + return + self.redis_client.eval( + UNLOCK_SCRIPT, + 1, + self.key, + self.value + ) + self._locked = False + + def __enter__(self): + ok = self.acquire() + if not ok: + raise RuntimeError(f"Get redis lock timeout: {self.key}") + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.release() diff --git a/api/app/version_info.json b/api/app/version_info.json index bbaffc17..12793cb5 100644 --- a/api/app/version_info.json +++ b/api/app/version_info.json @@ -1,4 +1,38 @@ { + "v0.2.7": { + "introduction": { + "codeName": "武陵", + "releaseDate": "2026-3-13", + "upgradePosition": "🐻 应用可移植性、工具生态扩展与记忆智能精细化", + "coreUpgrades": [ + "1. 应用管理与可移植性
* 应用导入/导出:全面支持 Agent 配置和工作流定义的导入导出,实现跨环境无缝迁移、备份和共享", + "2. 工具生态扩展 🔌
* MCP 广场集成:工具管理接入 MCP 广场,提供集中式工具发现、浏览和集成枢纽", + "3. 工作流增强 📝
* 备注节点:新增备注节点类型,支持工作流图中的内联文档和上下文说明,提升协作效率", + "4. 记忆智能精细化 🧠
* 隐性记忆与情绪记忆生成逻辑优化:含数据存在性校验、时间轴筛选和兴趣分布缓存校验
* 兴趣分布生成逻辑改进:优化算法产生更准确的用户兴趣画像", + "5. 用户体验改进 🎨
* 知识库分享加载状态:增加加载指示器,改善感知响应速度", + "6. 稳健性与缺陷修复 🔧
* 应用调试终端用户管理:修复调试会话错误创建 end_user 记录问题
* 知识库数据集创建流程:解决创建数据集后无法进入下一步的缺陷
* RAG 空间记忆生成失败:修复记忆生成失败和存储中断的关键问题
* 应用字符限制强制执行:增加条件校验防止过长输入
* 语义剪枝情绪/兴趣保留:优化剪枝逻辑防止误删情绪和兴趣片段
* 语义剪枝效果优化:增强算法平衡记忆压缩与信息保留", + "
", + "v0.2.8 及更远的未来将引入多模态记忆能力,实现知识库和模型的分服务部署,为应用增加语音输入支持,并扩展应用能力至语音回复、BI 可视化、PPT 生成和直接生图。应用会话分享和联网搜索功能将得到修复和增强。记忆检索基准测试和情景记忆聚类算法将增强上下文召回和时序推理能力。通往真正智能、多模态、上下文感知应用的旅程仍在继续。", + "记忆熊,智慧致远 🐻✨" + ] + }, + "introduction_en": { + "codeName": "WuLing", + "releaseDate": "2026-3-13", + "upgradePosition": "🐻 Application portability, tool ecosystem expansion, and memory intelligence refinement", + "coreUpgrades": [ + "1. Application Management & Portability
* Application Import/Export: Full support for importing and exporting agent configurations and workflow definitions, enabling seamless cross-environment migration, backup, and sharing", + "2. Tool Ecosystem Expansion 🔌
* MCP Marketplace Integration: Tool management now includes MCP Marketplace access for centralized tool discovery, browsing, and integration", + "3. Workflow Enhancements 📝
* Annotation Node: Introduced annotation node type for inline documentation and contextual notes within workflow graphs, improving collaboration", + "4. Memory Intelligence Refinement 🧠
* Implicit & Emotional Memory Generation Logic: Comprehensive optimization including data existence validation, timeline filtering, and interest distribution cache validation
* Interest Distribution Generation Logic: Refined algorithm for more accurate user interest profiles", + "5. User Experience Improvements 🎨
* Knowledge Base Sharing Loading State: Added loading indicators to improve perceived responsiveness", + "6. Robustness & Bug Fixes 🔧
* End User Management in App Debugging: Fixed incorrect end_user record creation during debugging sessions
* Knowledge Base Dataset Creation Flow: Resolved bug preventing next step after dataset creation
* RAG Space Memory Generation Failure: Fixed critical memory generation and storage interruption issue
* Application Character Limit Enforcement: Added conditional validation to prevent excessively long input
* Semantic Pruning Emotion/Interest Preservation: Optimized pruning logic to prevent incorrect deletion of emotional and interest fragments
* Semantic Pruning Effectiveness: Enhanced algorithm balance between memory compression and information retention", + "
", + "Looking forward to v0.2.8 and beyond, we will introduce multimodal memory capabilities with distributed service deployment for knowledge bases and models, enabling voice input for applications and expanding application capabilities with voice responses, BI visualizations, PPT generation, and direct image creation. Application conversation sharing and web search functionality will be restored and enhanced. Memory retrieval benchmarking and episodic memory clustering algorithms will enhance contextual recall and temporal reasoning. The journey toward truly intelligent, multimodal, context-aware applications continues.", + "MemoryBear, Wisdom Reaching Far 🐻✨" + ] + } + }, "v0.2.6": { "introduction": { "codeName": "听剑", diff --git a/api/docker-compose.yml b/api/docker-compose.yml index 69763de2..5d358f2c 100644 --- a/api/docker-compose.yml +++ b/api/docker-compose.yml @@ -49,7 +49,7 @@ services: networks: - celery - # Periodic worker - Scheduled/beat tasks (prefork, low concurrency) + # Periodic worker - Scheduled/beat tasks + API-triggered tasks (prefork, low concurrency) worker-periodic: image: redbear-mem-open:latest container_name: worker-periodic diff --git a/api/env.example b/api/env.example index bd7f3dae..e324d1e5 100644 --- a/api/env.example +++ b/api/env.example @@ -75,7 +75,7 @@ REFRESH_TOKEN_EXPIRE_DAYS=7 ENABLE_SINGLE_SESSION= # File Upload -MAX_FILE_SIZE=52428800 # 50MB:10 * 1024 * 1024 +MAX_FILE_SIZE=52428800 # 50MB:50 * 1024 * 1024 FILE_PATH=/files FILE_LOCAL_SERVER_URL="http://localhost:8000/api" diff --git a/api/migrations/versions/01587a13522f_202603131028.py b/api/migrations/versions/01587a13522f_202603131028.py new file mode 100644 index 00000000..6412dedd --- /dev/null +++ b/api/migrations/versions/01587a13522f_202603131028.py @@ -0,0 +1,38 @@ +"""202603131028 + +Revision ID: 01587a13522f +Revises: fb834419b18f +Create Date: 2026-03-13 10:28:43.601370 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = '01587a13522f' +down_revision: Union[str, None] = 'fb834419b18f' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('tenants', sa.Column('default_language', sa.String(length=10), server_default='zh', nullable=False)) + op.add_column('tenants', sa.Column('supported_languages', postgresql.ARRAY(sa.String(length=10)), server_default=sa.text("'{zh,en}'"), nullable=False)) + op.create_index(op.f('ix_tenants_default_language'), 'tenants', ['default_language'], unique=False) + op.add_column('users', sa.Column('preferred_language', sa.String(length=10), server_default=sa.text("'zh'"), nullable=False)) + op.create_index(op.f('ix_users_preferred_language'), 'users', ['preferred_language'], unique=False) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index(op.f('ix_users_preferred_language'), table_name='users') + op.drop_column('users', 'preferred_language') + op.drop_index(op.f('ix_tenants_default_language'), table_name='tenants') + op.drop_column('tenants', 'supported_languages') + op.drop_column('tenants', 'default_language') + # ### end Alembic commands ### diff --git a/api/migrations/versions/12114b3e953c_202603131647.py b/api/migrations/versions/12114b3e953c_202603131647.py new file mode 100644 index 00000000..dec07664 --- /dev/null +++ b/api/migrations/versions/12114b3e953c_202603131647.py @@ -0,0 +1,50 @@ +"""202603131647 + +Revision ID: 12114b3e953c +Revises: cd3a402c2f6c +Create Date: 2026-03-13 08:47:30.455956 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy import text + + +# revision identifiers, used by Alembic. +revision: str = '12114b3e953c' +down_revision: Union[str, None] = 'ef9d172cb753' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + conn = op.get_bind() + print("Step 1: 添加 workspace_id 列...") + op.add_column('end_users', sa.Column('workspace_id', sa.UUID(), nullable=True)) + print("Step 2: 回填 workspace_id...") + conn.execute(text(""" + UPDATE end_users + SET workspace_id = apps.workspace_id + FROM apps + WHERE end_users.app_id = apps.id + """)) + # Step 3: 设置 workspace_id 为 NOT NULL + print("Step 3: 设置 workspace_id 为 NOT NULL...") + op.alter_column('end_users', 'workspace_id', nullable=False) + op.alter_column('end_users', 'app_id', existing_type=sa.UUID(), nullable=True) + # Step 4: 添加外键约束 + print("Step 4: 添加外键约束...") + op.create_foreign_key('fk_end_users_workspace_id','end_users', 'workspaces', + ['workspace_id'], ['id'] + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_constraint('fk_end_users_workspace_id', 'end_users', type_='foreignkey') + op.alter_column('end_users', 'app_id', existing_type=sa.UUID(), nullable=False) + op.drop_column('end_users', 'workspace_id') + # ### end Alembic commands ### diff --git a/api/migrations/versions/74b51dfece29_20260311000.py b/api/migrations/versions/74b51dfece29_20260311000.py new file mode 100644 index 00000000..aa9feab1 --- /dev/null +++ b/api/migrations/versions/74b51dfece29_20260311000.py @@ -0,0 +1,156 @@ +"""20260311000 + +Revision ID: 74b51dfece29 +Revises: f017efe4831c +Create Date: 2026-03-19 10:15:42.488027 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '74b51dfece29' +down_revision: Union[str, None] = 'f017efe4831c' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # 先删除旧的触发器(如果存在) + op.execute("DROP TRIGGER IF EXISTS tr_documents_update_stats ON documents;") + + # 创建或更新 knowledges 统计信息的函数 + op.execute(""" +CREATE OR REPLACE FUNCTION update_knowledge_stats() +RETURNS TRIGGER AS $$ +DECLARE + -- 声明变量用于存储当前处理的知识库ID + current_kb_id UUID; + -- 声明变量用于存储文件夹知识库ID(如果存在) + folder_kb_id UUID; + -- 声明变量用于存储递归查询结果 + folder_ids UUID[]; +BEGIN + -- 处理 documents 表的插入、更新或删除 + IF TG_TABLE_NAME = 'documents' THEN + -- 1. 更新 knowledges 表的 doc_num + UPDATE knowledges SET doc_num = ( + SELECT COUNT(*) FROM documents + WHERE kb_id = knowledges.id AND status = 1 + ) + WHERE id = NEW.kb_id OR id = OLD.kb_id; + + -- 2. 更新 knowledges 表的 chunk_num + UPDATE knowledges SET chunk_num = ( + SELECT COALESCE(SUM(chunk_num), 0) FROM documents + WHERE kb_id = knowledges.id AND status = 1 + ) + WHERE id = NEW.kb_id OR id = OLD.kb_id; + + -- 通过 knowledge_shares 表同步统计信息 + -- 1. 使用 source_kb_id 的 doc_num 更新 target_kb_id 的 doc_num + UPDATE knowledges AS target + SET doc_num = source.doc_num + FROM knowledge_shares ks + JOIN knowledges AS source ON source.id = ks.source_kb_id + WHERE ks.target_kb_id = target.id + AND (source.id = NEW.kb_id OR source.id = OLD.kb_id); + + -- 2. 使用 source_kb_id 的 chunk_num 更新 target_kb_id 的 chunk_num + UPDATE knowledges AS target + SET chunk_num = source.chunk_num + FROM knowledge_shares ks + JOIN knowledges AS source ON source.id = ks.source_kb_id + WHERE ks.target_kb_id = target.id + AND (source.id = NEW.kb_id OR source.id = OLD.kb_id); + + -- 处理文件夹知识库的统计更新 + -- 获取当前处理的知识库ID(可能是NEW或OLD中的kb_id) + IF NEW.kb_id IS NOT NULL THEN + current_kb_id := NEW.kb_id; + ELSIF OLD.kb_id IS NOT NULL THEN + current_kb_id := OLD.kb_id; + ELSE + RETURN NULL; + END IF; + + -- 查找当前知识库的父文件夹(如果有) + SELECT id INTO folder_kb_id FROM knowledges + WHERE id IN ( + SELECT parent_id FROM knowledges WHERE id = current_kb_id + ) AND type = 'Folder'; + + -- 如果存在父文件夹,递归处理所有父文件夹 + IF folder_kb_id IS NOT NULL THEN + -- 使用递归CTE获取所有父文件夹ID(包括多级嵌套) + WITH RECURSIVE folder_hierarchy AS ( + -- 基础查询:获取直接父文件夹 + SELECT id FROM knowledges + WHERE id = folder_kb_id AND type = 'Folder' + UNION ALL + -- 递归查询:获取父文件夹的父文件夹 + SELECT k.id FROM knowledges k + JOIN folder_hierarchy fh ON k.id = k.parent_id + WHERE k.type = 'Folder' + ) + -- 将结果存入数组以便处理 + SELECT array_agg(id) INTO folder_ids FROM folder_hierarchy; + + -- 遍历所有父文件夹并更新统计信息 + FOR i IN 1..array_length(folder_ids, 1) LOOP + -- 更新文件夹的doc_num(汇总所有子知识库的doc_num) + UPDATE knowledges SET doc_num = ( + -- 汇总直接子知识库的doc_num + SELECT COALESCE(SUM(child.doc_num), 0) + FROM knowledges child + WHERE child.parent_id = folder_ids[i] AND child.status = 1 + -- 加上直接属于该文件夹的文档数(如果有) + UNION ALL + SELECT COALESCE(COUNT(*), 0) + FROM documents + WHERE kb_id = folder_ids[i] AND status = 1 + LIMIT 1 + ) + WHERE id = folder_ids[i]; + + -- 更新文件夹的chunk_num(汇总所有子知识库的chunk_num) + UPDATE knowledges SET chunk_num = ( + -- 汇总直接子知识库的chunk_num + SELECT COALESCE(SUM(child.chunk_num), 0) + FROM knowledges child + WHERE child.parent_id = folder_ids[i] AND child.status = 1 + -- 加上直接属于该文件夹的文档的chunk_num(如果有) + UNION ALL + SELECT COALESCE(SUM(d.chunk_num), 0) + FROM documents d + WHERE d.kb_id = folder_ids[i] AND d.status = 1 + LIMIT 1 + ) + WHERE id = folder_ids[i]; + END LOOP; + END IF; + END IF; + + RETURN NULL; +END; +$$ LANGUAGE plpgsql; + """) + + # documents 表上的触发器(插入、更新、删除后) + op.execute(""" +CREATE TRIGGER tr_documents_update_stats + AFTER INSERT OR UPDATE OR DELETE ON documents + FOR EACH ROW + EXECUTE FUNCTION update_knowledge_stats(); + """) + + +def downgrade() -> None: + # 删除触发器 + op.execute("DROP TRIGGER IF EXISTS tr_documents_update_stats ON documents;") + # 删除函数 + op.execute("DROP FUNCTION IF EXISTS update_knowledge_stats();") + diff --git a/api/migrations/versions/818c6c535e14_202603161825.py b/api/migrations/versions/818c6c535e14_202603161825.py new file mode 100644 index 00000000..2d46faa3 --- /dev/null +++ b/api/migrations/versions/818c6c535e14_202603161825.py @@ -0,0 +1,34 @@ +"""202603161825 + +Revision ID: 818c6c535e14 +Revises: 12114b3e953c +Create Date: 2026-03-16 18:33:41.883671 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = '818c6c535e14' +down_revision: Union[str, None] = '12114b3e953c' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('agent_configs', sa.Column('features', postgresql.JSON(astext_type=sa.Text()), nullable=True, comment='功能特性配置')) + op.add_column('tool_configs', sa.Column('is_active', sa.Boolean(), server_default='true', nullable=False, comment='是否可用,False表示已删除')) + op.create_index(op.f('ix_tool_configs_is_active'), 'tool_configs', ['is_active'], unique=False) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index(op.f('ix_tool_configs_is_active'), table_name='tool_configs') + op.drop_column('tool_configs', 'is_active') + op.drop_column('agent_configs', 'features') + # ### end Alembic commands ### diff --git a/api/migrations/versions/ea31b4e347d8_202603131452.py b/api/migrations/versions/ea31b4e347d8_202603131452.py new file mode 100644 index 00000000..12716fd9 --- /dev/null +++ b/api/migrations/versions/ea31b4e347d8_202603131452.py @@ -0,0 +1,30 @@ +"""202603131452 + +Revision ID: ea31b4e347d8 +Revises: 01587a13522f +Create Date: 2026-03-13 14:53:20.587580 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = 'ea31b4e347d8' +down_revision: Union[str, None] = '01587a13522f' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('app_shares', sa.Column('permission', sa.String(), nullable=False, comment='权限模式: readonly | editable')) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('app_shares', 'permission') + # ### end Alembic commands ### diff --git a/api/migrations/versions/ef9d172cb753_202603131800.py b/api/migrations/versions/ef9d172cb753_202603131800.py new file mode 100644 index 00000000..efeaee1c --- /dev/null +++ b/api/migrations/versions/ef9d172cb753_202603131800.py @@ -0,0 +1,30 @@ +"""202603131800 + +Revision ID: ef9d172cb753 +Revises: ea31b4e347d8 +Create Date: 2026-03-13 18:01:11.167711 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = 'ef9d172cb753' +down_revision: Union[str, None] = 'ea31b4e347d8' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('app_shares', sa.Column('is_active', sa.Boolean(), server_default='true', nullable=False, comment='是否有效,False 表示逻辑删除')) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('app_shares', 'is_active') + # ### end Alembic commands ### diff --git a/api/migrations/versions/f017efe4831c_202603181652.py b/api/migrations/versions/f017efe4831c_202603181652.py new file mode 100644 index 00000000..833d29c0 --- /dev/null +++ b/api/migrations/versions/f017efe4831c_202603181652.py @@ -0,0 +1,30 @@ +"""202603181652 + +Revision ID: f017efe4831c +Revises: 818c6c535e14 +Create Date: 2026-03-18 16:52:21.639695 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = 'f017efe4831c' +down_revision: Union[str, None] = '818c6c535e14' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('workflow_configs', sa.Column('features', postgresql.JSONB(astext_type=sa.Text()), nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('workflow_configs', 'features') + # ### end Alembic commands ### diff --git a/api/migrations/versions/fb834419b18f_202603101453.py b/api/migrations/versions/fb834419b18f_202603101453.py new file mode 100644 index 00000000..8b17e2e0 --- /dev/null +++ b/api/migrations/versions/fb834419b18f_202603101453.py @@ -0,0 +1,34 @@ +"""202603101453 + +Revision ID: fb834419b18f +Revises: 1ac07dc7366f +Create Date: 2026-03-10 14:46:48.038643 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = 'fb834419b18f' +down_revision: Union[str, None] = '1ac07dc7366f' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('end_users', sa.Column('rag_tags', sa.Text(), nullable=True, comment='RAG模式下提取的标签列表(JSON格式)')) + op.add_column('end_users', sa.Column('rag_personas', sa.Text(), nullable=True, comment='RAG模式下提取的人物形象列表(JSON格式)')) + op.add_column('end_users', sa.Column('rag_summary_updated_at', sa.DateTime(), nullable=True, comment='RAG摘要/标签/人物形象最后更新时间')) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('end_users', 'rag_summary_updated_at') + op.drop_column('end_users', 'rag_personas') + op.drop_column('end_users', 'rag_tags') + # ### end Alembic commands ### diff --git a/api/pyproject.toml b/api/pyproject.toml index 0bb232c3..e6fddea8 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -145,6 +145,8 @@ dependencies = [ "lxml>=4.9.0", "httpx>=0.28.0", "modelscope>=1.34.0", + "python-magic>=0.4.14; sys_platform == 'linux' or sys_platform == 'darwin'", + "python-magic-bin>=0.4.14; sys_platform=='win32'", ] [tool.pytest.ini_options] diff --git a/api/uv.lock b/api/uv.lock index a9bde1ed..e040f78b 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -7,6 +7,18 @@ resolution-markers = [ "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')", ] +[[package]] +name = "aiofile" +version = "3.9.0" +source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +dependencies = [ + { name = "caio" }, +] +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/67/e2/d7cb819de8df6b5c1968a2756c3cb4122d4fa2b8fc768b53b7c9e5edb646/aiofile-3.9.0.tar.gz", hash = "sha256:e5ad718bb148b265b6df1b3752c4d1d83024b93da9bd599df74b9d9ffcf7919b", size = 17943, upload-time = "2024-10-08T10:39:35.846Z" } +wheels = [ + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/50/25/da1f0b4dd970e52bf5a36c204c107e11a0c6d3ed195eba0bfbc664c312b2/aiofile-3.9.0-py3-none-any.whl", hash = "sha256:ce2f6c1571538cbdfa0143b04e16b208ecb0e9cb4148e528af8a640ed51cc8aa", size = 19539, upload-time = "2024-10-08T10:39:32.955Z" }, +] + [[package]] name = "aiofiles" version = "25.1.0" @@ -172,14 +184,14 @@ wheels = [ [[package]] name = "authlib" -version = "1.6.6" +version = "1.6.9" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "cryptography" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bb/9b/b1661026ff24bc641b76b78c5222d614776b0c085bcfdac9bd15a1cb4b35/authlib-1.6.6.tar.gz", hash = "sha256:45770e8e056d0f283451d9996fbb59b70d45722b45d854d58f32878d0a40c38e", size = 164894, upload-time = "2025-12-12T08:01:41.464Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/af/98/00d3dd826d46959ad8e32af2dbb2398868fd9fd0683c26e56d0789bd0e68/authlib-1.6.9.tar.gz", hash = "sha256:d8f2421e7e5980cc1ddb4e32d3f5fa659cfaf60d8eaf3281ebed192e4ab74f04", size = 165134, upload-time = "2026-03-02T07:44:01.998Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/54/51/321e821856452f7386c4e9df866f196720b1ad0c5ea1623ea7399969ae3b/authlib-1.6.6-py2.py3-none-any.whl", hash = "sha256:7d9e9bc535c13974313a87f53e8430eb6ea3d1cf6ae4f6efcd793f2e949143fd", size = 244005, upload-time = "2025-12-12T08:01:40.209Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/53/23/b65f568ed0c22f1efacb744d2db1a33c8068f384b8c9b482b52ebdbc3ef6/authlib-1.6.9-py2.py3-none-any.whl", hash = "sha256:f08b4c14e08f0861dc18a32357b33fbcfd2ea86cfe3fe149484b4d764c4a0ac3", size = 244197, upload-time = "2026-03-02T07:44:00.307Z" }, ] [[package]] @@ -283,30 +295,30 @@ wheels = [ [[package]] name = "boto3" -version = "1.42.32" +version = "1.42.60" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "botocore" }, { name = "jmespath" }, { name = "s3transfer" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/68/73/2a8065918dcc9f07046f7e87e17f54a62914a8b7f1f9e506799ec533d2e9/boto3-1.42.32.tar.gz", hash = "sha256:0ba535985f139cf38455efd91f3801fe72e5cce6ded2df5aadfd63177d509675", size = 112830, upload-time = "2026-01-21T20:40:10.891Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b8/e0/071e00265d3d8127b28c27ba3918ba283f49b39943864a389ac3f5096ef3/boto3-1.42.60.tar.gz", hash = "sha256:3d549d15c821dcc871a0821319049e7d493ae3317121eb01e4b1f5230c19d5d4", size = 112786, upload-time = "2026-03-03T21:21:07.199Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/60/e3/c86658f1fd0191aa8131cb1baacd337b037546d902980ea5a9c8f0c5cd9b/boto3-1.42.32-py3-none-any.whl", hash = "sha256:695ac7e62dfde28cc1d3b28a581cce37c53c729d48ea0f4cd0dbf599856850cf", size = 140573, upload-time = "2026-01-21T20:40:09.1Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ed/30/156ff2b5afb7dd03383b5f97d0e32535e9c0e783917380c476fe2fbc1874/boto3-1.42.60-py3-none-any.whl", hash = "sha256:c0cc3d93cd76c99461f6e109e04bb020defe3ffcd04c6163c72836dff5591614", size = 140554, upload-time = "2026-03-03T21:21:05.194Z" }, ] [[package]] name = "botocore" -version = "1.42.32" +version = "1.42.60" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "jmespath" }, { name = "python-dateutil" }, { name = "urllib3" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/21/5e/84404e094be8e2145c7f6bb8b3709193bc4488c385edffc6cc6890b5c88b/botocore-1.42.32.tar.gz", hash = "sha256:4c0a9fe23e060c019e327cd5e4ea1976a1343faba74e5301ebfc9549cc584ccb", size = 14898756, upload-time = "2026-01-21T20:39:59.698Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/82/d7/bfe8413cc7dc167e04ca9c68ea136251307960f662ec5889512615565b25/botocore-1.42.60.tar.gz", hash = "sha256:de9278810fb2e92a9ffe3dc8ffa68f1066e6d2caf19da9460760743b39ca5215", size = 14950855, upload-time = "2026-03-03T21:20:51.529Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dd/ab/55062f6eaf9fc537b62b7425ab53ef4366032256e1dda8ef52a9a31f7a6e/botocore-1.42.32-py3-none-any.whl", hash = "sha256:9c1ce43687cc4c0bba12054b229b3464265c699e2de4723998d86791254a5a37", size = 14573367, upload-time = "2026-01-21T20:39:56.65Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8d/63/5cf970a00e9ddcbb9e65ecc79276717a9555a77d3d0571bd962676e19c3b/botocore-1.42.60-py3-none-any.whl", hash = "sha256:d8b4aab06cc134e21d294c068cb94e0eeb59bacd27c836fb6b882b61433df2f4", size = 14621726, upload-time = "2026-03-03T21:20:47.935Z" }, ] [[package]] @@ -318,6 +330,19 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/96/c5/1e741d26306c42e2bf6ab740b2202872727e0f606033c9dd713f8b93f5a8/cachetools-6.2.1-py3-none-any.whl", hash = "sha256:09868944b6dde876dfd44e1d47e18484541eaf12f26f29b7af91b26cc892d701", size = 11280, upload-time = "2025-10-12T14:55:28.382Z" }, ] +[[package]] +name = "caio" +version = "0.9.25" +source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/92/88/b8527e1b00c1811db339a1df8bd1ae49d146fcea9d6a5c40e3a80aaeb38d/caio-0.9.25.tar.gz", hash = "sha256:16498e7f81d1d0f5a4c0ad3f2540e65fe25691376e0a5bd367f558067113ed10", size = 26781, upload-time = "2025-12-26T15:21:36.501Z" } +wheels = [ + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d3/25/79c98ebe12df31548ba4eaf44db11b7cad6b3e7b4203718335620939083c/caio-0.9.25-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fb7ff95af4c31ad3f03179149aab61097a71fd85e05f89b4786de0359dffd044", size = 36983, upload-time = "2025-12-26T15:21:36.075Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a3/2b/21288691f16d479945968a0a4f2856818c1c5be56881d51d4dac9b255d26/caio-0.9.25-cp312-cp312-manylinux2010_x86_64.manylinux2014_x86_64.manylinux_2_12_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:97084e4e30dfa598449d874c4d8e0c8d5ea17d2f752ef5e48e150ff9d240cd64", size = 82012, upload-time = "2025-12-26T15:22:20.983Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/03/c4/8a1b580875303500a9c12b9e0af58cb82e47f5bcf888c2457742a138273c/caio-0.9.25-cp312-cp312-manylinux_2_34_aarch64.whl", hash = "sha256:4fa69eba47e0f041b9d4f336e2ad40740681c43e686b18b191b6c5f4c5544bfb", size = 81502, upload-time = "2026-03-04T22:08:22.381Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d1/1c/0fe770b8ffc8362c48134d1592d653a81a3d8748d764bec33864db36319d/caio-0.9.25-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:6bebf6f079f1341d19f7386db9b8b1f07e8cc15ae13bfdaff573371ba0575d69", size = 80200, upload-time = "2026-03-04T22:08:23.382Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/93/1f76c8d1bafe3b0614e06b2195784a3765bbf7b0a067661af9e2dd47fc33/caio-0.9.25-py3-none-any.whl", hash = "sha256:06c0bb02d6b929119b1cfbe1ca403c768b2013a369e2db46bfa2a5761cf82e40", size = 19087, upload-time = "2025-12-26T15:22:00.221Z" }, +] + [[package]] name = "celery" version = "5.5.3" @@ -339,11 +364,11 @@ wheels = [ [[package]] name = "certifi" -version = "2026.1.4" +version = "2026.2.25" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload-time = "2026-01-04T02:42:41.825Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/af/2d/7bf41579a8986e348fa033a31cdd0e4121114f6bce2457e8876010b092dd/certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7", size = 155029, upload-time = "2026-02-25T02:54:17.342Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9a/3c/c17fb3ca2d9c3acff52e30b309f538586f9f5b9c9cf454f3845fc9af4881/certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa", size = 153684, upload-time = "2026-02-25T02:54:15.766Z" }, ] [[package]] @@ -373,9 +398,9 @@ wheels = [ name = "chardet" version = "5.2.0" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f3/0d/f7b6ab21ec75897ed80c17d79b15951a719226b9fababf1e40ea74d69079/chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f3/0d/f7b6ab21ec75897ed80c17d79b15951a719226b9fababf1e40ea74d69079/chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7", size = 2069618, upload-time = "2023-08-01T19:23:02.662Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970", size = 199385, upload-time = "2023-08-01T19:23:00.661Z" }, ] [[package]] @@ -466,15 +491,6 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/52/40/9d857001228658f0d59e97ebd4c346fe73e138c6de1bce61dc568a57c7f8/click_repl-0.3.0-py3-none-any.whl", hash = "sha256:fb7e06deb8da8de86180a33a9da97ac316751c094c6899382da7feeeeb51b812" }, ] -[[package]] -name = "cloudpickle" -version = "3.1.2" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/27/fb/576f067976d320f5f0114a8d9fa1215425441bb35627b1993e5afd8111e5/cloudpickle-3.1.2.tar.gz", hash = "sha256:7fda9eb655c9c230dab534f1983763de5835249750e85fbcef43aaa30a9a2414", size = 22330, upload-time = "2025-11-03T09:25:26.604Z" } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/88/39/799be3f2f0f38cc727ee3b4f1445fe6d5e4133064ec2e4115069418a5bb6/cloudpickle-3.1.2-py3-none-any.whl", hash = "sha256:9acb47f6afd73f60dc1df93bb801b472f05ff42fa6c84167d25cb206be1fbf4a", size = 22228, upload-time = "2025-11-03T09:25:25.534Z" }, -] - [[package]] name = "cn2an" version = "0.5.23" @@ -519,14 +535,14 @@ wheels = [ [[package]] name = "concurrent-log-handler" -version = "0.9.28" +version = "0.9.29" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "portalocker" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6b/ed/68b9c3a07a2331361a09a194e4375c4ee680a799391cfb1ca924ca2b6523/concurrent_log_handler-0.9.28.tar.gz", hash = "sha256:4cc27969b3420239bd153779266f40d9713ece814e312b7aa753ce62c6eacdb8", size = 30935, upload-time = "2025-06-10T19:02:15.622Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9c/2c/ba185acc438cff6b58cd8f8dec27e7f4fcabf6968a1facbb6d0cacbde7fe/concurrent_log_handler-0.9.29.tar.gz", hash = "sha256:bc37a76d3f384cbf4a98f693ebd770543edc0f4cd5c6ab6bc70e9e1d7d582265", size = 42114, upload-time = "2026-02-22T18:18:25.758Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d0/a0/1331c3f12d95adc8d0385dc620001054c509db88376d2e17be36b6353020/concurrent_log_handler-0.9.28-py3-none-any.whl", hash = "sha256:65db25d05506651a61573937880789fc51c7555e7452303042b5a402fd78939c", size = 28983, upload-time = "2025-06-10T19:02:14.223Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4a/f3/3e3188fdb3e53c6343fd1c7de41c55d4db626f07db3877eae77b28d58bd2/concurrent_log_handler-0.9.29-py3-none-any.whl", hash = "sha256:0d6c077fbaef2dae49a25975dcf72a602fe0a6a4ce80a3b7c37696d37e10459a", size = 32052, upload-time = "2026-02-22T18:18:24.558Z" }, ] [[package]] @@ -609,7 +625,7 @@ wheels = [ [[package]] name = "cyclopts" -version = "4.5.0" +version = "4.6.0" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "attrs" }, @@ -617,14 +633,14 @@ dependencies = [ { name = "rich" }, { name = "rich-rst" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/13/7b/663f3285c1ac0e5d0854bd9db2c87caa6fa3d1a063185e3394a6cdca9151/cyclopts-4.5.0.tar.gz", hash = "sha256:717ac4235548b58d500baf7e688aa4d024caf0ee68f61a012ffd5e29db3099f9", size = 161980, upload-time = "2026-01-16T02:07:16.171Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/49/5c/88a4068c660a096bbe87efc5b7c190080c9e86919c36ec5f092cb08d852f/cyclopts-4.6.0.tar.gz", hash = "sha256:483c4704b953ea6da742e8de15972f405d2e748d19a848a4d61595e8e5360ee5", size = 162724, upload-time = "2026-02-23T15:44:49.286Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/12/a3/2e00fececc34a99ae3a5d5702a5dd29c5371e4ed016647301a2b9bcc1976/cyclopts-4.5.0-py3-none-any.whl", hash = "sha256:305b9aa90a9cd0916f0a450b43e50ad5df9c252680731a0719edfb9b20381bf5", size = 199772, upload-time = "2026-01-16T02:07:14.707Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8f/eb/1e8337755a70dc7d7ff10a73dc8f20e9352c9ad6c2256ed863ac95cd3539/cyclopts-4.6.0-py3-none-any.whl", hash = "sha256:0a891cb55bfd79a3cdce024db8987b33316aba11071e5258c21ac12a640ba9f2", size = 200518, upload-time = "2026-02-23T15:44:47.854Z" }, ] [[package]] name = "dashscope" -version = "1.25.9" +version = "1.25.13" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "aiohttp" }, @@ -634,7 +650,7 @@ dependencies = [ { name = "websocket-client" }, ] wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/03/bf/503587663b909427c1906b3b75fc2982bf9e42161d8b687f6e38ad12d042/dashscope-1.25.9-py3-none-any.whl", hash = "sha256:03b587bcb58a2f0a76fa5102925c16609b50af176198af0aeb0fd85aa44d6cfe", size = 1335755, upload-time = "2026-01-21T06:58:14.496Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f9/35/f1fbc93189da944ea1505647dd2ae3b7f8c2688e1f8087e08ffe043d3c3e/dashscope-1.25.13-py3-none-any.whl", hash = "sha256:03e7ce41954bacc3786ec3f45d5dc01e1a8ccf2741503ce3dc9e4ce60866386e", size = 1342636, upload-time = "2026-03-03T01:55:03.866Z" }, ] [[package]] @@ -674,15 +690,6 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/84/d0/205d54408c08b13550c733c4b85429e7ead111c7f0014309637425520a9a/deprecated-1.3.1-py2.py3-none-any.whl", hash = "sha256:597bfef186b6f60181535a29fbe44865ce137a5079f295b479886c82729d5f3f", size = 11298, upload-time = "2025-10-30T08:19:00.758Z" }, ] -[[package]] -name = "diskcache" -version = "5.6.3" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3f/21/1c1ffc1a039ddcc459db43cc108658f32c57d271d7289a2794e401d0fdb6/diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc" } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3f/27/4570e78fc0bf5ea0ca45eb1de3818a23787af9b390c0b0a0033a1b8236f9/diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19" }, -] - [[package]] name = "distro" version = "1.9.0" @@ -824,24 +831,6 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10", size = 16674, upload-time = "2025-05-10T17:42:49.33Z" }, ] -[[package]] -name = "fakeredis" -version = "2.33.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "redis" }, - { name = "sortedcontainers" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5f/f9/57464119936414d60697fcbd32f38909bb5688b616ae13de6e98384433e0/fakeredis-2.33.0.tar.gz", hash = "sha256:d7bc9a69d21df108a6451bbffee23b3eba432c21a654afc7ff2d295428ec5770", size = 175187, upload-time = "2025-12-16T19:45:52.269Z" } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6e/78/a850fed8aeef96d4a99043c90b818b2ed5419cd5b24a4049fd7cfb9f1471/fakeredis-2.33.0-py3-none-any.whl", hash = "sha256:de535f3f9ccde1c56672ab2fdd6a8efbc4f2619fc2f1acc87b8737177d71c965", size = 119605, upload-time = "2025-12-16T19:45:51.08Z" }, -] - -[package.optional-dependencies] -lua = [ - { name = "lupa" }, -] - [[package]] name = "fastapi" version = "0.119.0" @@ -858,38 +847,43 @@ wheels = [ [[package]] name = "fastmcp" -version = "2.14.3" +version = "3.1.0" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "authlib" }, { name = "cyclopts" }, { name = "exceptiongroup" }, { name = "httpx" }, + { name = "jsonref" }, { name = "jsonschema-path" }, { name = "mcp" }, { name = "openapi-pydantic" }, + { name = "opentelemetry-api" }, + { name = "packaging" }, { name = "platformdirs" }, - { name = "py-key-value-aio", extra = ["disk", "keyring", "memory"] }, + { name = "py-key-value-aio", extra = ["filetree", "keyring", "memory"] }, { name = "pydantic", extra = ["email"] }, - { name = "pydocket" }, { name = "pyperclip" }, { name = "python-dotenv" }, + { name = "pyyaml" }, { name = "rich" }, + { name = "uncalled-for" }, { name = "uvicorn" }, + { name = "watchfiles" }, { name = "websockets" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/72/b5/7c4744dc41390ed2c17fd462ef2d42f4448a1ec53dda8fe3a01ff2872313/fastmcp-2.14.3.tar.gz", hash = "sha256:abc9113d5fcf79dfb4c060a1e1c55fccb0d4bce4a2e3eab15ca352341eec8dd6", size = 8279206, upload-time = "2026-01-12T20:00:40.789Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0a/70/862026c4589441f86ad3108f05bfb2f781c6b322ad60a982f40b303b47d7/fastmcp-3.1.0.tar.gz", hash = "sha256:e25264794c734b9977502a51466961eeecff92a0c2f3b49c40c070993628d6d0", size = 17347083, upload-time = "2026-03-03T02:43:11.283Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fc/dc/f7dd14213bf511690dccaa5094d436947c253b418c86c86211d1c76e6e44/fastmcp-2.14.3-py3-none-any.whl", hash = "sha256:103c6b4c6e97a9acc251c81d303f110fe4f2bdba31353df515d66272bf1b9414", size = 416220, upload-time = "2026-01-12T20:00:42.543Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/17/07/516f5b20d88932e5a466c2216b628e5358a71b3a9f522215607c3281de05/fastmcp-3.1.0-py3-none-any.whl", hash = "sha256:b1f73b56fd3b0cb2bd9e2a144fc650d5cc31587ed129d996db7710e464ae8010", size = 633749, upload-time = "2026-03-03T02:43:09.06Z" }, ] [[package]] name = "filelock" -version = "3.20.3" +version = "3.25.0" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1d/65/ce7f1b70157833bf3cb851b556a37d4547ceafc158aa9b34b36782f23696/filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1", size = 19485, upload-time = "2026-01-09T17:55:05.421Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/77/18/a1fd2231c679dcb9726204645721b12498aeac28e1ad0601038f94b42556/filelock-3.25.0.tar.gz", hash = "sha256:8f00faf3abf9dc730a1ffe9c354ae5c04e079ab7d3a683b7c32da5dd05f26af3", size = 40158, upload-time = "2026-03-01T15:08:45.916Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b5/36/7fb70f04bf00bc646cd5bb45aa9eddb15e19437a28b8fb2b4a5249fac770/filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1", size = 16701, upload-time = "2026-01-09T17:55:04.334Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f9/0b/de6f54d4a8bedfe8645c41497f3c18d749f0bd3218170c667bf4b81d0cdd/filelock-3.25.0-py3-none-any.whl", hash = "sha256:5ccf8069f7948f494968fc0713c10e5c182a9c9d9eef3a636307a20c2490f047", size = 26427, upload-time = "2026-03-01T15:08:44.593Z" }, ] [[package]] @@ -977,11 +971,11 @@ wheels = [ [[package]] name = "fsspec" -version = "2026.1.0" +version = "2026.2.0" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d5/7d/5df2650c57d47c57232af5ef4b4fdbff182070421e405e0d62c6cdbfaa87/fsspec-2026.1.0.tar.gz", hash = "sha256:e987cb0496a0d81bba3a9d1cee62922fb395e7d4c3b575e57f547953334fe07b", size = 310496, upload-time = "2026-01-09T15:21:35.562Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/51/7c/f60c259dcbf4f0c47cc4ddb8f7720d2dcdc8888c8e5ad84c73ea4531cc5b/fsspec-2026.2.0.tar.gz", hash = "sha256:6544e34b16869f5aacd5b90bdf1a71acb37792ea3ddf6125ee69a22a53fb8bff", size = 313441, upload-time = "2026-02-05T21:50:53.743Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/01/c9/97cc5aae1648dcb851958a3ddf73ccd7dbe5650d95203ecb4d7720b4cdbf/fsspec-2026.1.0-py3-none-any.whl", hash = "sha256:cb76aa913c2285a3b49bdd5fc55b1d7c708d7208126b60f2eb8194fe1b4cbdcc", size = 201838, upload-time = "2026-01-09T15:21:34.041Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e6/ab/fb21f4c939bb440104cc2b396d3be1d9b7a9fd3c6c2a53d98c45b3d7c954/fsspec-2026.2.0-py3-none-any.whl", hash = "sha256:98de475b5cb3bd66bedd5c4679e87b4fdfe1a3bf4d707b151b3c07e58c9a2437", size = 202505, upload-time = "2026-02-05T21:50:51.819Z" }, ] [[package]] @@ -1322,27 +1316,27 @@ wheels = [ [[package]] name = "jiter" -version = "0.12.0" +version = "0.13.0" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/45/9d/e0660989c1370e25848bb4c52d061c71837239738ad937e83edca174c273/jiter-0.12.0.tar.gz", hash = "sha256:64dfcd7d5c168b38d3f9f8bba7fc639edb3418abcc74f22fdbe6b8938293f30b", size = 168294, upload-time = "2025-11-09T20:49:23.302Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0d/5e/4ec91646aee381d01cdb9974e30882c9cd3b8c5d1079d6b5ff4af522439a/jiter-0.13.0.tar.gz", hash = "sha256:f2839f9c2c7e2dffc1bc5929a510e14ce0a946be9365fd1219e7ef342dae14f4", size = 164847, upload-time = "2026-02-02T12:37:56.441Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/92/c9/5b9f7b4983f1b542c64e84165075335e8a236fa9e2ea03a0c79780062be8/jiter-0.12.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:305e061fa82f4680607a775b2e8e0bcb071cd2205ac38e6ef48c8dd5ebe1cf37", size = 314449, upload-time = "2025-11-09T20:47:22.999Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/98/6e/e8efa0e78de00db0aee82c0cf9e8b3f2027efd7f8a71f859d8f4be8e98ef/jiter-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5c1860627048e302a528333c9307c818c547f214d8659b0705d2195e1a94b274", size = 319855, upload-time = "2025-11-09T20:47:24.779Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/20/26/894cd88e60b5d58af53bec5c6759d1292bd0b37a8b5f60f07abf7a63ae5f/jiter-0.12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df37577a4f8408f7e0ec3205d2a8f87672af8f17008358063a4d6425b6081ce3", size = 350171, upload-time = "2025-11-09T20:47:26.469Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f5/27/a7b818b9979ac31b3763d25f3653ec3a954044d5e9f5d87f2f247d679fd1/jiter-0.12.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:75fdd787356c1c13a4f40b43c2156276ef7a71eb487d98472476476d803fb2cf", size = 365590, upload-time = "2025-11-09T20:47:27.918Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ba/7e/e46195801a97673a83746170b17984aa8ac4a455746354516d02ca5541b4/jiter-0.12.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1eb5db8d9c65b112aacf14fcd0faae9913d07a8afea5ed06ccdd12b724e966a1", size = 479462, upload-time = "2025-11-09T20:47:29.654Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ca/75/f833bfb009ab4bd11b1c9406d333e3b4357709ed0570bb48c7c06d78c7dd/jiter-0.12.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:73c568cc27c473f82480abc15d1301adf333a7ea4f2e813d6a2c7d8b6ba8d0df", size = 378983, upload-time = "2025-11-09T20:47:31.026Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/71/b3/7a69d77943cc837d30165643db753471aff5df39692d598da880a6e51c24/jiter-0.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4321e8a3d868919bcb1abb1db550d41f2b5b326f72df29e53b2df8b006eb9403", size = 361328, upload-time = "2025-11-09T20:47:33.286Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b0/ac/a78f90caf48d65ba70d8c6efc6f23150bc39dc3389d65bbec2a95c7bc628/jiter-0.12.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0a51bad79f8cc9cac2b4b705039f814049142e0050f30d91695a2d9a6611f126", size = 386740, upload-time = "2025-11-09T20:47:34.703Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/39/b6/5d31c2cc8e1b6a6bcf3c5721e4ca0a3633d1ab4754b09bc7084f6c4f5327/jiter-0.12.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:2a67b678f6a5f1dd6c36d642d7db83e456bc8b104788262aaefc11a22339f5a9", size = 520875, upload-time = "2025-11-09T20:47:36.058Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/30/b5/4df540fae4e9f68c54b8dab004bd8c943a752f0b00efd6e7d64aa3850339/jiter-0.12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efe1a211fe1fd14762adea941e3cfd6c611a136e28da6c39272dbb7a1bbe6a86", size = 511457, upload-time = "2025-11-09T20:47:37.932Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/07/65/86b74010e450a1a77b2c1aabb91d4a91dd3cd5afce99f34d75fd1ac64b19/jiter-0.12.0-cp312-cp312-win32.whl", hash = "sha256:d779d97c834b4278276ec703dc3fc1735fca50af63eb7262f05bdb4e62203d44", size = 204546, upload-time = "2025-11-09T20:47:40.47Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1c/c7/6659f537f9562d963488e3e55573498a442503ced01f7e169e96a6110383/jiter-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:e8269062060212b373316fe69236096aaf4c49022d267c6736eebd66bbbc60bb", size = 205196, upload-time = "2025-11-09T20:47:41.794Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/21/f4/935304f5169edadfec7f9c01eacbce4c90bb9a82035ac1de1f3bd2d40be6/jiter-0.12.0-cp312-cp312-win_arm64.whl", hash = "sha256:06cb970936c65de926d648af0ed3d21857f026b1cf5525cb2947aa5e01e05789", size = 186100, upload-time = "2025-11-09T20:47:43.007Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cb/f5/12efb8ada5f5c9edc1d4555fe383c1fb2eac05ac5859258a72d61981d999/jiter-0.12.0-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:e8547883d7b96ef2e5fe22b88f8a4c8725a56e7f4abafff20fd5272d634c7ecb", size = 309974, upload-time = "2025-11-09T20:49:17.187Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/85/15/d6eb3b770f6a0d332675141ab3962fd4a7c270ede3515d9f3583e1d28276/jiter-0.12.0-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:89163163c0934854a668ed783a2546a0617f71706a2551a4a0666d91ab365d6b", size = 304233, upload-time = "2025-11-09T20:49:18.734Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8c/3e/e7e06743294eea2cf02ced6aa0ff2ad237367394e37a0e2b4a1108c67a36/jiter-0.12.0-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d96b264ab7d34bbb2312dedc47ce07cd53f06835eacbc16dde3761f47c3a9e7f", size = 338537, upload-time = "2025-11-09T20:49:20.317Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2f/9c/6753e6522b8d0ef07d3a3d239426669e984fb0eba15a315cdbc1253904e4/jiter-0.12.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c24e864cb30ab82311c6425655b0cdab0a98c5d973b065c66a3f020740c2324c", size = 346110, upload-time = "2025-11-09T20:49:21.817Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2e/30/7687e4f87086829955013ca12a9233523349767f69653ebc27036313def9/jiter-0.13.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:0a2bd69fc1d902e89925fc34d1da51b2128019423d7b339a45d9e99c894e0663", size = 307958, upload-time = "2026-02-02T12:35:57.165Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c3/27/e57f9a783246ed95481e6749cc5002a8a767a73177a83c63ea71f0528b90/jiter-0.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f917a04240ef31898182f76a332f508f2cc4b57d2b4d7ad2dbfebbfe167eb505", size = 318597, upload-time = "2026-02-02T12:35:58.591Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cf/52/e5719a60ac5d4d7c5995461a94ad5ef962a37c8bf5b088390e6fad59b2ff/jiter-0.13.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1e2b199f446d3e82246b4fd9236d7cb502dc2222b18698ba0d986d2fecc6152", size = 348821, upload-time = "2026-02-02T12:36:00.093Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/61/db/c1efc32b8ba4c740ab3fc2d037d8753f67685f475e26b9d6536a4322bcdd/jiter-0.13.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:04670992b576fa65bd056dbac0c39fe8bd67681c380cb2b48efa885711d9d726", size = 364163, upload-time = "2026-02-02T12:36:01.937Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/55/8a/fb75556236047c8806995671a18e4a0ad646ed255276f51a20f32dceaeec/jiter-0.13.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5a1aff1fbdb803a376d4d22a8f63f8e7ccbce0b4890c26cc7af9e501ab339ef0", size = 483709, upload-time = "2026-02-02T12:36:03.41Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7e/16/43512e6ee863875693a8e6f6d532e19d650779d6ba9a81593ae40a9088ff/jiter-0.13.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b3fb8c2053acaef8580809ac1d1f7481a0a0bdc012fd7f5d8b18fb696a5a089", size = 370480, upload-time = "2026-02-02T12:36:04.791Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f8/4c/09b93e30e984a187bc8aaa3510e1ec8dcbdcd71ca05d2f56aac0492453aa/jiter-0.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdaba7d87e66f26a2c45d8cbadcbfc4bf7884182317907baf39cfe9775bb4d93", size = 360735, upload-time = "2026-02-02T12:36:06.994Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1a/1b/46c5e349019874ec5dfa508c14c37e29864ea108d376ae26d90bee238cd7/jiter-0.13.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7b88d649135aca526da172e48083da915ec086b54e8e73a425ba50999468cc08", size = 391814, upload-time = "2026-02-02T12:36:08.368Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/15/9e/26184760e85baee7162ad37b7912797d2077718476bf91517641c92b3639/jiter-0.13.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e404ea551d35438013c64b4f357b0474c7abf9f781c06d44fcaf7a14c69ff9e2", size = 513990, upload-time = "2026-02-02T12:36:09.993Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e9/34/2c9355247d6debad57a0a15e76ab1566ab799388042743656e566b3b7de1/jiter-0.13.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1f4748aad1b4a93c8bdd70f604d0f748cdc0e8744c5547798acfa52f10e79228", size = 548021, upload-time = "2026-02-02T12:36:11.376Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ac/4a/9f2c23255d04a834398b9c2e0e665382116911dc4d06b795710503cdad25/jiter-0.13.0-cp312-cp312-win32.whl", hash = "sha256:0bf670e3b1445fc4d31612199f1744f67f889ee1bbae703c4b54dc097e5dd394", size = 203024, upload-time = "2026-02-02T12:36:12.682Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/ee/f0ae675a957ae5a8f160be3e87acea6b11dc7b89f6b7ab057e77b2d2b13a/jiter-0.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:15db60e121e11fe186c0b15236bd5d18381b9ddacdcf4e659feb96fc6c969c92", size = 205424, upload-time = "2026-02-02T12:36:13.93Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1b/02/ae611edf913d3cbf02c97cdb90374af2082c48d7190d74c1111dde08bcdd/jiter-0.13.0-cp312-cp312-win_arm64.whl", hash = "sha256:41f92313d17989102f3cb5dd533a02787cdb99454d494344b0361355da52fcb9", size = 186818, upload-time = "2026-02-02T12:36:15.308Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/80/60/e50fa45dd7e2eae049f0ce964663849e897300433921198aef94b6ffa23a/jiter-0.13.0-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:3d744a6061afba08dd7ae375dcde870cffb14429b7477e10f67e9e6d68772a0a", size = 305169, upload-time = "2026-02-02T12:37:50.376Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d2/73/a009f41c5eed71c49bec53036c4b33555afcdee70682a18c6f66e396c039/jiter-0.13.0-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:ff732bd0a0e778f43d5009840f20b935e79087b4dc65bd36f1cd0f9b04b8ff7f", size = 303808, upload-time = "2026-02-02T12:37:52.092Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c4/10/528b439290763bff3d939268085d03382471b442f212dca4ff5f12802d43/jiter-0.13.0-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab44b178f7981fcaea7e0a5df20e773c663d06ffda0198f1a524e91b2fde7e59", size = 337384, upload-time = "2026-02-02T12:37:53.582Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/67/8a/a342b2f0251f3dac4ca17618265d93bf244a2a4d089126e81e4c1056ac50/jiter-0.13.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bb00b6d26db67a05fe3e12c76edc75f32077fb51deed13822dc648fa373bc19", size = 343768, upload-time = "2026-02-02T12:37:55.055Z" }, ] [[package]] @@ -1393,6 +1387,15 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/71/92/5e77f98553e9e75130c78900d000368476aed74276eb8ae8796f65f00918/jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942", size = 7595, upload-time = "2024-06-10T19:24:40.698Z" }, ] +[[package]] +name = "jsonref" +version = "1.1.0" +source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/aa/0d/c1f3277e90ccdb50d33ed5ba1ec5b3f0a242ed8c1b1a85d3afeb68464dca/jsonref-1.1.0.tar.gz", hash = "sha256:32fe8e1d85af0fdefbebce950af85590b22b60f9e95443176adbde4e1ecea552", size = 8814, upload-time = "2023-01-16T16:10:04.455Z" } +wheels = [ + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0c/ec/e1db9922bceb168197a558a2b8c03a7963f1afe93517ddd3cf99f202f996/jsonref-1.1.0-py3-none-any.whl", hash = "sha256:590dc7773df6c21cbf948b5dac07a72a251db28b0238ceecce0a2abfa8ec30a9", size = 9425, upload-time = "2023-01-16T16:10:02.255Z" }, +] + [[package]] name = "jsonschema" version = "4.26.0" @@ -1410,17 +1413,16 @@ wheels = [ [[package]] name = "jsonschema-path" -version = "0.3.4" +version = "0.4.5" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "pathable" }, { name = "pyyaml" }, { name = "referencing" }, - { name = "requests" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6e/45/41ebc679c2a4fced6a722f624c18d658dee42612b83ea24c1caf7c0eb3a8/jsonschema_path-0.3.4.tar.gz", hash = "sha256:8365356039f16cc65fddffafda5f58766e34bebab7d6d105616ab52bc4297001", size = 11159, upload-time = "2025-01-24T14:33:16.547Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5b/8a/7e6102f2b8bdc6705a9eb5294f8f6f9ccd3a8420e8e8e19671d1dd773251/jsonschema_path-0.4.5.tar.gz", hash = "sha256:c6cd7d577ae290c7defd4f4029e86fdb248ca1bd41a07557795b3c95e5144918", size = 15113, upload-time = "2026-03-03T09:56:46.87Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cb/58/3485da8cb93d2f393bce453adeef16896751f14ba3e2024bc21dc9597646/jsonschema_path-0.3.4-py3-none-any.whl", hash = "sha256:f502191fdc2b22050f9a81c9237be9d27145b9001c55842bece5e94e382e52f8", size = 14810, upload-time = "2025-01-24T14:33:14.652Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/04/d5/4e96c44f6c1ea3d812cf5391d81a4f5abaa540abf8d04ecd7f66e0ed11df/jsonschema_path-0.4.5-py3-none-any.whl", hash = "sha256:7d77a2c3f3ec569a40efe5c5f942c44c1af2a6f96fe0866794c9ef5b8f87fd65", size = 19368, upload-time = "2026-03-03T09:56:45.39Z" }, ] [[package]] @@ -1490,21 +1492,21 @@ wheels = [ [[package]] name = "langchain" -version = "1.2.6" +version = "1.2.10" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "langchain-core" }, { name = "langgraph" }, { name = "pydantic" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f5/bc/d8f506a525baadee99a65c6cc28c1c35c9eaf1cb2009f048e9861d81a600/langchain-1.2.6.tar.gz", hash = "sha256:7d46cbf719d860a16f6fc182d5d3de17453dda187f3d43e9c40ac352a5094fdd", size = 553127, upload-time = "2026-01-16T19:21:19.611Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/16/22/a4d4ac98fc2e393537130bbfba0d71a8113e6f884d96f935923e247397fe/langchain-1.2.10.tar.gz", hash = "sha256:bdcd7218d9c79a413cf15e106e4eb94408ac0963df9333ccd095b9ed43bf3be7", size = 570071, upload-time = "2026-02-10T14:56:49.74Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3f/28/d5dc4cb06ccb29d62a590d446072964766555e85863f5044c6e644c07d0d/langchain-1.2.6-py3-none-any.whl", hash = "sha256:a9a6c39f03c09b6eb0f1b47e267ad2a2fd04e124dfaa9753bd6c11d2fe7d944e", size = 108458, upload-time = "2026-01-16T19:21:18.085Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7c/06/c3394327f815fade875724c0f6cff529777c96a1e17fea066deb997f8cf5/langchain-1.2.10-py3-none-any.whl", hash = "sha256:e07a377204451fffaed88276b8193e894893b1003e25c5bca6539288ccca3698", size = 111738, upload-time = "2026-02-10T14:56:47.985Z" }, ] [[package]] name = "langchain-aws" -version = "1.0.0a1" +version = "1.4.0" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "boto3" }, @@ -1512,9 +1514,9 @@ dependencies = [ { name = "numpy" }, { name = "pydantic" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c0/c3/a98c0849c13c6880b5629409cadb22d4070e9c611013da127be975f8c0dc/langchain_aws-1.0.0a1.tar.gz", hash = "sha256:3bb193a5fa915520c52bb47581e892d11ac4d114939a1b3ecfeca56fe153fff7", size = 121650, upload-time = "2025-09-18T20:52:36.098Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/31/1d/9bc0e523b8d13dcaf301cdd85ae05ce0687fdbeff2000b50346410b260d0/langchain_aws-1.4.0.tar.gz", hash = "sha256:370dcba824d68af96372a1c979c94f8d7062d268615b5395d80d1562a05fca37", size = 435753, upload-time = "2026-03-09T20:02:54.472Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9d/7b/be49a224fe3aa07ed869801356f06e1d7a321bb7f22b6f7935dce86d258a/langchain_aws-1.0.0a1-py3-none-any.whl", hash = "sha256:24207d05c619ea61dfeab0a0f7086ae388cc3f2f5c03a8ae56b12d1b77d72585", size = 146839, upload-time = "2025-09-18T20:52:35.013Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cc/5d/8a52c067137c26db4c0ff6c0847d75cc5f34c42d21dfb031f27fa705fd52/langchain_aws-1.4.0-py3-none-any.whl", hash = "sha256:68e07276cd85bb45dc4415e417e04fbe1c76c3cf029ddd6d96e2fa981dd29cc1", size = 174854, upload-time = "2026-03-09T20:02:53.381Z" }, ] [[package]] @@ -1560,7 +1562,7 @@ wheels = [ [[package]] name = "langchain-core" -version = "1.2.7" +version = "1.2.17" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "jsonpatch" }, @@ -1572,9 +1574,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "uuid-utils" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a2/0e/664d8d81b3493e09cbab72448d2f9d693d1fa5aa2bcc488602203a9b6da0/langchain_core-1.2.7.tar.gz", hash = "sha256:e1460639f96c352b4a41c375f25aeb8d16ffc1769499fb1c20503aad59305ced", size = 837039, upload-time = "2026-01-09T17:44:25.505Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1d/93/36226f593df52b871fc24d494c274f3a6b2ac76763a2806e7d35611634a1/langchain_core-1.2.17.tar.gz", hash = "sha256:54aa267f3311e347fb2e50951fe08e53761cebfb999ab80e6748d70525bbe872", size = 836130, upload-time = "2026-03-02T22:47:55.846Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6e/6f/34a9fba14d191a67f7e2ee3dbce3e9b86d2fa7310e2c7f2c713583481bd2/langchain_core-1.2.7-py3-none-any.whl", hash = "sha256:452f4fef7a3d883357b22600788d37e3d8854ef29da345b7ac7099f33c31828b", size = 490232, upload-time = "2026-01-09T17:44:24.236Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/be/90/073f33ab383a62908eca7ea699586dfea280e77182176e33199c80ddf22a/langchain_core-1.2.17-py3-none-any.whl", hash = "sha256:bf6bd6ce503874e9c2da1669a69383e967c3de1ea808921d19a9a6bff1a9fbbe", size = 502727, upload-time = "2026-03-02T22:47:54.537Z" }, ] [[package]] @@ -1606,33 +1608,33 @@ wheels = [ [[package]] name = "langchain-openai" -version = "1.1.7" +version = "1.1.10" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "langchain-core" }, { name = "openai" }, { name = "tiktoken" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/38/b7/30bfc4d1b658a9ee524bcce3b0b2ec9c45a11c853a13c4f0c9da9882784b/langchain_openai-1.1.7.tar.gz", hash = "sha256:f5ec31961ed24777548b63a5fe313548bc6e0eb9730d6552b8c6418765254c81", size = 1039134, upload-time = "2026-01-07T19:44:59.728Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d6/0f/01147f842499338ae3b0dd0a351fb83006d9ed623cf3a999bd68ba5bbe2d/langchain_openai-1.1.10.tar.gz", hash = "sha256:ca6fae7cf19425acc81814efed59c7d205ec9a1f284fd1d08aae9bda85d6501b", size = 1059755, upload-time = "2026-02-17T18:03:44.506Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/a1/50e7596aca775d8c3883eceeaf47489fac26c57c1abe243c00174f715a8a/langchain_openai-1.1.7-py3-none-any.whl", hash = "sha256:34e9cd686aac1a120d6472804422792bf8080a2103b5d21ee450c9e42d053815", size = 84753, upload-time = "2026-01-07T19:44:58.629Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/72/17/3785cbcdc81c451179247e4176d2697879cb4f45ab2c59d949ca574e072d/langchain_openai-1.1.10-py3-none-any.whl", hash = "sha256:d91b2c09e9fbc70f7af45345d3aa477744962d41c73a029beb46b4f83b824827", size = 87205, upload-time = "2026-02-17T18:03:43.502Z" }, ] [[package]] name = "langchain-text-splitters" -version = "1.1.0" +version = "1.1.1" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "langchain-core" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/41/42/c178dcdc157b473330eb7cc30883ea69b8ec60078c7b85e2d521054c4831/langchain_text_splitters-1.1.0.tar.gz", hash = "sha256:75e58acb7585dc9508f3cd9d9809cb14751283226c2d6e21fb3a9ae57582ca22", size = 272230, upload-time = "2025-12-14T01:15:38.659Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/85/38/14121ead61e0e75f79c3a35e5148ac7c2fe754a55f76eab3eed573269524/langchain_text_splitters-1.1.1.tar.gz", hash = "sha256:34861abe7c07d9e49d4dc852d0129e26b32738b60a74486853ec9b6d6a8e01d2", size = 279352, upload-time = "2026-02-18T23:02:42.798Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d8/1a/a84ed1c046deecf271356b0179c1b9fba95bfdaa6f934e1849dee26fad7b/langchain_text_splitters-1.1.0-py3-none-any.whl", hash = "sha256:f00341fe883358786104a5f881375ac830a4dd40253ecd42b4c10536c6e4693f", size = 34182, upload-time = "2025-12-14T01:15:37.382Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/84/66/d9e0c3b83b0ad75ee746c51ba347cacecb8d656b96e1d513f3e334d1ccab/langchain_text_splitters-1.1.1-py3-none-any.whl", hash = "sha256:5ed0d7bf314ba925041e7d7d17cd8b10f688300d5415fb26c29442f061e329dc", size = 35734, upload-time = "2026-02-18T23:02:41.913Z" }, ] [[package]] name = "langfuse" -version = "3.12.0" +version = "3.14.5" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "backoff" }, @@ -1646,14 +1648,14 @@ dependencies = [ { name = "requests" }, { name = "wrapt" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/d2/33991342653d101715faae8f82c14eb3f0a5c2d22d8c99df9dbb8d099802/langfuse-3.12.0.tar.gz", hash = "sha256:0f75b3d21d4ef4014ebeaa8188eb0c855200412b4e4fb8cceca609a7ce465f91", size = 232651, upload-time = "2026-01-13T14:17:33.659Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ec/6b/7a945e8bc56cbf343b6f6171fd45870b0ea80ea38463b2db8dd5a9dc04a2/langfuse-3.14.5.tar.gz", hash = "sha256:2f543ec1540053d39b08a50ed5992caf1cd54d472a55cb8e5dcf6d4fcb7ff631", size = 235474, upload-time = "2026-02-23T10:42:47.721Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c3/87/141689c2c2b352ed100de4a63f64f24b4df7f883ba2a3fc0c6733d9d0451/langfuse-3.12.0-py3-none-any.whl", hash = "sha256:644d9bbfa842eb6775b1e069e23f77ad1087f5241682966b8168bbb01f9c357e", size = 416875, upload-time = "2026-01-13T14:17:31.791Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a3/a1/10f04224542d6a57073c4f339b6763836a0899c98966f1d4ffcf56d2cf61/langfuse-3.14.5-py3-none-any.whl", hash = "sha256:5054b1c705ec69bce2d7077ce7419727ac629159428da013790979ca9cae77d5", size = 421240, upload-time = "2026-02-23T10:42:46.085Z" }, ] [[package]] name = "langgraph" -version = "1.0.6" +version = "1.0.10" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "langchain-core" }, @@ -1663,53 +1665,53 @@ dependencies = [ { name = "pydantic" }, { name = "xxhash" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c2/9c/dac99ab1732e9fb2d3b673482ac28f02bee222c0319a3b8f8f73d90727e6/langgraph-1.0.6.tar.gz", hash = "sha256:dd8e754c76d34a07485308d7117221acf63990e7de8f46ddf5fe256b0a22e6c5", size = 495092, upload-time = "2026-01-12T20:33:30.778Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/55/92/14df6fefba28c10caf1cb05aa5b8c7bf005838fe32a86d903b6c7cc4018d/langgraph-1.0.10.tar.gz", hash = "sha256:73bd10ee14a8020f31ef07e9cd4c1a70c35cc07b9c2b9cd637509a10d9d51e29", size = 511644, upload-time = "2026-02-27T21:04:38.743Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/10/45/9960747781416bed4e531ed0c6b2f2c739bc7b5397d8e92155463735a40e/langgraph-1.0.6-py3-none-any.whl", hash = "sha256:bcfce190974519c72e29f6e5b17f0023914fd6f936bfab8894083215b271eb89", size = 157356, upload-time = "2026-01-12T20:33:29.191Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5d/60/260e0c04620a37ba8916b712766c341cc5fc685dabc6948c899494bbc2ae/langgraph-1.0.10-py3-none-any.whl", hash = "sha256:7c298bef4f6ea292fcf9824d6088fe41a6727e2904ad6066f240c4095af12247", size = 160920, upload-time = "2026-02-27T21:04:35.932Z" }, ] [[package]] name = "langgraph-checkpoint" -version = "4.0.0" +version = "4.0.1" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "langchain-core" }, { name = "ormsgpack" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/98/76/55a18c59dedf39688d72c4b06af73a5e3ea0d1a01bc867b88fbf0659f203/langgraph_checkpoint-4.0.0.tar.gz", hash = "sha256:814d1bd050fac029476558d8e68d87bce9009a0262d04a2c14b918255954a624", size = 137320, upload-time = "2026-01-12T20:30:26.38Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b1/44/a8df45d1e8b4637e29789fa8bae1db022c953cc7ac80093cfc52e923547e/langgraph_checkpoint-4.0.1.tar.gz", hash = "sha256:b433123735df11ade28829e40ce25b9be614930cd50245ff2af60629234befd9", size = 158135, upload-time = "2026-02-27T21:06:16.092Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4a/de/ddd53b7032e623f3c7bcdab2b44e8bf635e468f62e10e5ff1946f62c9356/langgraph_checkpoint-4.0.0-py3-none-any.whl", hash = "sha256:3fa9b2635a7c5ac28b338f631abf6a030c3b508b7b9ce17c22611513b589c784", size = 46329, upload-time = "2026-01-12T20:30:25.2Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/65/4c/09a4a0c42f5d2fc38d6c4d67884788eff7fd2cfdf367fdf7033de908b4c0/langgraph_checkpoint-4.0.1-py3-none-any.whl", hash = "sha256:e3adcd7a0e0166f3b48b8cf508ce0ea366e7420b5a73aa81289888727769b034", size = 50453, upload-time = "2026-02-27T21:06:14.293Z" }, ] [[package]] name = "langgraph-prebuilt" -version = "1.0.6" +version = "1.0.8" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "langchain-core" }, { name = "langgraph-checkpoint" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3c/f5/8c75dace0d729561dce2966e630c5e312193df7e5df41a7e10cd7378c3a7/langgraph_prebuilt-1.0.6.tar.gz", hash = "sha256:c5f6cf0f5a0ac47643d2e26ae6faa38cb28885ecde67911190df9e30c4f72361", size = 162623, upload-time = "2026-01-12T20:31:28.425Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0d/06/dd61a5c2dce009d1b03b1d56f2a85b3127659fdddf5b3be5d8f1d60820fb/langgraph_prebuilt-1.0.8.tar.gz", hash = "sha256:0cd3cf5473ced8a6cd687cc5294e08d3de57529d8dd14fdc6ae4899549efcf69", size = 164442, upload-time = "2026-02-19T18:14:39.083Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/26/6c/4045822b0630cfc0f8624c4499ceaf90644142143c063a8dc385a7424fc3/langgraph_prebuilt-1.0.6-py3-none-any.whl", hash = "sha256:9fdc35048ff4ac985a55bd2a019a86d45b8184551504aff6780d096c678b39ae", size = 35322, upload-time = "2026-01-12T20:31:27.161Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dc/41/ec966424ad3f2ed3996d24079d3342c8cd6c0bd0653c12b2a917a685ec6c/langgraph_prebuilt-1.0.8-py3-none-any.whl", hash = "sha256:d16a731e591ba4470f3e313a319c7eee7dbc40895bcf15c821f985a3522a7ce0", size = 35648, upload-time = "2026-02-19T18:14:37.611Z" }, ] [[package]] name = "langgraph-sdk" -version = "0.3.3" +version = "0.3.9" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "httpx" }, { name = "orjson" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c3/0f/ed0634c222eed48a31ba48eab6881f94ad690d65e44fe7ca838240a260c1/langgraph_sdk-0.3.3.tar.gz", hash = "sha256:c34c3dce3b6848755eb61f0c94369d1ba04aceeb1b76015db1ea7362c544fb26", size = 130589, upload-time = "2026-01-13T00:30:43.894Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3a/bd/ca8ae5c6a34be6d4f7aa86016e010ff96b3a939456041565797952e3014d/langgraph_sdk-0.3.9.tar.gz", hash = "sha256:8be8958529b3f6d493ec248fdb46e539362efda75784654a42a7091d22504e0e", size = 184287, upload-time = "2026-02-24T18:39:03.276Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6e/be/4ad511bacfdd854afb12974f407cb30010dceb982dc20c55491867b34526/langgraph_sdk-0.3.3-py3-none-any.whl", hash = "sha256:a52ebaf09d91143e55378bb2d0b033ed98f57f48c9ad35c8f81493b88705fc7b", size = 67021, upload-time = "2026-01-13T00:30:42.264Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6a/4c/7a7510260fbda788efd13bf4650d3e7d80988118441ac811ec78e0aa03ac/langgraph_sdk-0.3.9-py3-none-any.whl", hash = "sha256:94654294250c920789b6ed0d8a70c0117fed5736b61efc24ff647157359453c5", size = 90511, upload-time = "2026-02-24T18:39:02.012Z" }, ] [[package]] name = "langsmith" -version = "0.6.4" +version = "0.7.11" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "httpx" }, @@ -1719,11 +1721,12 @@ dependencies = [ { name = "requests" }, { name = "requests-toolbelt" }, { name = "uuid-utils" }, + { name = "xxhash" }, { name = "zstandard" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e7/85/9c7933052a997da1b85bc5c774f3865e9b1da1c8d71541ea133178b13229/langsmith-0.6.4.tar.gz", hash = "sha256:36f7223a01c218079fbb17da5e536ebbaf5c1468c028abe070aa3ae59bc99ec8", size = 919964, upload-time = "2026-01-15T20:02:28.873Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0d/43/db660d35fb59577490b072fa7bee4043ee4ba9d21c3185882efb3713fe59/langsmith-0.7.11.tar.gz", hash = "sha256:71df5fb9fa1ee0d3b494c14393566d33130739656de5ef96486bcbb0b5e4d329", size = 1109819, upload-time = "2026-03-03T20:29:18.406Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/66/0f/09a6637a7ba777eb307b7c80852d9ee26438e2bdafbad6fcc849ff9d9192/langsmith-0.6.4-py3-none-any.whl", hash = "sha256:ac4835860160be371042c7adbba3cb267bcf8d96a5ea976c33a8a4acad6c5486", size = 283503, upload-time = "2026-01-15T20:02:26.662Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/da/c1/aec40ba797c3ce0f9c41536491394704ae2d7253794405cb813748dcddbe/langsmith-0.7.11-py3-none-any.whl", hash = "sha256:0aff5b4316341d6ab6bcb6abf405a6a098f469020bad4889cafb6098650b8603", size = 346485, upload-time = "2026-03-03T20:29:16.685Z" }, ] [[package]] @@ -1738,25 +1741,6 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2a/6b/d139535d7590a1bba1ceb68751bef22fadaa5b815bbdf0e858e3875726b2/llvmlite-0.46.0-cp312-cp312-win_amd64.whl", hash = "sha256:398b39db462c39563a97b912d4f2866cd37cba60537975a09679b28fbbc0fb38", size = 38138940, upload-time = "2025-12-08T18:15:10.162Z" }, ] -[[package]] -name = "lupa" -version = "2.6" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b8/1c/191c3e6ec6502e3dbe25a53e27f69a5daeac3e56de1f73c0138224171ead/lupa-2.6.tar.gz", hash = "sha256:9a770a6e89576be3447668d7ced312cd6fd41d3c13c2462c9dc2c2ab570e45d9", size = 7240282, upload-time = "2025-10-24T07:20:29.738Z" } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/94/86/ce243390535c39d53ea17ccf0240815e6e457e413e40428a658ea4ee4b8d/lupa-2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:47ce718817ef1cc0c40d87c3d5ae56a800d61af00fbc0fad1ca9be12df2f3b56", size = 951707, upload-time = "2025-10-24T07:18:03.884Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/85/cedea5e6cbeb54396fdcc55f6b741696f3f036d23cfaf986d50d680446da/lupa-2.6-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:7aba985b15b101495aa4b07112cdc08baa0c545390d560ad5cfde2e9e34f4d58", size = 1916703, upload-time = "2025-10-24T07:18:05.6Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/24/be/3d6b5f9a8588c01a4d88129284c726017b2089f3a3fd3ba8bd977292fea0/lupa-2.6-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:b766f62f95b2739f2248977d29b0722e589dcf4f0ccfa827ccbd29f0148bd2e5", size = 985152, upload-time = "2025-10-24T07:18:08.561Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/23/9f9a05beee5d5dce9deca4cb07c91c40a90541fc0a8e09db4ee670da550f/lupa-2.6-cp312-cp312-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:00a934c23331f94cb51760097ebfab14b005d55a6b30a2b480e3c53dd2fa290d", size = 1159599, upload-time = "2025-10-24T07:18:10.346Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/40/4e/e7c0583083db9d7f1fd023800a9767d8e4391e8330d56c2373d890ac971b/lupa-2.6-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:21de9f38bd475303e34a042b7081aabdf50bd9bafd36ce4faea2f90fd9f15c31", size = 1038686, upload-time = "2025-10-24T07:18:12.112Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1c/9f/5a4f7d959d4feba5e203ff0c31889e74d1ca3153122be4a46dca7d92bf7c/lupa-2.6-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cf3bda96d3fc41237e964a69c23647d50d4e28421111360274d4799832c560e9", size = 2071956, upload-time = "2025-10-24T07:18:14.572Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/92/34/2f4f13ca65d01169b1720176aedc4af17bc19ee834598c7292db232cb6dc/lupa-2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5a76ead245da54801a81053794aa3975f213221f6542d14ec4b859ee2e7e0323", size = 1057199, upload-time = "2025-10-24T07:18:16.379Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/35/2a/5f7d2eebec6993b0dcd428e0184ad71afb06a45ba13e717f6501bfed1da3/lupa-2.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:8dd0861741caa20886ddbda0a121d8e52fb9b5bb153d82fa9bba796962bf30e8", size = 1173693, upload-time = "2025-10-24T07:18:18.153Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e4/29/089b4d2f8e34417349af3904bb40bec40b65c8731f45e3fd8d497ca573e5/lupa-2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:239e63948b0b23023f81d9a19a395e768ed3da6a299f84e7963b8f813f6e3f9c", size = 2164394, upload-time = "2025-10-24T07:18:20.403Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f3/1b/79c17b23c921f81468a111cad843b076a17ef4b684c4a8dff32a7969c3f0/lupa-2.6-cp312-cp312-win32.whl", hash = "sha256:325894e1099499e7a6f9c351147661a2011887603c71086d36fe0f964d52d1ce", size = 1420647, upload-time = "2025-10-24T07:18:23.368Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b8/15/5121e68aad3584e26e1425a5c9a79cd898f8a152292059e128c206ee817c/lupa-2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c735a1ce8ee60edb0fe71d665f1e6b7c55c6021f1d340eb8c865952c602cd36f", size = 1688529, upload-time = "2025-10-24T07:18:25.523Z" }, -] - [[package]] name = "lxml" version = "6.0.2" @@ -1909,7 +1893,7 @@ wheels = [ [[package]] name = "mcp" -version = "1.25.0" +version = "1.26.0" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "anyio" }, @@ -1927,9 +1911,9 @@ dependencies = [ { name = "typing-inspection" }, { name = "uvicorn", marker = "sys_platform != 'emscripten'" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d5/2d/649d80a0ecf6a1f82632ca44bec21c0461a9d9fc8934d38cb5b319f2db5e/mcp-1.25.0.tar.gz", hash = "sha256:56310361ebf0364e2d438e5b45f7668cbb124e158bb358333cd06e49e83a6802", size = 605387, upload-time = "2025-12-19T10:19:56.985Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fc/6d/62e76bbb8144d6ed86e202b5edd8a4cb631e7c8130f3f4893c3f90262b10/mcp-1.26.0.tar.gz", hash = "sha256:db6e2ef491eecc1a0d93711a76f28dec2e05999f93afd48795da1c1137142c66", size = 608005, upload-time = "2026-01-24T19:40:32.468Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e2/fc/6dc7659c2ae5ddf280477011f4213a74f806862856b796ef08f028e664bf/mcp-1.25.0-py3-none-any.whl", hash = "sha256:b37c38144a666add0862614cc79ec276e97d72aa8ca26d622818d4e278b9721a", size = 233076, upload-time = "2025-12-19T10:19:55.416Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/d9/eaa1f80170d2b7c5ba23f3b59f766f3a0bb41155fbc32a69adfa1adaaef9/mcp-1.26.0-py3-none-any.whl", hash = "sha256:904a21c33c25aa98ddbeb47273033c435e595bbacfdb177f4bd87f6dceebe1ca", size = 233615, upload-time = "2026-01-24T19:40:30.652Z" }, ] [[package]] @@ -1941,6 +1925,22 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, ] +[[package]] +name = "modelscope" +version = "1.34.0" +source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +dependencies = [ + { name = "filelock" }, + { name = "requests" }, + { name = "setuptools" }, + { name = "tqdm" }, + { name = "urllib3" }, +] +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f4/89/7a13bf70090e631f81a943797e875272d19d47e0a6984eda0a5a3f04e7e3/modelscope-1.34.0.tar.gz", hash = "sha256:c3041af301334aa9ca3f66f5b23e11ca33a2bdf28cc415dcceb75f68e4732aac", size = 4560273, upload-time = "2026-01-19T02:50:23.274Z" } +wheels = [ + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/46/df/2c112a7c4160aa5e74dad87060019be5eca197d910af3f5b12e68ec090a9/modelscope-1.34.0-py3-none-any.whl", hash = "sha256:4629ace145972520b71b0ad02e4604282426c0cfae6a4b0922509898f3b269c8", size = 6050825, upload-time = "2026-01-19T02:50:20.018Z" }, +] + [[package]] name = "more-itertools" version = "10.8.0" @@ -1961,29 +1961,29 @@ wheels = [ [[package]] name = "multidict" -version = "6.7.0" +version = "6.7.1" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/80/1e/5492c365f222f907de1039b91f922b93fa4f764c713ee858d235495d8f50/multidict-6.7.0.tar.gz", hash = "sha256:c6e99d9a65ca282e578dfea819cfa9c0a62b2499d8677392e09feaf305e9e6f5", size = 101834, upload-time = "2025-10-06T14:52:30.657Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1a/c2/c2d94cbe6ac1753f3fc980da97b3d930efe1da3af3c9f5125354436c073d/multidict-6.7.1.tar.gz", hash = "sha256:ec6652a1bee61c53a3e5776b6049172c53b6aaba34f18c9ad04f82712bac623d", size = 102010, upload-time = "2026-01-26T02:46:45.979Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c2/9e/9f61ac18d9c8b475889f32ccfa91c9f59363480613fc807b6e3023d6f60b/multidict-6.7.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:8a3862568a36d26e650a19bb5cbbba14b71789032aebc0423f8cc5f150730184", size = 76877, upload-time = "2025-10-06T14:49:20.884Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/38/6f/614f09a04e6184f8824268fce4bc925e9849edfa654ddd59f0b64508c595/multidict-6.7.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:960c60b5849b9b4f9dcc9bea6e3626143c252c74113df2c1540aebce70209b45", size = 45467, upload-time = "2025-10-06T14:49:22.054Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b3/93/c4f67a436dd026f2e780c433277fff72be79152894d9fc36f44569cab1a6/multidict-6.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2049be98fb57a31b4ccf870bf377af2504d4ae35646a19037ec271e4c07998aa", size = 43834, upload-time = "2025-10-06T14:49:23.566Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7f/f5/013798161ca665e4a422afbc5e2d9e4070142a9ff8905e482139cd09e4d0/multidict-6.7.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0934f3843a1860dd465d38895c17fce1f1cb37295149ab05cd1b9a03afacb2a7", size = 250545, upload-time = "2025-10-06T14:49:24.882Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/71/2f/91dbac13e0ba94669ea5119ba267c9a832f0cb65419aca75549fcf09a3dc/multidict-6.7.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b3e34f3a1b8131ba06f1a73adab24f30934d148afcd5f5de9a73565a4404384e", size = 258305, upload-time = "2025-10-06T14:49:26.778Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ef/b0/754038b26f6e04488b48ac621f779c341338d78503fb45403755af2df477/multidict-6.7.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:efbb54e98446892590dc2458c19c10344ee9a883a79b5cec4bc34d6656e8d546", size = 242363, upload-time = "2025-10-06T14:49:28.562Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/87/15/9da40b9336a7c9fa606c4cf2ed80a649dffeb42b905d4f63a1d7eb17d746/multidict-6.7.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a35c5fc61d4f51eb045061e7967cfe3123d622cd500e8868e7c0c592a09fedc4", size = 268375, upload-time = "2025-10-06T14:49:29.96Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/82/72/c53fcade0cc94dfaad583105fd92b3a783af2091eddcb41a6d5a52474000/multidict-6.7.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:29fe6740ebccba4175af1b9b87bf553e9c15cd5868ee967e010efcf94e4fd0f1", size = 269346, upload-time = "2025-10-06T14:49:31.404Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0d/e2/9baffdae21a76f77ef8447f1a05a96ec4bc0a24dae08767abc0a2fe680b8/multidict-6.7.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:123e2a72e20537add2f33a79e605f6191fba2afda4cbb876e35c1a7074298a7d", size = 256107, upload-time = "2025-10-06T14:49:32.974Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3c/06/3f06f611087dc60d65ef775f1fb5aca7c6d61c6db4990e7cda0cef9b1651/multidict-6.7.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b284e319754366c1aee2267a2036248b24eeb17ecd5dc16022095e747f2f4304", size = 253592, upload-time = "2025-10-06T14:49:34.52Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/20/24/54e804ec7945b6023b340c412ce9c3f81e91b3bf5fa5ce65558740141bee/multidict-6.7.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:803d685de7be4303b5a657b76e2f6d1240e7e0a8aa2968ad5811fa2285553a12", size = 251024, upload-time = "2025-10-06T14:49:35.956Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/14/48/011cba467ea0b17ceb938315d219391d3e421dfd35928e5dbdc3f4ae76ef/multidict-6.7.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c04a328260dfd5db8c39538f999f02779012268f54614902d0afc775d44e0a62", size = 251484, upload-time = "2025-10-06T14:49:37.631Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0d/2f/919258b43bb35b99fa127435cfb2d91798eb3a943396631ef43e3720dcf4/multidict-6.7.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8a19cdb57cd3df4cd865849d93ee14920fb97224300c88501f16ecfa2604b4e0", size = 263579, upload-time = "2025-10-06T14:49:39.502Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/31/22/a0e884d86b5242b5a74cf08e876bdf299e413016b66e55511f7a804a366e/multidict-6.7.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9b2fd74c52accced7e75de26023b7dccee62511a600e62311b918ec5c168fc2a", size = 259654, upload-time = "2025-10-06T14:49:41.32Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b2/e5/17e10e1b5c5f5a40f2fcbb45953c9b215f8a4098003915e46a93f5fcaa8f/multidict-6.7.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3e8bfdd0e487acf992407a140d2589fe598238eaeffa3da8448d63a63cd363f8", size = 251511, upload-time = "2025-10-06T14:49:46.021Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e3/9a/201bb1e17e7af53139597069c375e7b0dcbd47594604f65c2d5359508566/multidict-6.7.0-cp312-cp312-win32.whl", hash = "sha256:dd32a49400a2c3d52088e120ee00c1e3576cbff7e10b98467962c74fdb762ed4", size = 41895, upload-time = "2025-10-06T14:49:48.718Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/46/e2/348cd32faad84eaf1d20cce80e2bb0ef8d312c55bca1f7fa9865e7770aaf/multidict-6.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:92abb658ef2d7ef22ac9f8bb88e8b6c3e571671534e029359b6d9e845923eb1b", size = 46073, upload-time = "2025-10-06T14:49:50.28Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/25/ec/aad2613c1910dce907480e0c3aa306905830f25df2e54ccc9dea450cb5aa/multidict-6.7.0-cp312-cp312-win_arm64.whl", hash = "sha256:490dab541a6a642ce1a9d61a4781656b346a55c13038f0b1244653828e3a83ec", size = 43226, upload-time = "2025-10-06T14:49:52.304Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8d/9c/f20e0e2cf80e4b2e4b1c365bf5fe104ee633c751a724246262db8f1a0b13/multidict-6.7.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a90f75c956e32891a4eda3639ce6dd86e87105271f43d43442a3aedf3cddf172", size = 76893, upload-time = "2026-01-26T02:43:52.754Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fe/cf/18ef143a81610136d3da8193da9d80bfe1cb548a1e2d1c775f26b23d024a/multidict-6.7.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fccb473e87eaa1382689053e4a4618e7ba7b9b9b8d6adf2027ee474597128cd", size = 45456, upload-time = "2026-01-26T02:43:53.893Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a9/65/1caac9d4cd32e8433908683446eebc953e82d22b03d10d41a5f0fefe991b/multidict-6.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0fa96985700739c4c7853a43c0b3e169360d6855780021bfc6d0f1ce7c123e7", size = 43872, upload-time = "2026-01-26T02:43:55.041Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cf/3b/d6bd75dc4f3ff7c73766e04e705b00ed6dbbaccf670d9e05a12b006f5a21/multidict-6.7.1-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cb2a55f408c3043e42b40cc8eecd575afa27b7e0b956dfb190de0f8499a57a53", size = 251018, upload-time = "2026-01-26T02:43:56.198Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/80/c959c5933adedb9ac15152e4067c702a808ea183a8b64cf8f31af8ad3155/multidict-6.7.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb0ce7b2a32d09892b3dd6cc44877a0d02a33241fafca5f25c8b6b62374f8b75", size = 258883, upload-time = "2026-01-26T02:43:57.499Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/85/7ed40adafea3d4f1c8b916e3b5cc3a8e07dfcdcb9cd72800f4ed3ca1b387/multidict-6.7.1-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c3a32d23520ee37bf327d1e1a656fec76a2edd5c038bf43eddfa0572ec49c60b", size = 242413, upload-time = "2026-01-26T02:43:58.755Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d2/57/b8565ff533e48595503c785f8361ff9a4fde4d67de25c207cd0ba3befd03/multidict-6.7.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9c90fed18bffc0189ba814749fdcc102b536e83a9f738a9003e569acd540a733", size = 268404, upload-time = "2026-01-26T02:44:00.216Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e0/50/9810c5c29350f7258180dfdcb2e52783a0632862eb334c4896ac717cebcb/multidict-6.7.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:da62917e6076f512daccfbbde27f46fed1c98fee202f0559adec8ee0de67f71a", size = 269456, upload-time = "2026-01-26T02:44:02.202Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f3/8d/5e5be3ced1d12966fefb5c4ea3b2a5b480afcea36406559442c6e31d4a48/multidict-6.7.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bfde23ef6ed9db7eaee6c37dcec08524cb43903c60b285b172b6c094711b3961", size = 256322, upload-time = "2026-01-26T02:44:03.56Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/31/6e/d8a26d81ac166a5592782d208dd90dfdc0a7a218adaa52b45a672b46c122/multidict-6.7.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3758692429e4e32f1ba0df23219cd0b4fc0a52f476726fff9337d1a57676a582", size = 253955, upload-time = "2026-01-26T02:44:04.845Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/59/4c/7c672c8aad41534ba619bcd4ade7a0dc87ed6b8b5c06149b85d3dd03f0cd/multidict-6.7.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:398c1478926eca669f2fd6a5856b6de9c0acf23a2cb59a14c0ba5844fa38077e", size = 251254, upload-time = "2026-01-26T02:44:06.133Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7b/bd/84c24de512cbafbdbc39439f74e967f19570ce7924e3007174a29c348916/multidict-6.7.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c102791b1c4f3ab36ce4101154549105a53dc828f016356b3e3bcae2e3a039d3", size = 252059, upload-time = "2026-01-26T02:44:07.518Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fa/ba/f5449385510825b73d01c2d4087bf6d2fccc20a2d42ac34df93191d3dd03/multidict-6.7.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a088b62bd733e2ad12c50dad01b7d0166c30287c166e137433d3b410add807a6", size = 263588, upload-time = "2026-01-26T02:44:09.382Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d7/11/afc7c677f68f75c84a69fe37184f0f82fce13ce4b92f49f3db280b7e92b3/multidict-6.7.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3d51ff4785d58d3f6c91bdbffcb5e1f7ddfda557727043aa20d20ec4f65e324a", size = 259642, upload-time = "2026-01-26T02:44:10.73Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2b/17/ebb9644da78c4ab36403739e0e6e0e30ebb135b9caf3440825001a0bddcb/multidict-6.7.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fc5907494fccf3e7d3f94f95c91d6336b092b5fc83811720fae5e2765890dfba", size = 251377, upload-time = "2026-01-26T02:44:12.042Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ca/a4/840f5b97339e27846c46307f2530a2805d9d537d8b8bd416af031cad7fa0/multidict-6.7.1-cp312-cp312-win32.whl", hash = "sha256:28ca5ce2fd9716631133d0e9a9b9a745ad7f60bac2bccafb56aa380fc0b6c511", size = 41887, upload-time = "2026-01-26T02:44:14.245Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/80/31/0b2517913687895f5904325c2069d6a3b78f66cc641a86a2baf75a05dcbb/multidict-6.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcee94dfbd638784645b066074b338bc9cc155d4b4bffa4adce1615c5a426c19", size = 46053, upload-time = "2026-01-26T02:44:15.371Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0c/5b/aba28e4ee4006ae4c7df8d327d31025d760ffa992ea23812a601d226e682/multidict-6.7.1-cp312-cp312-win_arm64.whl", hash = "sha256:ba0a9fb644d0c1a2194cf7ffb043bd852cea63a57f66fbd33959f7dae18517bf", size = 43307, upload-time = "2026-01-26T02:44:16.852Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/81/08/7036c080d7117f28a4af526d794aab6a84463126db031b007717c1a6676e/multidict-6.7.1-py3-none-any.whl", hash = "sha256:55d97cc6dae627efa6a6e548885712d4864b81110ac76fa4e534c03819fa4a56", size = 12319, upload-time = "2026-01-26T02:46:44.004Z" }, ] [[package]] @@ -2033,18 +2033,18 @@ wheels = [ [[package]] name = "numba" -version = "0.63.1" +version = "0.64.0" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "llvmlite" }, { name = "numpy" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dc/60/0145d479b2209bd8fdae5f44201eceb8ce5a23e0ed54c71f57db24618665/numba-0.63.1.tar.gz", hash = "sha256:b320aa675d0e3b17b40364935ea52a7b1c670c9037c39cf92c49502a75902f4b", size = 2761666, upload-time = "2025-12-10T02:57:39.002Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/23/c9/a0fb41787d01d621046138da30f6c2100d80857bf34b3390dd68040f27a3/numba-0.64.0.tar.gz", hash = "sha256:95e7300af648baa3308127b1955b52ce6d11889d16e8cfe637b4f85d2fca52b1", size = 2765679, upload-time = "2026-02-18T18:41:20.974Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/14/9c/c0974cd3d00ff70d30e8ff90522ba5fbb2bcee168a867d2321d8d0457676/numba-0.63.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2819cd52afa5d8d04e057bdfd54367575105f8829350d8fb5e4066fb7591cc71", size = 2680981, upload-time = "2025-12-10T02:57:17.579Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cb/70/ea2bc45205f206b7a24ee68a159f5097c9ca7e6466806e7c213587e0c2b1/numba-0.63.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5cfd45dbd3d409e713b1ccfdc2ee72ca82006860254429f4ef01867fdba5845f", size = 3801656, upload-time = "2025-12-10T02:57:19.106Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0d/82/4f4ba4fd0f99825cbf3cdefd682ca3678be1702b63362011de6e5f71f831/numba-0.63.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:69a599df6976c03b7ecf15d05302696f79f7e6d10d620367407517943355bcb0", size = 3501857, upload-time = "2025-12-10T02:57:20.721Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/af/fd/6540456efa90b5f6604a86ff50dabefb187e43557e9081adcad3be44f048/numba-0.63.1-cp312-cp312-win_amd64.whl", hash = "sha256:bbad8c63e4fc7eb3cdb2c2da52178e180419f7969f9a685f283b313a70b92af3", size = 2750282, upload-time = "2025-12-10T02:57:22.474Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/70/a6/9fc52cb4f0d5e6d8b5f4d81615bc01012e3cf24e1052a60f17a68deb8092/numba-0.64.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:69440a8e8bc1a81028446f06b363e28635aa67bd51b1e498023f03b812e0ce68", size = 2683418, upload-time = "2026-02-18T18:40:59.886Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9b/89/1a74ea99b180b7a5587b0301ed1b183a2937c4b4b67f7994689b5d36fc34/numba-0.64.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f13721011f693ba558b8dd4e4db7f2640462bba1b855bdc804be45bbeb55031a", size = 3804087, upload-time = "2026-02-18T18:41:01.699Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/91/e1/583c647404b15f807410510fec1eb9b80cb8474165940b7749f026f21cbc/numba-0.64.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e0b180b1133f2b5d8b3f09d96b6d7a9e51a7da5dda3c09e998b5bcfac85d222c", size = 3504309, upload-time = "2026-02-18T18:41:03.252Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/85/23/0fce5789b8a5035e7ace21216a468143f3144e02013252116616c58339aa/numba-0.64.0-cp312-cp312-win_amd64.whl", hash = "sha256:e63dc94023b47894849b8b106db28ccb98b49d5498b98878fac1a38f83ac007a", size = 2752740, upload-time = "2026-02-18T18:41:05.097Z" }, ] [[package]] @@ -2214,7 +2214,7 @@ wheels = [ [[package]] name = "openai" -version = "2.15.0" +version = "2.24.0" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "anyio" }, @@ -2226,9 +2226,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/94/f4/4690ecb5d70023ce6bfcfeabfe717020f654bde59a775058ec6ac4692463/openai-2.15.0.tar.gz", hash = "sha256:42eb8cbb407d84770633f31bf727d4ffb4138711c670565a41663d9439174fba", size = 627383, upload-time = "2026-01-09T22:10:08.603Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/55/13/17e87641b89b74552ed408a92b231283786523edddc95f3545809fab673c/openai-2.24.0.tar.gz", hash = "sha256:1e5769f540dbd01cb33bc4716a23e67b9d695161a734aff9c5f925e2bf99a673", size = 658717, upload-time = "2026-02-24T20:02:07.958Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b5/df/c306f7375d42bafb379934c2df4c2fa3964656c8c782bac75ee10c102818/openai-2.15.0-py3-none-any.whl", hash = "sha256:6ae23b932cd7230f7244e52954daa6602716d6b9bf235401a107af731baea6c3", size = 1067879, upload-time = "2026-01-09T22:10:06.446Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c9/30/844dc675ee6902579b8eef01ed23917cc9319a1c9c0c14ec6e39340c96d0/openai-2.24.0-py3-none-any.whl", hash = "sha256:fed30480d7d6c884303287bde864980a4b137b60553ffbcf9ab4a233b7a73d94", size = 1120122, upload-time = "2026-02-24T20:02:05.669Z" }, ] [[package]] @@ -2315,35 +2315,6 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/95/f1/b27d3e2e003cd9a3592c43d099d2ed8d0a947c15281bf8463a256db0b46c/opentelemetry_exporter_otlp_proto_http-1.39.1-py3-none-any.whl", hash = "sha256:d9f5207183dd752a412c4cd564ca8875ececba13be6e9c6c370ffb752fd59985", size = 19641, upload-time = "2025-12-11T13:32:22.248Z" }, ] -[[package]] -name = "opentelemetry-exporter-prometheus" -version = "0.60b1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "opentelemetry-api" }, - { name = "opentelemetry-sdk" }, - { name = "prometheus-client" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/14/39/7dafa6fff210737267bed35a8855b6ac7399b9e582b8cf1f25f842517012/opentelemetry_exporter_prometheus-0.60b1.tar.gz", hash = "sha256:a4011b46906323f71724649d301b4dc188aaa068852e814f4df38cc76eac616b", size = 14976, upload-time = "2025-12-11T13:32:42.944Z" } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9b/0d/4be6bf5477a3eb3d917d2f17d3c0b6720cd6cb97898444a61d43cc983f5c/opentelemetry_exporter_prometheus-0.60b1-py3-none-any.whl", hash = "sha256:49f59178de4f4590e3cef0b8b95cf6e071aae70e1f060566df5546fad773b8fd", size = 13019, upload-time = "2025-12-11T13:32:23.974Z" }, -] - -[[package]] -name = "opentelemetry-instrumentation" -version = "0.60b1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "opentelemetry-api" }, - { name = "opentelemetry-semantic-conventions" }, - { name = "packaging" }, - { name = "wrapt" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/41/0f/7e6b713ac117c1f5e4e3300748af699b9902a2e5e34c9cf443dde25a01fa/opentelemetry_instrumentation-0.60b1.tar.gz", hash = "sha256:57ddc7974c6eb35865af0426d1a17132b88b2ed8586897fee187fd5b8944bd6a", size = 31706, upload-time = "2025-12-11T13:36:42.515Z" } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/77/d2/6788e83c5c86a2690101681aeef27eeb2a6bf22df52d3f263a22cee20915/opentelemetry_instrumentation-0.60b1-py3-none-any.whl", hash = "sha256:04480db952b48fb1ed0073f822f0ee26012b7be7c3eac1a3793122737c78632d", size = 33096, upload-time = "2025-12-11T13:35:33.067Z" }, -] - [[package]] name = "opentelemetry-proto" version = "1.39.1" @@ -2385,25 +2356,25 @@ wheels = [ [[package]] name = "orjson" -version = "3.11.5" +version = "3.11.7" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/04/b8/333fdb27840f3bf04022d21b654a35f58e15407183aeb16f3b41aa053446/orjson-3.11.5.tar.gz", hash = "sha256:82393ab47b4fe44ffd0a7659fa9cfaacc717eb617c93cde83795f14af5c2e9d5", size = 5972347, upload-time = "2025-12-06T15:55:39.458Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/53/45/b268004f745ede84e5798b48ee12b05129d19235d0e15267aa57dcdb400b/orjson-3.11.7.tar.gz", hash = "sha256:9b1a67243945819ce55d24a30b59d6a168e86220452d2c96f4d1f093e71c0c49", size = 6144992, upload-time = "2026-02-02T15:38:49.29Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ef/a4/8052a029029b096a78955eadd68ab594ce2197e24ec50e6b6d2ab3f4e33b/orjson-3.11.5-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:334e5b4bff9ad101237c2d799d9fd45737752929753bf4faf4b207335a416b7d", size = 245347, upload-time = "2025-12-06T15:54:22.061Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/67/574a7732bd9d9d79ac620c8790b4cfe0717a3d5a6eb2b539e6e8995e24a0/orjson-3.11.5-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:ff770589960a86eae279f5d8aa536196ebda8273a2a07db2a54e82b93bc86626", size = 129435, upload-time = "2025-12-06T15:54:23.615Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/52/8d/544e77d7a29d90cf4d9eecd0ae801c688e7f3d1adfa2ebae5e1e94d38ab9/orjson-3.11.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed24250e55efbcb0b35bed7caaec8cedf858ab2f9f2201f17b8938c618c8ca6f", size = 132074, upload-time = "2025-12-06T15:54:24.694Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6e/57/b9f5b5b6fbff9c26f77e785baf56ae8460ef74acdb3eae4931c25b8f5ba9/orjson-3.11.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a66d7769e98a08a12a139049aac2f0ca3adae989817f8c43337455fbc7669b85", size = 130520, upload-time = "2025-12-06T15:54:26.185Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f6/6d/d34970bf9eb33f9ec7c979a262cad86076814859e54eb9a059a52f6dc13d/orjson-3.11.5-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:86cfc555bfd5794d24c6a1903e558b50644e5e68e6471d66502ce5cb5fdef3f9", size = 136209, upload-time = "2025-12-06T15:54:27.264Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e7/39/bc373b63cc0e117a105ea12e57280f83ae52fdee426890d57412432d63b3/orjson-3.11.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a230065027bc2a025e944f9d4714976a81e7ecfa940923283bca7bbc1f10f626", size = 139837, upload-time = "2025-12-06T15:54:28.75Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cb/aa/7c4818c8d7d324da220f4f1af55c343956003aa4d1ce1857bdc1d396ba69/orjson-3.11.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b29d36b60e606df01959c4b982729c8845c69d1963f88686608be9ced96dbfaa", size = 137307, upload-time = "2025-12-06T15:54:29.856Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/46/bf/0993b5a056759ba65145effe3a79dd5a939d4a070eaa5da2ee3180fbb13f/orjson-3.11.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c74099c6b230d4261fdc3169d50efc09abf38ace1a42ea2f9994b1d79153d477", size = 139020, upload-time = "2025-12-06T15:54:31.024Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/65/e8/83a6c95db3039e504eda60fc388f9faedbb4f6472f5aba7084e06552d9aa/orjson-3.11.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e697d06ad57dd0c7a737771d470eedc18e68dfdefcdd3b7de7f33dfda5b6212e", size = 141099, upload-time = "2025-12-06T15:54:32.196Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b9/b4/24fdc024abfce31c2f6812973b0a693688037ece5dc64b7a60c1ce69e2f2/orjson-3.11.5-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:e08ca8a6c851e95aaecc32bc44a5aa75d0ad26af8cdac7c77e4ed93acf3d5b69", size = 413540, upload-time = "2025-12-06T15:54:33.361Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d9/37/01c0ec95d55ed0c11e4cae3e10427e479bba40c77312b63e1f9665e0737d/orjson-3.11.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e8b5f96c05fce7d0218df3fdfeb962d6b8cfff7e3e20264306b46dd8b217c0f3", size = 151530, upload-time = "2025-12-06T15:54:34.6Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f9/d4/f9ebc57182705bb4bbe63f5bbe14af43722a2533135e1d2fb7affa0c355d/orjson-3.11.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ddbfdb5099b3e6ba6d6ea818f61997bb66de14b411357d24c4612cf1ebad08ca", size = 141863, upload-time = "2025-12-06T15:54:35.801Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0d/04/02102b8d19fdcb009d72d622bb5781e8f3fae1646bf3e18c53d1bc8115b5/orjson-3.11.5-cp312-cp312-win32.whl", hash = "sha256:9172578c4eb09dbfcf1657d43198de59b6cef4054de385365060ed50c458ac98", size = 135255, upload-time = "2025-12-06T15:54:37.209Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d4/fb/f05646c43d5450492cb387de5549f6de90a71001682c17882d9f66476af5/orjson-3.11.5-cp312-cp312-win_amd64.whl", hash = "sha256:2b91126e7b470ff2e75746f6f6ee32b9ab67b7a93c8ba1d15d3a0caaf16ec875", size = 133252, upload-time = "2025-12-06T15:54:38.401Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dc/a6/7b8c0b26ba18c793533ac1cd145e131e46fcf43952aa94c109b5b913c1f0/orjson-3.11.5-cp312-cp312-win_arm64.whl", hash = "sha256:acbc5fac7e06777555b0722b8ad5f574739e99ffe99467ed63da98f97f9ca0fe", size = 126777, upload-time = "2025-12-06T15:54:39.515Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/80/bf/76f4f1665f6983385938f0e2a5d7efa12a58171b8456c252f3bae8a4cf75/orjson-3.11.7-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:bd03ea7606833655048dab1a00734a2875e3e86c276e1d772b2a02556f0d895f", size = 228545, upload-time = "2026-02-02T15:37:46.376Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/79/53/6c72c002cb13b5a978a068add59b25a8bdf2800ac1c9c8ecdb26d6d97064/orjson-3.11.7-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:89e440ebc74ce8ab5c7bc4ce6757b4a6b1041becb127df818f6997b5c71aa60b", size = 125224, upload-time = "2026-02-02T15:37:47.697Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/83/10e48852865e5dd151bdfe652c06f7da484578ed02c5fca938e3632cb0b8/orjson-3.11.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ede977b5fe5ac91b1dffc0a517ca4542d2ec8a6a4ff7b2652d94f640796342a", size = 128154, upload-time = "2026-02-02T15:37:48.954Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6e/52/a66e22a2b9abaa374b4a081d410edab6d1e30024707b87eab7c734afe28d/orjson-3.11.7-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b7b1dae39230a393df353827c855a5f176271c23434cfd2db74e0e424e693e10", size = 123548, upload-time = "2026-02-02T15:37:50.187Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/de/38/605d371417021359f4910c496f764c48ceb8997605f8c25bf1dfe58c0ebe/orjson-3.11.7-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed46f17096e28fb28d2975834836a639af7278aa87c84f68ab08fbe5b8bd75fa", size = 129000, upload-time = "2026-02-02T15:37:51.426Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/44/98/af32e842b0ffd2335c89714d48ca4e3917b42f5d6ee5537832e069a4b3ac/orjson-3.11.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3726be79e36e526e3d9c1aceaadbfb4a04ee80a72ab47b3f3c17fefb9812e7b8", size = 141686, upload-time = "2026-02-02T15:37:52.607Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/96/0b/fc793858dfa54be6feee940c1463370ece34b3c39c1ca0aa3845f5ba9892/orjson-3.11.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0724e265bc548af1dedebd9cb3d24b4e1c1e685a343be43e87ba922a5c5fff2f", size = 130812, upload-time = "2026-02-02T15:37:53.944Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dc/91/98a52415059db3f374757d0b7f0f16e3b5cd5976c90d1c2b56acaea039e6/orjson-3.11.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7745312efa9e11c17fbd3cb3097262d079da26930ae9ae7ba28fb738367cbad", size = 133440, upload-time = "2026-02-02T15:37:55.615Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dc/b6/cb540117bda61791f46381f8c26c8f93e802892830a6055748d3bb1925ab/orjson-3.11.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f904c24bdeabd4298f7a977ef14ca2a022ca921ed670b92ecd16ab6f3d01f867", size = 138386, upload-time = "2026-02-02T15:37:56.814Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/63/1a/50a3201c334a7f17c231eee5f841342190723794e3b06293f26e7cf87d31/orjson-3.11.7-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b9fc4d0f81f394689e0814617aadc4f2ea0e8025f38c226cbf22d3b5ddbf025d", size = 408853, upload-time = "2026-02-02T15:37:58.291Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/87/cd/8de1c67d0be44fdc22701e5989c0d015a2adf391498ad42c4dc589cd3013/orjson-3.11.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:849e38203e5be40b776ed2718e587faf204d184fc9a008ae441f9442320c0cab", size = 144130, upload-time = "2026-02-02T15:38:00.163Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0f/fe/d605d700c35dd55f51710d159fc54516a280923cd1b7e47508982fbb387d/orjson-3.11.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4682d1db3bcebd2b64757e0ddf9e87ae5f00d29d16c5cdf3a62f561d08cc3dd2", size = 134818, upload-time = "2026-02-02T15:38:01.507Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e4/e4/15ecc67edb3ddb3e2f46ae04475f2d294e8b60c1825fbe28a428b93b3fbd/orjson-3.11.7-cp312-cp312-win32.whl", hash = "sha256:f4f7c956b5215d949a1f65334cf9d7612dde38f20a95f2315deef167def91a6f", size = 127923, upload-time = "2026-02-02T15:38:02.75Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/34/70/2e0855361f76198a3965273048c8e50a9695d88cd75811a5b46444895845/orjson-3.11.7-cp312-cp312-win_amd64.whl", hash = "sha256:bf742e149121dc5648ba0a08ea0871e87b660467ef168a3a5e53bc1fbd64bb74", size = 125007, upload-time = "2026-02-02T15:38:04.032Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/68/40/c2051bd19fc467610fed469dc29e43ac65891571138f476834ca192bc290/orjson-3.11.7-cp312-cp312-win_arm64.whl", hash = "sha256:26c3b9132f783b7d7903bf1efb095fed8d4a3a85ec0d334ee8beff3d7a4749d5", size = 126089, upload-time = "2026-02-02T15:38:05.297Z" }, ] [[package]] @@ -2451,9 +2422,9 @@ wheels = [ [[package]] name = "owlready2" -version = "0.49" +version = "0.50" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/62/06/de9eab54845aeb1a60d1b9bfe115e55540e00bab50d1ec52a4c503f33097/owlready2-0.49.tar.gz", hash = "sha256:f076f0a89f64cf27088b69f2ff65c7d5c27da15c0ac6c5ac57ec726e89baf928", size = 27305575, upload-time = "2025-11-24T16:50:48.735Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a4/67/6fb3a628889d97f8a3d3c64d4597f0637db61a1856c835fe7c65d8e89658/owlready2-0.50.tar.gz", hash = "sha256:7be74c53a35497df138c8223dddcf65510def9b38494ae1fac4aa2a61c4677eb", size = 27328931, upload-time = "2026-02-05T10:42:01.733Z" } [[package]] name = "packaging" @@ -2496,20 +2467,11 @@ wheels = [ [[package]] name = "pathable" -version = "0.4.4" +version = "0.5.0" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/67/93/8f2c2075b180c12c1e9f6a09d1a985bc2036906b13dff1d8917e395f2048/pathable-0.4.4.tar.gz", hash = "sha256:6905a3cd17804edfac7875b5f6c9142a218c7caef78693c2dbbbfbac186d88b2", size = 8124, upload-time = "2025-01-10T18:43:13.247Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/72/55/b748445cb4ea6b125626f15379be7c96d1035d4fa3e8fee362fa92298abf/pathable-0.5.0.tar.gz", hash = "sha256:d81938348a1cacb525e7c75166270644782c0fb9c8cecc16be033e71427e0ef1", size = 16655, upload-time = "2026-02-20T08:47:00.748Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7d/eb/b6260b31b1a96386c0a880edebe26f89669098acea8e0318bff6adb378fd/pathable-0.4.4-py3-none-any.whl", hash = "sha256:5ae9e94793b6ef5a4cbe0a7ce9dbbefc1eec38df253763fd0aeeacf2762dbbc2", size = 9592, upload-time = "2025-01-10T18:43:11.88Z" }, -] - -[[package]] -name = "pathvalidate" -version = "3.3.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fa/2a/52a8da6fe965dea6192eb716b357558e103aea0a1e9a8352ad575a8406ca/pathvalidate-3.3.1.tar.gz", hash = "sha256:b18c07212bfead624345bb8e1d6141cdcf15a39736994ea0b94035ad2b1ba177", size = 63262, upload-time = "2025-06-15T09:07:20.736Z" } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9a/70/875f4a23bfc4731703a5835487d0d2fb999031bd415e7d17c0ae615c18b7/pathvalidate-3.3.1-py3-none-any.whl", hash = "sha256:5263baab691f8e1af96092fa5137ee17df5bdfbd6cff1fcac4d6ef4bc2e1735f", size = 24305, upload-time = "2025-06-15T09:07:19.117Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/52/96/5a770e5c461462575474468e5af931cff9de036e7c2b4fea23c1c58d2cbe/pathable-0.5.0-py3-none-any.whl", hash = "sha256:646e3d09491a6351a0c82632a09c02cdf70a252e73196b36d8a15ba0a114f0a6", size = 16867, upload-time = "2026-02-20T08:46:59.536Z" }, ] [[package]] @@ -2572,11 +2534,11 @@ wheels = [ [[package]] name = "platformdirs" -version = "4.5.1" +version = "4.9.2" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cf/86/0248f086a84f01b37aaec0fa567b397df1a119f73c16f6c7a9aac73ea309/platformdirs-4.5.1.tar.gz", hash = "sha256:61d5cdcc6065745cdd94f0f878977f8de9437be93de97c1c12f853c9c0cdcbda", size = 21715, upload-time = "2025-12-05T13:52:58.638Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1b/04/fea538adf7dbbd6d186f551d595961e564a3b6715bdf276b477460858672/platformdirs-4.9.2.tar.gz", hash = "sha256:9a33809944b9db043ad67ca0db94b14bf452cc6aeaac46a88ea55b26e2e9d291", size = 28394, upload-time = "2026-02-16T03:56:10.574Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cb/28/3bfe2fa5a7b9c46fe7e13c97bda14c895fb10fa2ebf1d0abb90e0cea7ee1/platformdirs-4.5.1-py3-none-any.whl", hash = "sha256:d03afa3963c806a9bed9d5125c8f4cb2fdaf74a55ab60e5d59b3fde758104d31", size = 18731, upload-time = "2025-12-05T13:52:56.823Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/48/31/05e764397056194206169869b50cf2fee4dbbbc71b344705b9c0d878d4d8/platformdirs-4.9.2-py3-none-any.whl", hash = "sha256:9170634f126f8efdae22fb58ae8a0eaa86f38365bc57897a6c4f781d1f5875bd", size = 21168, upload-time = "2026-02-16T03:56:08.891Z" }, ] [[package]] @@ -2675,17 +2637,17 @@ wheels = [ [[package]] name = "protobuf" -version = "6.33.4" +version = "6.33.5" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/53/b8/cda15d9d46d03d4aa3a67cb6bffe05173440ccf86a9541afaf7ac59a1b6b/protobuf-6.33.4.tar.gz", hash = "sha256:dc2e61bca3b10470c1912d166fe0af67bfc20eb55971dcef8dfa48ce14f0ed91", size = 444346, upload-time = "2026-01-12T18:33:40.109Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ba/25/7c72c307aafc96fa87062aa6291d9f7c94836e43214d43722e86037aac02/protobuf-6.33.5.tar.gz", hash = "sha256:6ddcac2a081f8b7b9642c09406bc6a4290128fce5f471cddd165960bb9119e5c", size = 444465, upload-time = "2026-01-29T21:51:33.494Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e0/be/24ef9f3095bacdf95b458543334d0c4908ccdaee5130420bf064492c325f/protobuf-6.33.4-cp310-abi3-win32.whl", hash = "sha256:918966612c8232fc6c24c78e1cd89784307f5814ad7506c308ee3cf86662850d", size = 425612, upload-time = "2026-01-12T18:33:29.656Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/31/ad/e5693e1974a28869e7cd244302911955c1cebc0161eb32dfa2b25b6e96f0/protobuf-6.33.4-cp310-abi3-win_amd64.whl", hash = "sha256:8f11ffae31ec67fc2554c2ef891dcb561dae9a2a3ed941f9e134c2db06657dbc", size = 436962, upload-time = "2026-01-12T18:33:31.345Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/66/15/6ee23553b6bfd82670207ead921f4d8ef14c107e5e11443b04caeb5ab5ec/protobuf-6.33.4-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:2fe67f6c014c84f655ee06f6f66213f9254b3a8b6bda6cda0ccd4232c73c06f0", size = 427612, upload-time = "2026-01-12T18:33:32.646Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2b/48/d301907ce6d0db75f959ca74f44b475a9caa8fcba102d098d3c3dd0f2d3f/protobuf-6.33.4-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:757c978f82e74d75cba88eddec479df9b99a42b31193313b75e492c06a51764e", size = 324484, upload-time = "2026-01-12T18:33:33.789Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/92/1c/e53078d3f7fe710572ab2dcffd993e1e3b438ae71cfc031b71bae44fcb2d/protobuf-6.33.4-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:c7c64f259c618f0bef7bee042075e390debbf9682334be2b67408ec7c1c09ee6", size = 339256, upload-time = "2026-01-12T18:33:35.231Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e8/8e/971c0edd084914f7ee7c23aa70ba89e8903918adca179319ee94403701d5/protobuf-6.33.4-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:3df850c2f8db9934de4cf8f9152f8dc2558f49f298f37f90c517e8e5c84c30e9", size = 323311, upload-time = "2026-01-12T18:33:36.305Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/75/b1/1dc83c2c661b4c62d56cc081706ee33a4fc2835bd90f965baa2663ef7676/protobuf-6.33.4-py3-none-any.whl", hash = "sha256:1fe3730068fcf2e595816a6c34fe66eeedd37d51d0400b72fabc848811fdc1bc", size = 170532, upload-time = "2026-01-12T18:33:39.199Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b1/79/af92d0a8369732b027e6d6084251dd8e782c685c72da161bd4a2e00fbabb/protobuf-6.33.5-cp310-abi3-win32.whl", hash = "sha256:d71b040839446bac0f4d162e758bea99c8251161dae9d0983a3b88dee345153b", size = 425769, upload-time = "2026-01-29T21:51:21.751Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/55/75/bb9bc917d10e9ee13dee8607eb9ab963b7cf8be607c46e7862c748aa2af7/protobuf-6.33.5-cp310-abi3-win_amd64.whl", hash = "sha256:3093804752167bcab3998bec9f1048baae6e29505adaf1afd14a37bddede533c", size = 437118, upload-time = "2026-01-29T21:51:24.022Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a2/6b/e48dfc1191bc5b52950246275bf4089773e91cb5ba3592621723cdddca62/protobuf-6.33.5-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:a5cb85982d95d906df1e2210e58f8e4f1e3cdc088e52c921a041f9c9a0386de5", size = 427766, upload-time = "2026-01-29T21:51:25.413Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4e/b1/c79468184310de09d75095ed1314b839eb2f72df71097db9d1404a1b2717/protobuf-6.33.5-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:9b71e0281f36f179d00cbcb119cb19dec4d14a81393e5ea220f64b286173e190", size = 324638, upload-time = "2026-01-29T21:51:26.423Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c5/f5/65d838092fd01c44d16037953fd4c2cc851e783de9b8f02b27ec4ffd906f/protobuf-6.33.5-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8afa18e1d6d20af15b417e728e9f60f3aa108ee76f23c3b2c07a2c3b546d3afd", size = 339411, upload-time = "2026-01-29T21:51:27.446Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9b/53/a9443aa3ca9ba8724fdfa02dd1887c1bcd8e89556b715cfbacca6b63dbec/protobuf-6.33.5-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:cbf16ba3350fb7b889fca858fb215967792dc125b35c7976ca4818bee3521cf0", size = 323465, upload-time = "2026-01-29T21:51:28.925Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/57/bf/2086963c69bdac3d7cff1cc7ff79b8ce5ea0bec6797a017e1be338a46248/protobuf-6.33.5-py3-none-any.whl", hash = "sha256:69915a973dd0f60f31a08b8318b73eab2bd6a392c79184b3612226b0a3f8ec02", size = 170687, upload-time = "2026-01-29T21:51:32.557Z" }, ] [[package]] @@ -2709,21 +2671,21 @@ wheels = [ [[package]] name = "py-key-value-aio" -version = "0.3.0" +version = "0.4.4" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "beartype" }, - { name = "py-key-value-shared" }, + { name = "typing-extensions" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/93/ce/3136b771dddf5ac905cc193b461eb67967cf3979688c6696e1f2cdcde7ea/py_key_value_aio-0.3.0.tar.gz", hash = "sha256:858e852fcf6d696d231266da66042d3355a7f9871650415feef9fca7a6cd4155", size = 50801, upload-time = "2025-11-17T16:50:04.711Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/04/3c/0397c072a38d4bc580994b42e0c90c5f44f679303489e4376289534735e5/py_key_value_aio-0.4.4.tar.gz", hash = "sha256:e3012e6243ed7cc09bb05457bd4d03b1ba5c2b1ca8700096b3927db79ffbbe55", size = 92300, upload-time = "2026-02-16T21:21:43.245Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/99/10/72f6f213b8f0bce36eff21fda0a13271834e9eeff7f9609b01afdc253c79/py_key_value_aio-0.3.0-py3-none-any.whl", hash = "sha256:1c781915766078bfd608daa769fefb97e65d1d73746a3dfb640460e322071b64", size = 96342, upload-time = "2025-11-17T16:50:03.801Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/32/69/f1b537ee70b7def42d63124a539ed3026a11a3ffc3086947a1ca6e861868/py_key_value_aio-0.4.4-py3-none-any.whl", hash = "sha256:18e17564ecae61b987f909fc2cd41ee2012c84b4b1dcb8c055cf8b4bc1bf3f5d", size = 152291, upload-time = "2026-02-16T21:21:44.241Z" }, ] [package.optional-dependencies] -disk = [ - { name = "diskcache" }, - { name = "pathvalidate" }, +filetree = [ + { name = "aiofile" }, + { name = "anyio" }, ] keyring = [ { name = "keyring" }, @@ -2731,22 +2693,6 @@ keyring = [ memory = [ { name = "cachetools" }, ] -redis = [ - { name = "redis" }, -] - -[[package]] -name = "py-key-value-shared" -version = "0.3.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "beartype" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7b/e4/1971dfc4620a3a15b4579fe99e024f5edd6e0967a71154771a059daff4db/py_key_value_shared-0.3.0.tar.gz", hash = "sha256:8fdd786cf96c3e900102945f92aa1473138ebe960ef49da1c833790160c28a4b", size = 11666, upload-time = "2025-11-17T16:50:06.849Z" } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/51/e4/b8b0a03ece72f47dce2307d36e1c34725b7223d209fc679315ffe6a4e2c3/py_key_value_shared-0.3.0-py3-none-any.whl", hash = "sha256:5b0efba7ebca08bb158b1e93afc2f07d30b8f40c2fc12ce24a4c0d84f42f9298", size = 19560, upload-time = "2025-11-17T16:50:05.954Z" }, -] [[package]] name = "pyasn1" @@ -2850,39 +2796,16 @@ wheels = [ [[package]] name = "pydantic-settings" -version = "2.12.0" +version = "2.13.1" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "pydantic" }, { name = "python-dotenv" }, { name = "typing-inspection" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/43/4b/ac7e0aae12027748076d72a8764ff1c9d82ca75a7a52622e67ed3f765c54/pydantic_settings-2.12.0.tar.gz", hash = "sha256:005538ef951e3c2a68e1c08b292b5f2e71490def8589d4221b95dab00dafcfd0", size = 194184, upload-time = "2025-11-10T14:25:47.013Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/52/6d/fffca34caecc4a3f97bda81b2098da5e8ab7efc9a66e819074a11955d87e/pydantic_settings-2.13.1.tar.gz", hash = "sha256:b4c11847b15237fb0171e1462bf540e294affb9b86db4d9aa5c01730bdbe4025", size = 223826, upload-time = "2026-02-19T13:45:08.055Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/60/5d4751ba3f4a40a6891f24eec885f51afd78d208498268c734e256fb13c4/pydantic_settings-2.12.0-py3-none-any.whl", hash = "sha256:fddb9fd99a5b18da837b29710391e945b1e30c135477f484084ee513adb93809", size = 51880, upload-time = "2025-11-10T14:25:45.546Z" }, -] - -[[package]] -name = "pydocket" -version = "0.16.6" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "cloudpickle" }, - { name = "fakeredis", extra = ["lua"] }, - { name = "opentelemetry-api" }, - { name = "opentelemetry-exporter-prometheus" }, - { name = "opentelemetry-instrumentation" }, - { name = "prometheus-client" }, - { name = "py-key-value-aio", extra = ["memory", "redis"] }, - { name = "python-json-logger" }, - { name = "redis" }, - { name = "rich" }, - { name = "typer" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/72/00/26befe5f58df7cd1aeda4a8d10bc7d1908ffd86b80fd995e57a2a7b3f7bd/pydocket-0.16.6.tar.gz", hash = "sha256:b96c96ad7692827214ed4ff25fcf941ec38371314db5dcc1ae792b3e9d3a0294", size = 299054, upload-time = "2026-01-09T22:09:15.405Z" } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0a/3f/7483e5a6dc6326b6e0c640619b5c5bd1d6e3c20e54d58f5fb86267cef00e/pydocket-0.16.6-py3-none-any.whl", hash = "sha256:683d21e2e846aa5106274e7d59210331b242d7fb0dce5b08d3b82065663ed183", size = 67697, upload-time = "2026-01-09T22:09:13.436Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/00/4b/ccc026168948fec4f7555b9164c724cf4125eac006e176541483d2c959be/pydantic_settings-2.13.1-py3-none-any.whl", hash = "sha256:d56fd801823dbeae7f0975e1f8c8e25c258eb75d278ea7abb5d9cebb01b56237", size = 58929, upload-time = "2026-02-19T13:45:06.034Z" }, ] [[package]] @@ -2896,11 +2819,11 @@ wheels = [ [[package]] name = "pyjwt" -version = "2.10.1" +version = "2.11.0" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785, upload-time = "2024-11-28T03:43:29.933Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5c/5a/b46fa56bf322901eee5b0454a34343cdbdae202cd421775a8ee4e42fd519/pyjwt-2.11.0.tar.gz", hash = "sha256:35f95c1f0fbe5d5ba6e43f00271c275f7a1a4db1dab27bf708073b75318ea623", size = 98019, upload-time = "2026-01-30T19:59:55.694Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997, upload-time = "2024-11-28T03:43:27.893Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6f/01/c26ce75ba460d5cd503da9e13b21a33804d38c2165dec7b716d06b13010c/pyjwt-2.11.0-py3-none-any.whl", hash = "sha256:94a6bde30eb5c8e04fee991062b534071fd1439ef58d2adc9ccb823e7bcd0469", size = 28224, upload-time = "2026-01-30T19:59:54.539Z" }, ] [package.optional-dependencies] @@ -2953,31 +2876,31 @@ wheels = [ [[package]] name = "pypdfium2" -version = "5.3.0" +version = "5.5.0" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/18/83/173dab58beb6c7e772b838199014c173a2436018dd7cfde9bbf4a3be15da/pypdfium2-5.3.0.tar.gz", hash = "sha256:2873ffc95fcb01f329257ebc64a5fdce44b36447b6b171fe62f7db5dc3269885", size = 268742, upload-time = "2026-01-05T16:29:03.02Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fb/f6/42f5f1b9beb7e036f5532832b9c590fd107c52a78f704302c03bc6793954/pypdfium2-5.5.0.tar.gz", hash = "sha256:3283c61f54c3c546d140da201ef48a51c18b0ad54293091a010029ac13ece23a", size = 270502, upload-time = "2026-02-18T23:22:37.643Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e3/a4/6bb5b5918c7fc236ec426be8a0205a984fe0a26ae23d5e4dd497398a6571/pypdfium2-5.3.0-py3-none-android_23_arm64_v8a.whl", hash = "sha256:885df6c78d41600cb086dc0c76b912d165b5bd6931ca08138329ea5a991b3540", size = 2763287, upload-time = "2026-01-05T16:28:24.21Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3e/64/24b41b906006bf07099b095f0420ee1f01a3a83a899f3e3731e4da99c06a/pypdfium2-5.3.0-py3-none-android_23_armeabi_v7a.whl", hash = "sha256:6e53dee6b333ee77582499eff800300fb5aa0c7eb8f52f95ccb5ca35ebc86d48", size = 2303285, upload-time = "2026-01-05T16:28:26.274Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c2/c0/3ec73f4ded83ba6c02acf6e9d228501759d5d74fe57f1b93849ab92dcc20/pypdfium2-5.3.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:ce4466bdd62119fe25a5f74d107acc9db8652062bf217057630c6ff0bb419523", size = 2816066, upload-time = "2026-01-05T16:28:28.099Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/62/ca/e553b3b8b5c2cdc3d955cc313493ac27bbe63fc22624769d56ded585dd5e/pypdfium2-5.3.0-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:cc2647fd03db42b8a56a8835e8bc7899e604e2042cd6fedeea53483185612907", size = 2945545, upload-time = "2026-01-05T16:28:29.489Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a1/56/615b776071e95c8570d579038256d0c77969ff2ff381e427be4ab8967f44/pypdfium2-5.3.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35e205f537ddb4069e4b4e22af7ffe84fcf2d686c3fee5e5349f73268a0ef1ca", size = 2979892, upload-time = "2026-01-05T16:28:31.088Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/10/27114199b765bdb7d19a9514c07036ad2fc3a579b910e7823ba167ead6de/pypdfium2-5.3.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b5795298f44050797ac030994fc2525ea35d2d714efe70058e0ee22e5f613f27", size = 2765738, upload-time = "2026-01-05T16:28:33.18Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b4/d7/2a3afa35e6c205a4f6264c33b8d2f659707989f93c30b336aa58575f66fa/pypdfium2-5.3.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7cd43dfceb77137e69e74c933d41506da1dddaff70f3a794fb0ad0d73e90d75", size = 3064338, upload-time = "2026-01-05T16:28:34.731Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a2/f1/6658755cf6e369bb51d0bccb81c51c300404fbe67c2f894c90000b6442dd/pypdfium2-5.3.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d5956867558fd3a793e58691cf169718864610becb765bfe74dd83f05cbf1ae3", size = 3415059, upload-time = "2026-01-05T16:28:37.313Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f5/34/f86482134fa641deb1f524c45ec7ebd6fc8d404df40c5657ddfce528593e/pypdfium2-5.3.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3ff1071e9a782625822658dfe6e29e3a644a66960f8713bb17819f5a0ac5987", size = 2998517, upload-time = "2026-01-05T16:28:38.873Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/34/40ab99425dcf503c172885904c5dc356c052bfdbd085f9f3cc920e0b8b25/pypdfium2-5.3.0-py3-none-manylinux_2_27_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f319c46ead49d289ab8c1ed2ea63c91e684f35bdc4cf4dc52191c441182ac481", size = 3673154, upload-time = "2026-01-05T16:28:40.347Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a5/67/0f7532f80825a7728a5cbff3f1104857f8f9fe49ebfd6cb25582a89ae8e1/pypdfium2-5.3.0-py3-none-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6dc67a186da0962294321cace6ccc0a4d212dbc5e9522c640d35725a812324b8", size = 2965002, upload-time = "2026-01-05T16:28:42.143Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ce/6c/c03d2a3d6621b77aac9604bce1c060de2af94950448787298501eac6c6a2/pypdfium2-5.3.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:0ad0afd3d2b5b54d86287266fd6ae3fef0e0a1a3df9d2c4984b3e3f8f70e6330", size = 4130530, upload-time = "2026-01-05T16:28:44.264Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/af/39/9ad1f958cbe35d4693ae87c09ebafda4bb3e4709c7ccaec86c1a829163a3/pypdfium2-5.3.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:1afe35230dc3951b3e79b934c0c35a2e79e2372d06503fce6cf1926d2a816f47", size = 3746568, upload-time = "2026-01-05T16:28:45.897Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2a/e2/4d32310166c2d6955d924737df8b0a3e3efc8d133344a98b10f96320157d/pypdfium2-5.3.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:00385793030cadce08469085cd21b168fd8ff981b009685fef3103bdc5fc4686", size = 4336683, upload-time = "2026-01-05T16:28:47.584Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/14/ea/38c337ff12a8cec4b00fd4fdb0a63a70597a344581e20b02addbd301ab56/pypdfium2-5.3.0-py3-none-musllinux_1_2_ppc64le.whl", hash = "sha256:d911e82676398949697fef80b7f412078df14d725a91c10e383b727051530285", size = 4375030, upload-time = "2026-01-05T16:28:49.5Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a1/77/9d8de90c35d2fc383be8819bcde52f5821dacbd7404a0225e4010b99d080/pypdfium2-5.3.0-py3-none-musllinux_1_2_riscv64.whl", hash = "sha256:ca1dc625ed347fac3d9002a3ed33d521d5803409bd572e7b3f823c12ab2ef58f", size = 3928914, upload-time = "2026-01-05T16:28:51.433Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a5/39/9d4a6fbd78fcb6803b0ea5e4952a31d6182a0aaa2609cfcd0eb88446fdb8/pypdfium2-5.3.0-py3-none-musllinux_1_2_s390x.whl", hash = "sha256:ea4f9db2d3575f22cd41f4c7a855240ded842f135e59a961b5b1351a65ce2b6e", size = 4997777, upload-time = "2026-01-05T16:28:53.589Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9d/38/cdd4ed085c264234a59ad32df1dfe432c77a7403da2381e0fcc1ba60b74e/pypdfium2-5.3.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:0ea24409613df350223c6afc50911c99dca0d43ddaf2616c5a1ebdffa3e1bcb5", size = 4179895, upload-time = "2026-01-05T16:28:55.322Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/93/4c/d2f40145c9012482699664f615d7ae540a346c84f68a8179449e69dcc4d8/pypdfium2-5.3.0-py3-none-win32.whl", hash = "sha256:5bf695d603f9eb8fdd7c1786add5cf420d57fbc81df142ed63c029ce29614df9", size = 2993570, upload-time = "2026-01-05T16:28:58.37Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/dc/1388ea650020c26ef3f68856b9227e7f153dcaf445e7e4674a0b8f26891e/pypdfium2-5.3.0-py3-none-win_amd64.whl", hash = "sha256:8365af22a39d4373c265f8e90e561cd64d4ddeaf5e6a66546a8caed216ab9574", size = 3102340, upload-time = "2026-01-05T16:28:59.933Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c8/71/a433668d33999b3aeb2c2dda18aaf24948e862ea2ee148078a35daac6c1c/pypdfium2-5.3.0-py3-none-win_arm64.whl", hash = "sha256:0b2c6bf825e084d91d34456be54921da31e9199d9530b05435d69d1a80501a12", size = 2940987, upload-time = "2026-01-05T16:29:01.511Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/08/c0/cdddce35108c118cc110c1c2ed16de82d74d7646b9bcf98eae2fa440966b/pypdfium2-5.5.0-py3-none-android_23_arm64_v8a.whl", hash = "sha256:414f0b4aef7413e04df7355043fb752f2efb6f9777e04fd880d302612dacf89f", size = 2760984, upload-time = "2026-02-18T23:21:56.668Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d0/c7/23a6fbd6d23fd8dbe657696acd81fba858639ef221254ce05970152ad1d8/pypdfium2-5.5.0-py3-none-android_23_armeabi_v7a.whl", hash = "sha256:126ff8b131d12f16ce96b3e85b7f413e5073212be06b571f157fe11ad221c274", size = 2303146, upload-time = "2026-02-18T23:21:58.466Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bc/a9/379ec56c4481f39f0e37a7ce42f4844e6ddd7662571922e2b348105960ab/pypdfium2-5.5.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:0770bd3f0be5c68443fc4017e43b1b1fe8f36877481cab70fd29b68b2c362e1b", size = 2815036, upload-time = "2026-02-18T23:22:00.288Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/91/a4/b0cc01aaae1fdf1ca4e080cc55bb432f5a2234f33209a602bc498a47850d/pypdfium2-5.5.0-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:5ab41a3b9953d9be44be35c36a2340f1d67c602db98a0d6f70006610871ae43a", size = 2948686, upload-time = "2026-02-18T23:22:02.213Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/26/99/25a0c71b551d100b505c618910afec0df402b230e087078c8078f8b1fcff/pypdfium2-5.5.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2492a22c3126a004cee2fa208ea4aa03ede2c7e205d05814934ab18f83d073e9", size = 2977311, upload-time = "2026-02-18T23:22:03.603Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/85/64/691e21539566f7a0521295948b5589d2fdfe3df5acab9c29ff410633a839/pypdfium2-5.5.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:83ff93e08b1fadb00040564e2eccc99147fc1a632ba5daff745126b373d78446", size = 2762449, upload-time = "2026-02-18T23:22:05.044Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/74/b1/9af288557291e2964bf5ffd460b7ed1090fcb8c54addfd6c7c5deb9ba7c7/pypdfium2-5.5.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7e85de3332bedf8e5f157c248063b4eaf968660e1e490353b6e581d9f96a4c6", size = 3074851, upload-time = "2026-02-18T23:22:07.431Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a4/1e/c61fddbdea5ea1ba478dc7ecc9d68069d17b858e5fed04e4e071811f0858/pypdfium2-5.5.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e258365f34b6e334bb415e44dd9b1ee78a6e525bf854a1e74af67af7ede7555b", size = 3423003, upload-time = "2026-02-18T23:22:09.749Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/36/5f/d2eb58c54abba3a6c3bc4c297b3a11348dd4b4deb073f1aa8a872a298278/pypdfium2-5.5.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bec21d833404ca771f02fa5cefb0b73e2148f05cbdb3b5b9989bdd51d9b5cbac", size = 3002104, upload-time = "2026-02-18T23:22:12.035Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1c/33/87423eec4f5d4287d5a1726dbb9f06fb1f1aebc38ff75dcff817c492769d/pypdfium2-5.5.0-py3-none-manylinux_2_27_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1dd6ccbe1b5e2e778e8b021e47f9485b4fd42eaa6c9bdda2631641724e1fcc04", size = 3097209, upload-time = "2026-02-18T23:22:13.809Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/97/0a/a3fd71f00838bba7922691107219bee67f50fbda6d12df330ef485a97848/pypdfium2-5.5.0-py3-none-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:da3eada345570cec5e34872d1472d4ac542f0e650ccdb6c2eac08ae1a5f07c82", size = 2965027, upload-time = "2026-02-18T23:22:16.324Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/75/4a/2181260bd8a0b1b30ac50b7fd6ee3366e04f3a9f1c29351d882652da7fa7/pypdfium2-5.5.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:a087fb4088c7433fd3d78833dbe42cfb66df3d5ac98e3edf66110520fb33c0f0", size = 4131431, upload-time = "2026-02-18T23:22:18.469Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/15/bb/3ccf481191346eda11c0c208bd4e46f8de019ae7d9e9c1b660633f0bb3f4/pypdfium2-5.5.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e6418cdc500ef85a90319f9bc7f1c54fc133460379f509429403225d8a4c157f", size = 3747468, upload-time = "2026-02-18T23:22:20.679Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/15/51/17e50ec72cf2235ac18d9cbe907859501c769d3e964818fefac6a3e10727/pypdfium2-5.5.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:8f7b66eedfac26eb2df4b00936e081b0a1c76fb8ee1c12639d85c2e73b0769ef", size = 4337579, upload-time = "2026-02-18T23:22:23.245Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c6/e4/f9bdf06f4d3f1e56eff9d997392a00a4b66cbc9c20f33934c4edc2a7943f/pypdfium2-5.5.0-py3-none-musllinux_1_2_ppc64le.whl", hash = "sha256:faea3246591ce2ea6218cd06679071275e3c65f11c3f5c9091eb7fb07610af6a", size = 4376104, upload-time = "2026-02-18T23:22:25.337Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8c/20/06baf1f5d494e035f50fc895fa1da5ed652d03ecc59aeb3aabb0daa5adfc/pypdfium2-5.5.0-py3-none-musllinux_1_2_riscv64.whl", hash = "sha256:aba26d404b51a9de3d3e80c867a95c71abf1c79552001ae22707451e59186b3d", size = 3929824, upload-time = "2026-02-18T23:22:26.889Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3a/01/28940e54e6936674e9a05eb58ccce7c54d8e2ac81cd84ec0b76e7d32a010/pypdfium2-5.5.0-py3-none-musllinux_1_2_s390x.whl", hash = "sha256:e0fa8f81679e6e71f26806f4db853571ee6435dc3bde7a46acdd182ef886a5b9", size = 4270200, upload-time = "2026-02-18T23:22:28.668Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cb/d4/1f36c505a3770aad9a88c895a46d61fd4c0535f79548f02c93b97ff89604/pypdfium2-5.5.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:ee22df3376d350eeb64d2002a1071e3a02c0d874c557a3cd8229a8fc572cdaac", size = 4180794, upload-time = "2026-02-18T23:22:30.11Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ac/38/f77e7792b4fba37f0e3d78db52fb7288d41db3c46ed28906fb940bc3e325/pypdfium2-5.5.0-py3-none-win32.whl", hash = "sha256:ec62a00223d1222d2f35c0866dd79cdc24da070738544cdf51b17d332d4a7389", size = 3001772, upload-time = "2026-02-18T23:22:32.367Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3e/c5/0d7ba53148262f78d8eee528a504764f78ae7bebf434a53714294b1fd973/pypdfium2-5.5.0-py3-none-win_amd64.whl", hash = "sha256:15c32fbeebb5198afa785dd03e98906ebb4eded9ef8862e10f833c37b4a18786", size = 3107710, upload-time = "2026-02-18T23:22:33.925Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/29/ad/fae449d2ed7b3088c6ab088f53fc6a9e9af26ccc9e0477d4182e373c4dd8/pypdfium2-5.5.0-py3-none-win_arm64.whl", hash = "sha256:f618af0884c16c768539c44933a255039131dbbf39d68eded020da4f14958d73", size = 2938315, upload-time = "2026-02-18T23:22:35.907Z" }, ] [[package]] @@ -3029,24 +2952,24 @@ wheels = [ [[package]] name = "python-calamine" -version = "0.6.1" +version = "0.6.2" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9b/32/99a794a1ca7b654cecdb76d4d61f21658b6f76574321341eb47df4365807/python_calamine-0.6.1.tar.gz", hash = "sha256:5974989919aa0bb55a136c1822d6f8b967d13c0fd0f245e3293abb4e63ab0f4b", size = 138354, upload-time = "2025-11-26T10:48:35.331Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/01/18/e1e53ade001b30a3c6642d876e5defe8431da8c31fb7798909e6c8ab8c34/python_calamine-0.6.2.tar.gz", hash = "sha256:2c90e5224c5e92db9fcd8f22b6085ce63b935cfe7a893ac9a1c3c56793bafd9d", size = 138000, upload-time = "2026-02-18T13:38:17.389Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/17/ad/f7cd7281dbd15c63c106963bdc2474354eeac58afb5484da23cfb89f650e/python_calamine-0.6.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:b06e10ce5a83ed32d7322b79b929eccde02fa69cdca74a0af69f373f4a0ba38e", size = 877325, upload-time = "2025-11-26T10:46:25.994Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/76/4f/d29f20e48adc1e7bab38f74498935dd3047c3ffc31fdf8424a68d821965b/python_calamine-0.6.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:57fc3dd9a4b293ad1300c35b10f4f6bdffb80861b6b4fe7e5bb05ef12dc6bc43", size = 854967, upload-time = "2025-11-26T10:46:27.38Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/94/04/c8eac3245010eaa0a39b27c4c53d401eae8719a0a8044106d7cb7761d57d/python_calamine-0.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a6b44d98d29769595af6d17443607156da55b8ee7338011abd20f51a3c540d1", size = 928722, upload-time = "2025-11-26T10:46:28.807Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3b/0d/a08871caf15673a7af94a42ae7af183ef9f6790851c027e97d425a7285ba/python_calamine-0.6.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:599928d30ef294c688c2a2db0c24e05a81a7dff08fec7865f6724694ab68950a", size = 912566, upload-time = "2025-11-26T10:46:30.26Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6b/7b/5547c90b5d9b0ca10dd81398673968a08040ad0b6a757e2ca05d8deef6eb/python_calamine-0.6.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:28a4799efc9d163130edb8b4f7b35a0e51f46b40e3ce57c024fa2c52d10bbe4b", size = 1073608, upload-time = "2025-11-26T10:46:31.784Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c3/f3/4b8007cab8084d5d5c1b3da1f4490035033692d12b66a5fcc2903fb76554/python_calamine-0.6.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a57a1876748746c9e41237fd1dd49c2f231628c5f97ca1ef1b100db97af7a0e2", size = 964662, upload-time = "2025-11-26T10:46:33.193Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8a/d2/71ea99fd1b06864791267c9ff43480fa569d0f7700506bbb84d9a17cb749/python_calamine-0.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c73c9b06cac54d0b4350d6935bab6fead954b997062854aeaba3c7a966db5ac0", size = 933579, upload-time = "2025-11-26T10:46:34.62Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/53/68/5556f44fdd1ed3e48c043e407e4ca7cd311787934b1ded9870d2dd1e5f4e/python_calamine-0.6.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c9e3db8502f59234bcd72cb3042c628fb2a99e59e721dbd11e8ee6106cee3513", size = 975141, upload-time = "2025-11-26T10:46:36.026Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c8/fa/595c254014c863b8f9ed68cef6dcdb58c3ea3bb0166fe6f120808441b427/python_calamine-0.6.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:978006312127727bb0f481992aa1e2f0d2109efe5d4a3fe248471efb1591d06d", size = 1110935, upload-time = "2025-11-26T10:46:37.531Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5e/ae/9377b92cf380f7d5843348de148646c630665a32c2efcc7a88f3e8056eaf/python_calamine-0.6.1-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:8a39d1e58610674f4fcc3648aff885897998228f6bb6d09e09dccd73c4b59e64", size = 1179688, upload-time = "2025-11-26T10:46:39.14Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/23/d439d9dc61aa6bb5dcae4ee95de8cded53d2099d9d309531159e7050be26/python_calamine-0.6.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:7d5874a1d83361a32099bfe6dce806498a4d9cf070dde0b48fd3e691789c1322", size = 1108864, upload-time = "2025-11-26T10:46:41.53Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d0/c0/b54f124f03fff0c5439e899f6e3fb89636def08ac04f5c24184d2bfdc17f/python_calamine-0.6.1-cp312-cp312-win32.whl", hash = "sha256:9dca5bc0490b377fc619b4e93bff91a3ba296fefa2aab3eb7a652c7c7606ad61", size = 695346, upload-time = "2025-11-26T10:46:44.203Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c4/d2/2df6e2ae9c63a7ffb6ceb3f8f36e2711e772bb96ddb0785e37107996d562/python_calamine-0.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:1675ff630d439144ad5805a28bf4f65afd100b38f2a8703ceebe7c7e47039bc5", size = 747324, upload-time = "2025-11-26T10:46:45.478Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f7/3f/1e55ccab357f653dfe5f7991ff7f7a38b1892e88610a8873db1549e7c0c5/python_calamine-0.6.1-cp312-cp312-win_arm64.whl", hash = "sha256:4f7a68b31474a39a0f22e1f1464857222877e740255db196e141ff9db0d3229c", size = 716731, upload-time = "2025-11-26T10:46:47.351Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7a/ec/e111c1a3a4c138ebc41e416e33730ee6d7c54e714af21c2a4e59b41715a5/python_calamine-0.6.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:857e4cddadba9b55c76dc583c58c5dc101a6cd5320190c10f8b2ab98d66c9040", size = 879539, upload-time = "2026-02-18T13:36:21.674Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/53/26/fe4c2138ff21542e2f1130a4d83c330d7f9486b62775196e998b88a03de6/python_calamine-0.6.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cd89d6a53e4b22328cd685fc054c31d359cb3ae67bd24bc57e1c1db62a4cfc97", size = 858642, upload-time = "2026-02-18T13:36:23.847Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6a/b0/bfeaf45ac5e2f6553723dd2fbe127d1d17c6f26496db5781de42a933776a/python_calamine-0.6.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d6c9af39db39e0c70710ae79cd1b5d980f9c0aea55fc16d194460c1561a0c6a", size = 925242, upload-time = "2026-02-18T13:36:25.236Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fe/6e/81106aa80609075015d400584030605b05f5e12931717160dcc58fdc4980/python_calamine-0.6.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9a2382dbc410dd48c99d89ee460662cc70892fe1b2901ab982604b923e8eb8f6", size = 905295, upload-time = "2026-02-18T13:36:27.152Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4d/ba/6311b24f9889246be63b664630c5601039ef771f7ed04c8f51aace39b7a9/python_calamine-0.6.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3ebb93255709874ede5b5e62828cb5758e60097e5390b6c9a3eb7751b617b12e", size = 1063473, upload-time = "2026-02-18T13:36:29.226Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/23/e4/027a1b046d30768872307ebe808dc4cdc5357295cdcda98b30b3ea924904/python_calamine-0.6.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:837bca19bd945cb83aded433f4cf76e80d70a5400404d876400ca7e88e5ea311", size = 965355, upload-time = "2026-02-18T13:36:31.376Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5c/4d/da8716a1b3a66938aaabe36873f6fa210fa063bab1b20c2ec236013de6b3/python_calamine-0.6.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:723990a47668cb819f307ccc634741370d3cd3804a0ee8cda392a522ae6d5016", size = 935091, upload-time = "2026-02-18T13:36:32.777Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/36/40/9521e8da5496cbc4b18027626a40018301f546b3e9802ca2f3a6cb5b4739/python_calamine-0.6.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b067630d693e1d7de41e3d44a99c7dd3feebb52db8dda8636ac3f70d8b6a4ad6", size = 974070, upload-time = "2026-02-18T13:36:34.055Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cd/b0/7a63963512c5ba7e9539b7452e2b1561625e63e4e29c044e487e2e93dcbe/python_calamine-0.6.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:6ab09c9da53a2b33633e9f940aed11c08e083810a0fd6885826cdc52ba4f86a5", size = 1100321, upload-time = "2026-02-18T13:36:35.475Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/22/81/e2bc38a5cf9629f656adcdabe8e134028f60c236e4bb96375dda90db3fdd/python_calamine-0.6.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:ae08e1308a0d0c6b8b4cc0a039ed8a85fc9ee2f8a3ca9ea57b1af9f97ed68fe4", size = 1181039, upload-time = "2026-02-18T13:36:37.195Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b1/ea/513117015fd5903ca6dde9c8fb8502af60af6965642f4e3311623943e673/python_calamine-0.6.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c441a20c7aff0e904ca01b5cdc1e5be2c6d4a41a24a0ea4d5ea6d211343bb95f", size = 1144843, upload-time = "2026-02-18T13:36:38.393Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a2/14/8846478dacf31535f5f15448ade3bc688b51f3183f1b52844451aa27b0e6/python_calamine-0.6.2-cp312-cp312-win32.whl", hash = "sha256:39cae8e66f8bce499f5f965f4575ddf61e30184cc97f02e1c7031a57abe0903b", size = 692411, upload-time = "2026-02-18T13:36:39.741Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bb/e2/2d2dcf4ec7e5ec08e33bf966ab010a7be178a4b623bd5f7601d47f2c734c/python_calamine-0.6.2-cp312-cp312-win_amd64.whl", hash = "sha256:1617efa24532f2420934a8cf77e6d33ff1740cae1d39355cab4f4cf141fdab49", size = 748960, upload-time = "2026-02-18T13:36:40.922Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f1/eb/2f50f3395c0435e6186cab56c36d04c06581ba827264bca1f1acae523aa3/python_calamine-0.6.2-cp312-cp312-win_arm64.whl", hash = "sha256:c2b378db494740e540e8157a7e5fe61dadae69ad2d988a7c80f9583f434acf07", size = 718992, upload-time = "2026-02-18T13:36:42.671Z" }, ] [[package]] @@ -3097,22 +3020,13 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d9/c3/0bd11992072e6a1c513b16500a5d07f91a24017c5909b02c72c62d7ad024/python_jose-3.5.0-py2.py3-none-any.whl", hash = "sha256:abd1202f23d34dfad2c3d28cb8617b90acf34132c7afd60abd0b0b7d3cb55771", size = 34624, upload-time = "2025-05-28T17:31:52.802Z" }, ] -[[package]] -name = "python-json-logger" -version = "4.0.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/29/bf/eca6a3d43db1dae7070f70e160ab20b807627ba953663ba07928cdd3dc58/python_json_logger-4.0.0.tar.gz", hash = "sha256:f58e68eb46e1faed27e0f574a55a0455eecd7b8a5b88b85a784519ba3cff047f", size = 17683, upload-time = "2025-10-06T04:15:18.984Z" } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/51/e5/fecf13f06e5e5f67e8837d777d1bc43fac0ed2b77a676804df5c34744727/python_json_logger-4.0.0-py3-none-any.whl", hash = "sha256:af09c9daf6a813aa4cc7180395f50f2a9e5fa056034c9953aec92e381c5ba1e2", size = 15548, upload-time = "2025-10-06T04:15:17.553Z" }, -] - [[package]] name = "python-multipart" -version = "0.0.21" +version = "0.0.22" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/78/96/804520d0850c7db98e5ccb70282e29208723f0964e88ffd9d0da2f52ea09/python_multipart-0.0.21.tar.gz", hash = "sha256:7137ebd4d3bbf70ea1622998f902b97a29434a9e8dc40eb203bbcf7c2a2cba92", size = 37196, upload-time = "2025-12-17T09:24:22.446Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/94/01/979e98d542a70714b0cb2b6728ed0b7c46792b695e3eaec3e20711271ca3/python_multipart-0.0.22.tar.gz", hash = "sha256:7340bef99a7e0032613f56dc36027b959fd3b30a787ed62d310e951f7c3a3a58", size = 37612, upload-time = "2026-01-25T10:15:56.219Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/aa/76/03af049af4dcee5d27442f71b6924f01f3efb5d2bd34f23fcd563f2cc5f5/python_multipart-0.0.21-py3-none-any.whl", hash = "sha256:cf7a6713e01c87aa35387f4774e812c4361150938d20d232800f75ffcf266090", size = 24541, upload-time = "2025-12-17T09:24:21.153Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1b/d0/397f9626e711ff749a95d96b7af99b9c566a9bb5129b8e4c10fc4d100304/python_multipart-0.0.22-py3-none-any.whl", hash = "sha256:2b2cd894c83d21bf49d702499531c7bafd057d730c201782048f7945d82de155", size = 24579, upload-time = "2026-01-25T10:15:54.811Z" }, ] [[package]] @@ -3132,11 +3046,11 @@ wheels = [ [[package]] name = "pytz" -version = "2025.2" +version = "2026.1.post1" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884, upload-time = "2025-03-25T02:25:00.538Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/56/db/b8721d71d945e6a8ac63c0fc900b2067181dbb50805958d4d4661cf7d277/pytz-2026.1.post1.tar.gz", hash = "sha256:3378dde6a0c3d26719182142c56e60c7f9af7e968076f31aae569d72a0358ee1", size = 321088, upload-time = "2026-03-03T07:47:50.683Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/10/99/781fe0c827be2742bcc775efefccb3b048a3a9c6ce9aec0cbf4a101677e5/pytz-2026.1.post1-py2.py3-none-any.whl", hash = "sha256:f2fd16142fda348286a75e1a524be810bb05d444e5a081f37f7affc635035f7a", size = 510489, upload-time = "2026-03-03T07:47:49.167Z" }, ] [[package]] @@ -3176,6 +3090,18 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" }, ] +[[package]] +name = "rdflib" +version = "7.6.0" +source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +dependencies = [ + { name = "pyparsing" }, +] +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/98/f5/18bb77b7af9526add0c727a3b2048959847dc5fb030913e2918bf384fec3/rdflib-7.6.0.tar.gz", hash = "sha256:6c831288d5e4a5a7ece85d0ccde9877d512a3d0f02d7c06455d00d6d0ea379df", size = 4943826, upload-time = "2026-02-13T07:15:55.938Z" } +wheels = [ + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/10/c2/6604a71269e0c1bd75656d5a001432d16f2cc5b8c057140ec797155c295e/rdflib-7.6.0-py3-none-any.whl", hash = "sha256:30c0a3ebf4c0e09215f066be7246794b6492e054e782d7ac2a34c9f70a15e0dd", size = 615416, upload-time = "2026-02-13T07:15:46.487Z" }, +] + [[package]] name = "redbear-mem" version = "0.1.0" @@ -3247,6 +3173,7 @@ dependencies = [ { name = "markupsafe" }, { name = "matplotlib" }, { name = "mcp" }, + { name = "modelscope" }, { name = "neo4j" }, { name = "networkx" }, { name = "nltk" }, @@ -3281,6 +3208,7 @@ dependencies = [ { name = "python-multipart" }, { name = "python-pptx" }, { name = "pyyaml" }, + { name = "rdflib" }, { name = "redis" }, { name = "requests" }, { name = "roman-numbers" }, @@ -3387,6 +3315,7 @@ requires-dist = [ { name = "markupsafe", specifier = "==3.0.3" }, { name = "matplotlib", specifier = ">=3.10.7" }, { name = "mcp", specifier = ">=1.21.1" }, + { name = "modelscope", specifier = ">=1.34.0" }, { name = "neo4j", specifier = ">=6.0.3" }, { name = "networkx", specifier = ">=3.4.2" }, { name = "nltk", specifier = "==3.9.2" }, @@ -3422,6 +3351,7 @@ requires-dist = [ { name = "python-multipart", specifier = ">=0.0.20" }, { name = "python-pptx", specifier = "==1.0.2" }, { name = "pyyaml", specifier = "==6.0.3" }, + { name = "rdflib", specifier = ">=7.0.0" }, { name = "redis", specifier = "==6.4.0" }, { name = "requests", specifier = "==2.32.5" }, { name = "roman-numbers", specifier = "==1.0.2" }, @@ -3470,40 +3400,40 @@ wheels = [ [[package]] name = "referencing" -version = "0.36.2" +version = "0.37.0" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "attrs" }, { name = "rpds-py" }, { name = "typing-extensions" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2f/db/98b5c277be99dd18bfd91dd04e1b759cad18d1a338188c936e92f921c7e2/referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa", size = 74744, upload-time = "2025-01-25T08:48:16.138Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/b1/3baf80dc6d2b7bc27a95a67752d0208e410351e3feb4eb78de5f77454d8d/referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0", size = 26775, upload-time = "2025-01-25T08:48:14.241Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" }, ] [[package]] name = "regex" -version = "2026.1.15" +version = "2026.2.28" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/86/07d5056945f9ec4590b518171c4254a5925832eb727b56d3c38a7476f316/regex-2026.1.15.tar.gz", hash = "sha256:164759aa25575cbc0651bef59a0b18353e54300d79ace8084c818ad8ac72b7d5", size = 414811, upload-time = "2026-01-14T23:18:02.775Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8b/71/41455aa99a5a5ac1eaf311f5d8efd9ce6433c03ac1e0962de163350d0d97/regex-2026.2.28.tar.gz", hash = "sha256:a729e47d418ea11d03469f321aaf67cdee8954cde3ff2cf8403ab87951ad10f2", size = 415184, upload-time = "2026-02-28T02:19:42.792Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/92/81/10d8cf43c807d0326efe874c1b79f22bfb0fb226027b0b19ebc26d301408/regex-2026.1.15-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:4c8fcc5793dde01641a35905d6731ee1548f02b956815f8f1cab89e515a5bdf1", size = 489398, upload-time = "2026-01-14T23:14:43.741Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/90/b0/7c2a74e74ef2a7c32de724658a69a862880e3e4155cba992ba04d1c70400/regex-2026.1.15-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:bfd876041a956e6a90ad7cdb3f6a630c07d491280bfeed4544053cd434901681", size = 291339, upload-time = "2026-01-14T23:14:45.183Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/19/4d/16d0773d0c818417f4cc20aa0da90064b966d22cd62a8c46765b5bd2d643/regex-2026.1.15-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9250d087bc92b7d4899ccd5539a1b2334e44eee85d848c4c1aef8e221d3f8c8f", size = 289003, upload-time = "2026-01-14T23:14:47.25Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c6/e4/1fc4599450c9f0863d9406e944592d968b8d6dfd0d552a7d569e43bceada/regex-2026.1.15-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c8a154cf6537ebbc110e24dabe53095e714245c272da9c1be05734bdad4a61aa", size = 798656, upload-time = "2026-01-14T23:14:48.77Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b2/e6/59650d73a73fa8a60b3a590545bfcf1172b4384a7df2e7fe7b9aab4e2da9/regex-2026.1.15-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8050ba2e3ea1d8731a549e83c18d2f0999fbc99a5f6bd06b4c91449f55291804", size = 864252, upload-time = "2026-01-14T23:14:50.528Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6e/ab/1d0f4d50a1638849a97d731364c9a80fa304fec46325e48330c170ee8e80/regex-2026.1.15-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0bf065240704cb8951cc04972cf107063917022511273e0969bdb34fc173456c", size = 912268, upload-time = "2026-01-14T23:14:52.952Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dd/df/0d722c030c82faa1d331d1921ee268a4e8fb55ca8b9042c9341c352f17fa/regex-2026.1.15-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c32bef3e7aeee75746748643667668ef941d28b003bfc89994ecf09a10f7a1b5", size = 803589, upload-time = "2026-01-14T23:14:55.182Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/66/23/33289beba7ccb8b805c6610a8913d0131f834928afc555b241caabd422a9/regex-2026.1.15-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d5eaa4a4c5b1906bd0d2508d68927f15b81821f85092e06f1a34a4254b0e1af3", size = 775700, upload-time = "2026-01-14T23:14:56.707Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e7/65/bf3a42fa6897a0d3afa81acb25c42f4b71c274f698ceabd75523259f6688/regex-2026.1.15-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:86c1077a3cc60d453d4084d5b9649065f3bf1184e22992bd322e1f081d3117fb", size = 787928, upload-time = "2026-01-14T23:14:58.312Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f4/f5/13bf65864fc314f68cdd6d8ca94adcab064d4d39dbd0b10fef29a9da48fc/regex-2026.1.15-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:2b091aefc05c78d286657cd4db95f2e6313375ff65dcf085e42e4c04d9c8d410", size = 858607, upload-time = "2026-01-14T23:15:00.657Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a3/31/040e589834d7a439ee43fb0e1e902bc81bd58a5ba81acffe586bb3321d35/regex-2026.1.15-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:57e7d17f59f9ebfa9667e6e5a1c0127b96b87cb9cede8335482451ed00788ba4", size = 763729, upload-time = "2026-01-14T23:15:02.248Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9b/84/6921e8129687a427edf25a34a5594b588b6d88f491320b9de5b6339a4fcb/regex-2026.1.15-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:c6c4dcdfff2c08509faa15d36ba7e5ef5fcfab25f1e8f85a0c8f45bc3a30725d", size = 850697, upload-time = "2026-01-14T23:15:03.878Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8a/87/3d06143d4b128f4229158f2de5de6c8f2485170c7221e61bf381313314b2/regex-2026.1.15-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cf8ff04c642716a7f2048713ddc6278c5fd41faa3b9cab12607c7abecd012c22", size = 789849, upload-time = "2026-01-14T23:15:06.102Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/77/69/c50a63842b6bd48850ebc7ab22d46e7a2a32d824ad6c605b218441814639/regex-2026.1.15-cp312-cp312-win32.whl", hash = "sha256:82345326b1d8d56afbe41d881fdf62f1926d7264b2fc1537f99ae5da9aad7913", size = 266279, upload-time = "2026-01-14T23:15:07.678Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f2/36/39d0b29d087e2b11fd8191e15e81cce1b635fcc845297c67f11d0d19274d/regex-2026.1.15-cp312-cp312-win_amd64.whl", hash = "sha256:4def140aa6156bc64ee9912383d4038f3fdd18fee03a6f222abd4de6357ce42a", size = 277166, upload-time = "2026-01-14T23:15:09.257Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/28/32/5b8e476a12262748851fa8ab1b0be540360692325975b094e594dfebbb52/regex-2026.1.15-cp312-cp312-win_arm64.whl", hash = "sha256:c6c565d9a6e1a8d783c1948937ffc377dd5771e83bd56de8317c450a954d2056", size = 270415, upload-time = "2026-01-14T23:15:10.743Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/07/42/9061b03cf0fc4b5fa2c3984cbbaed54324377e440a5c5a29d29a72518d62/regex-2026.2.28-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fcf26c3c6d0da98fada8ae4ef0aa1c3405a431c0a77eb17306d38a89b02adcd7", size = 489574, upload-time = "2026-02-28T02:16:50.455Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/77/83/0c8a5623a233015595e3da499c5a1c13720ac63c107897a6037bb97af248/regex-2026.2.28-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:02473c954af35dd2defeb07e44182f5705b30ea3f351a7cbffa9177beb14da5d", size = 291426, upload-time = "2026-02-28T02:16:52.52Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9e/06/3ef1ac6910dc3295ebd71b1f9bfa737e82cfead211a18b319d45f85ddd09/regex-2026.2.28-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9b65d33a17101569f86d9c5966a8b1d7fbf8afdda5a8aa219301b0a80f58cf7d", size = 289200, upload-time = "2026-02-28T02:16:54.08Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dd/c9/8cc8d850b35ab5650ff6756a1cb85286e2000b66c97520b29c1587455344/regex-2026.2.28-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e71dcecaa113eebcc96622c17692672c2d104b1d71ddf7adeda90da7ddeb26fc", size = 796765, upload-time = "2026-02-28T02:16:55.905Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e9/5d/57702597627fc23278ebf36fbb497ac91c0ce7fec89ac6c81e420ca3e38c/regex-2026.2.28-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:481df4623fa4969c8b11f3433ed7d5e3dc9cec0f008356c3212b3933fb77e3d8", size = 863093, upload-time = "2026-02-28T02:16:58.094Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/02/6d/f3ecad537ca2811b4d26b54ca848cf70e04fcfc138667c146a9f3157779c/regex-2026.2.28-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:64e7c6ad614573e0640f271e811a408d79a9e1fe62a46adb602f598df42a818d", size = 909455, upload-time = "2026-02-28T02:17:00.918Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9e/40/bb226f203caa22c1043c1ca79b36340156eca0f6a6742b46c3bb222a3a57/regex-2026.2.28-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6b08a06976ff4fb0d83077022fde3eca06c55432bb997d8c0495b9a4e9872f4", size = 802037, upload-time = "2026-02-28T02:17:02.842Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/44/7c/c6d91d8911ac6803b45ca968e8e500c46934e58c0903cbc6d760ee817a0a/regex-2026.2.28-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:864cdd1a2ef5716b0ab468af40139e62ede1b3a53386b375ec0786bb6783fc05", size = 775113, upload-time = "2026-02-28T02:17:04.506Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dc/8d/4a9368d168d47abd4158580b8c848709667b1cd293ff0c0c277279543bd0/regex-2026.2.28-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:511f7419f7afab475fd4d639d4aedfc54205bcb0800066753ef68a59f0f330b5", size = 784194, upload-time = "2026-02-28T02:17:06.888Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cc/bf/2c72ab5d8b7be462cb1651b5cc333da1d0068740342f350fcca3bca31947/regex-2026.2.28-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:b42f7466e32bf15a961cf09f35fa6323cc72e64d3d2c990b10de1274a5da0a59", size = 856846, upload-time = "2026-02-28T02:17:09.11Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7c/f4/6b65c979bb6d09f51bb2d2a7bc85de73c01ec73335d7ddd202dcb8cd1c8f/regex-2026.2.28-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:8710d61737b0c0ce6836b1da7109f20d495e49b3809f30e27e9560be67a257bf", size = 763516, upload-time = "2026-02-28T02:17:11.004Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8e/32/29ea5e27400ee86d2cc2b4e80aa059df04eaf78b4f0c18576ae077aeff68/regex-2026.2.28-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4390c365fd2d45278f45afd4673cb90f7285f5701607e3ad4274df08e36140ae", size = 849278, upload-time = "2026-02-28T02:17:12.693Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1d/91/3233d03b5f865111cd517e1c95ee8b43e8b428d61fa73764a80c9bb6f537/regex-2026.2.28-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cb3b1db8ff6c7b8bf838ab05583ea15230cb2f678e569ab0e3a24d1e8320940b", size = 790068, upload-time = "2026-02-28T02:17:14.9Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/76/92/abc706c1fb03b4580a09645b206a3fc032f5a9f457bc1a8038ac555658ab/regex-2026.2.28-cp312-cp312-win32.whl", hash = "sha256:f8ed9a5d4612df9d4de15878f0bc6aa7a268afbe5af21a3fdd97fa19516e978c", size = 266416, upload-time = "2026-02-28T02:17:17.15Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fa/06/2a6f7dff190e5fa9df9fb4acf2fdf17a1aa0f7f54596cba8de608db56b3a/regex-2026.2.28-cp312-cp312-win_amd64.whl", hash = "sha256:01d65fd24206c8e1e97e2e31b286c59009636c022eb5d003f52760b0f42155d4", size = 277297, upload-time = "2026-02-28T02:17:18.723Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b7/f0/58a2484851fadf284458fdbd728f580d55c1abac059ae9f048c63b92f427/regex-2026.2.28-cp312-cp312-win_arm64.whl", hash = "sha256:c0b5ccbb8ffb433939d248707d4a8b31993cb76ab1a0187ca886bf50e96df952", size = 270408, upload-time = "2026-02-28T02:17:20.328Z" }, ] [[package]] @@ -3535,15 +3465,15 @@ wheels = [ [[package]] name = "rich" -version = "14.2.0" +version = "14.3.3" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "markdown-it-py" }, { name = "pygments" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fb/d2/8920e102050a0de7bfabeb4c4614a49248cf8d5d7a8d01885fbb24dc767a/rich-14.2.0.tar.gz", hash = "sha256:73ff50c7c0c1c77c8243079283f4edb376f0f6442433aecb8ce7e6d0b92d1fe4", size = 219990, upload-time = "2025-10-09T14:16:53.064Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b3/c6/f3b320c27991c46f43ee9d856302c70dc2d0fb2dba4842ff739d5f46b393/rich-14.3.3.tar.gz", hash = "sha256:b8daa0b9e4eef54dd8cf7c86c03713f53241884e814f4e2f5fb342fe520f639b", size = 230582, upload-time = "2026-02-19T17:23:12.474Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload-time = "2025-10-09T14:16:51.245Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" }, ] [[package]] @@ -3666,23 +3596,23 @@ wheels = [ [[package]] name = "scipy" -version = "1.17.0" +version = "1.17.1" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "numpy" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/56/3e/9cca699f3486ce6bc12ff46dc2031f1ec8eb9ccc9a320fdaf925f1417426/scipy-1.17.0.tar.gz", hash = "sha256:2591060c8e648d8b96439e111ac41fd8342fdeff1876be2e19dea3fe8930454e", size = 30396830, upload-time = "2026-01-10T21:34:23.009Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7a/97/5a3609c4f8d58b039179648e62dd220f89864f56f7357f5d4f45c29eb2cc/scipy-1.17.1.tar.gz", hash = "sha256:95d8e012d8cb8816c226aef832200b1d45109ed4464303e997c5b13122b297c0", size = 30573822, upload-time = "2026-02-23T00:26:24.851Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/11/7241a63e73ba5a516f1930ac8d5b44cbbfabd35ac73a2d08ca206df007c4/scipy-1.17.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:0d5018a57c24cb1dd828bcf51d7b10e65986d549f52ef5adb6b4d1ded3e32a57", size = 31364580, upload-time = "2026-01-10T21:25:25.717Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ed/1d/5057f812d4f6adc91a20a2d6f2ebcdb517fdbc87ae3acc5633c9b97c8ba5/scipy-1.17.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:88c22af9e5d5a4f9e027e26772cc7b5922fab8bcc839edb3ae33de404feebd9e", size = 27969012, upload-time = "2026-01-10T21:25:30.921Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e3/21/f6ec556c1e3b6ec4e088da667d9987bb77cc3ab3026511f427dc8451187d/scipy-1.17.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:f3cd947f20fe17013d401b64e857c6b2da83cae567adbb75b9dcba865abc66d8", size = 20140691, upload-time = "2026-01-10T21:25:34.802Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7a/fe/5e5ad04784964ba964a96f16c8d4676aa1b51357199014dce58ab7ec5670/scipy-1.17.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:e8c0b331c2c1f531eb51f1b4fc9ba709521a712cce58f1aa627bc007421a5306", size = 22463015, upload-time = "2026-01-10T21:25:39.277Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4a/69/7c347e857224fcaf32a34a05183b9d8a7aca25f8f2d10b8a698b8388561a/scipy-1.17.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5194c445d0a1c7a6c1a4a4681b6b7c71baad98ff66d96b949097e7513c9d6742", size = 32724197, upload-time = "2026-01-10T21:25:44.084Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d1/fe/66d73b76d378ba8cc2fe605920c0c75092e3a65ae746e1e767d9d020a75a/scipy-1.17.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9eeb9b5f5997f75507814ed9d298ab23f62cf79f5a3ef90031b1ee2506abdb5b", size = 35009148, upload-time = "2026-01-10T21:25:50.591Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/af/07/07dec27d9dc41c18d8c43c69e9e413431d20c53a0339c388bcf72f353c4b/scipy-1.17.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:40052543f7bbe921df4408f46003d6f01c6af109b9e2c8a66dd1cf6cf57f7d5d", size = 34798766, upload-time = "2026-01-10T21:25:59.41Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/81/61/0470810c8a093cdacd4ba7504b8a218fd49ca070d79eca23a615f5d9a0b0/scipy-1.17.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0cf46c8013fec9d3694dc572f0b54100c28405d55d3e2cb15e2895b25057996e", size = 37405953, upload-time = "2026-01-10T21:26:07.75Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/92/ce/672ed546f96d5d41ae78c4b9b02006cedd0b3d6f2bf5bb76ea455c320c28/scipy-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:0937a0b0d8d593a198cededd4c439a0ea216a3f36653901ea1f3e4be949056f8", size = 36328121, upload-time = "2026-01-10T21:26:16.509Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9d/21/38165845392cae67b61843a52c6455d47d0cc2a40dd495c89f4362944654/scipy-1.17.0-cp312-cp312-win_arm64.whl", hash = "sha256:f603d8a5518c7426414d1d8f82e253e454471de682ce5e39c29adb0df1efb86b", size = 24314368, upload-time = "2026-01-10T21:26:23.087Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/35/48/b992b488d6f299dbe3f11a20b24d3dda3d46f1a635ede1c46b5b17a7b163/scipy-1.17.1-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:35c3a56d2ef83efc372eaec584314bd0ef2e2f0d2adb21c55e6ad5b344c0dcb8", size = 31610954, upload-time = "2026-02-23T00:17:49.855Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b2/02/cf107b01494c19dc100f1d0b7ac3cc08666e96ba2d64db7626066cee895e/scipy-1.17.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:fcb310ddb270a06114bb64bbe53c94926b943f5b7f0842194d585c65eb4edd76", size = 28172662, upload-time = "2026-02-23T00:18:01.64Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cf/a9/599c28631bad314d219cf9ffd40e985b24d603fc8a2f4ccc5ae8419a535b/scipy-1.17.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:cc90d2e9c7e5c7f1a482c9875007c095c3194b1cfedca3c2f3291cdc2bc7c086", size = 20344366, upload-time = "2026-02-23T00:18:12.015Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/35/f5/906eda513271c8deb5af284e5ef0206d17a96239af79f9fa0aebfe0e36b4/scipy-1.17.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:c80be5ede8f3f8eded4eff73cc99a25c388ce98e555b17d31da05287015ffa5b", size = 22704017, upload-time = "2026-02-23T00:18:21.502Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/da/34/16f10e3042d2f1d6b66e0428308ab52224b6a23049cb2f5c1756f713815f/scipy-1.17.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e19ebea31758fac5893a2ac360fedd00116cbb7628e650842a6691ba7ca28a21", size = 32927842, upload-time = "2026-02-23T00:18:35.367Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/01/8e/1e35281b8ab6d5d72ebe9911edcdffa3f36b04ed9d51dec6dd140396e220/scipy-1.17.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:02ae3b274fde71c5e92ac4d54bc06c42d80e399fec704383dcd99b301df37458", size = 35235890, upload-time = "2026-02-23T00:18:49.188Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c5/5c/9d7f4c88bea6e0d5a4f1bc0506a53a00e9fcb198de372bfe4d3652cef482/scipy-1.17.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8a604bae87c6195d8b1045eddece0514d041604b14f2727bbc2b3020172045eb", size = 35003557, upload-time = "2026-02-23T00:18:54.74Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/65/94/7698add8f276dbab7a9de9fb6b0e02fc13ee61d51c7c3f85ac28b65e1239/scipy-1.17.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f590cd684941912d10becc07325a3eeb77886fe981415660d9265c4c418d0bea", size = 37625856, upload-time = "2026-02-23T00:19:00.307Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a2/84/dc08d77fbf3d87d3ee27f6a0c6dcce1de5829a64f2eae85a0ecc1f0daa73/scipy-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:41b71f4a3a4cab9d366cd9065b288efc4d4f3c0b37a91a8e0947fb5bd7f31d87", size = 36549682, upload-time = "2026-02-23T00:19:07.67Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bc/98/fe9ae9ffb3b54b62559f52dedaebe204b408db8109a8c66fdd04869e6424/scipy-1.17.1-cp312-cp312-win_arm64.whl", hash = "sha256:f4115102802df98b2b0db3cce5cb9b92572633a1197c77b7553e5203f284a5b3", size = 24547340, upload-time = "2026-02-23T00:19:12.024Z" }, ] [[package]] @@ -3714,11 +3644,11 @@ wheels = [ [[package]] name = "setuptools" -version = "80.10.1" +version = "80.10.2" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/ff/f75651350db3cf2ef767371307eb163f3cc1ac03e16fdf3ac347607f7edb/setuptools-80.10.1.tar.gz", hash = "sha256:bf2e513eb8144c3298a3bd28ab1a5edb739131ec5c22e045ff93cd7f5319703a", size = 1229650, upload-time = "2026-01-21T09:42:03.061Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/76/95/faf61eb8363f26aa7e1d762267a8d602a1b26d4f3a1e758e92cb3cb8b054/setuptools-80.10.2.tar.gz", hash = "sha256:8b0e9d10c784bf7d262c4e5ec5d4ec94127ce206e8738f29a437945fbc219b70", size = 1200343, upload-time = "2026-01-25T22:38:17.252Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e0/76/f963c61683a39084aa575f98089253e1e852a4417cb8a3a8a422923a5246/setuptools-80.10.1-py3-none-any.whl", hash = "sha256:fc30c51cbcb8199a219c12cc9c281b5925a4978d212f84229c909636d9f6984e", size = 1099859, upload-time = "2026-01-21T09:42:00.688Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/94/b8/f1f62a5e3c0ad2ff1d189590bfa4c46b4f3b6e49cef6f26c6ee4e575394d/setuptools-80.10.2-py3-none-any.whl", hash = "sha256:95b30ddfb717250edb492926c92b5221f7ef3fbcc2b07579bcd4a27da21d0173", size = 1064234, upload-time = "2026-01-25T22:38:15.216Z" }, ] [[package]] @@ -3740,15 +3670,6 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ec/bf/cb6c1c505cb31e818e900b9312d514f381fbfa5c4363edfce0fcc4f8c1a4/shapely-2.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:743044b4cfb34f9a67205cee9279feaf60ba7d02e69febc2afc609047cb49179", size = 1722861, upload-time = "2025-09-24T13:50:43.35Z" }, ] -[[package]] -name = "shellingham" -version = "1.5.4" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, -] - [[package]] name = "simpleeval" version = "1.0.3" @@ -3769,14 +3690,14 @@ wheels = [ [[package]] name = "smart-open" -version = "7.5.0" +version = "7.5.1" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "wrapt" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/67/9a/0a7acb748b86e2922982366d780ca4b16c33f7246fa5860d26005c97e4f3/smart_open-7.5.0.tar.gz", hash = "sha256:f394b143851d8091011832ac8113ea4aba6b92e6c35f6e677ddaaccb169d7cb9", size = 53920, upload-time = "2025-11-08T21:38:40.698Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e8/be/a66598b305763861a9ab15ff0f2fbc44e47b1ce7a776797337a4eef37c66/smart_open-7.5.1.tar.gz", hash = "sha256:3f08e16827c4733699e6b2cc40328a3568f900cb12ad9a3ad233ba6c872d9fe7", size = 54034, upload-time = "2026-02-23T11:01:28.979Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ad/95/bc978be7ea0babf2fb48a414b6afaad414c6a9e8b1eafc5b8a53c030381a/smart_open-7.5.0-py3-none-any.whl", hash = "sha256:87e695c5148bbb988f15cec00971602765874163be85acb1c9fb8abc012e6599", size = 63940, upload-time = "2025-11-08T21:38:39.024Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5e/ea/dcdecd68acebb49d3fd560473a43499b1635076f7f1ae8641c060fe7ce74/smart_open-7.5.1-py3-none-any.whl", hash = "sha256:3e07cbbd9c8a908bcb8e25d48becf1a5cbb4886fa975e9f34c672ed171df2318", size = 64108, upload-time = "2026-02-23T11:01:27.429Z" }, ] [[package]] @@ -3896,11 +3817,11 @@ wheels = [ [[package]] name = "tenacity" -version = "9.1.2" +version = "9.1.4" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0a/d4/2b0cd0fe285e14b36db076e78c93766ff1d529d70408bd1d2a5a84f1d929/tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb", size = 48036, upload-time = "2025-04-02T08:25:09.966Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/c6/ee486fd809e357697ee8a44d3d69222b344920433d3b6666ccd9b374630c/tenacity-9.1.4.tar.gz", hash = "sha256:adb31d4c263f2bd041081ab33b498309a57c77f9acf2db65aadf0898179cf93a", size = 49413, upload-time = "2026-02-07T10:45:33.841Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/30/643397144bfbfec6f6ef821f36f33e57d35946c44a2352d3c9f0ae847619/tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138", size = 28248, upload-time = "2025-04-02T08:25:07.678Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d7/c1/eb8f9debc45d3b7918a32ab756658a0904732f75e555402972246b0b8e71/tenacity-9.1.4-py3-none-any.whl", hash = "sha256:6095a360c919085f28c6527de529e76a06ad89b23659fa881ae0649b867a9d55", size = 28926, upload-time = "2026-02-07T10:45:32.24Z" }, ] [[package]] @@ -4013,14 +3934,14 @@ wheels = [ [[package]] name = "tqdm" -version = "4.67.1" +version = "4.67.3" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737, upload-time = "2024-11-24T20:12:22.481Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" }, ] [[package]] @@ -4040,21 +3961,6 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/41/bf/945d527ff706233636c73880b22c7c953f3faeb9d6c7e2e85bfbfd0134a0/trio-0.32.0-py3-none-any.whl", hash = "sha256:4ab65984ef8370b79a76659ec87aa3a30c5c7c83ff250b4de88c29a8ab6123c5", size = 512030, upload-time = "2025-10-31T07:18:15.885Z" }, ] -[[package]] -name = "typer" -version = "0.21.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "click" }, - { name = "rich" }, - { name = "shellingham" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/36/bf/8825b5929afd84d0dabd606c67cd57b8388cb3ec385f7ef19c5cc2202069/typer-0.21.1.tar.gz", hash = "sha256:ea835607cd752343b6b2b7ce676893e5a0324082268b48f27aa058bdb7d2145d", size = 110371, upload-time = "2026-01-06T11:21:10.989Z" } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a0/1d/d9257dd49ff2ca23ea5f132edf1281a0c4f9de8a762b9ae399b670a59235/typer-0.21.1-py3-none-any.whl", hash = "sha256:7985e89081c636b88d172c2ee0cfe33c253160994d47bdfdc302defd7d1f1d01", size = 47381, upload-time = "2026-01-06T11:21:09.824Z" }, -] - [[package]] name = "typing-extensions" version = "4.15.0" @@ -4115,6 +4021,15 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/43/d2/fcf7192dd1cd8c090b6cfd53fa223c4fb2887a17c47e06bc356d44f40dfb/umap_learn-0.5.11-py3-none-any.whl", hash = "sha256:cb17adbde9d544ba79481b3ab4d81ac222e940f3d9219307bea6044f869af3cc", size = 90890, upload-time = "2026-01-12T20:44:46.511Z" }, ] +[[package]] +name = "uncalled-for" +version = "0.2.0" +source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/02/7c/b5b7d8136f872e3f13b0584e576886de0489d7213a12de6bebf29ff6ebfc/uncalled_for-0.2.0.tar.gz", hash = "sha256:b4f8fdbcec328c5a113807d653e041c5094473dd4afa7c34599ace69ccb7e69f", size = 49488, upload-time = "2026-02-27T17:40:58.137Z" } +wheels = [ + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ff/7f/4320d9ce3be404e6310b915c3629fe27bf1e2f438a1a7a3cb0396e32e9a9/uncalled_for-0.2.0-py3-none-any.whl", hash = "sha256:2c0bd338faff5f930918f79e7eb9ff48290df2cb05fcc0b40a7f334e55d4d85f", size = 11351, upload-time = "2026-02-27T17:40:56.804Z" }, +] + [[package]] name = "urllib3" version = "2.6.3" @@ -4126,24 +4041,24 @@ wheels = [ [[package]] name = "uuid-utils" -version = "0.14.0" +version = "0.14.1" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/57/7c/3a926e847516e67bc6838634f2e54e24381105b4e80f9338dc35cca0086b/uuid_utils-0.14.0.tar.gz", hash = "sha256:fc5bac21e9933ea6c590433c11aa54aaca599f690c08069e364eb13a12f670b4", size = 22072, upload-time = "2026-01-20T20:37:15.729Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7b/d1/38a573f0c631c062cf42fa1f5d021d4dd3c31fb23e4376e4b56b0c9fbbed/uuid_utils-0.14.1.tar.gz", hash = "sha256:9bfc95f64af80ccf129c604fb6b8ca66c6f256451e32bc4570f760e4309c9b69", size = 22195, upload-time = "2026-02-20T22:50:38.833Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a7/42/42d003f4a99ddc901eef2fd41acb3694163835e037fb6dde79ad68a72342/uuid_utils-0.14.0-cp39-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:f6695c0bed8b18a904321e115afe73b34444bc8451d0ce3244a1ec3b84deb0e5", size = 601786, upload-time = "2026-01-20T20:37:09.843Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/96/e6/775dfb91f74b18f7207e3201eb31ee666d286579990dc69dd50db2d92813/uuid_utils-0.14.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:4f0a730bbf2d8bb2c11b93e1005e91769f2f533fa1125ed1f00fd15b6fcc732b", size = 303943, upload-time = "2026-01-20T20:37:18.767Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/17/82/ea5f5e85560b08a1f30cdc65f75e76494dc7aba9773f679e7eaa27370229/uuid_utils-0.14.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40ce3fd1a4fdedae618fc3edc8faf91897012469169d600133470f49fd699ed3", size = 340467, upload-time = "2026-01-20T20:37:11.794Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ca/33/54b06415767f4569882e99b6470c6c8eeb97422686a6d432464f9967fd91/uuid_utils-0.14.0-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:09ae4a98416a440e78f7d9543d11b11cae4bab538b7ed94ec5da5221481748f2", size = 346333, upload-time = "2026-01-20T20:37:12.818Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cb/10/a6bce636b8f95e65dc84bf4a58ce8205b8e0a2a300a38cdbc83a3f763d27/uuid_utils-0.14.0-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:971e8c26b90d8ae727e7f2ac3ee23e265971d448b3672882f2eb44828b2b8c3e", size = 470859, upload-time = "2026-01-20T20:37:01.512Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8a/27/84121c51ea72f013f0e03d0886bcdfa96b31c9b83c98300a7bd5cc4fa191/uuid_utils-0.14.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5cde1fa82804a8f9d2907b7aec2009d440062c63f04abbdb825fce717a5e860", size = 341988, upload-time = "2026-01-20T20:37:22.881Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/90/a4/01c1c7af5e6a44f20b40183e8dac37d6ed83e7dc9e8df85370a15959b804/uuid_utils-0.14.0-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c7343862a2359e0bd48a7f3dfb5105877a1728677818bb694d9f40703264a2db", size = 365784, upload-time = "2026-01-20T20:37:10.808Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/04/f0/65ee43ec617b8b6b1bf2a5aecd56a069a08cca3d9340c1de86024331bde3/uuid_utils-0.14.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:c51e4818fdb08ccec12dc7083a01f49507b4608770a0ab22368001685d59381b", size = 523750, upload-time = "2026-01-20T20:37:06.152Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/95/d3/6bf503e3f135a5dfe705a65e6f89f19bccd55ac3fb16cb5d3ec5ba5388b8/uuid_utils-0.14.0-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:181bbcccb6f93d80a8504b5bd47b311a1c31395139596edbc47b154b0685b533", size = 615818, upload-time = "2026-01-20T20:37:21.816Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/6c/99937dd78d07f73bba831c8dc9469dfe4696539eba2fc269ae1b92752f9e/uuid_utils-0.14.0-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:5c8ae96101c3524ba8dbf762b6f05e9e9d896544786c503a727c5bf5cb9af1a7", size = 580831, upload-time = "2026-01-20T20:37:19.691Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/44/fa/bbc9e2c25abd09a293b9b097a0d8fc16acd6a92854f0ec080f1ea7ad8bb3/uuid_utils-0.14.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:00ac3c6edfdaff7e1eed041f4800ae09a3361287be780d7610a90fdcde9befdc", size = 546333, upload-time = "2026-01-20T20:37:03.117Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e7/9b/e5e99b324b1b5f0c62882230455786df0bc66f67eff3b452447e703f45d2/uuid_utils-0.14.0-cp39-abi3-win32.whl", hash = "sha256:ec2fd80adf8e0e6589d40699e6f6df94c93edcc16dd999be0438dd007c77b151", size = 177319, upload-time = "2026-01-20T20:37:04.208Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d3/28/2c7d417ea483b6ff7820c948678fdf2ac98899dc7e43bb15852faa95acaf/uuid_utils-0.14.0-cp39-abi3-win_amd64.whl", hash = "sha256:efe881eb43a5504fad922644cb93d725fd8a6a6d949bd5a4b4b7d1a1587c7fd1", size = 182566, upload-time = "2026-01-20T20:37:16.868Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b8/86/49e4bdda28e962fbd7266684171ee29b3d92019116971d58783e51770745/uuid_utils-0.14.0-cp39-abi3-win_arm64.whl", hash = "sha256:32b372b8fd4ebd44d3a219e093fe981af4afdeda2994ee7db208ab065cfcd080", size = 182809, upload-time = "2026-01-20T20:37:05.139Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/43/b7/add4363039a34506a58457d96d4aa2126061df3a143eb4d042aedd6a2e76/uuid_utils-0.14.1-cp39-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:93a3b5dc798a54a1feb693f2d1cb4cf08258c32ff05ae4929b5f0a2ca624a4f0", size = 604679, upload-time = "2026-02-20T22:50:27.469Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dd/84/d1d0bef50d9e66d31b2019997c741b42274d53dde2e001b7a83e9511c339/uuid_utils-0.14.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:ccd65a4b8e83af23eae5e56d88034b2fe7264f465d3e830845f10d1591b81741", size = 309346, upload-time = "2026-02-20T22:50:31.857Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ef/ed/b6d6fd52a6636d7c3eddf97d68da50910bf17cd5ac221992506fb56cf12e/uuid_utils-0.14.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b56b0cacd81583834820588378e432b0696186683b813058b707aedc1e16c4b1", size = 344714, upload-time = "2026-02-20T22:50:42.642Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a8/a7/a19a1719fb626fe0b31882db36056d44fe904dc0cf15b06fdf56b2679cf7/uuid_utils-0.14.1-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bb3cf14de789097320a3c56bfdfdd51b1225d11d67298afbedee7e84e3837c96", size = 350914, upload-time = "2026-02-20T22:50:36.487Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1d/fc/f6690e667fdc3bb1a73f57951f97497771c56fe23e3d302d7404be394d4f/uuid_utils-0.14.1-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60e0854a90d67f4b0cc6e54773deb8be618f4c9bad98d3326f081423b5d14fae", size = 482609, upload-time = "2026-02-20T22:50:37.511Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/54/6e/dcd3fa031320921a12ec7b4672dea3bd1dd90ddffa363a91831ba834d559/uuid_utils-0.14.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce6743ba194de3910b5feb1a62590cd2587e33a73ab6af8a01b642ceb5055862", size = 345699, upload-time = "2026-02-20T22:50:46.87Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/04/28/e5220204b58b44ac0047226a9d016a113fde039280cc8732d9e6da43b39f/uuid_utils-0.14.1-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:043fb58fde6cf1620a6c066382f04f87a8e74feb0f95a585e4ed46f5d44af57b", size = 372205, upload-time = "2026-02-20T22:50:28.438Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c7/d9/3d2eb98af94b8dfffc82b6a33b4dfc87b0a5de2c68a28f6dde0db1f8681b/uuid_utils-0.14.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:c915d53f22945e55fe0d3d3b0b87fd965a57f5fd15666fd92d6593a73b1dd297", size = 521836, upload-time = "2026-02-20T22:50:23.057Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a8/15/0eb106cc6fe182f7577bc0ab6e2f0a40be247f35c5e297dbf7bbc460bd02/uuid_utils-0.14.1-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:0972488e3f9b449e83f006ead5a0e0a33ad4a13e4462e865b7c286ab7d7566a3", size = 625260, upload-time = "2026-02-20T22:50:25.949Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3c/17/f539507091334b109e7496830af2f093d9fc8082411eafd3ece58af1f8ba/uuid_utils-0.14.1-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:1c238812ae0c8ffe77d8d447a32c6dfd058ea4631246b08b5a71df586ff08531", size = 587824, upload-time = "2026-02-20T22:50:35.225Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2e/c2/d37a7b2e41f153519367d4db01f0526e0d4b06f1a4a87f1c5dfca5d70a8b/uuid_utils-0.14.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:bec8f8ef627af86abf8298e7ec50926627e29b34fa907fcfbedb45aaa72bca43", size = 551407, upload-time = "2026-02-20T22:50:44.915Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/65/36/2d24b2cbe78547c6532da33fb8613debd3126eccc33a6374ab788f5e46e9/uuid_utils-0.14.1-cp39-abi3-win32.whl", hash = "sha256:b54d6aa6252d96bac1fdbc80d26ba71bad9f220b2724d692ad2f2310c22ef523", size = 183476, upload-time = "2026-02-20T22:50:32.745Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/83/92/2d7e90df8b1a69ec4cff33243ce02b7a62f926ef9e2f0eca5a026889cd73/uuid_utils-0.14.1-cp39-abi3-win_amd64.whl", hash = "sha256:fc27638c2ce267a0ce3e06828aff786f91367f093c80625ee21dad0208e0f5ba", size = 187147, upload-time = "2026-02-20T22:50:45.807Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d9/26/529f4beee17e5248e37e0bc17a2761d34c0fa3b1e5729c88adb2065bae6e/uuid_utils-0.14.1-cp39-abi3-win_arm64.whl", hash = "sha256:b04cb49b42afbc4ff8dbc60cf054930afc479d6f4dd7f1ec3bbe5dbfdde06b7a", size = 188132, upload-time = "2026-02-20T22:50:41.718Z" }, ] [[package]] @@ -4264,14 +4179,14 @@ wheels = [ [[package]] name = "werkzeug" -version = "3.1.5" +version = "3.1.6" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "markupsafe" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5a/70/1469ef1d3542ae7c2c7b72bd5e3a4e6ee69d7978fa8a3af05a38eca5becf/werkzeug-3.1.5.tar.gz", hash = "sha256:6a548b0e88955dd07ccb25539d7d0cc97417ee9e179677d22c7041c8f078ce67", size = 864754, upload-time = "2026-01-08T17:49:23.247Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/61/f1/ee81806690a87dab5f5653c1f146c92bc066d7f4cebc603ef88eb9e13957/werkzeug-3.1.6.tar.gz", hash = "sha256:210c6bede5a420a913956b4791a7f4d6843a43b6fcee4dfa08a65e93007d0d25", size = 864736, upload-time = "2026-02-19T15:17:18.884Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ad/e4/8d97cca767bcc1be76d16fb76951608305561c6e056811587f36cb1316a8/werkzeug-3.1.5-py3-none-any.whl", hash = "sha256:5111e36e91086ece91f93268bb39b4a35c1e6f1feac762c9c822ded0a4e322dc", size = 225025, upload-time = "2026-01-08T17:49:21.859Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4d/ec/d58832f89ede95652fd01f4f24236af7d32b70cab2196dfcc2d2fd13c5c2/werkzeug-3.1.6-py3-none-any.whl", hash = "sha256:7ddf3357bb9564e407607f988f683d72038551200c704012bb9a4c523d42f131", size = 225166, upload-time = "2026-02-19T15:17:17.475Z" }, ] [[package]] @@ -4386,32 +4301,34 @@ wheels = [ [[package]] name = "yarl" -version = "1.22.0" +version = "1.23.0" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } dependencies = [ { name = "idna" }, { name = "multidict" }, { name = "propcache" }, ] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/57/63/0c6ebca57330cd313f6102b16dd57ffaf3ec4c83403dcb45dbd15c6f3ea1/yarl-1.22.0.tar.gz", hash = "sha256:bebf8557577d4401ba8bd9ff33906f1376c877aa78d1fe216ad01b4d6745af71", size = 187169, upload-time = "2025-10-06T14:12:55.963Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/23/6e/beb1beec874a72f23815c1434518bfc4ed2175065173fb138c3705f658d4/yarl-1.23.0.tar.gz", hash = "sha256:53b1ea6ca88ebd4420379c330aea57e258408dd0df9af0992e5de2078dc9f5d5", size = 194676, upload-time = "2026-03-01T22:07:53.373Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/75/ff/46736024fee3429b80a165a732e38e5d5a238721e634ab41b040d49f8738/yarl-1.22.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e340382d1afa5d32b892b3ff062436d592ec3d692aeea3bef3a5cfe11bbf8c6f", size = 142000, upload-time = "2025-10-06T14:09:44.631Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5a/9a/b312ed670df903145598914770eb12de1bac44599549b3360acc96878df8/yarl-1.22.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f1e09112a2c31ffe8d80be1b0988fa6a18c5d5cad92a9ffbb1c04c91bfe52ad2", size = 94338, upload-time = "2025-10-06T14:09:46.372Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ba/f5/0601483296f09c3c65e303d60c070a5c19fcdbc72daa061e96170785bc7d/yarl-1.22.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:939fe60db294c786f6b7c2d2e121576628468f65453d86b0fe36cb52f987bd74", size = 94909, upload-time = "2025-10-06T14:09:48.648Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/60/41/9a1fe0b73dbcefce72e46cf149b0e0a67612d60bfc90fb59c2b2efdfbd86/yarl-1.22.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e1651bf8e0398574646744c1885a41198eba53dc8a9312b954073f845c90a8df", size = 372940, upload-time = "2025-10-06T14:09:50.089Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/17/7a/795cb6dfee561961c30b800f0ed616b923a2ec6258b5def2a00bf8231334/yarl-1.22.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b8a0588521a26bf92a57a1705b77b8b59044cdceccac7151bd8d229e66b8dedb", size = 345825, upload-time = "2025-10-06T14:09:52.142Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d7/93/a58f4d596d2be2ae7bab1a5846c4d270b894958845753b2c606d666744d3/yarl-1.22.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:42188e6a615c1a75bcaa6e150c3fe8f3e8680471a6b10150c5f7e83f47cc34d2", size = 386705, upload-time = "2025-10-06T14:09:54.128Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/61/92/682279d0e099d0e14d7fd2e176bd04f48de1484f56546a3e1313cd6c8e7c/yarl-1.22.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f6d2cb59377d99718913ad9a151030d6f83ef420a2b8f521d94609ecc106ee82", size = 396518, upload-time = "2025-10-06T14:09:55.762Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/db/0f/0d52c98b8a885aeda831224b78f3be7ec2e1aa4a62091f9f9188c3c65b56/yarl-1.22.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50678a3b71c751d58d7908edc96d332af328839eea883bb554a43f539101277a", size = 377267, upload-time = "2025-10-06T14:09:57.958Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/22/42/d2685e35908cbeaa6532c1fc73e89e7f2efb5d8a7df3959ea8e37177c5a3/yarl-1.22.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1e8fbaa7cec507aa24ea27a01456e8dd4b6fab829059b69844bd348f2d467124", size = 365797, upload-time = "2025-10-06T14:09:59.527Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a2/83/cf8c7bcc6355631762f7d8bdab920ad09b82efa6b722999dfb05afa6cfac/yarl-1.22.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:433885ab5431bc3d3d4f2f9bd15bfa1614c522b0f1405d62c4f926ccd69d04fa", size = 365535, upload-time = "2025-10-06T14:10:01.139Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/25/e1/5302ff9b28f0c59cac913b91fe3f16c59a033887e57ce9ca5d41a3a94737/yarl-1.22.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:b790b39c7e9a4192dc2e201a282109ed2985a1ddbd5ac08dc56d0e121400a8f7", size = 382324, upload-time = "2025-10-06T14:10:02.756Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bf/cd/4617eb60f032f19ae3a688dc990d8f0d89ee0ea378b61cac81ede3e52fae/yarl-1.22.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:31f0b53913220599446872d757257be5898019c85e7971599065bc55065dc99d", size = 383803, upload-time = "2025-10-06T14:10:04.552Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/59/65/afc6e62bb506a319ea67b694551dab4a7e6fb7bf604e9bd9f3e11d575fec/yarl-1.22.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a49370e8f711daec68d09b821a34e1167792ee2d24d405cbc2387be4f158b520", size = 374220, upload-time = "2025-10-06T14:10:06.489Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e7/3d/68bf18d50dc674b942daec86a9ba922d3113d8399b0e52b9897530442da2/yarl-1.22.0-cp312-cp312-win32.whl", hash = "sha256:70dfd4f241c04bd9239d53b17f11e6ab672b9f1420364af63e8531198e3f5fe8", size = 81589, upload-time = "2025-10-06T14:10:09.254Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c8/9a/6ad1a9b37c2f72874f93e691b2e7ecb6137fb2b899983125db4204e47575/yarl-1.22.0-cp312-cp312-win_amd64.whl", hash = "sha256:8884d8b332a5e9b88e23f60bb166890009429391864c685e17bd73a9eda9105c", size = 87213, upload-time = "2025-10-06T14:10:11.369Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/44/c5/c21b562d1680a77634d748e30c653c3ca918beb35555cff24986fff54598/yarl-1.22.0-cp312-cp312-win_arm64.whl", hash = "sha256:ea70f61a47f3cc93bdf8b2f368ed359ef02a01ca6393916bc8ff877427181e74", size = 81330, upload-time = "2025-10-06T14:10:13.112Z" }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/73/ae/b48f95715333080afb75a4504487cbe142cae1268afc482d06692d605ae6/yarl-1.22.0-py3-none-any.whl", hash = "sha256:1380560bdba02b6b6c90de54133c81c9f2a453dee9912fe58c1dcced1edb7cff", size = 46814, upload-time = "2025-10-06T14:12:53.872Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/88/8a/94615bc31022f711add374097ad4144d569e95ff3c38d39215d07ac153a0/yarl-1.23.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1932b6b8bba8d0160a9d1078aae5838a66039e8832d41d2992daa9a3a08f7860", size = 124737, upload-time = "2026-03-01T22:05:12.897Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e3/6f/c6554045d59d64052698add01226bc867b52fe4a12373415d7991fdca95d/yarl-1.23.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:411225bae281f114067578891bc75534cfb3d92a3b4dfef7a6ca78ba354e6069", size = 87029, upload-time = "2026-03-01T22:05:14.376Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/19/2a/725ecc166d53438bc88f76822ed4b1e3b10756e790bafd7b523fe97c322d/yarl-1.23.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:13a563739ae600a631c36ce096615fe307f131344588b0bc0daec108cdb47b25", size = 86310, upload-time = "2026-03-01T22:05:15.71Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/99/30/58260ed98e6ff7f90ba84442c1ddd758c9170d70327394a6227b310cd60f/yarl-1.23.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9cbf44c5cb4a7633d078788e1b56387e3d3cf2b8139a3be38040b22d6c3221c8", size = 97587, upload-time = "2026-03-01T22:05:17.384Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/76/0a/8b08aac08b50682e65759f7f8dde98ae8168f72487e7357a5d684c581ef9/yarl-1.23.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:53ad387048f6f09a8969631e4de3f1bf70c50e93545d64af4f751b2498755072", size = 92528, upload-time = "2026-03-01T22:05:18.804Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/52/07/0b7179101fe5f8385ec6c6bb5d0cb9f76bd9fb4a769591ab6fb5cdbfc69a/yarl-1.23.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4a59ba56f340334766f3a4442e0efd0af895fae9e2b204741ef885c446b3a1a8", size = 105339, upload-time = "2026-03-01T22:05:20.235Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d3/8a/36d82869ab5ec829ca8574dfcb92b51286fcfb1e9c7a73659616362dc880/yarl-1.23.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:803a3c3ce4acc62eaf01eaca1208dcf0783025ef27572c3336502b9c232005e7", size = 105061, upload-time = "2026-03-01T22:05:22.268Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/66/3e/868e5c3364b6cee19ff3e1a122194fa4ce51def02c61023970442162859e/yarl-1.23.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a3d2bff8f37f8d0f96c7ec554d16945050d54462d6e95414babaa18bfafc7f51", size = 100132, upload-time = "2026-03-01T22:05:23.638Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cf/26/9c89acf82f08a52cb52d6d39454f8d18af15f9d386a23795389d1d423823/yarl-1.23.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c75eb09e8d55bceb4367e83496ff8ef2bc7ea6960efb38e978e8073ea59ecb67", size = 99289, upload-time = "2026-03-01T22:05:25.749Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6f/54/5b0db00d2cb056922356104468019c0a132e89c8d3ab67d8ede9f4483d2a/yarl-1.23.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:877b0738624280e34c55680d6054a307aa94f7d52fa0e3034a9cc6e790871da7", size = 96950, upload-time = "2026-03-01T22:05:27.318Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f6/40/10fa93811fd439341fad7e0718a86aca0de9548023bbb403668d6555acab/yarl-1.23.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b5405bb8f0e783a988172993cfc627e4d9d00432d6bbac65a923041edacf997d", size = 93960, upload-time = "2026-03-01T22:05:28.738Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bc/d2/8ae2e6cd77d0805f4526e30ec43b6f9a3dfc542d401ac4990d178e4bf0cf/yarl-1.23.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1c3a3598a832590c5a3ce56ab5576361b5688c12cb1d39429cf5dba30b510760", size = 104703, upload-time = "2026-03-01T22:05:30.438Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2f/0c/b3ceacf82c3fe21183ce35fa2acf5320af003d52bc1fcf5915077681142e/yarl-1.23.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:8419ebd326430d1cbb7efb5292330a2cf39114e82df5cc3d83c9a0d5ebeaf2f2", size = 98325, upload-time = "2026-03-01T22:05:31.835Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9d/e0/12900edd28bdab91a69bd2554b85ad7b151f64e8b521fe16f9ad2f56477a/yarl-1.23.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:be61f6fff406ca40e3b1d84716fde398fc08bc63dd96d15f3a14230a0973ed86", size = 105067, upload-time = "2026-03-01T22:05:33.358Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/15/61/74bb1182cf79c9bbe4eb6b1f14a57a22d7a0be5e9cedf8e2d5c2086474c3/yarl-1.23.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3ceb13c5c858d01321b5d9bb65e4cf37a92169ea470b70fec6f236b2c9dd7e34", size = 100285, upload-time = "2026-03-01T22:05:35.4Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/69/7f/cd5ef733f2550de6241bd8bd8c3febc78158b9d75f197d9c7baa113436af/yarl-1.23.0-cp312-cp312-win32.whl", hash = "sha256:fffc45637bcd6538de8b85f51e3df3223e4ad89bccbfca0481c08c7fc8b7ed7d", size = 82359, upload-time = "2026-03-01T22:05:36.811Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f5/be/25216a49daeeb7af2bec0db22d5e7df08ed1d7c9f65d78b14f3b74fd72fc/yarl-1.23.0-cp312-cp312-win_amd64.whl", hash = "sha256:f69f57305656a4852f2a7203efc661d8c042e6cc67f7acd97d8667fb448a426e", size = 87674, upload-time = "2026-03-01T22:05:38.171Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d2/35/aeab955d6c425b227d5b7247eafb24f2653fedc32f95373a001af5dfeb9e/yarl-1.23.0-cp312-cp312-win_arm64.whl", hash = "sha256:6e87a6e8735b44816e7db0b2fbc9686932df473c826b0d9743148432e10bb9b9", size = 81879, upload-time = "2026-03-01T22:05:40.006Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/69/68/c8739671f5699c7dc470580a4f821ef37c32c4cb0b047ce223a7f115757f/yarl-1.23.0-py3-none-any.whl", hash = "sha256:a2df6afe50dea8ae15fa34c9f824a3ee958d785fd5d089063d960bae1daa0a3f", size = 48288, upload-time = "2026-03-01T22:07:51.388Z" }, ] [[package]] diff --git a/redbear-mem-benchmark b/redbear-mem-benchmark index 8494e824..e853d99f 160000 --- a/redbear-mem-benchmark +++ b/redbear-mem-benchmark @@ -1 +1 @@ -Subproject commit 8494e82498cb99c70ac67a64a544ff872432363a +Subproject commit e853d99ff0d42ee81333db0fe0b6927536e4aa0e diff --git a/web/package.json b/web/package.json index 937be97c..0284f397 100644 --- a/web/package.json +++ b/web/package.json @@ -36,6 +36,7 @@ "codemirror": "^6.0.2", "copy-to-clipboard": "^3.3.3", "crypto-js": "^4.2.0", + "d3": "^7.9.0", "dayjs": "^1.11.18", "echarts": "^5.6.0", "echarts-for-react": "^3.0.2", @@ -43,7 +44,9 @@ "i18next": "^25.6.0", "js-yaml": "^4.1.1", "lexical": "^0.39.0", + "mammoth": "^1.12.0", "mermaid": "^11.12.1", + "pdfjs-dist": "4.10.38", "react": "^18.2.0", "react-dom": "^18.2.0", "react-i18next": "^15.0.0", @@ -58,6 +61,7 @@ "remark-gfm": "^4.0.1", "remark-math": "^6.0.0", "tailwindcss": "^4.1.14", + "xlsx": "^0.18.5", "zustand": "^5.0.8" }, "devDependencies": { @@ -67,6 +71,7 @@ "@tailwindcss/vite": "^4.1.14", "@types/codemirror": "^5.60.17", "@types/crypto-js": "^4.2.2", + "@types/d3": "^7.4.3", "@types/js-yaml": "^4.0.9", "@types/node": "^24.6.0", "@types/react": "^18.2.0", diff --git a/web/src/api/application.ts b/web/src/api/application.ts index 2e598363..935f786a 100644 --- a/web/src/api/application.ts +++ b/web/src/api/application.ts @@ -2,11 +2,11 @@ * @Author: ZhaoYing * @Date: 2026-02-03 13:59:45 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-03-03 12:08:42 + * @Last Modified time: 2026-03-19 20:42:23 */ import { request } from '@/utils/request' import type { ApplicationModalData } from '@/views/ApplicationManagement/types' -import type { Config } from '@/views/ApplicationConfig/types' +import type { Config, AppSharingForm } from '@/views/ApplicationConfig/types' import { handleSSE, type SSEMessage } from '@/utils/stream' import type { QueryParams } from '@/views/Conversation/types' import type { WorkflowConfig } from '@/views/Workflow/types' @@ -113,8 +113,8 @@ export const getShareToken = (share_token: string, user_id: string) => { return request.post(`/public/share/${share_token}/token`, { user_id }) } // Copy application -export const copyApplication = (app_id: string, new_name: string) => { - return request.post(`/apps/${app_id}/copy?new_name=${new_name}`) +export const copyApplication = (app_id: string, new_name?: string) => { + return request.post(`/apps/${app_id}/copy`, { new_name }) } // Data statistics export const getAppStatistics = (app_id: string, data: { start_date: number; end_date: number; }) => { @@ -139,4 +139,34 @@ export const getExperienceConfig = (share_token: string) => { // Get workspace API call statistics export const getWorkspaceApiStatistics = (data: { start_date: number; end_date: number; }) => { return request.get(`/apps/workspace/api-statistics`, data) -} \ No newline at end of file +} +// Export application +export const appExport = (app_id: string, appName: string, data?: { release_id: string }) => { + return request.getDownloadFile(`/apps/${app_id}/export`, `${appName}.yml`, data) +} +// Import application +export const appImport = (formData: FormData) => { + return request.uploadFile(`/apps/import`, formData) +} + +// Share application +export const appSharing = (app_id: string, data: AppSharingForm) => { + return request.post(`/apps/${app_id}/share`, data) +} +// Get my shared application records +export const mySharedOutList = () => { + return request.get(`/apps/my-shared-out`) +} +// Get sharing records for a specific application +export const getAppShares = (app_id: string) => { + return request.get(`/apps/${app_id}/shares`) +} +// Cancel a single share (source side operation) +export const cancelShare = (app_id: string, target_workspace_id?: string) => { + return request.delete(`/apps/${app_id}/share/${target_workspace_id}`) +} +// Cancel all shares under a workspace (source side operation) +export const cancelSpaceShare = (target_workspace_id?: string) => { + return request.delete(`/apps/share/${target_workspace_id}`) +} + diff --git a/web/src/api/memory.ts b/web/src/api/memory.ts index 2c840c9a..823e3d78 100644 --- a/web/src/api/memory.ts +++ b/web/src/api/memory.ts @@ -2,9 +2,10 @@ * @Author: ZhaoYing * @Date: 2026-02-03 14:00:06 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-03-04 10:58:41 + * @Last Modified time: 2026-03-13 10:48:41 */ import { request } from '@/utils/request' +import type { AxiosRequestConfig } from 'axios' import type { MemoryFormData, } from '@/views/MemoryManagement/types' @@ -94,8 +95,12 @@ export const updatedEndUserProfile = (values: EndUser) => { return request.post(`/memory-storage/updated_end_user/profile`, values) } // User Memory - Relationship network -export const getMemorySearchEdges = (end_user_id: string) => { - return request.get(`/memory-storage/analytics/graph_data`, { end_user_id }) +export const getMemorySearchEdges = (end_user_id: string, config?: AxiosRequestConfig) => { + return request.get(`/memory-storage/analytics/graph_data`, { end_user_id }, config) +} +// User Memory - Community graph +export const getMemoryCommunityGraph = (end_user_id: string, config?: AxiosRequestConfig) => { + return request.get(`/memory-storage/analytics/community_graph`, { end_user_id }, config) } // User Memory - User interest distribution export const getInterestDistributionByUser = (end_user_id: string) => { @@ -118,8 +123,9 @@ export const getChunkInsight = (end_user_id: string) => { return request.get(`/dashboard/chunk_insight`, { end_user_id }) } // RAG User Memory - Storage content -export const getRagContent = (end_user_id: string) => { - return request.get(`/dashboard/rag_content`, { end_user_id, limit: 20 }) +export const getRagContentUrl = '/dashboard/rag_content' +export const getRagContent = (end_user_id: string, page = 1, pagesize = 20) => { + return request.get(getRagContentUrl, { end_user_id, page, pagesize }) } // Emotion distribution analysis export const getWordCloud = (end_user_id: string) => { @@ -224,6 +230,10 @@ export const getConversationDetail = (end_user_id: string, conversation_id: stri export const forgetTrigger = (data: { max_merge_batch_size: number; min_days_since_access: number; end_user_id: string;}) => { return request.post(`/memory/forget-memory/trigger`, data) } +// RAG type - Refresh RAG user summary and memory insight +export const generateRagProfile = (end_user_id: string) => { + return request.post(`/dashboard/generate_rag_profile`, { end_user_id }) +} /*************** end User Memory APIs ******************************/ /****************** Memory Management APIs *******************************/ diff --git a/web/src/api/models.ts b/web/src/api/models.ts index 2d590287..7af412f8 100644 --- a/web/src/api/models.ts +++ b/web/src/api/models.ts @@ -41,12 +41,12 @@ export const deleteCompositeModel = (model_id: string) => { return request.delete(`/models/composite/${model_id}`) } // Create API keys for all matching models by provider -export const updateProviderApiKeys = (data: KeyConfigModalForm) => { - return request.post('/models/provider/apikeys', data) +export const updateProviderApiKeys = (data: KeyConfigModalForm, signal?: AbortSignal) => { + return request.post('/models/provider/apikeys', data, { signal }) } // Create model API key -export const addModelApiKey = (model_id: string, data: MultiKeyForm) => { - return request.post(`/models/${model_id}/apikeys`, data) +export const addModelApiKey = (model_id: string, data: MultiKeyForm, signal?: AbortSignal) => { + return request.post(`/models/${model_id}/apikeys`, data, { signal }) } // Delete model API key export const deleteModelApiKey = (api_key_id: string) => { @@ -65,10 +65,10 @@ export const addModelPlaza = (model_base_id: string) => { return request.post(`/models/model_plaza/${model_base_id}/add`) } // Create custom model -export const addCustomModel = (data: CustomModelForm) => { - return request.post('/models', data) +export const addCustomModel = (data: CustomModelForm, signal?: AbortSignal) => { + return request.post('/models', data, { signal }) } // Update custom model -export const updateCustomModel = (model_base_id: string, data: CustomModelForm) => { - return request.put(`/models/${model_base_id}`, data) +export const updateCustomModel = (model_base_id: string, data: CustomModelForm, signal?: AbortSignal) => { + return request.put(`/models/${model_base_id}`, data, { signal }) } \ No newline at end of file diff --git a/web/src/api/tools.ts b/web/src/api/tools.ts index b14905f8..612f924d 100644 --- a/web/src/api/tools.ts +++ b/web/src/api/tools.ts @@ -1,17 +1,17 @@ import { request } from '@/utils/request' -import type { Query, CustomToolItem, ExecuteData, MCPToolItem, InnerToolItem } from '@/views/ToolManagement/types' +import type { Query, MarketQuery, CustomToolItem, ExecuteData, MCPToolItem, InnerToolItem } from '@/views/ToolManagement/types' // 工具列表 export const getTools = (data: Query) => { return request.get('/tools', data) } // 创建MCP工具 -export const addTool = (values: MCPToolItem | CustomToolItem) => { - return request.post('/tools', values) +export const addTool = (values: MCPToolItem | CustomToolItem, config?: { signal?: AbortSignal }) => { + return request.post('/tools', values, config) } // 更新工具 -export const updateTool = (tool_id: string, data: MCPToolItem | InnerToolItem | CustomToolItem) => { - return request.put(`/tools/${tool_id}`, data) +export const updateTool = (tool_id: string, data: MCPToolItem | InnerToolItem | CustomToolItem, config?: { signal?: AbortSignal }) => { + return request.put(`/tools/${tool_id}`, data, config) } // 删除工具 export const deleteTool = (tool_id: string) => { @@ -33,4 +33,44 @@ export const getToolDetail = (tool_id: string) => { } export const getToolMethods = (tool_id: string) => { return request.get(`/tools/${tool_id}/methods`) +} + +// MCP市场列表 +export const getMarketTools = (data?: Record) => { + return request.get('/mcp_markets/mcp_markets', data) +} +// 市场配置创建 +export const createMarketConfig = (values: { + mcp_market_id: string; + token: string; + status: number; +}) => { + return request.post('/mcp_market_configs/mcp_market_config', values) +} +// 市场配置更新 +export const updateMarketConfig = (values: { + mcp_market_config_id: string; + token: string; + status: number; +}) => { + return request.put(`/mcp_market_configs/${values.mcp_market_config_id}`, values) +} +// 市场根据id获取配置 +export const getMarketConfig = (mcp_market_id: string) => { + return request.get(`/mcp_market_configs/mcp_market_id/${mcp_market_id}`) +} +// 市场MCP列表 +export const getMarketMCPs = (data: MarketQuery) => { + return request.get('/mcp_market_configs/mcp_servers', data) +} +// 根据配置ID serverId 获取MCP服务详情 +export const getMarketMCPDetail = (data:{ + mcp_market_config_id: string; + server_id: string; +}) => { + return request.get(`/mcp_market_configs/mcp_server`,data) +} +// 市场已激活MCP列表 +export const getMarketMCPsActivated = (data: MarketQuery) => { + return request.get('/mcp_market_configs/operational_mcp_servers', data) } \ No newline at end of file diff --git a/web/src/api/workspaces.ts b/web/src/api/workspaces.ts index 01f3be72..5c62489d 100644 --- a/web/src/api/workspaces.ts +++ b/web/src/api/workspaces.ts @@ -1,16 +1,16 @@ /* * @Author: ZhaoYing * @Date: 2026-02-03 14:00:26 - * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-02-03 14:00:26 + * @Last Modified by: ZhaoYing + * @Last Modified time: 2026-03-13 15:29:03 */ import { request } from '@/utils/request' import type { SpaceModalData } from '@/views/SpaceManagement/types' import type { SpaceConfigData } from '@/views/SpaceConfig/types' // Workspace list -export const getWorkspaces = () => { - return request.get('/workspaces') +export const getWorkspaces = (data?: { include_current?: boolean }) => { + return request.get('/workspaces', data) } // Create workspace export const createWorkspace = (values: SpaceModalData) => { diff --git a/web/src/assets/images/file/audio.svg b/web/src/assets/images/file/audio.svg new file mode 100644 index 00000000..0826c7f8 --- /dev/null +++ b/web/src/assets/images/file/audio.svg @@ -0,0 +1,11 @@ + + + 音乐 + + + + + + + + \ No newline at end of file diff --git a/web/src/assets/images/file/csv.svg b/web/src/assets/images/file/csv.svg new file mode 100644 index 00000000..1b8fc721 --- /dev/null +++ b/web/src/assets/images/file/csv.svg @@ -0,0 +1,16 @@ + + + 编组 57 + + + + + + + + + + + + + \ No newline at end of file diff --git a/web/src/assets/images/file/excel.svg b/web/src/assets/images/file/excel.svg new file mode 100644 index 00000000..cd09cc8c --- /dev/null +++ b/web/src/assets/images/file/excel.svg @@ -0,0 +1,15 @@ + + + Excel + + + + + + + + + + + + \ No newline at end of file diff --git a/web/src/assets/images/file/html.svg b/web/src/assets/images/file/html.svg new file mode 100644 index 00000000..641f97a2 --- /dev/null +++ b/web/src/assets/images/file/html.svg @@ -0,0 +1,15 @@ + + + Word + + + + + + + + + + + + \ No newline at end of file diff --git a/web/src/assets/images/file/image.svg b/web/src/assets/images/file/image.svg new file mode 100644 index 00000000..f81baa50 --- /dev/null +++ b/web/src/assets/images/file/image.svg @@ -0,0 +1,15 @@ + + + 编组 58 + + + + + + + + + + + + \ No newline at end of file diff --git a/web/src/assets/images/file/json.svg b/web/src/assets/images/file/json.svg new file mode 100644 index 00000000..4ced0745 --- /dev/null +++ b/web/src/assets/images/file/json.svg @@ -0,0 +1,12 @@ + + + JSON + + + + + + + + + \ No newline at end of file diff --git a/web/src/assets/images/file/md.svg b/web/src/assets/images/file/md.svg new file mode 100644 index 00000000..c2cb9619 --- /dev/null +++ b/web/src/assets/images/file/md.svg @@ -0,0 +1,17 @@ + + + PDF + + + + + + + + + MD + + + + + \ No newline at end of file diff --git a/web/src/assets/images/file/pdf.svg b/web/src/assets/images/file/pdf.svg new file mode 100644 index 00000000..10c3020b --- /dev/null +++ b/web/src/assets/images/file/pdf.svg @@ -0,0 +1,18 @@ + + + PDF + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/web/src/assets/images/file/ppt.svg b/web/src/assets/images/file/ppt.svg new file mode 100644 index 00000000..eb3d4d8d --- /dev/null +++ b/web/src/assets/images/file/ppt.svg @@ -0,0 +1,12 @@ + + + file-ppt-2-fill + + + + + + + + + \ No newline at end of file diff --git a/web/src/assets/images/file/txt.svg b/web/src/assets/images/file/txt.svg new file mode 100644 index 00000000..141d2bfb --- /dev/null +++ b/web/src/assets/images/file/txt.svg @@ -0,0 +1,12 @@ + + + txt + + + + + + + + + \ No newline at end of file diff --git a/web/src/assets/images/file/video.svg b/web/src/assets/images/file/video.svg new file mode 100644 index 00000000..08c0b262 --- /dev/null +++ b/web/src/assets/images/file/video.svg @@ -0,0 +1,14 @@ + + + 编组 59 + + + + + + + + + + + \ No newline at end of file diff --git a/web/src/assets/images/file/word.svg b/web/src/assets/images/file/word.svg new file mode 100644 index 00000000..dc37637d --- /dev/null +++ b/web/src/assets/images/file/word.svg @@ -0,0 +1,15 @@ + + + Word + + + + + + + + + + + + \ No newline at end of file diff --git a/web/src/components/AudioRecorder/index.tsx b/web/src/components/AudioRecorder/index.tsx index d31746f6..639a9109 100644 --- a/web/src/components/AudioRecorder/index.tsx +++ b/web/src/components/AudioRecorder/index.tsx @@ -1,26 +1,48 @@ +/* + * @Author: ZhaoYing + * @Date: 2026-02-06 21:11:51 + * @Last Modified by: ZhaoYing + * @Last Modified time: 2026-03-17 18:39:09 + */ import { type FC, useRef, useState } from 'react' import RecordRTC from 'recordrtc' +import { App } from 'antd' +import { useTranslation } from 'react-i18next'; import { fileUploadUrlWithoutApiPrefix } from '@/api/fileStorage' import { request } from '@/utils/request' +/** Props for the AudioRecorder component */ interface AudioRecorderProps { + /** Callback fired when recording is complete, receives uploaded file info and raw blob */ onRecordingComplete?: (file: { file_id: string; file_key: string; url: string; type?: string; }, blob?: Blob) => void className?: string; + /** Upload endpoint URL, defaults to fileUploadUrlWithoutApiPrefix */ action?: string; + /** Additional config passed to the upload request */ requestConfig?: Record; + disabled?: boolean; + maxSize?: number; } const AudioRecorder: FC = ({ onRecordingComplete, className = '', action = fileUploadUrlWithoutApiPrefix, - requestConfig = {} + requestConfig = {}, + disabled = false, + maxSize, }) => { + const { message } = App.useApp() + const { t } = useTranslation(); + // Whether the recorder is currently capturing audio const [isRecording, setIsRecording] = useState(false) + // Holds the RecordRTC instance across renders const recorderRef = useRef(null) + /** Request microphone access and start recording */ const startRecording = async () => { + if (disabled) return try { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }) recorderRef.current = new RecordRTC(stream, { @@ -34,11 +56,19 @@ const AudioRecorder: FC = ({ } } + /** Stop recording, upload the audio blob, then invoke the completion callback */ const stopRecording = () => { + if (disabled) return if (recorderRef.current) { recorderRef.current.stopRecording(() => { const blob = recorderRef.current!.getBlob() const url = recorderRef.current!.toURL() + + if (maxSize && blob.size > maxSize * 1024 * 1024) { + message.error(t('common.fileSizeTip', { size: maxSize })); + return + } + const formData = new FormData() formData.append('file', blob, `recording_${Date.now()}.webm`) request @@ -49,6 +79,7 @@ const AudioRecorder: FC = ({ type: blob.type, url }, blob) + // Release recorder resources after upload recorderRef.current?.destroy() recorderRef.current = null }) @@ -57,12 +88,14 @@ const AudioRecorder: FC = ({ } } + // Toggle between recording/idle states on click; + // swap background image to reflect current state return (
diff --git a/web/src/components/ButtonCheckbox/index.tsx b/web/src/components/ButtonCheckbox/index.tsx index 5053ec7c..8c52701b 100644 --- a/web/src/components/ButtonCheckbox/index.tsx +++ b/web/src/components/ButtonCheckbox/index.tsx @@ -2,7 +2,7 @@ * @Author: ZhaoYing * @Date: 2026-02-02 15:01:59 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-02-11 10:53:27 + * @Last Modified time: 2026-03-19 20:45:13 */ /** @@ -32,6 +32,7 @@ interface ButtonCheckboxProps extends Omit { checkedIcon?: string; /** Button content */ children?: ReactNode + cicle?: boolean; } const ButtonCheckbox: FC = ({ @@ -41,6 +42,8 @@ const ButtonCheckbox: FC = ({ icon, checkedIcon, children, + cicle = false, + disabled, }) => { // Listen to value changes and trigger side effects via onValueChange callback useEffect(() => { @@ -57,18 +60,21 @@ const ButtonCheckbox: FC = ({ } return ( - {/* Display unchecked icon when not checked */} - {icon && !checked && } + {icon && !checked && } {/* Display checked icon when checked */} {checkedIcon && checked && } {children} diff --git a/web/src/components/Chat/ChatContent.tsx b/web/src/components/Chat/ChatContent.tsx index 1dfed604..2824381e 100644 --- a/web/src/components/Chat/ChatContent.tsx +++ b/web/src/components/Chat/ChatContent.tsx @@ -2,13 +2,19 @@ * @Author: ZhaoYing * @Date: 2025-12-10 16:46:17 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-02-25 19:04:55 + * @Last Modified time: 2026-03-19 20:45:39 */ -import { type FC, useRef, useEffect } from 'react' +import { type FC, useRef, useEffect, useState } from 'react' import clsx from 'clsx' import Markdown from '@/components/Markdown' import type { ChatContentProps } from './types' -import { Spin } from 'antd' +import { Spin, Divider, Space, Image, Flex } from 'antd' +import { SoundOutlined } from '@ant-design/icons' + + +const getFileUrl = (file: any) => { + return file.thumbUrl || file.url || (file.originFileObj ? URL.createObjectURL(file.originFileObj) : undefined) +} /** * Chat Content Display Component @@ -28,15 +34,33 @@ const ChatContent: FC = ({ // Scroll container reference for controlling auto-scroll to bottom const scrollContainerRef = useRef<(HTMLDivElement | null)>(null) const prevDataLengthRef = useRef(data.length); - const isScrolledToBottomRef = useRef(true); // Track if user is scrolled to bottom + const isScrolledToBottomRef = useRef(true); + const audioRef = useRef(null) + const [playingIndex, setPlayingIndex] = useState(null) + + const handlePlay = (index: number, audio_url: string) => { + if (playingIndex === index) { + audioRef.current?.pause() + setPlayingIndex(null) + return + } + if (audioRef.current) { + audioRef.current.pause() + } + const audio = new Audio(audio_url) + audioRef.current = audio + audio.play() + setPlayingIndex(index) + audio.onended = () => setPlayingIndex(null) + } // Track scroll position to determine if user is at bottom useEffect(() => { const handleScroll = () => { if (scrollContainerRef.current) { const { scrollTop, scrollHeight, clientHeight } = scrollContainerRef.current; - // Consider user is at bottom if within 20px of the bottom - isScrolledToBottomRef.current = scrollHeight - scrollTop - clientHeight < 20; + // Consider user is at bottom if within 100px of the bottom + isScrolledToBottomRef.current = scrollHeight - scrollTop - clientHeight < 100; } }; @@ -64,11 +88,16 @@ const ChatContent: FC = ({ // Auto-scroll if data length changed OR user is currently at bottom if (data.length !== prevDataLengthRef.current || isScrolledToBottomRef.current) { scrollContainerRef.current.scrollTop = scrollContainerRef.current.scrollHeight; + isScrolledToBottomRef.current = true; } prevDataLengthRef.current = data.length; } }, 0); }, [data]) + + const handleDownload = (file: any) => { + window.open(getFileUrl(file), '_blank') + } return (
{data.length === 0 @@ -89,6 +118,44 @@ const ChatContent: FC = ({ {labelFormat(item)}
} + {item.meta_data?.files && item.meta_data?.files.length > 0 && + {item.meta_data?.files?.map((file) => { + if (file.type.includes('image')) { + return ( +
+ {file.name} +
+ ) + } + if (file.type.includes('video')) { + return ( +
+
+ ) + } + if (file.type.includes('audio')) { + return ( +
+
+ ) + } + return ( +
handleDownload(file)}> + {(file.type.includes('doc') || file.type.includes('docx') || file.type.includes('word') || file.type.includes('wordprocessingml.document')) &&
} + {(file.type.includes('pdf')) &&
} + {(file.type.includes('excel') || file.type.includes('spreadsheetml.sheet') || file.type.includes('csv')) &&
} +
+ ) + })} +
} {/* Message bubble */}
= ({ {item.subContent && renderRuntime && renderRuntime(item, index)} {/* Render message content using Markdown component */} + + {item.meta_data?.audio_url && <> + + + {playingIndex !== index + ? handlePlay(index, item.meta_data?.audio_url!)} /> + :
handlePlay(index, item.meta_data?.audio_url!)} + /> + } + + }
{/* Bottom label (such as timestamp, username, etc.) */} {labelPosition === 'bottom' && diff --git a/web/src/components/Chat/ChatInput.tsx b/web/src/components/Chat/ChatInput.tsx index 8d265578..dddcc162 100644 --- a/web/src/components/Chat/ChatInput.tsx +++ b/web/src/components/Chat/ChatInput.tsx @@ -2,7 +2,7 @@ * @Author: ZhaoYing * @Date: 2025-12-10 16:46:14 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-03-19 17:35:14 + * @Last Modified time: 2026-03-19 20:46:45 */ import { type FC, useEffect, useMemo, useState } from 'react' import { Flex, Input, Form } from 'antd' @@ -122,15 +122,20 @@ const ChatInput: FC = ({ gap={10} className="rb:w-45 rb:text-[12px] rb:group rb:relative rb:rounded-lg rb:bg-[#F0F3F8] rb:py-2! rb:px-2.5!" > - {(file.type.includes('doc') || file.type.includes('docx') || file.type.includes('word') || file.type.includes('wordprocessingml.document')) &&
} - {(file.type.includes('pdf')) &&
} - {(file.type.includes('excel') || file.type.includes('spreadsheetml.sheet') || file.type.includes('csv')) &&
} + {file.type.includes('pdf') + ?
+ : (file.type.includes('excel') || file.type.includes('spreadsheetml.sheet') || file.type.includes('csv')) + ?
+ : (file.type.includes('doc') || file.type.includes('docx') || file.type.includes('word') || file.type.includes('wordprocessingml.document')) + ?
+ : null + }
{file.name}
{file.type} · {file.size}
diff --git a/web/src/components/Chat/ChatToolbar.tsx b/web/src/components/Chat/ChatToolbar.tsx new file mode 100644 index 00000000..883ac98a --- /dev/null +++ b/web/src/components/Chat/ChatToolbar.tsx @@ -0,0 +1,204 @@ +/* + * @Author: ZhaoYing + * @Date: 2026-03-17 14:22:25 + * @Last Modified by: ZhaoYing + * @Last Modified time: 2026-03-18 15:55:13 + */ +// Toolbar component for chat input area, supporting file upload, audio recording, and variable configuration +import { useRef, forwardRef, useImperativeHandle, type ReactNode, useEffect } from 'react' +import { Flex, Dropdown, Divider, App, Form, type MenuProps } from 'antd' +import { SettingOutlined } from '@ant-design/icons' +import { useTranslation } from 'react-i18next' +import clsx from 'clsx' + +import AudioRecorder from '@/components/AudioRecorder' +import UploadFiles from '@/views/Conversation/components/FileUpload' +import UploadFileListModal from '@/views/Conversation/components/UploadFileListModal' +import VariableConfigModal from '@/views/Workflow/components/Chat/VariableConfigModal' +import type { FeaturesConfigForm } from '@/views/ApplicationConfig/types' +import type { UploadFileListModalRef } from '@/views/Conversation/types' +import type { VariableConfigModalRef } from '@/views/Workflow/types' +import type { Variable } from '@/views/Workflow/components/Properties/VariableList/types' + +// Exposed methods via ref for parent components to access/set form state +export interface ChatToolbarRef { + getFiles: () => any[] + getVariables: () => Variable[] + setFiles: (files: any[]) => void + setVariables: (variables: Variable[]) => void +} + +// Props for configuring toolbar features, upload settings, and event callbacks +export interface ChatToolbarProps { + features: FeaturesConfigForm + extra?: ReactNode + uploadAction?: string + uploadRequestConfig?: { + data?: Record + headers?: Record + } + onFilesChange?: (files: any[]) => void + onVariablesChange?: (variables: Variable[]) => void + onRecordingComplete?: (file: any) => void; + defaultValue?: { memory: boolean } +} + +interface FormValues { + files: any[] + variables: Variable[]; + memory?: boolean; +} + +const ChatToolbar = forwardRef(({ + features, + extra, + uploadAction, + uploadRequestConfig, + onFilesChange, + onVariablesChange, + onRecordingComplete, + defaultValue, +}, ref) => { + const { t } = useTranslation() + const { message: messageApi } = App.useApp() + const uploadFileListModalRef = useRef(null) + const variableConfigModalRef = useRef(null) + const [form] = Form.useForm() + const queryValues = Form.useWatch([], form) + + useEffect(() => { + if (!defaultValue) return + form.setFieldsValue(defaultValue) + }, [defaultValue]) + + useImperativeHandle(ref, () => ({ + getFiles: () => form.getFieldValue('files') || [], + getVariables: () => form.getFieldValue('variables') || [], + setFiles: (files) => form.setFieldValue('files', files), + setVariables: (variables) => { + console.log('variables', variables) + form.setFieldValue('variables', variables) + }, + })) + + const { file_upload } = features || {} + + // Append newly uploaded file to the file list when upload is complete + const fileChange = (file?: any) => { + if (file?.status !== 'done') return + const files = [...(queryValues?.files || []), file] + form.setFieldValue('files', files) + onFilesChange?.(files) + } + + // Append recorded audio file to the file list and notify parent + const handleRecordingComplete = (file: any) => { + const files = [...(queryValues?.files || []), file] + form.setFieldValue('files', files) + onFilesChange?.(files) + onRecordingComplete?.(file) + } + + // Merge a batch of files (e.g. from remote URL modal) into the file list + const addFileList = (list?: any[]) => { + if (!list?.length) return + const files = [...(queryValues?.files || []), ...list] + form.setFieldValue('files', files) + onFilesChange?.(files) + } + + // Persist variable values from the config modal and notify parent + const handleVariablesSave = (values: Variable[]) => { + form.setFieldValue('variables', values) + onVariablesChange?.(values) + } + + // True when any required variable is missing a value, used to highlight the config button + const isNeedVariableConfig = queryValues?.variables?.some( + vo => vo.required && (vo.value === null || vo.value === undefined || vo.value === '') + ) + + // Build dropdown menu items based on allowed transfer methods + const fileMenus: MenuProps['items'] = [] + const enabledTypes = ['image', 'document', 'video', 'audio'].filter( + type => file_upload?.[`${type}_enabled` as keyof FeaturesConfigForm['file_upload']] + ) + if (file_upload?.allowed_transfer_methods?.includes('remote_url') && enabledTypes.length > 0) { + fileMenus.push({ + key: 'url', + label: t('memoryConversation.addRemoteFile'), + onClick: () => { + if ((queryValues?.files?.length || 0) >= file_upload.max_file_count) { + messageApi.warning(t('common.fileNumTip', { num: file_upload.max_file_count })) + return + } + uploadFileListModalRef.current?.handleOpen() + } + }) + } + if (file_upload?.allowed_transfer_methods?.includes('local_file') && enabledTypes.length > 0) { + fileMenus.push({ + key: 'upload', + label: ( + = file_upload.max_file_count} + /> + ) + }) + } + + return ( +
+ + +