Merge branch 'develop' into feature/multimodel_memory

# Conflicts: # api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/embedding_generation.py # api/app/repositories/neo4j/add_nodes.py # api/app/repositories/neo4j/cypher_queries.py # api/app/repositories/neo4j/graph_saver.py # api/app/services/memory_agent_service.py # api/app/services/multimodal_service.py
2026-03-24 14:15:18 +08:00
parent 6bba574ca6 5987eee0a8
commit 89d188fbf3
61 changed files with 1707 additions and 694 deletions
--- a/api/app/controllers/init.py
+++ b/api/app/controllers/init.py
@@ -8,6 +8,7 @@ from fastapi import APIRouter
 from . import (
    api_key_controller,
    app_controller,
+    app_log_controller,
    auth_controller,
    chunk_controller,
    document_controller,
@@ -70,6 +71,7 @@ manager_router.include_router(chunk_controller.router)
 manager_router.include_router(test_controller.router)
 manager_router.include_router(knowledgeshare_controller.router)
 manager_router.include_router(app_controller.router)
+manager_router.include_router(app_log_controller.router)
 manager_router.include_router(upload_controller.router)
 manager_router.include_router(memory_agent_controller.router)
 manager_router.include_router(memory_dashboard_controller.router)
--- a/api/app/controllers/app_controller.py
+++ b/api/app/controllers/app_controller.py
@@ -57,6 +57,7 @@ def list_apps(
        page: int = 1,
        pagesize: int = 10,
        ids: Optional[str] = None,
+        api_key: Optional[str] = None,
        db: Session = Depends(get_db),
        current_user=Depends(get_current_user),
 ):
@@ -65,10 +66,25 @@ def list_apps(
    - 默认包含本工作空间的应用和分享给本工作空间的应用
    - 设置 include_shared=false 可以只查看本工作空间的应用
    - 当提供 ids 参数时，按逗号分割获取指定应用，不分页
+    - 当提供 api_key 参数时，查找该 API Key 关联的应用
    """
+    from sqlalchemy import select as sa_select
+    from app.models.api_key_model import ApiKey
+
    workspace_id = current_user.current_workspace_id
    service = app_service.AppService(db)

+    # 通过 API Key 搜索：精确匹配，将 resource_id 注入 ids 走统一分页流程
+    if api_key:
+        matched_id = db.execute(
+            sa_select(ApiKey.resource_id).where(
+                ApiKey.workspace_id == workspace_id,
+                ApiKey.api_key == api_key,
+                ApiKey.resource_id.isnot(None),
+            )
+        ).scalar_one_or_none()
+        ids = str(matched_id) if matched_id else ""
+
    # 当 ids 存在且不为 None 时，根据 ids 获取应用
    if ids is not None:
        app_ids = [app_id.strip() for app_id in ids.split(',') if app_id.strip()]
--- a/api/app/controllers/app_log_controller.py
+++ b/api/app/controllers/app_log_controller.py
@@ -0,0 +1,129 @@
+"""应用日志（消息记录）接口"""
+import uuid
+from typing import Optional
+
+from fastapi import APIRouter, Depends, Query
+from sqlalchemy import select, desc, func
+from sqlalchemy.orm import Session
+
+from app.core.logging_config import get_business_logger
+from app.core.response_utils import success
+from app.db import get_db
+from app.dependencies import get_current_user, cur_workspace_access_guard
+from app.models.conversation_model import Conversation, Message
+from app.schemas.app_log_schema import AppLogConversation, AppLogConversationDetail, AppLogMessage
+from app.schemas.response_schema import PageData, PageMeta
+from app.services.app_service import AppService
+
+router = APIRouter(prefix="/apps", tags=["App Logs"])
+logger = get_business_logger()
+
+
+@router.get("/{app_id}/logs", summary="应用日志 - 会话列表")
+@cur_workspace_access_guard()
+def list_app_logs(
+        app_id: uuid.UUID,
+        page: int = Query(1, ge=1),
+        pagesize: int = Query(20, ge=1, le=100),
+        user_id: Optional[str] = None,
+        is_draft: Optional[bool] = None,
+        db: Session = Depends(get_db),
+        current_user=Depends(get_current_user),
+):
+    """查看应用下所有会话记录（分页）
+
+    - 支持按 user_id 筛选
+    - 支持按 is_draft 筛选（草稿会话 / 发布会话）
+    - 按最新更新时间倒序排列
+    """
+    workspace_id = current_user.current_workspace_id
+
+    # 验证应用访问权限
+    service = AppService(db)
+    service.get_app(app_id, workspace_id)
+
+    stmt = select(Conversation).where(
+        Conversation.app_id == app_id,
+        Conversation.workspace_id == workspace_id,
+        Conversation.is_active.is_(True),
+    )
+
+    if user_id:
+        stmt = stmt.where(Conversation.user_id == user_id)
+
+    if is_draft is not None:
+        stmt = stmt.where(Conversation.is_draft == is_draft)
+
+    total = int(db.execute(
+        select(func.count()).select_from(stmt.subquery())
+    ).scalar_one())
+
+    stmt = stmt.order_by(desc(Conversation.updated_at))
+    stmt = stmt.offset((page - 1) * pagesize).limit(pagesize)
+
+    conversations = list(db.scalars(stmt).all())
+
+    items = [AppLogConversation.model_validate(c) for c in conversations]
+    meta = PageMeta(page=page, pagesize=pagesize, total=total, hasnext=(page * pagesize) < total)
+
+    logger.info(
+        "查询应用日志会话列表",
+        extra={"app_id": str(app_id), "total": total, "page": page}
+    )
+
+    return success(data=PageData(page=meta, items=items))
+
+
+@router.get("/{app_id}/logs/{conversation_id}", summary="应用日志 - 会话消息详情")
+@cur_workspace_access_guard()
+def get_app_log_detail(
+        app_id: uuid.UUID,
+        conversation_id: uuid.UUID,
+        db: Session = Depends(get_db),
+        current_user=Depends(get_current_user),
+):
+    """查看某会话的完整消息记录
+
+    - 返回会话基本信息 + 所有消息（按时间正序）
+    - 消息 meta_data 包含模型名、token 用量等信息
+    """
+    workspace_id = current_user.current_workspace_id
+
+    # 验证应用访问权限
+    service = AppService(db)
+    service.get_app(app_id, workspace_id)
+
+    # 查询会话（确保属于该应用和工作空间）
+    conversation = db.scalars(
+        select(Conversation).where(
+            Conversation.id == conversation_id,
+            Conversation.app_id == app_id,
+            Conversation.workspace_id == workspace_id,
+            Conversation.is_active.is_(True),
+        )
+    ).first()
+
+    if not conversation:
+        from app.core.exceptions import ResourceNotFoundException
+        raise ResourceNotFoundException("会话", str(conversation_id))
+
+    # 查询消息（按时间正序）
+    messages = list(db.scalars(
+        select(Message)
+        .where(Message.conversation_id == conversation_id)
+        .order_by(Message.created_at)
+    ).all())
+
+    detail = AppLogConversationDetail.model_validate(conversation)
+    detail.messages = [AppLogMessage.model_validate(m) for m in messages]
+
+    logger.info(
+        "查询应用日志会话详情",
+        extra={
+            "app_id": str(app_id),
+            "conversation_id": str(conversation_id),
+            "message_count": len(messages)
+        }
+    )
+
+    return success(data=detail)
--- a/api/app/controllers/file_storage_controller.py
+++ b/api/app/controllers/file_storage_controller.py
@@ -14,6 +14,9 @@ Routes:
 import os
 import uuid
 from typing import Any
+import httpx
+import mimetypes
+from urllib.parse import urlparse, unquote

 from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile, status
 from fastapi.responses import FileResponse, RedirectResponse
@@ -91,7 +94,7 @@ async def upload_file(

    if file_size > settings.MAX_FILE_SIZE:
        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
+            status_code=status.HTTP_413_CONTENT_TOO_LARGE,
            detail=f"The file size exceeds the {settings.MAX_FILE_SIZE} byte limit"
        )

@@ -172,7 +175,6 @@ async def upload_file_with_share_token(
    
    # Get share and release info from share_token
    service = ReleaseShareService(db)
-    share_info = service.get_shared_release_info(share_token=share_data.share_token)
    
    # Get share object to access app_id
    share = service.repo.get_by_share_token(share_data.share_token)
@@ -291,6 +293,101 @@ async def upload_file_with_share_token(
    )


+@router.get("/files/info-by-url", response_model=ApiResponse)
+async def get_file_info_by_url(
+        url: str,
+):
+    """
+    Get file information by network URL (no authentication required).
+
+    Fetches file metadata from a remote URL via HTTP HEAD request.
+    Falls back to GET request if HEAD is not supported.
+    Returns file type, name, and size.
+
+    Args:
+        url: The network URL of the file.
+
+    Returns:
+        ApiResponse with file information.
+    """
+    api_logger.info(f"File info by URL request: url={url}")
+
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            # Try HEAD request first
+            response = await client.head(url, follow_redirects=True)
+
+            # If HEAD fails, try GET request (some servers don't support HEAD)
+            if response.status_code != 200:
+                api_logger.info(f"HEAD request failed with {response.status_code}, trying GET request")
+                response = await client.get(url, follow_redirects=True)
+                
+                if response.status_code != 200:
+                    api_logger.error(f"Failed to fetch file info: HTTP {response.status_code}")
+                    raise HTTPException(
+                        status_code=status.HTTP_400_BAD_REQUEST,
+                        detail=f"Unable to access file: HTTP {response.status_code}"
+                    )
+
+            # Get file size from Content-Length header or actual content
+            file_size = response.headers.get("Content-Length")
+            if file_size:
+                file_size = int(file_size)
+            elif hasattr(response, 'content'):
+                file_size = len(response.content)
+            else:
+                file_size = None
+
+            # Get content type from Content-Type header
+            content_type = response.headers.get("Content-Type", "application/octet-stream")
+            # Remove charset and other parameters from content type
+            content_type = content_type.split(';')[0].strip()
+
+            # Extract filename from Content-Disposition or URL
+            file_name = None
+            content_disposition = response.headers.get("Content-Disposition")
+            if content_disposition and "filename=" in content_disposition:
+                parts = content_disposition.split("filename=")
+                if len(parts) > 1:
+                    file_name = parts[1].strip('"').strip("'")
+
+            if not file_name:
+                parsed_url = urlparse(url)
+                file_name = unquote(os.path.basename(parsed_url.path)) or "unknown"
+
+            # Extract file extension from filename
+            _, file_ext = os.path.splitext(file_name)
+            
+            # If no extension found, infer from content type
+            if not file_ext:
+                ext = mimetypes.guess_extension(content_type)
+                if ext:
+                    file_ext = ext
+                    file_name = f"{file_name}{file_ext}"
+
+            api_logger.info(f"File info retrieved: name={file_name}, size={file_size}, type={content_type}")
+
+            return success(
+                data={
+                    "url": url,
+                    "file_name": file_name,
+                    "file_ext": file_ext.lower() if file_ext else "",
+                    "file_size": file_size,
+                    "content_type": content_type,
+                },
+                msg="File information retrieved successfully"
+            )
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        api_logger.error(f"Unexpected error: {e}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Failed to retrieve file information: {str(e)}"
+        )
+
+
@router.get("/files/{file_id}", response_model=Any)
 async def download_file(
    request: Request,
@@ -499,6 +596,51 @@ async def get_file_url(
        )


+@router.get("/files/{file_id}/public-url", response_model=ApiResponse)
+async def get_permanent_file_url(
+    file_id: uuid.UUID,
+    db: Session = Depends(get_db),
+    storage_service: FileStorageService = Depends(get_file_storage_service),
+):
+    """
+    获取文件的永久公开 URL（无过期时间）。
+
+    - 本地存储：返回 API 永久访问地址（基于 FILE_LOCAL_SERVER_URL 配置）
+    - 远程存储（OSS/S3）：返回 bucket 公读地址（需 bucket 已配置公共读权限）
+    """
+    file_metadata = db.query(FileMetadata).filter(FileMetadata.id == file_id).first()
+    if not file_metadata:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="The file does not exist")
+
+    if file_metadata.status != "completed":
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST,
+                            detail=f"File upload not completed, status: {file_metadata.status}")
+
+    file_key = file_metadata.file_key
+    storage = storage_service.storage
+
+    try:
+        if isinstance(storage, LocalStorage):
+            url = f"{settings.FILE_LOCAL_SERVER_URL}/storage/permanent/{file_id}"
+        else:
+            url = await storage.get_permanent_url(file_key)
+            if not url:
+                raise HTTPException(status_code=status.HTTP_501_NOT_IMPLEMENTED,
+                                    detail="Permanent URL not supported for current storage backend")
+
+        api_logger.info(f"Generated permanent URL: file_id={file_id}")
+        return success(
+            data={"url": url, "expires_in": None, "permanent": True, "file_name": file_metadata.file_name},
+            msg="Permanent file URL generated successfully"
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        api_logger.error(f"Failed to generate permanent URL: {e}")
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                            detail=f"Failed to generate permanent URL: {str(e)}")
+
+
@router.get("/public/{file_id}", response_model=Any)
 async def public_download_file(
    request: Request,
@@ -653,3 +795,44 @@ async def permanent_download_file(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail=f"Failed to retrieve file: {str(e)}"
            )
+
+
+@router.get("/files/{file_id}/status", response_model=ApiResponse)
+async def get_file_status(
+    file_id: uuid.UUID,
+    db: Session = Depends(get_db),
+):
+    """
+    Get file upload/processing status (no authentication required).
+    
+    This endpoint is used to check if a file (e.g., TTS audio) is ready.
+    Returns status: pending, completed, or failed.
+    
+    Args:
+        file_id: The UUID of the file.
+        db: Database session.
+    
+    Returns:
+        ApiResponse with file status and metadata.
+    """
+    api_logger.info(f"File status request: file_id={file_id}")
+    
+    # Query file metadata from database
+    file_metadata = db.query(FileMetadata).filter(FileMetadata.id == file_id).first()
+    if not file_metadata:
+        api_logger.warning(f"File not found in database: file_id={file_id}")
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail="The file does not exist"
+        )
+    
+    return success(
+        data={
+            "file_id": str(file_id),
+            "status": file_metadata.status,
+            "file_name": file_metadata.file_name,
+            "file_size": file_metadata.file_size,
+            "content_type": file_metadata.content_type,
+        },
+        msg="File status retrieved successfully"
+    )
--- a/api/app/controllers/memory_dashboard_controller.py
+++ b/api/app/controllers/memory_dashboard_controller.py
@@ -195,10 +195,9 @@ async def get_workspace_end_users(
        api_logger.warning(f"Redis 缓存写入失败: {str(e)}")

    # 触发社区聚类补全任务（异步，不阻塞接口响应）
-    # 对有 ExtractedEntity 但无 Community 节点的存量用户自动补跑全量聚类
    try:
        from app.tasks import init_community_clustering_for_users
-        init_community_clustering_for_users.delay(end_user_ids=end_user_ids)
+        init_community_clustering_for_users.delay(end_user_ids=end_user_ids, workspace_id=str(workspace_id))
        api_logger.info(f"已触发社区聚类补全任务，候选用户数: {len(end_user_ids)}")
    except Exception as e:
        api_logger.warning(f"触发社区聚类补全任务失败（不影响主流程）: {str(e)}")
--- a/api/app/controllers/memory_working_controller.py
+++ b/api/app/controllers/memory_working_controller.py
@@ -33,35 +33,47 @@ def get_memory_count(
@router.get("/{end_user_id}/conversations", response_model=ApiResponse)
 def get_conversations(
        end_user_id: uuid.UUID,
+        page: int = 1,
+        pagesize: int = 20,
        current_user: User = Depends(get_current_user),
        db: Session = Depends(get_db)
 ):
    """
-    Retrieve all conversations for the current user in a specific group.
+    Retrieve conversations for the current user in a specific group with pagination.

    Args:
        end_user_id (UUID): The group identifier.
+        page (int): Page number (1-based). Defaults to 1.
+        pagesize (int): Number of items per page. Defaults to 20.
        current_user (User, optional): The authenticated user.
        db (Session, optional): SQLAlchemy session.

    Returns:
-        ApiResponse: Contains a list of conversation IDs.
-
-    Notes:
-        - Initializes the ConversationService with the current DB session.
-        - Returns only conversation IDs for lightweight response.
-        - Logs can be added to trace requests in production.
+        ApiResponse: Contains a paginated list of conversations.
    """
+    page = max(1, page)
+    page_size = max(1, min(pagesize, 100))  # Limit page size between 1 and 100
    conversation_service = ConversationService(db)
-    conversations = conversation_service.get_user_conversations(
-        end_user_id
+    conversations, total = conversation_service.get_user_conversations(
+        end_user_id,
+        page=page,
+        page_size=page_size
    )
-    return success(data=[
-        {
-            "id": conversation.id,
-            "title": conversation.title
-        } for conversation in conversations
-    ], msg="get conversations success")
+    return success(data={
+        "items": [
+            {
+                "id": conversation.id,
+                "title": conversation.title
+            } for conversation in conversations
+        ],
+        "total": total,
+        "page": {
+            "page": page,
+            "pagesize": page_size,
+            "total": total,
+            "hasnext": (page * page_size) < total
+        },
+    }, msg="get conversations success")


@router.get("/{end_user_id}/messages", response_model=ApiResponse)
--- a/api/app/controllers/tool_controller.py
+++ b/api/app/controllers/tool_controller.py
@@ -76,6 +76,8 @@ async def get_tool_methods(
        if methods is None:
            raise HTTPException(status_code=404, detail="工具不存在")
        return success(data=methods, msg="获取工具方法成功")
+    except HTTPException:
+        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@@ -121,6 +123,8 @@ async def create_tool(
        raise HTTPException(status_code=400, detail=e.message)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
+    except HTTPException:
+        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@@ -149,6 +153,8 @@ async def update_tool(
        return success(msg="工具更新成功")
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
+    except HTTPException:
+        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@@ -191,6 +197,8 @@ async def set_tool_active(
        return success(msg=f"工具已{action}")
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
+    except HTTPException:
+        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@@ -223,6 +231,8 @@ async def execute_tool(
            },
            msg="工具执行完成"
        )
+    except HTTPException:
+        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))