From 70568657262b892fa22617f30b5f09ad17852eb2 Mon Sep 17 00:00:00 2001 From: Timebomb2018 <18868801967@163.com> Date: Thu, 19 Mar 2026 17:25:44 +0800 Subject: [PATCH 01/43] fix(agetn features): 1. Historical multimodal message writing is incorporated into the conversation context; 2. Resolve the issues where csv, json, and txt files cannot be recognized due to encoding problems; 3. File quantity limit; 4. Error details --- .../controllers/file_storage_controller.py | 2 +- api/app/core/config.py | 1 + api/app/main.py | 5 +- api/app/schemas/app_schema.py | 12 ++- api/app/services/app_chat_service.py | 11 +-- api/app/services/conversation_service.py | 2 +- api/app/services/draft_run_service.py | 12 +-- api/app/services/multimodal_service.py | 84 ++++++++++++++++--- api/app/services/shared_chat_service.py | 4 +- 9 files changed, 99 insertions(+), 34 deletions(-) diff --git a/api/app/controllers/file_storage_controller.py b/api/app/controllers/file_storage_controller.py index ff284f39..cb6dd1be 100644 --- a/api/app/controllers/file_storage_controller.py +++ b/api/app/controllers/file_storage_controller.py @@ -91,7 +91,7 @@ async def upload_file( if file_size > settings.MAX_FILE_SIZE: raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, + status_code=status.HTTP_413_CONTENT_TOO_LARGE, detail=f"The file size exceeds the {settings.MAX_FILE_SIZE} byte limit" ) diff --git a/api/app/core/config.py b/api/app/core/config.py index cdaa13cc..4a944557 100644 --- a/api/app/core/config.py +++ b/api/app/core/config.py @@ -97,6 +97,7 @@ class Settings: # File Upload MAX_FILE_SIZE: int = int(os.getenv("MAX_FILE_SIZE", "52428800")) + MAX_FILE_COUNT: int = int(os.getenv("MAX_FILE_COUNT", "20")) FILE_PATH: str = os.getenv("FILE_PATH", "/files") FILE_URL_EXPIRES: int = int(os.getenv("FILE_URL_EXPIRES", "3600")) diff --git a/api/app/main.py b/api/app/main.py index c6256e3c..f4c23ca8 100644 --- a/api/app/main.py +++ b/api/app/main.py @@ -506,10 +506,13 @@ async def http_exception_handler(request: Request, exc: HTTPException): 404: "errors.common.not_found", 405: "errors.common.method_not_allowed", 409: "errors.common.conflict", + 413: "errors.common.payload_too_large", 422: "errors.common.validation_failed", 429: "errors.common.too_many_requests", 500: "errors.common.internal_error", + 502: "errors.common.bad_gateway", 503: "errors.common.service_unavailable", + 504: "errors.common.gateway_timeout", } # 如果有对应的翻译键,使用翻译 @@ -534,7 +537,7 @@ async def http_exception_handler(request: Request, exc: HTTPException): return JSONResponse( status_code=exc.status_code, - content=fail(code=exc.status_code, msg=translated_message, error=translated_message) + content=fail(code=exc.status_code, msg=translated_message, error=exc.detail) ) diff --git a/api/app/schemas/app_schema.py b/api/app/schemas/app_schema.py index 5238b978..8ca43a76 100644 --- a/api/app/schemas/app_schema.py +++ b/api/app/schemas/app_schema.py @@ -151,7 +151,7 @@ class FileUploadConfig(BaseModel): document_enabled: bool = Field(default=False) document_max_size_mb: int = Field(default=100) document_allowed_extensions: List[str] = Field( - default=["pdf", "docx", "xlsx", "txt", "csv", "json", "md"] + default=["pdf", "docx", "doc", "xlsx", "xls", "txt", "csv", "json", "md"] ) # 视频文件:MP4/MOV/AVI/WebM,最大 500MB video_enabled: bool = Field(default=False) @@ -160,7 +160,15 @@ class FileUploadConfig(BaseModel): default=["mp4", "mov"] ) # 最大文件数量 - max_file_count: int = Field(default=5, ge=1, le=20) + max_file_count: int = Field(default=5, ge=1) + + @field_validator("max_file_count") + @classmethod + def validate_max_file_count(cls, v: int) -> int: + from app.core.config import settings + if v > settings.MAX_FILE_COUNT: + raise ValueError(f"max_file_count 不能超过 {settings.MAX_FILE_COUNT}") + return v class OpeningStatementConfig(BaseModel): diff --git a/api/app/services/app_chat_service.py b/api/app/services/app_chat_service.py index 604514b4..b0d90643 100644 --- a/api/app/services/app_chat_service.py +++ b/api/app/services/app_chat_service.py @@ -124,7 +124,7 @@ class AppChatService: limit=10 ) history = [ - {"role": msg.role, "content": msg.content} + {"role": msg.role, "content": [{"type": "text", "text": msg.content}] + msg.meta_data.get("files", [])} for msg in messages ] @@ -188,12 +188,7 @@ class AppChatService: "audio_url": None } if files: - for f in files: - # url = await MultimodalService(self.db).get_file_url(f) - human_meta["files"].append({ - "type": f.type, - "url": f.url - }) + human_meta["files"].extend(processed_files) # 保存消息 if audio_url: @@ -322,7 +317,7 @@ class AppChatService: limit=memory_config.get("max_history", 10) ) history = [ - {"role": msg.role, "content": msg.content} + {"role": msg.role, "content": [{"type": "text", "text": msg.content}] + msg.meta_data.get("files", [])} for msg in messages ] diff --git a/api/app/services/conversation_service.py b/api/app/services/conversation_service.py index aff5f533..b2166032 100644 --- a/api/app/services/conversation_service.py +++ b/api/app/services/conversation_service.py @@ -291,7 +291,7 @@ class ConversationService: history = [ { "role": msg.role, - "content": msg.content + "content": [{"type": "text", "text": msg.content}] + msg.meta_data.get("files", []) } for msg in messages ] diff --git a/api/app/services/draft_run_service.py b/api/app/services/draft_run_service.py index ba41d323..10ec25d9 100644 --- a/api/app/services/draft_run_service.py +++ b/api/app/services/draft_run_service.py @@ -658,7 +658,7 @@ class AgentRunService: "total_tokens": 0 }) }, - files=files, + files=processed_files, audio_url=audio_url ) @@ -820,6 +820,7 @@ class AgentRunService: conversation_id=conversation_id, max_history=memory_config.get("max_history", 10) ) + print(history) # 6. 处理多模态文件 processed_files = None @@ -904,7 +905,7 @@ class AgentRunService: meta_data={ "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens} }, - files=files, + files=processed_files, audio_url=stream_audio_url ) @@ -1182,12 +1183,7 @@ class AgentRunService: "files": [] } if files: - for f in files: - # url = await MultimodalService(self.db).get_file_url(f) - human_meta["files"].append({ - "type": f.type, - "url": f.url - }) + human_meta["files"].extend(files) # 保存用户消息 conversation_service.add_message( conversation_id=conv_uuid, diff --git a/api/app/services/multimodal_service.py b/api/app/services/multimodal_service.py index f0c7cee2..e11a8d2b 100644 --- a/api/app/services/multimodal_service.py +++ b/api/app/services/multimodal_service.py @@ -11,6 +11,8 @@ import base64 import io import uuid +import zipfile +import chardet from abc import ABC, abstractmethod from typing import List, Dict, Any, Optional @@ -42,12 +44,10 @@ PDF_MIME = ['application/pdf'] DOC_MIME = [ 'application/msword', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', - 'application/zip' ] XLSX_MIME = [ 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'application/vnd.ms-excel', - 'application/zip' ] CSV_MIME = ['text/csv', 'application/csv'] JSON_MIME = ['application/json'] @@ -588,12 +588,12 @@ class MultimodalService: file.set_content(file_content) file_mime_type = magic.from_buffer(file_content, mime=True) if file_mime_type in TEXT_MIME: - return file_content.decode("utf-8") + return self._decode_text_safe(file_content) elif file_mime_type in PDF_MIME: return await self._extract_pdf_text(file_content) - elif file_mime_type in DOC_MIME and file.file_type.endswith(('docx', 'doc')): + elif self._is_word_file(file_content, file_mime_type): return await self._extract_word_text(file_content) - elif file_mime_type in XLSX_MIME and file.file_type.endswith(("xlsx", "xls")): + elif self._is_excel_file(file_content, file_mime_type): return await self._extract_xlsx_text(file_content) elif file_mime_type in CSV_MIME: return await self._extract_csv_text(file_content) @@ -647,27 +647,89 @@ class MultimodalService: logger.error(f"提取 Excel 文本失败: {e}") return f"[Excel 提取失败: {str(e)}]" - @staticmethod - async def _extract_csv_text(file_content: bytes) -> str: + async def _extract_csv_text(self, file_content: bytes) -> str: """提取 CSV 文本""" try: - text = file_content.decode('utf-8-sig') + text = self._decode_text_safe(file_content) reader = csv.reader(io.StringIO(text)) return '\n'.join('\t'.join(row) for row in reader) except Exception as e: logger.error(f"提取 CSV 文本失败: {e}") return f"[CSV 提取失败: {str(e)}]" - @staticmethod - async def _extract_json_text(file_content: bytes) -> str: + async def _extract_json_text(self, file_content: bytes) -> str: """提取 JSON 文本""" try: - data = json.loads(file_content.decode('utf-8')) + text = self._decode_text_safe(file_content) + data = json.loads(text) return json.dumps(data, ensure_ascii=False, indent=2) except Exception as e: logger.error(f"提取 JSON 文本失败: {e}") return f"[JSON 提取失败: {str(e)}]" + def _is_word_file(self, file_content: bytes, mime_type: str) -> bool: + """判断是不是 Word 文件(doc / docx),不依赖后缀""" + # 旧版 .doc + if mime_type == 'application/msword': + return True + + # 新版 .docx(ZIP 内部包含 word/document.xml) + header = file_content[:4] + if header == b'PK\x03\x04': + try: + with zipfile.ZipFile(io.BytesIO(file_content)) as zf: + return "word/document.xml" in zf.namelist() + except: + pass + + return False + + def _is_excel_file(self, file_content: bytes, mime_type: str) -> bool: + """判断是不是 Excel 文件(xls / xlsx),不依赖后缀""" + # 旧版 .xls + if mime_type == 'application/vnd.ms-excel': + return True + + # 新版 .xlsx(ZIP 内部包含 xl/workbook.xml) + header = file_content[:4] + if header == b'PK\x03\x04': + try: + with zipfile.ZipFile(io.BytesIO(file_content)) as zf: + return "xl/workbook.xml" in zf.namelist() + except: + pass + + return False + + @staticmethod + def _decode_text_safe(file_content: bytes) -> str: + """ + 【万能文本解码】 + 自动检测编码,支持 utf-8 / gbk / gb2312 / utf-8-sig / ascii 等 + 永远不报错,永远不乱码 + """ + if not file_content: + return "" + + # 1. 自动检测文件编码 + detect = chardet.detect(file_content) + encoding = detect.get("encoding", "utf-8").lower() + + # 2. 兼容常见中文编码 + compatible_encodings = ["utf-8", "gbk", "gb18030", "gb2312", "ascii", "latin-1"] + + # 3. 按优先级尝试解码 + for enc in [encoding] + compatible_encodings: + if not enc: + continue + try: + return file_content.decode(enc.strip()) + except (UnicodeDecodeError, LookupError): + continue + + # 终极兜底 + return file_content.decode("utf-8", errors="replace") + def get_multimodal_service(db: Session) -> MultimodalService: """获取多模态服务实例(依赖注入)""" diff --git a/api/app/services/shared_chat_service.py b/api/app/services/shared_chat_service.py index 0d659832..a8a31997 100644 --- a/api/app/services/shared_chat_service.py +++ b/api/app/services/shared_chat_service.py @@ -264,7 +264,7 @@ class SharedChatService: limit=memory_config.get("max_history", 10) ) history = [ - {"role": msg.role, "content": msg.content} + {"role": msg.role, "content": [{"type": "text", "text": msg.content}] + msg.meta_data.get("files", [])} for msg in messages ] @@ -472,7 +472,7 @@ class SharedChatService: limit=memory_config.get("max_history", 10) ) history = [ - {"role": msg.role, "content": msg.content} + {"role": msg.role, "content": [{"type": "text", "text": msg.content}] + msg.meta_data.get("files", [])} for msg in messages ] From e8947ad0bb2e707f15c92e4da4dcde5a4fc26557 Mon Sep 17 00:00:00 2001 From: wxy Date: Thu, 19 Mar 2026 17:48:46 +0800 Subject: [PATCH 02/43] feat: add permanent public URL support for remote storage (OSS/S3) --- .../controllers/file_storage_controller.py | 55 ++++++++++++++++++- api/app/core/storage/base.py | 15 +++++ api/app/core/storage/oss.py | 10 ++++ api/app/core/storage/s3.py | 9 +++ 4 files changed, 86 insertions(+), 3 deletions(-) diff --git a/api/app/controllers/file_storage_controller.py b/api/app/controllers/file_storage_controller.py index ff284f39..d9406090 100644 --- a/api/app/controllers/file_storage_controller.py +++ b/api/app/controllers/file_storage_controller.py @@ -499,6 +499,51 @@ async def get_file_url( ) +@router.get("/files/{file_id}/permanent-url", response_model=ApiResponse) +async def get_permanent_file_url( + file_id: uuid.UUID, + db: Session = Depends(get_db), + storage_service: FileStorageService = Depends(get_file_storage_service), +): + """ + 获取文件的永久公开 URL(无过期时间)。 + + - 本地存储:返回 API 永久访问地址(基于 FILE_LOCAL_SERVER_URL 配置) + - 远程存储(OSS/S3):返回 bucket 公读地址(需 bucket 已配置公共读权限) + """ + file_metadata = db.query(FileMetadata).filter(FileMetadata.id == file_id).first() + if not file_metadata: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="The file does not exist") + + if file_metadata.status != "completed": + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, + detail=f"File upload not completed, status: {file_metadata.status}") + + file_key = file_metadata.file_key + storage = storage_service.storage + + try: + if isinstance(storage, LocalStorage): + url = f"{settings.FILE_LOCAL_SERVER_URL}/storage/permanent/{file_id}" + else: + url = await storage.get_permanent_url(file_key) + if not url: + raise HTTPException(status_code=status.HTTP_501_NOT_IMPLEMENTED, + detail="Permanent URL not supported for current storage backend") + + api_logger.info(f"Generated permanent URL: file_id={file_id}") + return success( + data={"url": url, "expires_in": None, "permanent": True, "file_name": file_metadata.file_name}, + msg="Permanent file URL generated successfully" + ) + except HTTPException: + raise + except Exception as e: + api_logger.error(f"Failed to generate permanent URL: {e}") + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Failed to generate permanent URL: {str(e)}") + + @router.get("/public/{file_id}", response_model=Any) async def public_download_file( request: Request, @@ -641,14 +686,18 @@ async def permanent_download_file( media_type=file_metadata.content_type or "application/octet-stream" ) else: - # For remote storage, redirect to presigned URL with long expiration + # For remote storage, use permanent public URL (requires bucket public read) try: - # Use a very long expiration (7 days max for most cloud providers) + permanent_url = await storage.get_permanent_url(file_key) + if permanent_url: + api_logger.info(f"Redirecting to permanent public URL: file_key={file_key}") + return RedirectResponse(url=permanent_url, status_code=status.HTTP_302_FOUND) + # Fallback: long-lived presigned URL presigned_url = await storage_service.get_file_url(file_key, expires=604800) presigned_url = _match_scheme(request, presigned_url) return RedirectResponse(url=presigned_url, status_code=status.HTTP_302_FOUND) except Exception as e: - api_logger.error(f"Failed to get presigned URL: {e}") + api_logger.error(f"Failed to get permanent URL: {e}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Failed to retrieve file: {str(e)}" diff --git a/api/app/core/storage/base.py b/api/app/core/storage/base.py index 6653d04a..8ab0fcde 100644 --- a/api/app/core/storage/base.py +++ b/api/app/core/storage/base.py @@ -121,3 +121,18 @@ class StorageBackend(ABC): URL for accessing the file. """ pass + + async def get_permanent_url(self, file_key: str) -> Optional[str]: + """ + Get a permanent public URL for the file (no expiration). + + Returns None by default; remote storage backends should override this + if the bucket is configured for public read access. + + Args: + file_key: Unique identifier for the file in the storage system. + + Returns: + A permanent public URL, or None if not supported. + """ + return None diff --git a/api/app/core/storage/oss.py b/api/app/core/storage/oss.py index 81bedce1..27669ffa 100644 --- a/api/app/core/storage/oss.py +++ b/api/app/core/storage/oss.py @@ -261,3 +261,13 @@ class OSSStorage(StorageBackend): logger.error(f"Failed to generate presigned URL for {file_key}: {e}") # Return a basic URL format as fallback return f"https://{self.bucket_name}.{self.endpoint.replace('https://', '').replace('http://', '')}/{file_key}" + + async def get_permanent_url(self, file_key: str) -> str: + """ + Get a permanent public URL for the file (requires bucket public read). + + Returns: + A permanent URL in the format: https://{bucket}.{endpoint}/{file_key} + """ + host = self.endpoint.replace("https://", "").replace("http://", "") + return f"https://{self.bucket_name}.{host}/{file_key}" diff --git a/api/app/core/storage/s3.py b/api/app/core/storage/s3.py index 37ad4184..c7b33ffe 100644 --- a/api/app/core/storage/s3.py +++ b/api/app/core/storage/s3.py @@ -378,3 +378,12 @@ class S3Storage(StorageBackend): logger.error(f"Failed to generate presigned URL for {file_key}: {e}") # Return a basic URL format as fallback return f"https://{self.bucket_name}.s3.{self.region}.amazonaws.com/{file_key}" + + async def get_permanent_url(self, file_key: str) -> str: + """ + Get a permanent public URL for the file (requires bucket public read). + + Returns: + A permanent URL in the format: https://{bucket}.s3.{region}.amazonaws.com/{file_key} + """ + return f"https://{self.bucket_name}.s3.{self.region}.amazonaws.com/{file_key}" From 13abb03d87a943055911a6dade6e3f4bc66e9d8c Mon Sep 17 00:00:00 2001 From: Eternity <1533512157@qq.com> Date: Thu, 19 Mar 2026 17:44:40 +0800 Subject: [PATCH 03/43] feat(memory): add pagination support for conversation list in working memory --- .../controllers/memory_working_controller.py | 42 ++++++++++++------- .../repositories/conversation_repository.py | 35 +++++++++------- api/app/services/conversation_service.py | 22 +++++----- api/app/services/user_memory_service.py | 5 +-- 4 files changed, 62 insertions(+), 42 deletions(-) diff --git a/api/app/controllers/memory_working_controller.py b/api/app/controllers/memory_working_controller.py index 8aab039a..473adedb 100644 --- a/api/app/controllers/memory_working_controller.py +++ b/api/app/controllers/memory_working_controller.py @@ -33,35 +33,47 @@ def get_memory_count( @router.get("/{end_user_id}/conversations", response_model=ApiResponse) def get_conversations( end_user_id: uuid.UUID, + page: int = 1, + page_size: int = 20, current_user: User = Depends(get_current_user), db: Session = Depends(get_db) ): """ - Retrieve all conversations for the current user in a specific group. + Retrieve conversations for the current user in a specific group with pagination. Args: end_user_id (UUID): The group identifier. + page (int): Page number (1-based). Defaults to 1. + page_size (int): Number of items per page. Defaults to 20. current_user (User, optional): The authenticated user. db (Session, optional): SQLAlchemy session. Returns: - ApiResponse: Contains a list of conversation IDs. - - Notes: - - Initializes the ConversationService with the current DB session. - - Returns only conversation IDs for lightweight response. - - Logs can be added to trace requests in production. + ApiResponse: Contains a paginated list of conversations. """ + page = max(1, page) + page_size = max(1, min(page_size, 100)) # Limit page size between 1 and 100 conversation_service = ConversationService(db) - conversations = conversation_service.get_user_conversations( - end_user_id + conversations, total = conversation_service.get_user_conversations( + end_user_id, + page=page, + page_size=page_size ) - return success(data=[ - { - "id": conversation.id, - "title": conversation.title - } for conversation in conversations - ], msg="get conversations success") + return success(data={ + "items": [ + { + "id": conversation.id, + "title": conversation.title + } for conversation in conversations + ], + "total": total, + "page": { + "page": page, + "pagesize": page_size, + "total": total, + "hasnext": (page * page_size) < total + }, + }, msg="get conversations success") @router.get("/{end_user_id}/messages", response_model=ApiResponse) diff --git a/api/app/repositories/conversation_repository.py b/api/app/repositories/conversation_repository.py index eb5d3c61..90f2d6ec 100644 --- a/api/app/repositories/conversation_repository.py +++ b/api/app/repositories/conversation_repository.py @@ -90,27 +90,27 @@ class ConversationRepository: self, user_id: uuid.UUID, workspace_id: uuid.UUID = None, - limit: int = 10, - is_activate: bool = True - ) -> list[Conversation]: + is_activate: bool = True, + page: int = 1, + page_size: int = 20 + ) -> tuple[list[Conversation], int]: """ - Retrieve recent conversations for a specific user. + Retrieve recent conversations for a specific user with pagination. This method queries conversations associated with the given user ID, optionally scoped to a specific workspace. Results are ordered by the - most recently updated conversations and limited to a fixed number. + most recently updated conversations. Args: user_id (uuid.UUID): Unique identifier of the user. workspace_id (uuid.UUID, optional): Workspace scope for the query. If provided, only conversations under this workspace will be returned. - limit (int): Maximum number of conversations to return. - Defaults to 10. - is_activate (bool): Convsersation State limit + is_activate (bool): Conversation State limit. + page (int): Page number (1-based). Defaults to 1. + page_size (int): Number of items per page. Defaults to 20. Returns: - list[Conversation]: A list of conversation entities ordered by - last updated time (descending). + tuple[list[Conversation], int]: A list of conversation entities and total count. """ logger.info(f"Fetching conversation by user_id: {user_id}") @@ -122,18 +122,25 @@ class ConversationRepository: if workspace_id: stmt = stmt.where(Conversation.workspace_id == workspace_id) - stmt = stmt.order_by(desc(Conversation.updated_at)) - stmt = stmt.limit(limit) + # Calculate total count + total = int(self.db.execute( + select(func.count()).select_from(stmt.subquery()) + ).scalar_one()) - convsersations = list(self.db.scalars(stmt).all()) + # Apply ordering and pagination + stmt = stmt.order_by(desc(Conversation.updated_at)) + stmt = stmt.offset((page - 1) * page_size).limit(page_size) + + conversations = list(self.db.scalars(stmt).all()) logger.info( "Conversation fetched successfully", extra={ "user_id": str(user_id), "workspace_id": str(workspace_id), + "total": total, } ) - return convsersations + return conversations, total def list_conversations( self, diff --git a/api/app/services/conversation_service.py b/api/app/services/conversation_service.py index aff5f533..87568b06 100644 --- a/api/app/services/conversation_service.py +++ b/api/app/services/conversation_service.py @@ -119,25 +119,27 @@ class ConversationService: def get_user_conversations( self, - user_id: uuid.UUID - ) -> list[Conversation]: + user_id: uuid.UUID, + page: int = 1, + page_size: int = 20 + ) -> tuple[list[Conversation], int]: """ - Retrieve recent conversations for a specific user - - This method delegates persistence logic to the repository layer and - applies service-level defaults (e.g. recent conversation limit). + Retrieve recent conversations for a specific user with pagination. Args: user_id (uuid.UUID): Unique identifier of the user. + page (int): Page number (1-based). Defaults to 1. + page_size (int): Number of items per page. Defaults to 20. Returns: - list[Conversation]: A list of recent conversation entities. + tuple[list[Conversation], int]: A list of recent conversation entities and total count. """ - conversations = self.conversation_repo.get_conversation_by_user_id( + conversations, total = self.conversation_repo.get_conversation_by_user_id( user_id, - limit=10 + page=page, + page_size=page_size ) - return conversations + return conversations, total def list_conversations( self, diff --git a/api/app/services/user_memory_service.py b/api/app/services/user_memory_service.py index d5d19e0d..12e0c324 100644 --- a/api/app/services/user_memory_service.py +++ b/api/app/services/user_memory_service.py @@ -1408,12 +1408,11 @@ async def analytics_memory_types( if end_user_id: try: conversation_repo = ConversationRepository(db) - conversations = conversation_repo.get_conversation_by_user_id( + conversations, total = conversation_repo.get_conversation_by_user_id( user_id=uuid.UUID(end_user_id), - limit=100, # 获取更多会话以准确统计 is_activate=True ) - work_count = len(conversations) + work_count = total logger.debug(f"工作记忆数量(会话数): {work_count} (end_user_id={end_user_id})") except Exception as e: logger.warning(f"获取会话数量失败,工作记忆数量设为0: {str(e)}") From 7aec15785967084d2bb7994d412324fc55d870bc Mon Sep 17 00:00:00 2001 From: lanceyq <1982376970@qq.com> Date: Thu, 19 Mar 2026 17:53:01 +0800 Subject: [PATCH 04/43] [changes] keep two decimals --- api/app/services/memory_forget_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/app/services/memory_forget_service.py b/api/app/services/memory_forget_service.py index 84c4aff6..a0bcc1a1 100644 --- a/api/app/services/memory_forget_service.py +++ b/api/app/services/memory_forget_service.py @@ -619,7 +619,7 @@ class MemoryForgetService: recent_trends.append({ 'date': date_str, 'merged_count': record.merged_count, - 'average_activation': record.average_activation_value, + 'average_activation': round(record.average_activation_value, 2) if record.average_activation_value is not None else None, 'total_nodes': record.total_nodes, 'execution_time': int(record.execution_time.timestamp() * 1000) }) From 6105d461988e32f1683201572866c2db8f85026b Mon Sep 17 00:00:00 2001 From: Timebomb2018 <18868801967@163.com> Date: Thu, 19 Mar 2026 17:54:32 +0800 Subject: [PATCH 05/43] fix(bug): bug fix --- api/app/services/app_chat_service.py | 11 +++-------- api/app/services/draft_run_service.py | 1 - api/app/services/multimodal_service.py | 3 ++- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/api/app/services/app_chat_service.py b/api/app/services/app_chat_service.py index b0d90643..3f4bc007 100644 --- a/api/app/services/app_chat_service.py +++ b/api/app/services/app_chat_service.py @@ -187,7 +187,7 @@ class AppChatService: "usage": result.get("usage", {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}), "audio_url": None } - if files: + if processed_files: human_meta["files"].extend(processed_files) # 保存消息 @@ -395,13 +395,8 @@ class AppChatService: "audio_url": None } - if files: - for f in files: - # url = await MultimodalService(self.db).get_file_url(f) - human_meta["files"].append({ - "type": f.type, - "url": f.url - }) + if processed_files: + human_meta["files"].extend(processed_files) if stream_audio_url: assistant_meta["audio_url"] = stream_audio_url diff --git a/api/app/services/draft_run_service.py b/api/app/services/draft_run_service.py index 10ec25d9..0af33357 100644 --- a/api/app/services/draft_run_service.py +++ b/api/app/services/draft_run_service.py @@ -820,7 +820,6 @@ class AgentRunService: conversation_id=conversation_id, max_history=memory_config.get("max_history", 10) ) - print(history) # 6. 处理多模态文件 processed_files = None diff --git a/api/app/services/multimodal_service.py b/api/app/services/multimodal_service.py index e11a8d2b..e7fb3de3 100644 --- a/api/app/services/multimodal_service.py +++ b/api/app/services/multimodal_service.py @@ -713,7 +713,8 @@ class MultimodalService: # 1. 自动检测文件编码 detect = chardet.detect(file_content) - encoding = detect.get("encoding", "utf-8").lower() + encoding = detect.get("encoding") or "utf-8" + encoding = encoding.lower() # 2. 兼容常见中文编码 compatible_encodings = ["utf-8", "gbk", "gb18030", "gb2312", "ascii", "latin-1"] From 37a22fbfa9fa09d9f450d04055212039ef8ba0dc Mon Sep 17 00:00:00 2001 From: Eternity <1533512157@qq.com> Date: Thu, 19 Mar 2026 18:23:09 +0800 Subject: [PATCH 06/43] feat(memory): add pagination support for conversation list in working memory --- api/app/controllers/memory_working_controller.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/api/app/controllers/memory_working_controller.py b/api/app/controllers/memory_working_controller.py index 473adedb..c06fd432 100644 --- a/api/app/controllers/memory_working_controller.py +++ b/api/app/controllers/memory_working_controller.py @@ -34,7 +34,7 @@ def get_memory_count( def get_conversations( end_user_id: uuid.UUID, page: int = 1, - page_size: int = 20, + pagesize: int = 20, current_user: User = Depends(get_current_user), db: Session = Depends(get_db) ): @@ -44,7 +44,7 @@ def get_conversations( Args: end_user_id (UUID): The group identifier. page (int): Page number (1-based). Defaults to 1. - page_size (int): Number of items per page. Defaults to 20. + pagesize (int): Number of items per page. Defaults to 20. current_user (User, optional): The authenticated user. db (Session, optional): SQLAlchemy session. @@ -52,7 +52,7 @@ def get_conversations( ApiResponse: Contains a paginated list of conversations. """ page = max(1, page) - page_size = max(1, min(page_size, 100)) # Limit page size between 1 and 100 + page_size = max(1, min(pagesize, 100)) # Limit page size between 1 and 100 conversation_service = ConversationService(db) conversations, total = conversation_service.get_user_conversations( end_user_id, From 27b782e12a1fa47c36febf440f7e77a38bad7553 Mon Sep 17 00:00:00 2001 From: zhaoying Date: Thu, 19 Mar 2026 18:41:33 +0800 Subject: [PATCH 07/43] feat(web): work memory support page --- web/src/api/memory.ts | 6 +- .../UserMemoryDetail/pages/WorkingDetail.tsx | 66 +++++++++++++------ 2 files changed, 49 insertions(+), 23 deletions(-) diff --git a/web/src/api/memory.ts b/web/src/api/memory.ts index 823e3d78..9a464893 100644 --- a/web/src/api/memory.ts +++ b/web/src/api/memory.ts @@ -2,7 +2,7 @@ * @Author: ZhaoYing * @Date: 2026-02-03 14:00:06 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-03-13 10:48:41 + * @Last Modified time: 2026-03-19 18:35:10 */ import { request } from '@/utils/request' import type { AxiosRequestConfig } from 'axios' @@ -218,8 +218,8 @@ export const getExplicitMemory = (end_user_id: string) => { export const getExplicitMemoryDetails = (data: { end_user_id: string, memory_id: string; }) => { return request.post(`/memory/explicit-memory/details`, data) } -export const getConversations = (end_user_id: string) => { - return request.get(`/memory/work/${end_user_id}/conversations`) +export const getConversations = (end_user_id: string, page = 1, pagesize = 20) => { + return request.get(`/memory/work/${end_user_id}/conversations`, { page, pagesize }) } export const getConversationMessages = (end_user_id: string, conversation_id: string) => { return request.get(`/memory/work/${end_user_id}/messages`, { conversation_id }) diff --git a/web/src/views/UserMemoryDetail/pages/WorkingDetail.tsx b/web/src/views/UserMemoryDetail/pages/WorkingDetail.tsx index 843a3b23..2320ee52 100644 --- a/web/src/views/UserMemoryDetail/pages/WorkingDetail.tsx +++ b/web/src/views/UserMemoryDetail/pages/WorkingDetail.tsx @@ -1,8 +1,10 @@ -import { type FC, useEffect, useState, useMemo } from 'react' +import { type FC, useEffect, useState, useMemo, useRef } from 'react' import clsx from 'clsx' import { useTranslation } from 'react-i18next' import { useParams } from 'react-router-dom' import { Row, Col, Skeleton, Button, Divider, Tooltip } from 'antd' + +import InfiniteScroll from 'react-infinite-scroll-component' import RbCard from '@/components/RbCard/Card' import { getConversations, @@ -34,6 +36,8 @@ const WorkingDetail: FC = () => { const { id } = useParams() const [loading, setLoading] = useState(false) const [data, setData] = useState([]) + const [hasMore, setHasMore] = useState(true) + const pageRef = useRef(1) const [messagesLoading, setMessagesLoading] = useState(false) const [messages, setMessages] = useState([]) const [detailLoading, setDetailLoading] = useState(false) @@ -51,16 +55,30 @@ const WorkingDetail: FC = () => { setSelected(null) setDetail(null) setData([]) - getConversations(id).then((res) => { - const response = res as Conversation[] - setData(response) - setSelected(response[0] || null) + setHasMore(true) + pageRef.current = 1 + getConversations(id, 1).then((res) => { + const response = res as { items: Conversation[], page: { hasnext: boolean } } + setData(response.items) + setSelected(response.items[0] || null) + setHasMore(response.page.hasnext) }) .finally(() => { setLoading(false) }) } + const loadMore = () => { + if (!id) return + const nextPage = pageRef.current + 1 + getConversations(id, nextPage).then((res) => { + const response = res as {items: Conversation[], page: { hasnext: boolean }} + setData(prev => [...prev, ...response.items]) + pageRef.current = nextPage + setHasMore(response.page.hasnext) + }) + } + useEffect(() => { if (!id || !selected || !selected.id) return getDetail(selected.id) @@ -103,22 +121,30 @@ const WorkingDetail: FC = () => { : data.length === 0 ? :( - + -
- {data.map(item => ( -
- -
setSelected(item)} - > - {item.title} -
-
-
- ))} +
+ + {data.map(item => ( +
+ +
setSelected(item)} + > + {item.title} +
+
+
+ ))} +
{selected && <> From a322ec4fd5864110f62d9f77f8efd4c060913495 Mon Sep 17 00:00:00 2001 From: Timebomb2018 <18868801967@163.com> Date: Thu, 19 Mar 2026 18:58:37 +0800 Subject: [PATCH 08/43] fix(bug): tool exception display --- api/app/controllers/tool_controller.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/api/app/controllers/tool_controller.py b/api/app/controllers/tool_controller.py index 5563b9d7..74b8d88e 100644 --- a/api/app/controllers/tool_controller.py +++ b/api/app/controllers/tool_controller.py @@ -76,6 +76,8 @@ async def get_tool_methods( if methods is None: raise HTTPException(status_code=404, detail="工具不存在") return success(data=methods, msg="获取工具方法成功") + except HTTPException: + raise except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -121,6 +123,8 @@ async def create_tool( raise HTTPException(status_code=400, detail=e.message) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) + except HTTPException: + raise except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -149,6 +153,8 @@ async def update_tool( return success(msg="工具更新成功") except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) + except HTTPException: + raise except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -191,6 +197,8 @@ async def set_tool_active( return success(msg=f"工具已{action}") except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) + except HTTPException: + raise except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -223,6 +231,8 @@ async def execute_tool( }, msg="工具执行完成" ) + except HTTPException: + raise except Exception as e: raise HTTPException(status_code=500, detail=str(e)) From b84aba71e73a1c00d5c404d8c6fe177c8567434d Mon Sep 17 00:00:00 2001 From: zhaoying Date: Thu, 19 Mar 2026 19:00:31 +0800 Subject: [PATCH 09/43] feat(web): file add status --- web/src/components/Chat/ChatInput.tsx | 111 ++++++++++-------- web/src/components/Chat/ChatToolbar.tsx | 18 ++- .../ApplicationConfig/TestChat/index.tsx | 4 +- .../ApplicationConfig/components/Chat.tsx | 4 +- .../Conversation/components/FileUpload.tsx | 36 ++++-- web/src/views/Conversation/index.tsx | 2 +- .../views/Workflow/components/Chat/Chat.tsx | 4 +- 7 files changed, 109 insertions(+), 70 deletions(-) diff --git a/web/src/components/Chat/ChatInput.tsx b/web/src/components/Chat/ChatInput.tsx index 7daf935c..aa0dd2f6 100644 --- a/web/src/components/Chat/ChatInput.tsx +++ b/web/src/components/Chat/ChatInput.tsx @@ -2,10 +2,11 @@ * @Author: ZhaoYing * @Date: 2025-12-10 16:46:14 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-03-19 16:05:56 + * @Last Modified time: 2026-03-19 18:44:51 */ import { type FC, useEffect, useMemo } from 'react' -import { Flex, Input, Form } from 'antd' +import { Flex, Input, Form, Spin } from 'antd' +import clsx from 'clsx' import SendIcon from '@/assets/images/conversation/send.svg' import SendDisabledIcon from '@/assets/images/conversation/sendDisabled.svg' @@ -69,6 +70,8 @@ const ChatInput: FC = ({ onSend(values.message) } + console.log('previewFileList', previewFileList) + return (
@@ -76,62 +79,78 @@ const ChatInput: FC = ({ {previewFileList.map((file) => { if (file.type.includes('image')) { return ( -
- {file.name} -
handleDelete(file)} - >
-
+ +
+ {file.name} +
handleDelete(file)} + >
+
+
) } if (file.type.includes('video')) { return ( -
-
+ +
+
+
) } if (file.type.includes('audio')) { return ( -
-
+ ) })}
} diff --git a/web/src/components/Chat/ChatToolbar.tsx b/web/src/components/Chat/ChatToolbar.tsx index 883ac98a..dda56977 100644 --- a/web/src/components/Chat/ChatToolbar.tsx +++ b/web/src/components/Chat/ChatToolbar.tsx @@ -2,7 +2,7 @@ * @Author: ZhaoYing * @Date: 2026-03-17 14:22:25 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-03-18 15:55:13 + * @Last Modified time: 2026-03-19 18:59:37 */ // Toolbar component for chat input area, supporting file upload, audio recording, and variable configuration import { useRef, forwardRef, useImperativeHandle, type ReactNode, useEffect } from 'react' @@ -85,10 +85,18 @@ const ChatToolbar = forwardRef(({ // Append newly uploaded file to the file list when upload is complete const fileChange = (file?: any) => { - if (file?.status !== 'done') return - const files = [...(queryValues?.files || []), file] - form.setFieldValue('files', files) - onFilesChange?.(files) + console.log('file', file) + const lastFiles = form.getFieldValue('files') || []; + const index = lastFiles.findIndex((item: any) => item.uid === file.uid) + if (index > -1) { + lastFiles[index] = file + } else { + lastFiles.push(file) + } + form.setFieldValue('files', [...lastFiles]) + onFilesChange?.([...lastFiles]) + + console.log('lastFiles', lastFiles) } // Append recorded audio file to the file list and notify parent diff --git a/web/src/views/ApplicationConfig/TestChat/index.tsx b/web/src/views/ApplicationConfig/TestChat/index.tsx index ad7931e2..c324622d 100644 --- a/web/src/views/ApplicationConfig/TestChat/index.tsx +++ b/web/src/views/ApplicationConfig/TestChat/index.tsx @@ -183,7 +183,7 @@ const TestChat: FC = ({ const handleSend = () => { if (loading || !application || !message || !message?.trim()) return - const files = toolbarRef.current?.getFiles() || [] + const files = (toolbarRef.current?.getFiles() || []).filter(item => !['uploading', 'error'].includes(item.status)) const variables = toolbarRef.current?.getVariables() || [] const { isCanSend, params } = buildVariableParams(variables) if (!isCanSend) return @@ -235,7 +235,7 @@ const TestChat: FC = ({ const handleWorkflowSend = () => { if (loading || !application || !message || !message?.trim()) return - const files = toolbarRef.current?.getFiles() || [] + const files = (toolbarRef.current?.getFiles() || []).filter(item => !['uploading', 'error'].includes(item.status)) const variables = toolbarRef.current?.getVariables() || [] const { isCanSend, params } = buildVariableParams(variables) if (!isCanSend) return diff --git a/web/src/views/ApplicationConfig/components/Chat.tsx b/web/src/views/ApplicationConfig/components/Chat.tsx index ee5bc53a..052ad872 100644 --- a/web/src/views/ApplicationConfig/components/Chat.tsx +++ b/web/src/views/ApplicationConfig/components/Chat.tsx @@ -191,7 +191,7 @@ const Chat: FC = ({ .then(() => { const message = msg if (!message?.trim()) return - const files = toolbarRef.current?.getFiles() || [] + const files = (toolbarRef.current?.getFiles() || []).filter(item => !['uploading', 'error'].includes(item.status)) // Validate required variables before sending let isCanSend = true const params: Record = {} @@ -352,7 +352,7 @@ const Chat: FC = ({ .then(() => { const message = msg if (!message || message.trim() === '') return - const files = toolbarRef.current?.getFiles() || [] + const files = (toolbarRef.current?.getFiles() || []).filter(item => !['uploading', 'error'].includes(item.status)) addUserMessage(message, files) setMessage(undefined) toolbarRef.current?.setFiles([]) diff --git a/web/src/views/Conversation/components/FileUpload.tsx b/web/src/views/Conversation/components/FileUpload.tsx index 48370fcc..23b62b14 100644 --- a/web/src/views/Conversation/components/FileUpload.tsx +++ b/web/src/views/Conversation/components/FileUpload.tsx @@ -2,7 +2,7 @@ * @Author: ZhaoYing * @Date: 2026-02-06 21:09:42 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-03-18 20:32:54 + * @Last Modified time: 2026-03-19 18:38:41 */ /** * File Upload Component @@ -23,7 +23,7 @@ import { useState, useEffect, forwardRef, useImperativeHandle, useMemo } from 'react'; import { Upload, Progress, App } from 'antd'; import type { UploadProps, UploadFile } from 'antd'; -import type { UploadProps as RcUploadProps } from 'antd/es/upload/interface'; +import type { UploadProps as RcUploadProps, RcFile, UploadFileStatus } from 'antd/es/upload/interface'; import { useTranslation } from 'react-i18next'; import { request } from '@/utils/request' @@ -221,17 +221,29 @@ const UploadFiles = forwardRef(({ */ const handleCustomRequest: RcUploadProps['customRequest'] = async (options) => { const { file, onSuccess, onError } = options; - - try { - const formData = new FormData(); - formData.append('file', file); - - const response = await request.uploadFile(action, formData, requestConfig); - - onSuccess?.({data: response}); - } catch (error) { - onError?.(error as Error); + if (typeof file === 'string') return; + const rcFile = file as RcFile; + const formData = new FormData(); + formData.append('file', rcFile); + const fileVo: UploadFile = { + uid: rcFile.uid, + name: rcFile.name, + status: 'uploading' as UploadFileStatus, + percent: 0, + type: rcFile.type, + originFileObj: rcFile, + thumbUrl: URL.createObjectURL(rcFile) } + onChange?.(fileVo) + request.uploadFile(action, formData, requestConfig) + .then(res => { + onSuccess?.({ data: res }); + }) + .catch((error) => { + onError?.(error as Error); + fileVo.status = 'error' + onChange?.(fileVo) + }) }; /** diff --git a/web/src/views/Conversation/index.tsx b/web/src/views/Conversation/index.tsx index 3a726746..e6047efd 100644 --- a/web/src/views/Conversation/index.tsx +++ b/web/src/views/Conversation/index.tsx @@ -200,7 +200,7 @@ const Conversation: FC = () => { /** Send message and handle streaming response */ const handleSend = () => { if (!token || !shareToken) return - const files = toolbarRef.current?.getFiles() || [] + const files = (toolbarRef.current?.getFiles() || []).filter(item => !['uploading', 'error'].includes(item.status)) const variables = toolbarRef.current?.getVariables() || [] let isCanSend = true const params: Record = {} diff --git a/web/src/views/Workflow/components/Chat/Chat.tsx b/web/src/views/Workflow/components/Chat/Chat.tsx index ad46d2dc..7d3e1313 100644 --- a/web/src/views/Workflow/components/Chat/Chat.tsx +++ b/web/src/views/Workflow/components/Chat/Chat.tsx @@ -2,7 +2,7 @@ * @Author: ZhaoYing * @Date: 2026-02-06 21:10:56 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-03-18 20:46:35 + * @Last Modified time: 2026-03-19 18:41:07 */ /** * Workflow Chat Component @@ -151,7 +151,7 @@ const Chat = forwardRef !['uploading', 'error'].includes(item.status)) setChatList(prev => [...prev, { role: 'user', content: message, From f644c84fbbf170fd31fa4d2c991e9ae521c66ba3 Mon Sep 17 00:00:00 2001 From: lanceyq <1982376970@qq.com> Date: Thu, 19 Mar 2026 19:24:37 +0800 Subject: [PATCH 10/43] [changes]Community node attribute check --- .../memory_dashboard_controller.py | 11 +++- .../clustering_engine/label_propagation.py | 18 ++++++ .../neo4j/community_repository.py | 21 ++++++- api/app/repositories/neo4j/cypher_queries.py | 24 +++++-- api/app/tasks.py | 63 ++++++++++++++++--- 5 files changed, 123 insertions(+), 14 deletions(-) diff --git a/api/app/controllers/memory_dashboard_controller.py b/api/app/controllers/memory_dashboard_controller.py index 2c979435..552b3483 100644 --- a/api/app/controllers/memory_dashboard_controller.py +++ b/api/app/controllers/memory_dashboard_controller.py @@ -198,8 +198,15 @@ async def get_workspace_end_users( # 对有 ExtractedEntity 但无 Community 节点的存量用户自动补跑全量聚类 try: from app.tasks import init_community_clustering_for_users - init_community_clustering_for_users.delay(end_user_ids=end_user_ids) - api_logger.info(f"已触发社区聚类补全任务,候选用户数: {len(end_user_ids)}") + from app.aioRedis import aio_redis_get + + done_key = f"community_cluster:done:workspace:{workspace_id}" + already_done = await aio_redis_get(done_key) + if already_done: + api_logger.info(f"工作空间 {workspace_id} 社区数据已完整,跳过本次聚类任务投递") + else: + init_community_clustering_for_users.delay(end_user_ids=end_user_ids, workspace_id=str(workspace_id)) + api_logger.info(f"已触发社区聚类补全任务,候选用户数: {len(end_user_ids)}") except Exception as e: api_logger.warning(f"触发社区聚类补全任务失败(不影响主流程): {str(e)}") diff --git a/api/app/core/memory/storage_services/clustering_engine/label_propagation.py b/api/app/core/memory/storage_services/clustering_engine/label_propagation.py index cbc303b1..23ce3901 100644 --- a/api/app/core/memory/storage_services/clustering_engine/label_propagation.py +++ b/api/app/core/memory/storage_services/clustering_engine/label_propagation.py @@ -69,11 +69,13 @@ class LabelPropagationEngine: connector: Neo4jConnector, config_id: Optional[str] = None, llm_model_id: Optional[str] = None, + embedding_model_id: Optional[str] = None, ): self.connector = connector self.repo = CommunityRepository(connector) self.config_id = config_id self.llm_model_id = llm_model_id + self.embedding_model_id = embedding_model_id # ────────────────────────────────────────────────────────────────────────── # 公开接口 @@ -468,12 +470,28 @@ class LabelPropagationEngine: except Exception as e: logger.warning(f"[Clustering] LLM 生成社区元数据失败,使用兜底值: {e}") + # 生成 summary_embedding + summary_embedding: Optional[List[float]] = None + if self.embedding_model_id and summary: + try: + from app.db import get_db_context + from app.core.memory.utils.llm.llm_utils import MemoryClientFactory + + with get_db_context() as db: + embedder = MemoryClientFactory(db).get_embedder_client(self.embedding_model_id) + vectors = await embedder.response([summary]) + if vectors: + summary_embedding = vectors[0] + except Exception as e: + logger.warning(f"[Clustering] 社区 {community_id} 生成 summary_embedding 失败: {e}") + await self.repo.update_community_metadata( community_id=community_id, end_user_id=end_user_id, name=name, summary=summary, core_entities=core_entities, + summary_embedding=summary_embedding, ) logger.debug(f"[Clustering] 社区 {community_id} 元数据已更新: name={name}") except Exception as e: diff --git a/api/app/repositories/neo4j/community_repository.py b/api/app/repositories/neo4j/community_repository.py index f2f11f76..f0febf24 100644 --- a/api/app/repositories/neo4j/community_repository.py +++ b/api/app/repositories/neo4j/community_repository.py @@ -19,6 +19,8 @@ from app.repositories.neo4j.cypher_queries import ( CHECK_USER_HAS_COMMUNITIES, UPDATE_COMMUNITY_MEMBER_COUNT, UPDATE_COMMUNITY_METADATA, + GET_INCOMPLETE_COMMUNITIES, + GET_INCOMPLETE_COMMUNITIES_WITH_EMBEDDING, ) logger = logging.getLogger(__name__) @@ -170,6 +172,21 @@ class CommunityRepository: logger.error(f"refresh_member_count failed: {e}") return 0 + async def get_incomplete_communities(self, end_user_id: str, check_embedding: bool = False) -> List[str]: + """查询该用户下属性不完整的 Community 节点 ID 列表。 + + Args: + end_user_id: 用户 ID + check_embedding: 为 True 时额外检查 summary_embedding 是否缺失(仅当用户有 embedding 模型配置时传 True) + """ + try: + query = GET_INCOMPLETE_COMMUNITIES_WITH_EMBEDDING if check_embedding else GET_INCOMPLETE_COMMUNITIES + result = await self.connector.execute_query(query, end_user_id=end_user_id) + return [row["community_id"] for row in result] + except Exception as e: + logger.error(f"get_incomplete_communities failed: {e}") + return [] + async def update_community_metadata( self, community_id: str, @@ -177,8 +194,9 @@ class CommunityRepository: name: str, summary: str, core_entities: List[str], + summary_embedding: Optional[List[float]] = None, ) -> bool: - """更新社区的名称、摘要和核心实体列表。""" + """更新社区的名称、摘要、核心实体列表及 summary_embedding。""" try: result = await self.connector.execute_query( UPDATE_COMMUNITY_METADATA, @@ -187,6 +205,7 @@ class CommunityRepository: name=name, summary=summary, core_entities=core_entities, + summary_embedding=summary_embedding, ) return bool(result) except Exception as e: diff --git a/api/app/repositories/neo4j/cypher_queries.py b/api/app/repositories/neo4j/cypher_queries.py index 48a5ac87..1ec5eaba 100644 --- a/api/app/repositories/neo4j/cypher_queries.py +++ b/api/app/repositories/neo4j/cypher_queries.py @@ -1153,10 +1153,11 @@ RETURN c.community_id AS community_id, cnt AS member_count UPDATE_COMMUNITY_METADATA = """ MATCH (c:Community {community_id: $community_id, end_user_id: $end_user_id}) -SET c.name = $name, - c.summary = $summary, - c.core_entities = $core_entities, - c.updated_at = datetime() +SET c.name = $name, + c.summary = $summary, + c.core_entities = $core_entities, + c.summary_embedding = $summary_embedding, + c.updated_at = datetime() RETURN c.community_id AS community_id """ @@ -1202,3 +1203,18 @@ RETURN properties(r) AS r_props, startNode(r) = e AS r_from_e """ + +GET_INCOMPLETE_COMMUNITIES = """ +MATCH (c:Community {end_user_id: $end_user_id}) +WHERE c.name IS NULL OR c.summary IS NULL OR c.core_entities IS NULL + OR c.name = '' OR c.summary = '' +RETURN c.community_id AS community_id +""" + +GET_INCOMPLETE_COMMUNITIES_WITH_EMBEDDING = """ +MATCH (c:Community {end_user_id: $end_user_id}) +WHERE c.name IS NULL OR c.summary IS NULL OR c.core_entities IS NULL + OR c.name = '' OR c.summary = '' + OR c.summary_embedding IS NULL +RETURN c.community_id AS community_id +""" diff --git a/api/app/tasks.py b/api/app/tasks.py index f5258330..3d8a2456 100644 --- a/api/app/tasks.py +++ b/api/app/tasks.py @@ -2675,13 +2675,15 @@ def write_perceptual_memory( time_limit=7200, # 2小时硬超时 soft_time_limit=6900, ) -def init_community_clustering_for_users(self, end_user_ids: List[str]) -> Dict[str, Any]: +def init_community_clustering_for_users(self, end_user_ids: List[str], workspace_id: Optional[str] = None) -> Dict[str, Any]: """触发型任务:检查指定用户列表,对有 ExtractedEntity 但无 Community 节点的用户执行全量聚类。 由 /dashboard/end_users 接口触发,已有社区节点的用户直接跳过。 + 任务完成且所有用户数据均完整时,写入 Redis 标记,避免下次重复投递。 Args: end_user_ids: 需要检查的用户 ID 列表 + workspace_id: 工作空间 ID,用于完成标记 Returns: 包含任务执行结果的字典 @@ -2707,6 +2709,7 @@ def init_community_clustering_for_users(self, end_user_ids: List[str]) -> Dict[s # 批量预取所有用户的配置(内置兜底:用户配置不可用时自动回退到工作空间默认配置) user_llm_map: Dict[str, Optional[str]] = {} + user_embedding_map: Dict[str, Optional[str]] = {} try: with get_db_context() as db: from app.services.memory_agent_service import get_end_users_connected_configs_batch @@ -2718,21 +2721,54 @@ def init_community_clustering_for_users(self, end_user_ids: List[str]) -> Dict[s try: cfg = MemoryConfigService(db).load_memory_config(config_id=config_id) user_llm_map[uid] = str(cfg.llm_model_id) if cfg.llm_model_id else None + user_embedding_map[uid] = str(cfg.embedding_model_id) if cfg.embedding_model_id else None except Exception as e: - logger.warning(f"[CommunityCluster] 用户 {uid} 加载 LLM 配置失败,将使用 None: {e}") + logger.warning(f"[CommunityCluster] 用户 {uid} 加载配置失败,将使用 None: {e}") user_llm_map[uid] = None + user_embedding_map[uid] = None else: user_llm_map[uid] = None + user_embedding_map[uid] = None except Exception as e: - logger.warning(f"[CommunityCluster] 批量获取 LLM 配置失败,所有用户将使用 None: {e}") + logger.warning(f"[CommunityCluster] 批量获取配置失败,所有用户将使用 None: {e}") for end_user_id in end_user_ids: try: - # 已有社区节点则跳过 + # 已有社区节点时,检查是否存在属性不完整的节点 has_communities = await repo.has_communities(end_user_id) if has_communities: - skipped += 1 - logger.debug(f"[CommunityCluster] 用户 {end_user_id} 已有社区节点,跳过") + llm_model_id = user_llm_map.get(end_user_id) + embedding_model_id = user_embedding_map.get(end_user_id) + incomplete_ids = await repo.get_incomplete_communities( + end_user_id, check_embedding=bool(embedding_model_id) + ) + if not incomplete_ids: + skipped += 1 + logger.debug(f"[CommunityCluster] 用户 {end_user_id} 社区节点均完整,跳过") + continue + + # 对不完整的社区节点逐一补全元数据 + engine = LabelPropagationEngine( + connector=connector, + llm_model_id=llm_model_id, + embedding_model_id=embedding_model_id, + ) + logger.info( + f"[CommunityCluster] 用户 {end_user_id} 发现 {len(incomplete_ids)} 个属性不完整的社区,开始补全" + ) + patch_ok = 0 + patch_fail = 0 + for cid in incomplete_ids: + try: + await engine._generate_community_metadata(cid, end_user_id) + patch_ok += 1 + except Exception as patch_err: + patch_fail += 1 + logger.error(f"[CommunityCluster] 社区 {cid} 元数据补全失败: {patch_err}") + logger.info( + f"[CommunityCluster] 用户 {end_user_id} 社区补全完成: 成功={patch_ok}, 失败={patch_fail}" + ) + initialized += 1 continue # 检查是否有 ExtractedEntity 节点 @@ -2742,11 +2778,13 @@ def init_community_clustering_for_users(self, end_user_ids: List[str]) -> Dict[s logger.debug(f"[CommunityCluster] 用户 {end_user_id} 无实体节点,跳过") continue - # 每个用户使用自己的 llm_model_id + # 每个用户使用自己的 llm_model_id / embedding_model_id llm_model_id = user_llm_map.get(end_user_id) + embedding_model_id = user_embedding_map.get(end_user_id) engine = LabelPropagationEngine( connector=connector, llm_model_id=llm_model_id, + embedding_model_id=embedding_model_id, ) logger.info(f"[CommunityCluster] 用户 {end_user_id} 有 {len(entities)} 个实体,开始全量聚类,llm_model_id={llm_model_id}") @@ -2782,6 +2820,17 @@ def init_community_clustering_for_users(self, end_user_ids: List[str]) -> Dict[s result = loop.run_until_complete(_run()) result["elapsed_time"] = time.time() - start_time result["task_id"] = self.request.id + + # 所有用户均完整(无需初始化也无失败),写入 Redis 标记,1小时内不再重复投递 + if workspace_id and result.get("initialized", 0) == 0 and result.get("failed", 0) == 0: + try: + _r = get_sync_redis_client() + if _r: + _r.set(f"community_cluster:done:workspace:{workspace_id}", "1", ex=3600) + logger.info(f"[CommunityCluster] 工作空间 {workspace_id} 数据完整,已写入完成标记(1小时有效)") + except Exception as e: + logger.warning(f"[CommunityCluster] 写入完成标记失败: {e}") + return result except Exception as e: From c3f8dbd4bc6e7f2e7c93151531ce8caf32eb0f6d Mon Sep 17 00:00:00 2001 From: Eternity <1533512157@qq.com> Date: Thu, 19 Mar 2026 19:21:39 +0800 Subject: [PATCH 11/43] fix(conversation): handle None meta_data in msg to prevent exceptions --- api/app/services/app_chat_service.py | 4 ++-- api/app/services/conversation_service.py | 2 +- api/app/services/shared_chat_service.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/api/app/services/app_chat_service.py b/api/app/services/app_chat_service.py index 3f4bc007..d81ba7b7 100644 --- a/api/app/services/app_chat_service.py +++ b/api/app/services/app_chat_service.py @@ -124,7 +124,7 @@ class AppChatService: limit=10 ) history = [ - {"role": msg.role, "content": [{"type": "text", "text": msg.content}] + msg.meta_data.get("files", [])} + {"role": msg.role, "content": [{"type": "text", "text": msg.content}] + (msg.meta_data.get("files", []) if msg.meta_data else [])} for msg in messages ] @@ -317,7 +317,7 @@ class AppChatService: limit=memory_config.get("max_history", 10) ) history = [ - {"role": msg.role, "content": [{"type": "text", "text": msg.content}] + msg.meta_data.get("files", [])} + {"role": msg.role, "content": [{"type": "text", "text": msg.content}] + (msg.meta_data.get("files", []) if msg.meta_data else [])} for msg in messages ] diff --git a/api/app/services/conversation_service.py b/api/app/services/conversation_service.py index 2ca50257..30c3feba 100644 --- a/api/app/services/conversation_service.py +++ b/api/app/services/conversation_service.py @@ -293,7 +293,7 @@ class ConversationService: history = [ { "role": msg.role, - "content": [{"type": "text", "text": msg.content}] + msg.meta_data.get("files", []) + "content": [{"type": "text", "text": msg.content}] + (msg.meta_data.get("files", []) if msg.meta_data else []) } for msg in messages ] diff --git a/api/app/services/shared_chat_service.py b/api/app/services/shared_chat_service.py index a8a31997..5e18ee42 100644 --- a/api/app/services/shared_chat_service.py +++ b/api/app/services/shared_chat_service.py @@ -264,7 +264,7 @@ class SharedChatService: limit=memory_config.get("max_history", 10) ) history = [ - {"role": msg.role, "content": [{"type": "text", "text": msg.content}] + msg.meta_data.get("files", [])} + {"role": msg.role, "content": [{"type": "text", "text": msg.content}] + (msg.meta_data.get("files", []) if msg.meta_data else [])} for msg in messages ] @@ -472,7 +472,7 @@ class SharedChatService: limit=memory_config.get("max_history", 10) ) history = [ - {"role": msg.role, "content": [{"type": "text", "text": msg.content}] + msg.meta_data.get("files", [])} + {"role": msg.role, "content": [{"type": "text", "text": msg.content}] + (msg.meta_data.get("files", []) if msg.meta_data else [])} for msg in messages ] From e2411e0bdd03f01da521d56ae942a4f941d514b3 Mon Sep 17 00:00:00 2001 From: wxy Date: Thu, 19 Mar 2026 19:43:48 +0800 Subject: [PATCH 12/43] fix: remove unused share_info variable in upload_file_with_share_token --- api/app/controllers/file_storage_controller.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/api/app/controllers/file_storage_controller.py b/api/app/controllers/file_storage_controller.py index d9406090..9af48692 100644 --- a/api/app/controllers/file_storage_controller.py +++ b/api/app/controllers/file_storage_controller.py @@ -172,7 +172,6 @@ async def upload_file_with_share_token( # Get share and release info from share_token service = ReleaseShareService(db) - share_info = service.get_shared_release_info(share_token=share_data.share_token) # Get share object to access app_id share = service.repo.get_by_share_token(share_data.share_token) @@ -499,7 +498,7 @@ async def get_file_url( ) -@router.get("/files/{file_id}/permanent-url", response_model=ApiResponse) +@router.get("/files/{file_id}/public-url", response_model=ApiResponse) async def get_permanent_file_url( file_id: uuid.UUID, db: Session = Depends(get_db), From 1c3029f3600e41b4e6257d4909a9e8c15f25fe1f Mon Sep 17 00:00:00 2001 From: zhaoying Date: Thu, 19 Mar 2026 19:45:58 +0800 Subject: [PATCH 13/43] fix(web): ui update --- web/src/components/Chat/ChatContent.tsx | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/web/src/components/Chat/ChatContent.tsx b/web/src/components/Chat/ChatContent.tsx index c77e8384..c7d3cffb 100644 --- a/web/src/components/Chat/ChatContent.tsx +++ b/web/src/components/Chat/ChatContent.tsx @@ -2,7 +2,7 @@ * @Author: ZhaoYing * @Date: 2025-12-10 16:46:17 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-03-19 13:38:20 + * @Last Modified time: 2026-03-19 19:45:40 */ import { type FC, useRef, useEffect, useState } from 'react' import clsx from 'clsx' @@ -143,15 +143,20 @@ const ChatContent: FC = ({ } return (
handleDownload(file)}> - {(file.type.includes('doc') || file.type.includes('docx') || file.type.includes('word') || file.type.includes('wordprocessingml.document')) &&
} - {(file.type.includes('pdf')) &&
} - {(file.type.includes('excel') || file.type.includes('spreadsheetml.sheet') || file.type.includes('csv')) &&
} + {(file.type.includes('excel') || file.type.includes('spreadsheetml.sheet') || file.type.includes('csv')) + ?
+ :(file.type.includes('pdf')) + ?
+ : (file.type.includes('doc') || file.type.includes('docx') || file.type.includes('word') || file.type.includes('wordprocessingml.document')) + ?
+ : null + }
) })} From e849fed5c174d9b1d16a5335cc08288a7e1db821 Mon Sep 17 00:00:00 2001 From: Eternity <1533512157@qq.com> Date: Thu, 19 Mar 2026 19:46:20 +0800 Subject: [PATCH 14/43] fix(workflow): enable nested search in knowledge base retrieval node --- api/app/core/workflow/nodes/knowledge/node.py | 90 ++++++++++--------- 1 file changed, 49 insertions(+), 41 deletions(-) diff --git a/api/app/core/workflow/nodes/knowledge/node.py b/api/app/core/workflow/nodes/knowledge/node.py index 14f789a9..d3e9efd9 100644 --- a/api/app/core/workflow/nodes/knowledge/node.py +++ b/api/app/core/workflow/nodes/knowledge/node.py @@ -5,7 +5,7 @@ from typing import Any from app.core.error_codes import BizCode from app.core.exceptions import BusinessException from app.core.models import RedBearRerank, RedBearModelConfig -from app.core.rag.vdb.elasticsearch.elasticsearch_vector import ElasticSearchVectorFactory +from app.core.rag.vdb.elasticsearch.elasticsearch_vector import ElasticSearchVectorFactory, ElasticSearchVector from app.core.workflow.engine.state_manager import WorkflowState from app.core.workflow.engine.variable_pool import VariablePool from app.core.workflow.nodes.base_node import BaseNode @@ -24,6 +24,7 @@ class KnowledgeRetrievalNode(BaseNode): def __init__(self, node_config: dict[str, Any], workflow_config: dict[str, Any]): super().__init__(node_config, workflow_config) self.typed_config: KnowledgeRetrievalNodeConfig | None = None + self.vector_service: ElasticSearchVector | None = None def _output_types(self) -> dict[str, VariableType]: return { @@ -163,6 +164,50 @@ class KnowledgeRetrievalNode(BaseNode): ) return reranker + def knowledge_retrieval(self, db, query, rs, db_knowledge, kb_config): + if db_knowledge.type == knowledge_model.KnowledgeType.FOLDER: + children = knowledge_repository.get_knowledges_by_parent_id(db=db, parent_id=db_knowledge.id) + for child in children: + if not (child and child.chunk_num > 0 and child.status == 1): + continue + kb_config.kb_id = child.id + self.knowledge_retrieval(db, query, rs, child, kb_config) + return + self.vector_service = ElasticSearchVectorFactory().init_vector(knowledge=db_knowledge) + indices = f"Vector_index_{kb_config.kb_id}_Node".lower() + match kb_config.retrieve_type: + case RetrieveType.PARTICIPLE: + rs.extend(self.vector_service.search_by_full_text(query=query, top_k=kb_config.top_k, + indices=indices, + score_threshold=kb_config.similarity_threshold)) + case RetrieveType.SEMANTIC: + rs.extend(self.vector_service.search_by_vector(query=query, top_k=kb_config.top_k, + indices=indices, + score_threshold=kb_config.vector_similarity_weight)) + case RetrieveType.HYBRID: + rs1 = self.vector_service.search_by_vector(query=query, top_k=kb_config.top_k, + indices=indices, + score_threshold=kb_config.vector_similarity_weight) + rs2 = self.vector_service.search_by_full_text(query=query, top_k=kb_config.top_k, + indices=indices, + score_threshold=kb_config.similarity_threshold) + + # Deduplicate hybrid retrieval results + unique_rs = self._deduplicate_docs(rs1, rs2) + if not unique_rs: + return + if self.typed_config.reranker_id: + self.vector_service.reranker = self.get_reranker_model() + rs.extend(self.vector_service.rerank(query=query, docs=unique_rs, top_k=kb_config.top_k)) + else: + rs.extend(sorted( + unique_rs, + key=lambda d: d.metadata.get("score", 0), + reverse=True + )[:kb_config.top_k]) + case _: + raise RuntimeError("Unknown retrieval type") + async def execute(self, state: WorkflowState, variable_pool: VariablePool) -> Any: """ Execute the knowledge retrieval workflow node. @@ -191,56 +236,19 @@ class KnowledgeRetrievalNode(BaseNode): query = self._render_template(self.typed_config.query, variable_pool) with get_db_read() as db: knowledge_bases = self.typed_config.knowledge_bases - existing_ids = self._get_existing_kb_ids(db, [kb.kb_id for kb in knowledge_bases]) - - if not existing_ids: - raise RuntimeError("Knowledge base retrieval failed: the knowledge base does not exist.") rs = [] for kb_config in knowledge_bases: db_knowledge = knowledge_repository.get_knowledge_by_id(db=db, knowledge_id=kb_config.kb_id) if not db_knowledge: raise RuntimeError("The knowledge base does not exist or access is denied.") + self.knowledge_retrieval(db, query, rs, db_knowledge, kb_config) - vector_service = ElasticSearchVectorFactory().init_vector(knowledge=db_knowledge) - indices = f"Vector_index_{kb_config.kb_id}_Node".lower() - match kb_config.retrieve_type: - case RetrieveType.PARTICIPLE: - rs.extend(vector_service.search_by_full_text(query=query, top_k=kb_config.top_k, - indices=indices, - score_threshold=kb_config.similarity_threshold)) - case RetrieveType.SEMANTIC: - rs.extend(vector_service.search_by_vector(query=query, top_k=kb_config.top_k, - indices=indices, - score_threshold=kb_config.vector_similarity_weight)) - case RetrieveType.HYBRID: - rs1 = vector_service.search_by_vector(query=query, top_k=kb_config.top_k, - indices=indices, - score_threshold=kb_config.vector_similarity_weight) - rs2 = vector_service.search_by_full_text(query=query, top_k=kb_config.top_k, - indices=indices, - score_threshold=kb_config.similarity_threshold) - - # Deduplicate hy brid retrieval results - unique_rs = self._deduplicate_docs(rs1, rs2) - if not unique_rs: - continue - if self.typed_config.reranker_id: - vector_service.reranker = self.get_reranker_model() - rs.extend(vector_service.rerank(query=query, docs=unique_rs, top_k=kb_config.top_k)) - else: - rs.extend(sorted( - unique_rs, - key=lambda d: d.metadata.get("score", 0), - reverse=True - )[:kb_config.top_k]) - case _: - raise RuntimeError("Unknown retrieval type") if not rs: return [] if self.typed_config.reranker_id: - vector_service.reranker = self.get_reranker_model() - final_rs = vector_service.rerank(query=query, docs=rs, top_k=self.typed_config.reranker_top_k) + self.vector_service.reranker = self.get_reranker_model() + final_rs = self.vector_service.rerank(query=query, docs=rs, top_k=self.typed_config.reranker_top_k) else: final_rs = sorted( rs, From b4615bacdcd8799275dcd317c2328f4ff830ac70 Mon Sep 17 00:00:00 2001 From: lanceyq <1982376970@qq.com> Date: Thu, 19 Mar 2026 20:17:43 +0800 Subject: [PATCH 15/43] [changes] Modify the execution conditions of the task --- .../memory_dashboard_controller.py | 12 ++------- .../clustering_engine/label_propagation.py | 6 +++++ .../neo4j/community_repository.py | 12 +++++++++ api/app/repositories/neo4j/cypher_queries.py | 26 ++++++++++++++++--- api/app/tasks.py | 11 -------- 5 files changed, 43 insertions(+), 24 deletions(-) diff --git a/api/app/controllers/memory_dashboard_controller.py b/api/app/controllers/memory_dashboard_controller.py index 552b3483..cc0efab3 100644 --- a/api/app/controllers/memory_dashboard_controller.py +++ b/api/app/controllers/memory_dashboard_controller.py @@ -195,18 +195,10 @@ async def get_workspace_end_users( api_logger.warning(f"Redis 缓存写入失败: {str(e)}") # 触发社区聚类补全任务(异步,不阻塞接口响应) - # 对有 ExtractedEntity 但无 Community 节点的存量用户自动补跑全量聚类 try: from app.tasks import init_community_clustering_for_users - from app.aioRedis import aio_redis_get - - done_key = f"community_cluster:done:workspace:{workspace_id}" - already_done = await aio_redis_get(done_key) - if already_done: - api_logger.info(f"工作空间 {workspace_id} 社区数据已完整,跳过本次聚类任务投递") - else: - init_community_clustering_for_users.delay(end_user_ids=end_user_ids, workspace_id=str(workspace_id)) - api_logger.info(f"已触发社区聚类补全任务,候选用户数: {len(end_user_ids)}") + init_community_clustering_for_users.delay(end_user_ids=end_user_ids, workspace_id=str(workspace_id)) + api_logger.info(f"已触发社区聚类补全任务,候选用户数: {len(end_user_ids)}") except Exception as e: api_logger.warning(f"触发社区聚类补全任务失败(不影响主流程): {str(e)}") diff --git a/api/app/core/memory/storage_services/clustering_engine/label_propagation.py b/api/app/core/memory/storage_services/clustering_engine/label_propagation.py index 23ce3901..58fd7f86 100644 --- a/api/app/core/memory/storage_services/clustering_engine/label_propagation.py +++ b/api/app/core/memory/storage_services/clustering_engine/label_propagation.py @@ -425,6 +425,12 @@ class LabelPropagationEngine: - name / summary:若有 llm_model_id 则调用 LLM 生成,否则用实体名称拼接兜底 """ try: + # 先检查属性是否已完整,完整则跳过,避免重复生成 + check_embedding = bool(self.embedding_model_id) + if await self.repo.is_community_complete(community_id, end_user_id, check_embedding=check_embedding): + logger.debug(f"[Clustering] 社区 {community_id} 属性已完整,跳过生成") + return + members = await self.repo.get_community_members(community_id, end_user_id) if not members: return diff --git a/api/app/repositories/neo4j/community_repository.py b/api/app/repositories/neo4j/community_repository.py index f0febf24..e89ee451 100644 --- a/api/app/repositories/neo4j/community_repository.py +++ b/api/app/repositories/neo4j/community_repository.py @@ -21,6 +21,8 @@ from app.repositories.neo4j.cypher_queries import ( UPDATE_COMMUNITY_METADATA, GET_INCOMPLETE_COMMUNITIES, GET_INCOMPLETE_COMMUNITIES_WITH_EMBEDDING, + CHECK_COMMUNITY_IS_COMPLETE, + CHECK_COMMUNITY_IS_COMPLETE_WITH_EMBEDDING, ) logger = logging.getLogger(__name__) @@ -187,6 +189,16 @@ class CommunityRepository: logger.error(f"get_incomplete_communities failed: {e}") return [] + async def is_community_complete(self, community_id: str, end_user_id: str, check_embedding: bool = False) -> bool: + """检查单个社区节点的属性是否完整。""" + try: + query = CHECK_COMMUNITY_IS_COMPLETE_WITH_EMBEDDING if check_embedding else CHECK_COMMUNITY_IS_COMPLETE + result = await self.connector.execute_query(query, community_id=community_id, end_user_id=end_user_id) + return result[0]["is_complete"] if result else False + except Exception as e: + logger.error(f"is_community_complete failed: {e}") + return False + async def update_community_metadata( self, community_id: str, diff --git a/api/app/repositories/neo4j/cypher_queries.py b/api/app/repositories/neo4j/cypher_queries.py index 1ec5eaba..66d24fab 100644 --- a/api/app/repositories/neo4j/cypher_queries.py +++ b/api/app/repositories/neo4j/cypher_queries.py @@ -1204,6 +1204,25 @@ RETURN startNode(r) = e AS r_from_e """ +CHECK_COMMUNITY_IS_COMPLETE = """ +MATCH (c:Community {community_id: $community_id, end_user_id: $end_user_id}) +RETURN ( + c.name IS NOT NULL AND c.name <> '' AND + c.summary IS NOT NULL AND c.summary <> '' AND + c.core_entities IS NOT NULL +) AS is_complete +""" + +CHECK_COMMUNITY_IS_COMPLETE_WITH_EMBEDDING = """ +MATCH (c:Community {community_id: $community_id, end_user_id: $end_user_id}) +RETURN ( + c.name IS NOT NULL AND c.name <> '' AND + c.summary IS NOT NULL AND c.summary <> '' AND + c.core_entities IS NOT NULL AND + c.summary_embedding IS NOT NULL +) AS is_complete +""" + GET_INCOMPLETE_COMMUNITIES = """ MATCH (c:Community {end_user_id: $end_user_id}) WHERE c.name IS NULL OR c.summary IS NULL OR c.core_entities IS NULL @@ -1213,8 +1232,9 @@ RETURN c.community_id AS community_id GET_INCOMPLETE_COMMUNITIES_WITH_EMBEDDING = """ MATCH (c:Community {end_user_id: $end_user_id}) -WHERE c.name IS NULL OR c.summary IS NULL OR c.core_entities IS NULL - OR c.name = '' OR c.summary = '' - OR c.summary_embedding IS NULL +WHERE c.name IS NULL OR c.name = '' + OR c.summary IS NULL OR c.summary = '' + OR c.core_entities IS NULL + OR (c.summary_embedding IS NULL AND c.summary IS NOT NULL AND c.summary <> '(empty)') RETURN c.community_id AS community_id """ diff --git a/api/app/tasks.py b/api/app/tasks.py index 3d8a2456..3a237d82 100644 --- a/api/app/tasks.py +++ b/api/app/tasks.py @@ -2820,17 +2820,6 @@ def init_community_clustering_for_users(self, end_user_ids: List[str], workspace result = loop.run_until_complete(_run()) result["elapsed_time"] = time.time() - start_time result["task_id"] = self.request.id - - # 所有用户均完整(无需初始化也无失败),写入 Redis 标记,1小时内不再重复投递 - if workspace_id and result.get("initialized", 0) == 0 and result.get("failed", 0) == 0: - try: - _r = get_sync_redis_client() - if _r: - _r.set(f"community_cluster:done:workspace:{workspace_id}", "1", ex=3600) - logger.info(f"[CommunityCluster] 工作空间 {workspace_id} 数据完整,已写入完成标记(1小时有效)") - except Exception as e: - logger.warning(f"[CommunityCluster] 写入完成标记失败: {e}") - return result except Exception as e: From 7ec947189c796dbfa2e995b17f62a7e129a6f643 Mon Sep 17 00:00:00 2001 From: zhaoying Date: Thu, 19 Mar 2026 20:19:30 +0800 Subject: [PATCH 16/43] fix(web): update file type --- .../FeaturesConfig/FileUploadSettingModal.tsx | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/web/src/views/ApplicationConfig/components/FeaturesConfig/FileUploadSettingModal.tsx b/web/src/views/ApplicationConfig/components/FeaturesConfig/FileUploadSettingModal.tsx index 3fb05a0e..14e7ce3f 100644 --- a/web/src/views/ApplicationConfig/components/FeaturesConfig/FileUploadSettingModal.tsx +++ b/web/src/views/ApplicationConfig/components/FeaturesConfig/FileUploadSettingModal.tsx @@ -2,7 +2,7 @@ * @Author: ZhaoYing * @Date: 2026-03-05 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-03-19 15:18:20 + * @Last Modified time: 2026-03-19 20:19:14 */ import { forwardRef, useImperativeHandle, useState } from 'react'; import { Form, InputNumber, Flex, Switch, Row, Col, Radio } from 'antd'; @@ -82,26 +82,25 @@ const defaultValues: FileUpload = { "mp3", "wav", "m4a", - "ogg", - "flac" ], document_enabled: false, document_max_size_mb: 100, document_allowed_extensions: [ "pdf", "docx", + "doc", "xlsx", + "xls", "txt", "csv", - "json" + "json", + "md", ], video_enabled: false, video_max_size_mb: 100, video_allowed_extensions: [ "mp4", "mov", - "avi", - "webm" ], max_file_count: 5, allowed_transfer_methods: 'both' From 1f531f1ace0d3de5802affc2e51eb58c809bc3e6 Mon Sep 17 00:00:00 2001 From: zhaoying Date: Thu, 19 Mar 2026 20:24:16 +0800 Subject: [PATCH 17/43] fix(web): community node validate key --- .../components/RelationshipNetwork.tsx | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/web/src/views/UserMemoryDetail/components/RelationshipNetwork.tsx b/web/src/views/UserMemoryDetail/components/RelationshipNetwork.tsx index 66e37a45..01d4da4f 100644 --- a/web/src/views/UserMemoryDetail/components/RelationshipNetwork.tsx +++ b/web/src/views/UserMemoryDetail/components/RelationshipNetwork.tsx @@ -2,7 +2,7 @@ * @Author: ZhaoYing * @Date: 2026-02-03 18:32:00 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-03-13 14:51:17 + * @Last Modified time: 2026-03-19 20:23:42 */ /** * Relationship Network Component @@ -287,22 +287,26 @@ const RelationshipNetwork:FC = () => { : (selectedNode as RawCommunityNode).properties.community_id ?
- {(selectedNode as RawCommunityNode).properties.name} -
-
{t('userMemory.summary')}
-
- {(selectedNode as RawCommunityNode).properties.summary} + {(selectedNode as RawCommunityNode).properties.name || selectedNode.id}
+ {(selectedNode as RawCommunityNode).properties.summary && <> +
{t('userMemory.summary')}
+
+ {(selectedNode as RawCommunityNode).properties.summary} +
+ } {t('userMemory.member_count')} {(selectedNode as RawCommunityNode).properties.member_count}{t('userMemory.member_count_desc')} - -
{t('userMemory.core_entities')}
-
    - {(selectedNode as RawCommunityNode).properties.core_entities.map((entity, index) =>
  • {entity}
  • )} -
+ {(selectedNode as RawCommunityNode).properties.core_entities && <> + +
{t('userMemory.core_entities')}
+
    + {(selectedNode as RawCommunityNode).properties.core_entities?.map((entity, index) =>
  • {entity}
  • )} +
+ }
: <> {(selectedNode as Node).name && ( From 08f0be17ce08abe09a4e729b8f852eef3734cfa7 Mon Sep 17 00:00:00 2001 From: zhaoying Date: Thu, 19 Mar 2026 20:28:22 +0800 Subject: [PATCH 18/43] fix(web): file type required --- .../views/Conversation/components/UploadFileListModal.tsx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/web/src/views/Conversation/components/UploadFileListModal.tsx b/web/src/views/Conversation/components/UploadFileListModal.tsx index ce71066d..1bed48c7 100644 --- a/web/src/views/Conversation/components/UploadFileListModal.tsx +++ b/web/src/views/Conversation/components/UploadFileListModal.tsx @@ -2,7 +2,7 @@ * @Author: ZhaoYing * @Date: 2026-02-06 21:09:47 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-03-18 21:10:01 + * @Last Modified time: 2026-03-19 20:27:40 */ /** * Upload File List Modal Component @@ -116,6 +116,9 @@ const UploadFileListModal = forwardRef From a51e34852c3201da1d8c134e58adface43606507 Mon Sep 17 00:00:00 2001 From: Timebomb2018 <18868801967@163.com> Date: Thu, 19 Mar 2026 21:41:45 +0800 Subject: [PATCH 20/43] fix(app features): Support for xls and doc files --- api/app/schemas/app_schema.py | 6 +-- api/app/services/multimodal_service.py | 71 ++++++++++++++++++++------ 2 files changed, 59 insertions(+), 18 deletions(-) diff --git a/api/app/schemas/app_schema.py b/api/app/schemas/app_schema.py index 8ca43a76..1582d862 100644 --- a/api/app/schemas/app_schema.py +++ b/api/app/schemas/app_schema.py @@ -149,15 +149,15 @@ class FileUploadConfig(BaseModel): ) # 通用文件:PDF/DOCX/XLSX/TXT/CSV/JSON,最大 100MB document_enabled: bool = Field(default=False) - document_max_size_mb: int = Field(default=100) + document_max_size_mb: int = Field(default=50) document_allowed_extensions: List[str] = Field( default=["pdf", "docx", "doc", "xlsx", "xls", "txt", "csv", "json", "md"] ) # 视频文件:MP4/MOV/AVI/WebM,最大 500MB video_enabled: bool = Field(default=False) - video_max_size_mb: int = Field(default=500) + video_max_size_mb: int = Field(default=50) video_allowed_extensions: List[str] = Field( - default=["mp4", "mov"] + default=["mp4"] ) # 最大文件数量 max_file_count: int = Field(default=5, ge=1) diff --git a/api/app/services/multimodal_service.py b/api/app/services/multimodal_service.py index e7fb3de3..d8cca67e 100644 --- a/api/app/services/multimodal_service.py +++ b/api/app/services/multimodal_service.py @@ -622,30 +622,71 @@ class MultimodalService: @staticmethod async def _extract_word_text(file_content: bytes) -> str: - """提取 Word 文档文本""" + """提取 Word 文档文本(支持 .docx 和旧版 .doc)""" + # 先尝试 docx(ZIP 格式) + if file_content[:2] == b'PK': + try: + word_file = io.BytesIO(file_content) + doc = Document(word_file) + return '\n'.join(p.text for p in doc.paragraphs) + except Exception as e: + logger.error(f"提取 docx 文本失败: {e}") + return f"[docx 提取失败: {str(e)}]" + + # 旧版 .doc(OLE2 格式) try: - word_file = io.BytesIO(file_content) - doc = Document(word_file) - text_parts = [paragraph.text for paragraph in doc.paragraphs] - return '\n'.join(text_parts) + import olefile + ole = olefile.OleFileIO(io.BytesIO(file_content)) + if not ole.exists('WordDocument'): + return "[doc 提取失败: 未找到 WordDocument 流]" + # 读取 WordDocument 流,提取可见 ASCII/Unicode 文本 + stream = ole.openstream('WordDocument').read() + # Word Binary Format: 文本在流中以 UTF-16-LE 编码存储 + # 简单提取:过滤出可打印字符段 + try: + text = stream.decode('utf-16-le', errors='ignore') + except Exception: + text = stream.decode('latin-1', errors='ignore') + # 过滤控制字符,保留可打印内容 + import re + text = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', text) + text = re.sub(r' +', ' ', text).strip() + ole.close() + return text except Exception as e: - logger.error(f"提取 Word 文本失败: {e}") - return f"[Word 提取失败: {str(e)}]" + logger.error(f"提取 doc 文本失败: {e}") + return f"[doc 提取失败: {str(e)}]" @staticmethod async def _extract_xlsx_text(file_content: bytes) -> str: - """提取 Excel 文本""" + """提取 Excel 文本(支持 .xlsx 和旧版 .xls)""" + # xlsx(ZIP 格式) + if file_content[:2] == b'PK': + try: + wb = openpyxl.load_workbook(io.BytesIO(file_content), read_only=True, data_only=True) + parts = [] + for sheet in wb.worksheets: + parts.append(f"[Sheet: {sheet.title}]") + for row in sheet.iter_rows(values_only=True): + parts.append('\t'.join('' if v is None else str(v) for v in row)) + return '\n'.join(parts) + except Exception as e: + logger.error(f"提取 xlsx 文本失败: {e}") + return f"[xlsx 提取失败: {str(e)}]" + + # xls(OLE2/BIFF 格式) try: - wb = openpyxl.load_workbook(io.BytesIO(file_content), read_only=True, data_only=True) + import xlrd + wb = xlrd.open_workbook(file_contents=file_content) parts = [] - for sheet in wb.worksheets: - parts.append(f"[Sheet: {sheet.title}]") - for row in sheet.iter_rows(values_only=True): - parts.append('\t'.join('' if v is None else str(v) for v in row)) + for sheet in wb.sheets(): + parts.append(f"[Sheet: {sheet.name}]") + for row_idx in range(sheet.nrows): + parts.append('\t'.join(str(sheet.cell_value(row_idx, col)) for col in range(sheet.ncols))) return '\n'.join(parts) except Exception as e: - logger.error(f"提取 Excel 文本失败: {e}") - return f"[Excel 提取失败: {str(e)}]" + logger.error(f"提取 xls 文本失败: {e}") + return f"[xls 提取失败: {str(e)}]" async def _extract_csv_text(self, file_content: bytes) -> str: """提取 CSV 文本""" From 0157c9d262355f2d525c9e1b444acf9945f3b5cf Mon Sep 17 00:00:00 2001 From: Timebomb2018 <18868801967@163.com> Date: Thu, 19 Mar 2026 21:59:00 +0800 Subject: [PATCH 21/43] fix(file): Routing repair --- api/app/controllers/file_storage_controller.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/api/app/controllers/file_storage_controller.py b/api/app/controllers/file_storage_controller.py index 0d18151f..55149cce 100644 --- a/api/app/controllers/file_storage_controller.py +++ b/api/app/controllers/file_storage_controller.py @@ -685,18 +685,14 @@ async def permanent_download_file( media_type=file_metadata.content_type or "application/octet-stream" ) else: - # For remote storage, use permanent public URL (requires bucket public read) + # For remote storage, redirect to presigned URL with long expiration try: - permanent_url = await storage.get_permanent_url(file_key) - if permanent_url: - api_logger.info(f"Redirecting to permanent public URL: file_key={file_key}") - return RedirectResponse(url=permanent_url, status_code=status.HTTP_302_FOUND) - # Fallback: long-lived presigned URL + # Use a very long expiration (7 days max for most cloud providers) presigned_url = await storage_service.get_file_url(file_key, expires=604800) presigned_url = _match_scheme(request, presigned_url) return RedirectResponse(url=presigned_url, status_code=status.HTTP_302_FOUND) except Exception as e: - api_logger.error(f"Failed to get permanent URL: {e}") + api_logger.error(f"Failed to get presigned URL: {e}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Failed to retrieve file: {str(e)}" From 52fdfc77443e253c67e4da8420aa42c589465375 Mon Sep 17 00:00:00 2001 From: zhaoying Date: Fri, 20 Mar 2026 10:49:04 +0800 Subject: [PATCH 22/43] fix(web): max_file_count limit 1 --- web/src/components/Chat/ChatToolbar.tsx | 9 ++++---- web/src/i18n/en.ts | 1 + web/src/i18n/zh.ts | 1 + .../FeaturesConfig/FeaturesConfigModal.tsx | 4 ++-- .../FeaturesConfig/FileUploadSettingModal.tsx | 6 +++--- .../components/UploadFileListModal.tsx | 21 ++++++++++++------- 6 files changed, 25 insertions(+), 17 deletions(-) diff --git a/web/src/components/Chat/ChatToolbar.tsx b/web/src/components/Chat/ChatToolbar.tsx index dda56977..936e7e63 100644 --- a/web/src/components/Chat/ChatToolbar.tsx +++ b/web/src/components/Chat/ChatToolbar.tsx @@ -49,6 +49,7 @@ interface FormValues { memory?: boolean; } +const max_file_count = 1; const ChatToolbar = forwardRef(({ features, extra, @@ -136,8 +137,8 @@ const ChatToolbar = forwardRef(({ key: 'url', label: t('memoryConversation.addRemoteFile'), onClick: () => { - if ((queryValues?.files?.length || 0) >= file_upload.max_file_count) { - messageApi.warning(t('common.fileNumTip', { num: file_upload.max_file_count })) + if ((queryValues?.files?.length || 0) >= max_file_count) { + messageApi.warning(t('common.fileNumTip', { num: max_file_count })) return } uploadFileListModalRef.current?.handleOpen() @@ -153,7 +154,7 @@ const ChatToolbar = forwardRef(({ onChange={fileChange} requestConfig={uploadRequestConfig} featureConfig={file_upload} - disabled={(queryValues?.files?.length || 0) >= file_upload.max_file_count} + disabled={(queryValues?.files?.length || 0) >= max_file_count} /> ) }) @@ -185,7 +186,7 @@ const ChatToolbar = forwardRef(({ {file_upload?.audio_enabled && file_upload?.allowed_transfer_methods?.includes('local_file') && ( = file_upload.max_file_count} + disabled={(queryValues?.files?.length || 0) >= max_file_count} action={uploadAction} requestConfig={uploadRequestConfig} onRecordingComplete={handleRecordingComplete} diff --git a/web/src/i18n/en.ts b/web/src/i18n/en.ts index 7ff1f30a..2007005f 100644 --- a/web/src/i18n/en.ts +++ b/web/src/i18n/en.ts @@ -460,6 +460,7 @@ export const en = { nameInvalid: 'Name cannot start or end with a space', notAllSpaces: 'Cannot be all spaces', view: 'View', + callbackUrlInvalid: 'Please enter a valid URL', }, model: { searchPlaceholder: 'search model…', diff --git a/web/src/i18n/zh.ts b/web/src/i18n/zh.ts index 49a1b803..bce9b5cc 100644 --- a/web/src/i18n/zh.ts +++ b/web/src/i18n/zh.ts @@ -1093,6 +1093,7 @@ export const zh = { nameInvalid: '不能是空格开头或结尾', notAllSpaces: '不能是纯空格', view: '查看', + callbackUrlInvalid: '请输入有效的 URL', }, model: { searchPlaceholder: '搜索模型…', diff --git a/web/src/views/ApplicationConfig/components/FeaturesConfig/FeaturesConfigModal.tsx b/web/src/views/ApplicationConfig/components/FeaturesConfig/FeaturesConfigModal.tsx index 5fcb752d..d712720f 100644 --- a/web/src/views/ApplicationConfig/components/FeaturesConfig/FeaturesConfigModal.tsx +++ b/web/src/views/ApplicationConfig/components/FeaturesConfig/FeaturesConfigModal.tsx @@ -24,7 +24,7 @@ interface FeaturesConfigModalProps { refresh: (value: FeaturesConfigForm) => void; source?: Application['type']; } - +const max_file_count = 1; /** * Modal for copying applications */ @@ -133,7 +133,7 @@ const FeaturesConfigModal = forwardRef
{t('application.maxCount')}
- {fu.max_file_count} {t('application.unix')} + {max_file_count} {t('application.unix')}
diff --git a/web/src/views/ApplicationConfig/components/FeaturesConfig/FileUploadSettingModal.tsx b/web/src/views/ApplicationConfig/components/FeaturesConfig/FileUploadSettingModal.tsx index 14e7ce3f..f33b313b 100644 --- a/web/src/views/ApplicationConfig/components/FeaturesConfig/FileUploadSettingModal.tsx +++ b/web/src/views/ApplicationConfig/components/FeaturesConfig/FileUploadSettingModal.tsx @@ -102,7 +102,7 @@ const defaultValues: FileUpload = { "mp4", "mov", ], - max_file_count: 5, + max_file_count: 1, allowed_transfer_methods: 'both' } @@ -167,8 +167,8 @@ const FileUploadSettingModal = forwardRef -
{t('application.maxCount')}
- + {/*
{t('application.maxCount')}
*/} + diff --git a/web/src/views/Conversation/components/UploadFileListModal.tsx b/web/src/views/Conversation/components/UploadFileListModal.tsx index 9061c4e3..4d2e83ee 100644 --- a/web/src/views/Conversation/components/UploadFileListModal.tsx +++ b/web/src/views/Conversation/components/UploadFileListModal.tsx @@ -19,7 +19,10 @@ * @component */ import { forwardRef, useImperativeHandle, useState, useMemo } from 'react'; -import { Form, Input, Select, Button, Flex } from 'antd'; +import { Form, Input, Select, + // Button, + Flex +} from 'antd'; import { useTranslation } from 'react-i18next'; import type { UploadFileListModalRef } from '../types' @@ -105,9 +108,11 @@ const UploadFileListModal = forwardRef -
+ - {(fields, { add, remove }) => ( + {(fields, + // { add, remove } + ) => ( <> {/* Render each file entry with type selector and URL input */} {fields.map(({ key, name, ...restField }) => ( @@ -131,23 +136,23 @@ const UploadFileListModal = forwardRef -
remove(name)} - >
+ >
*/} ))} - + {/* - + */} )} From 9a765ac71e3d5ab95c496077363de950f07350a1 Mon Sep 17 00:00:00 2001 From: zhaoying Date: Fri, 20 Mar 2026 10:58:58 +0800 Subject: [PATCH 23/43] fix(web): editor bug --- .../components/Editor/plugin/InitialValuePlugin.tsx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/web/src/views/Workflow/components/Editor/plugin/InitialValuePlugin.tsx b/web/src/views/Workflow/components/Editor/plugin/InitialValuePlugin.tsx index b263120a..8fe29d19 100644 --- a/web/src/views/Workflow/components/Editor/plugin/InitialValuePlugin.tsx +++ b/web/src/views/Workflow/components/Editor/plugin/InitialValuePlugin.tsx @@ -18,8 +18,8 @@ const InitialValuePlugin: React.FC = ({ value, options const isUserInputRef = useRef(false); useEffect(() => { - // 监听编辑器变化,标记是否为用户输入 - const removeListener = editor.registerUpdateListener(({ editorState }) => { + const removeListener = editor.registerUpdateListener(({ editorState, tags }) => { + if (tags.has('programmatic')) return; editorState.read(() => { const root = $getRoot(); const textContent = root.getTextContent(); @@ -107,7 +107,7 @@ const InitialValuePlugin: React.FC = ({ value, options }); root.append(paragraph); } - }, { discrete: true }); + }, { discrete: true, tag: 'programmatic' }); }); } From 3929f811b867ccee6f75d723ac8fab3cbf5721ce Mon Sep 17 00:00:00 2001 From: Timebomb2018 <18868801967@163.com> Date: Fri, 20 Mar 2026 12:05:35 +0800 Subject: [PATCH 24/43] fix(app): The import and export processing features of the application --- api/app/services/app_dsl_service.py | 67 +++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 3 deletions(-) diff --git a/api/app/services/app_dsl_service.py b/api/app/services/app_dsl_service.py index a10aa70a..13363667 100644 --- a/api/app/services/app_dsl_service.py +++ b/api/app/services/app_dsl_service.py @@ -16,6 +16,7 @@ from app.models.app_release_model import AppRelease from app.models.knowledge_model import Knowledge from app.models.models_model import ModelConfig from app.models.tool_model import ToolConfig as ToolConfigModel +from app.models.skill_model import Skill from app.models.workflow_model import WorkflowConfig from app.services.workflow_service import WorkflowService from app.core.workflow.adapters.memory_bear.memory_bear_adapter import MemoryBearAdapter @@ -84,7 +85,9 @@ class AppDslService: if "knowledge_retrieval" in cfg: enriched["knowledge_retrieval"] = self._enrich_knowledge_retrieval(cfg["knowledge_retrieval"]) if "tools" in cfg: - enriched["tools"] = self._enrich_tools(cfg["tools"]) + enriched["tools"] = self._enrich_tools(cfg.get("tools")) + if "skills" in cfg: + enriched["skills"] = self._enrich_skills(cfg.get("skills")) return enriched if app_type == AppType.MULTI_AGENT: enriched = {**cfg} @@ -108,6 +111,7 @@ class AppDslService: "variables": config.variables if config else [], "edges": config.edges if config else [], "nodes": config.nodes if config else [], + "features": config.features if config else {}, "execution_config": config.execution_config if config else {}, "triggers": config.triggers if config else [], } if config else {} @@ -123,7 +127,8 @@ class AppDslService: "memory": config.memory if config else None, "variables": config.variables if config else [], "tools": self._enrich_tools(config.tools) if config else [], - "skills": config.skills if config else {}, + "skills": self._enrich_skills(config.skills) if config else {}, + "features": config.features if config else {} } if config else {} dsl = {**meta, "app": app_meta, "agent_config": config_data} @@ -185,6 +190,22 @@ class AppDslService: def _enrich_tools(self, tools: list) -> list: return [{**t, "_ref": self._tool_ref(t.get("tool_id"))} for t in (tools or [])] + def _skill_ref(self, skill_id) -> Optional[dict]: + if not skill_id: + return None + s = self.db.query(Skill).filter(Skill.id == skill_id).first() + return {"id": str(skill_id), "name": s.name} if s else {"id": str(skill_id)} + + def _enrich_skills(self, skills: Optional[dict]) -> Optional[dict]: + if not skills: + return skills + skill_ids = skills.get("skill_ids", []) + enriched_ids = [ + {"id": sid, "_ref": self._skill_ref(sid)} + for sid in (skill_ids or []) + ] + return {**skills, "skill_ids": enriched_ids} + def _agent_ref(self, agent_id) -> Optional[dict]: if not agent_id: return None @@ -249,7 +270,7 @@ class AppDslService: memory=self._resolve_memory(cfg.get("memory"), workspace_id, warnings), variables=cfg.get("variables", []), tools=self._resolve_tools(cfg.get("tools", []), tenant_id, warnings), - skills=cfg.get("skills", {}), + skills=self._resolve_skills(cfg.get("skills", {}), tenant_id, warnings), is_active=True, created_at=now, updated_at=now, @@ -444,6 +465,46 @@ class AppDslService: return {**memory, "memory_config_id": None, "enabled": False} return memory + def _resolve_skills(self, skills: Optional[dict], tenant_id: uuid.UUID, warnings: list) -> dict: + if not skills: + return skills or {} + resolved_ids = [] + for entry in (skills.get("skill_ids") or []): + # entry 可能是 {"id": "...", "_ref": {...}} 或直接是字符串 + if isinstance(entry, dict): + ref = entry.get("_ref") or ({"name": None, "id": entry.get("id")} if entry.get("id") else None) + skill_id = self._resolve_skill(ref, tenant_id, warnings) + else: + skill_id = self._resolve_skill({"id": str(entry)}, tenant_id, warnings) + if skill_id: + resolved_ids.append(str(skill_id)) + return {**{k: v for k, v in skills.items() if k != "skill_ids"}, "skill_ids": resolved_ids} + + def _resolve_skill(self, ref: Optional[dict], tenant_id: uuid.UUID, warnings: list) -> Optional[str]: + if not ref: + return None + # 先按 id 匹配 + if ref.get("id"): + try: + s = self.db.query(Skill).filter( + Skill.id == uuid.UUID(str(ref["id"])), + Skill.tenant_id == tenant_id + ).first() + if s: + return str(s.id) + except Exception: + pass + # 再按名称匹配 + if ref.get("name"): + s = self.db.query(Skill).filter( + Skill.name == ref["name"], + Skill.tenant_id == tenant_id + ).first() + if s: + return str(s.id) + warnings.append(f"未找到技能: {ref}") + return None + def _resolve_tools(self, tools: list, tenant_id: uuid.UUID, warnings: list) -> list: result = [] for t in (tools or []): From 0b1ea33b41e4b42b27a8f4f503067979d0f24f13 Mon Sep 17 00:00:00 2001 From: yujiangping Date: Fri, 20 Mar 2026 12:13:04 +0800 Subject: [PATCH 25/43] fix:office view --- web/src/api/knowledgeBase.ts | 4 + web/src/components/DocumentPreview/index.tsx | 83 ++++++++++++++++--- .../[knowledgeBaseId]/DocumentDetails.tsx | 4 +- 3 files changed, 79 insertions(+), 12 deletions(-) diff --git a/web/src/api/knowledgeBase.ts b/web/src/api/knowledgeBase.ts index 60ed2403..63ec80ae 100644 --- a/web/src/api/knowledgeBase.ts +++ b/web/src/api/knowledgeBase.ts @@ -52,6 +52,10 @@ export const getKnowledgeBaseTypeList = async (): Promise => { // 如果不是数组,返回空数组 return []; }; +// 获取文件地址 +export const getFileUrl = (fileId: string) => { + return `${apiPrefix}/files/${fileId}`; +}; // 知识库文档解析类型 export const getKnowledgeBaseDocumentParseTypeList = async () => { const response = await request.get(`${apiPrefix}/knowledges/parsertype`); diff --git a/web/src/components/DocumentPreview/index.tsx b/web/src/components/DocumentPreview/index.tsx index 247f713e..f659c53e 100644 --- a/web/src/components/DocumentPreview/index.tsx +++ b/web/src/components/DocumentPreview/index.tsx @@ -4,7 +4,7 @@ * @Author: yujiangping * @Date: 2026-03-16 19:01:12 * @LastEditors: yujiangping - * @LastEditTime: 2026-03-18 18:35:53 + * @LastEditTime: 2026-03-20 12:12:20 */ import { useState, useEffect, useRef, useCallback, type FC } from 'react'; import { Spin, Alert, Button, Table, InputNumber, Image } from 'antd'; @@ -309,23 +309,64 @@ const DocumentPreview: FC = ({ } }; + const [csvTruncated, setCsvTruncated] = useState(false); + const isCsvFile = () => getFileExtension() === '.csv'; + // CSV 预览大小限制:1MB + const CSV_PREVIEW_SIZE = 1 * 1024 * 1024; + // 最大预览行数 + const MAX_PREVIEW_ROWS = 500; + + const fetchFileBufferWithLimit = async (url: string, maxBytes?: number): Promise => { + const requestUrl = getRequestUrl(url); + const headers: Record = { + 'Authorization': `Bearer ${cookieUtils.get('authToken') || ''}`, + }; + if (maxBytes) { + headers['Range'] = `bytes=0-${maxBytes - 1}`; + } + const response = await fetch(requestUrl, { + credentials: 'include', + headers, + }); + if (!response.ok && response.status !== 206) { + throw new Error(`HTTP ${response.status}: ${response.statusText}`); + } + return response.arrayBuffer(); + }; + const loadExcelFile = async () => { setLoading(true); setError(false); setErrorMessage(''); + setCsvTruncated(false); try { - const arrayBuffer = await fetchFileBuffer(fileUrl); - - // CSV 文件需要处理编码问题(可能是 GBK/GB2312) + // CSV 文件需要处理编码问题(可能是 GBK/GB2312),且大文件只取前 1MB if (isCsvFile()) { + let arrayBuffer: ArrayBuffer; + let truncated = false; + try { + // 先尝试 Range 请求只取前 1MB + arrayBuffer = await fetchFileBufferWithLimit(fileUrl, CSV_PREVIEW_SIZE); + // 如果返回的数据刚好等于限制大小,说明可能被截断了 + if (arrayBuffer.byteLength >= CSV_PREVIEW_SIZE) { + truncated = true; + } + } catch { + // Range 请求不支持时,全量获取后截断 + const fullBuffer = await fetchFileBuffer(fileUrl); + if (fullBuffer.byteLength > CSV_PREVIEW_SIZE) { + arrayBuffer = fullBuffer.slice(0, CSV_PREVIEW_SIZE); + truncated = true; + } else { + arrayBuffer = fullBuffer; + } + } + let csvText: string; - // 先尝试 UTF-8 解码 const utf8Text = new TextDecoder('utf-8').decode(arrayBuffer); - // 检测是否有乱码特征(常见的 GBK 被错误解析为 UTF-8 的替换字符) if (utf8Text.includes('\uFFFD') || /[\x80-\xff]/.test(utf8Text.slice(0, 200))) { - // 尝试 GBK 解码 try { csvText = new TextDecoder('gbk').decode(arrayBuffer); } catch { @@ -334,19 +375,35 @@ const DocumentPreview: FC = ({ } else { csvText = utf8Text; } + + // 如果被截断,去掉最后一行不完整的数据 + if (truncated) { + const lastNewline = csvText.lastIndexOf('\n'); + if (lastNewline > 0) { + csvText = csvText.substring(0, lastNewline); + } + } + const workbook = XLSX.read(csvText, { type: 'string' }); const sheets = workbook.SheetNames.map(sheetName => { const worksheet = workbook.Sheets[sheetName]; - const data = XLSX.utils.sheet_to_json(worksheet, { header: 1 }) as any[][]; + let data = XLSX.utils.sheet_to_json(worksheet, { header: 1 }) as any[][]; + // 限制最大行数 + if (data.length > MAX_PREVIEW_ROWS + 1) { + data = data.slice(0, MAX_PREVIEW_ROWS + 1); // +1 保留表头 + truncated = true; + } return { sheetName, data }; }); + setCsvTruncated(truncated); setExcelData(sheets); setLoading(false); return; } + const arrayBuffer = await fetchFileBuffer(fileUrl); const workbook = XLSX.read(arrayBuffer, { type: 'array' }); - const sheets = workbook.SheetNames.map(sheetName => { + const sheets = workbook.SheetNames.map((sheetName: string) => { const worksheet = workbook.Sheets[sheetName]; const data = XLSX.utils.sheet_to_json(worksheet, { header: 1 }) as any[][]; return { sheetName, data }; @@ -522,9 +579,14 @@ const DocumentPreview: FC = ({ ) )} - {/* Excel 预览 */} + {/* Excel/CSV 预览 */} {isExcelFile() && !error && !loading && (
+ {csvTruncated && ( +
+ 文件较大,仅预览前 {MAX_PREVIEW_ROWS} 行数据 +
+ )} {excelData.map((sheet, index) => (

{sheet.sheetName}

@@ -541,6 +603,7 @@ const DocumentPreview: FC = ({ scroll={{ x: 'max-content' }} size="small" bordered + virtual /> )}
diff --git a/web/src/views/KnowledgeBase/[knowledgeBaseId]/DocumentDetails.tsx b/web/src/views/KnowledgeBase/[knowledgeBaseId]/DocumentDetails.tsx index 08df86cd..dcee69dd 100644 --- a/web/src/views/KnowledgeBase/[knowledgeBaseId]/DocumentDetails.tsx +++ b/web/src/views/KnowledgeBase/[knowledgeBaseId]/DocumentDetails.tsx @@ -11,7 +11,7 @@ import { useNavigate, useParams, useLocation } from 'react-router-dom'; import { useTranslation } from 'react-i18next'; import { useBreadcrumbManager, type BreadcrumbPath } from '@/hooks/useBreadcrumbManager'; import { Button, Spin, message, Switch } from 'antd'; -import { getDocumentDetail, getDocumentChunkList, downloadFile, updateDocument, updateDocumentChunk, createDocumentChunk } from '@/api/knowledgeBase'; +import { getDocumentDetail, getDocumentChunkList, downloadFile, updateDocument, updateDocumentChunk, createDocumentChunk, getFileUrl } from '@/api/knowledgeBase'; import type { KnowledgeBaseDocumentData, RecallTestData } from '@/views/KnowledgeBase/types'; import { formatDateTime } from '@/utils/format'; import InfoPanel, { type InfoItem } from '../components/InfoPanel'; @@ -138,7 +138,7 @@ const DocumentDetails: FC = () => { const response = await getDocumentDetail(documentId); setDocument(response); setInfoItems(formatDocumentInfo(response)); - const url = `${imagePath}/api/files/${response.file_id}` + const url = `${window.location.origin}/api/files/${response.file_id}`; setFileUrl(url); setParserMode(response?.parser_config?.auto_questions || 0) // ChunkList will be called automatically in useEffect based on document.progress From fb6ca3909af46952a292e9110a8a7b91222216f6 Mon Sep 17 00:00:00 2001 From: Timebomb2018 <18868801967@163.com> Date: Fri, 20 Mar 2026 14:20:23 +0800 Subject: [PATCH 26/43] fix(app): The copy processing features of the application --- api/app/services/app_service.py | 242 +++++++++----------------------- 1 file changed, 63 insertions(+), 179 deletions(-) diff --git a/api/app/services/app_service.py b/api/app/services/app_service.py index 68d255f8..155ef617 100644 --- a/api/app/services/app_service.py +++ b/api/app/services/app_service.py @@ -833,8 +833,6 @@ class AppService: # 跨工作空间时,获取目标工作空间的 tenant_id 用于判断模型配置是否可用 target_tenant_id = None - available_model_ids: set = set() - available_kb_ids: set = set() if is_cross_workspace: target_ws = self.db.get(Workspace, target_workspace_id) if not target_ws: @@ -849,28 +847,29 @@ class AppService: if source_config: if is_cross_workspace: - # Batch-collect and preload all referenced resources - model_ids, kb_ids = self._collect_resource_ids_from_config( - source_config.default_model_config_id, - source_config.knowledge_retrieval, - source_config.tools + # 跨工作空间:model/tools/skills 属于 tenant 级别直接保留, + # knowledge_bases 属于 workspace 级别需过滤,memory_config 需清空 + _, kb_ids = self._collect_resource_ids_from_config( + None, source_config.knowledge_retrieval ) - available_model_ids, available_kb_ids = self._preload_cross_workspace_resources( - target_tenant_id, target_workspace_id, model_ids, kb_ids - ) - new_model_config_id = self._is_model_available( - source_config.default_model_config_id, available_model_ids + _, available_kb_ids = self._preload_cross_workspace_resources( + target_tenant_id, target_workspace_id, set(), kb_ids ) + new_model_config_id = source_config.default_model_config_id new_knowledge_retrieval = self._clean_knowledge_retrieval( source_config.knowledge_retrieval, available_kb_ids ) - new_tools = self._clean_tools( - source_config.tools, available_kb_ids + new_tools = copy.deepcopy(source_config.tools) if source_config.tools else [] + new_memory = self._clean_memory_cross_workspace( + source_config.memory, target_workspace_id ) + new_skills = copy.deepcopy(source_config.skills) if source_config.skills else {} else: new_model_config_id = source_config.default_model_config_id new_knowledge_retrieval = copy.deepcopy(source_config.knowledge_retrieval) if source_config.knowledge_retrieval else None new_tools = copy.deepcopy(source_config.tools) if source_config.tools else [] + new_memory = copy.deepcopy(source_config.memory) if source_config.memory else None + new_skills = copy.deepcopy(source_config.skills) if source_config.skills else {} new_config = AgentConfig( id=uuid.uuid4(), @@ -879,9 +878,11 @@ class AppService: default_model_config_id=new_model_config_id, model_parameters=copy.deepcopy(source_config.model_parameters) if source_config.model_parameters else None, knowledge_retrieval=new_knowledge_retrieval, - memory=copy.deepcopy(source_config.memory) if source_config.memory else None, + memory=new_memory, variables=copy.deepcopy(source_config.variables) if source_config.variables else [], tools=new_tools, + skills=new_skills, + features=copy.deepcopy(source_config.features) if source_config.features else {}, is_active=True, created_at=now, updated_at=now, @@ -894,25 +895,10 @@ class AppService: ).first() if source_config: - if is_cross_workspace: - model_ids, kb_ids = self._collect_resource_ids_from_workflow_nodes( - source_config.nodes - ) - available_model_ids, available_kb_ids = self._preload_cross_workspace_resources( - target_tenant_id, target_workspace_id, model_ids, kb_ids - ) - new_nodes = self._clean_workflow_nodes_for_cross_workspace( - source_config.nodes or [], - available_model_ids, - available_kb_ids - ) - else: - new_nodes = copy.deepcopy(source_config.nodes) if source_config.nodes else [] - new_config = WorkflowConfig( id=uuid.uuid4(), app_id=new_app.id, - nodes=new_nodes, + nodes=copy.deepcopy(source_config.nodes) if source_config.nodes else [], edges=copy.deepcopy(source_config.edges) if source_config.edges else [], variables=copy.deepcopy(source_config.variables) if source_config.variables else [], execution_config=copy.deepcopy(source_config.execution_config) if source_config.execution_config else {}, @@ -929,24 +915,15 @@ class AppService: ).first() if source_config: - if is_cross_workspace: - model_ids = {source_config.default_model_config_id} if source_config.default_model_config_id else set() - available_model_ids, _ = self._preload_cross_workspace_resources( - target_tenant_id, target_workspace_id, model_ids, set() - ) - new_model_config_id = self._is_model_available( - source_config.default_model_config_id, available_model_ids - ) - else: - new_model_config_id = source_config.default_model_config_id - + # multi_agent 的 model_config_id/sub_agents/routing_rules 均属于 tenant 级别直接保留 + # 跨空间时 master_agent_id(AppRelease)属于源空间,需清空 new_config = MultiAgentConfig( id=uuid.uuid4(), app_id=new_app.id, master_agent_id=source_config.master_agent_id if not is_cross_workspace else None, master_agent_name=source_config.master_agent_name, - default_model_config_id=new_model_config_id, - model_parameters=source_config.model_parameters, + default_model_config_id=source_config.default_model_config_id, + model_parameters=copy.deepcopy(source_config.model_parameters) if source_config.model_parameters else None, orchestration_mode=source_config.orchestration_mode, sub_agents=copy.deepcopy(source_config.sub_agents) if source_config.sub_agents else [], routing_rules=copy.deepcopy(source_config.routing_rules) if source_config.routing_rules else None, @@ -1037,8 +1014,7 @@ class AppService: @staticmethod def _collect_resource_ids_from_config( model_config_id: Optional[uuid.UUID], - knowledge_retrieval: Optional[dict], - tools: Optional[list] + knowledge_retrieval: Optional[dict] ) -> tuple: """Extract all model config IDs and knowledge base IDs from an app config.""" model_ids: set = set() @@ -1048,62 +1024,12 @@ class AppService: model_ids.add(model_config_id) if knowledge_retrieval and isinstance(knowledge_retrieval, dict): - if "kb_ids" in knowledge_retrieval: - for kid in knowledge_retrieval.get("kb_ids", []): - if kid: - kb_ids.add(str(kid)) - if knowledge_retrieval.get("knowledge_id"): - kb_ids.add(str(knowledge_retrieval["knowledge_id"])) - - if tools: - for tool in tools: - if isinstance(tool, dict): - kid = tool.get("knowledge_id") or tool.get("kb_id") - if kid: - kb_ids.add(str(kid)) + if "knowledge_bases" in knowledge_retrieval: + for kid in knowledge_retrieval.get("knowledge_bases", []): + kb_ids.add(str(kid.get("kb_id"))) return model_ids, kb_ids - @staticmethod - def _collect_resource_ids_from_workflow_nodes(nodes: list) -> tuple: - """Extract all model config IDs and knowledge base IDs from workflow nodes.""" - model_ids: set = set() - kb_ids: set = set() - - for node in (nodes or []): - if not isinstance(node, dict): - continue - data = node.get("data", {}) - if not isinstance(data, dict): - continue - for key in ("model_config_id", "default_model_config_id"): - val = data.get(key) - if val: - try: - model_ids.add(uuid.UUID(str(val))) - except (ValueError, AttributeError): - pass - kr = data.get("knowledge_retrieval") - if isinstance(kr, dict): - for kid in kr.get("kb_ids", []): - if kid: - kb_ids.add(str(kid)) - if kr.get("knowledge_id"): - kb_ids.add(str(kr["knowledge_id"])) - if data.get("knowledge_id"): - kb_ids.add(str(data["knowledge_id"])) - for kid in data.get("kb_ids", []): - if kid: - kb_ids.add(str(kid)) - - return model_ids, kb_ids - - @staticmethod - def _is_model_available(model_config_id: Optional[uuid.UUID], available_model_ids: set) -> Optional[uuid.UUID]: - if not model_config_id: - return None - return model_config_id if model_config_id in available_model_ids else None - @staticmethod def _is_kb_available(kb_id: Optional[str], available_kb_ids: set) -> Optional[str]: if not kb_id: @@ -1124,95 +1050,53 @@ class AppService: cleaned = copy.deepcopy(knowledge_retrieval) - if "kb_ids" in cleaned and isinstance(cleaned["kb_ids"], list): - cleaned["kb_ids"] = [ - kid for kid in cleaned["kb_ids"] - if self._is_kb_available(kid, available_kb_ids) + if "knowledge_bases" in cleaned and isinstance(cleaned["knowledge_bases"], list): + cleaned["knowledge_bases"] = [ + kb for kb in cleaned["knowledge_bases"] + if self._is_kb_available(kb.get("kb_id"), available_kb_ids) ] - if "knowledge_id" in cleaned: - cleaned["knowledge_id"] = self._is_kb_available( - cleaned.get("knowledge_id"), available_kb_ids - ) - return cleaned - def _clean_tools( + def _clean_memory_cross_workspace( self, - tools: Optional[list], - available_kb_ids: set - ) -> list: - """Clean tools config, keeping built-in tools and tools with available KBs.""" - if not tools: - return [] + memory: Optional[dict], + target_workspace_id: uuid.UUID + ) -> Optional[dict]: + """Clear memory_config_id/memory_content if it doesn't belong to target workspace.""" + if not memory: + return None - cleaned = [] - for tool in tools: - if not isinstance(tool, dict): - cleaned.append(tool) - continue + from app.models.memory_config_model import MemoryConfig - tool_type = tool.get("type", "") - if tool_type in ("builtin", "built_in", "system"): - cleaned.append(copy.deepcopy(tool)) - continue + cleaned = copy.deepcopy(memory) + # 兼容旧字段 memory_content 和新字段 memory_config_id + mid = cleaned.get("memory_config_id") or cleaned.get("memory_content") + if mid: + try: + mid_uuid = uuid.UUID(str(mid)) + except (ValueError, AttributeError): + exists = self.db.query(MemoryConfig).filter( + MemoryConfig.config_id_old == int(mid), + MemoryConfig.workspace_id == target_workspace_id + ).first() + if not exists: + cleaned["memory_config_id"] = None + cleaned.pop("memory_content", None) + cleaned["enabled"] = False + return cleaned - kb_id = tool.get("knowledge_id") or tool.get("kb_id") - if kb_id: - if self._is_kb_available(kb_id, available_kb_ids): - cleaned.append(copy.deepcopy(tool)) - continue + exists = self.db.query( + self.db.query(MemoryConfig).filter( + MemoryConfig.config_id == mid_uuid, + MemoryConfig.workspace_id == target_workspace_id + ).exists() + ).scalar() + if not exists: + cleaned["memory_config_id"] = None + cleaned.pop("memory_content", None) + cleaned["enabled"] = False - cleaned.append(copy.deepcopy(tool)) - - return cleaned - - def _clean_workflow_nodes_for_cross_workspace( - self, - nodes: list, - available_model_ids: set, - available_kb_ids: set - ) -> list: - """Clean workflow nodes, using pre-loaded resource sets. Uses deepcopy to avoid mutating source.""" - if not nodes: - return [] - - cleaned = [] - for node in nodes: - if not isinstance(node, dict): - cleaned.append(node) - continue - - node_copy = copy.deepcopy(node) - data = node_copy.get("data") - if not isinstance(data, dict): - cleaned.append(node_copy) - continue - - for key in ("model_config_id", "default_model_config_id"): - if key in data and data[key]: - try: - mid = uuid.UUID(str(data[key])) - except (ValueError, AttributeError): - data[key] = None - continue - data[key] = str(mid) if mid in available_model_ids else None - - if "knowledge_retrieval" in data and data["knowledge_retrieval"]: - data["knowledge_retrieval"] = self._clean_knowledge_retrieval( - data["knowledge_retrieval"], available_kb_ids - ) - if "knowledge_id" in data: - data["knowledge_id"] = self._is_kb_available( - data.get("knowledge_id"), available_kb_ids - ) - if "kb_ids" in data and isinstance(data["kb_ids"], list): - data["kb_ids"] = [ - kid for kid in data["kb_ids"] - if self._is_kb_available(kid, available_kb_ids) - ] - - cleaned.append(node_copy) return cleaned def list_apps( From bd1f649bd0942fbf8217814376bb25c2bb68ec87 Mon Sep 17 00:00:00 2001 From: Timebomb2018 <18868801967@163.com> Date: Fri, 20 Mar 2026 15:14:50 +0800 Subject: [PATCH 27/43] fix(app): The processing features of the application --- api/app/repositories/workflow_repository.py | 3 +++ api/app/services/app_dsl_service.py | 2 ++ api/app/services/app_service.py | 1 + api/app/services/workflow_service.py | 4 ++++ 4 files changed, 10 insertions(+) diff --git a/api/app/repositories/workflow_repository.py b/api/app/repositories/workflow_repository.py index b22673e6..2674dd68 100644 --- a/api/app/repositories/workflow_repository.py +++ b/api/app/repositories/workflow_repository.py @@ -43,6 +43,7 @@ class WorkflowConfigRepository: edges: list[dict[str, Any]], variables: list[dict[str, Any]] | None = None, execution_config: dict[str, Any] | None = None, + features: dict[str, Any] | None = None, triggers: list[dict[str, Any]] | None = None ) -> WorkflowConfig: """创建或更新工作流配置 @@ -53,6 +54,7 @@ class WorkflowConfigRepository: edges: 边列表 variables: 变量列表 execution_config: 执行配置 + features: 功能特性 triggers: 触发器列表 Returns: @@ -82,6 +84,7 @@ class WorkflowConfigRepository: edges=edges, variables=variables or [], execution_config=execution_config or {}, + feaeatures=features or {}, triggers=triggers or [] ) self.db.add(config) diff --git a/api/app/services/app_dsl_service.py b/api/app/services/app_dsl_service.py index 13363667..8c198be4 100644 --- a/api/app/services/app_dsl_service.py +++ b/api/app/services/app_dsl_service.py @@ -271,6 +271,7 @@ class AppDslService: variables=cfg.get("variables", []), tools=self._resolve_tools(cfg.get("tools", []), tenant_id, warnings), skills=self._resolve_skills(cfg.get("skills", {}), tenant_id, warnings), + features=cfg.get("features", {}), is_active=True, created_at=now, updated_at=now, @@ -311,6 +312,7 @@ class AppDslService: edges=[e.model_dump() for e in result.edges], variables=[v.model_dump() for v in result.variables], execution_config=wf.get("execution_config", {}), + features=wf.get("features", {}), triggers=wf.get("triggers", []), validate=False, ) diff --git a/api/app/services/app_service.py b/api/app/services/app_service.py index 155ef617..19aaac42 100644 --- a/api/app/services/app_service.py +++ b/api/app/services/app_service.py @@ -902,6 +902,7 @@ class AppService: edges=copy.deepcopy(source_config.edges) if source_config.edges else [], variables=copy.deepcopy(source_config.variables) if source_config.variables else [], execution_config=copy.deepcopy(source_config.execution_config) if source_config.execution_config else {}, + features=copy.deepcopy(source_config.features) if source_config.features else {}, triggers=copy.deepcopy(source_config.triggers) if source_config.triggers else [], is_active=True, created_at=now, diff --git a/api/app/services/workflow_service.py b/api/app/services/workflow_service.py index 04a778a1..0e1306b7 100644 --- a/api/app/services/workflow_service.py +++ b/api/app/services/workflow_service.py @@ -55,6 +55,7 @@ class WorkflowService: edges: list[dict[str, Any]], variables: list[dict[str, Any]] | None = None, execution_config: dict[str, Any] | None = None, + features: dict[str, Any] | None = None, triggers: list[dict[str, Any]] | None = None, validate: bool = True ) -> WorkflowConfig: @@ -66,6 +67,7 @@ class WorkflowService: edges: 边列表 variables: 变量列表 execution_config: 执行配置 + features: 功能特性 triggers: 触发器列表 validate: 是否验证配置 @@ -81,6 +83,7 @@ class WorkflowService: "edges": edges, "variables": variables or [], "execution_config": execution_config or {}, + "features": features or {}, "triggers": triggers or [] } @@ -101,6 +104,7 @@ class WorkflowService: edges=edges, variables=variables, execution_config=execution_config, + features=features, triggers=triggers ) From 0f1b1d7d108bf2e7dabaa30f21e6d2c24dd0feca Mon Sep 17 00:00:00 2001 From: Timebomb2018 <18868801967@163.com> Date: Fri, 20 Mar 2026 15:36:04 +0800 Subject: [PATCH 28/43] fix(app): The processing features of the application --- api/app/repositories/workflow_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/app/repositories/workflow_repository.py b/api/app/repositories/workflow_repository.py index 2674dd68..4e24faa0 100644 --- a/api/app/repositories/workflow_repository.py +++ b/api/app/repositories/workflow_repository.py @@ -84,7 +84,7 @@ class WorkflowConfigRepository: edges=edges, variables=variables or [], execution_config=execution_config or {}, - feaeatures=features or {}, + features=features or {}, triggers=triggers or [] ) self.db.add(config) From bd31aa5abf44778fc3e224392a1f661410ad1f2f Mon Sep 17 00:00:00 2001 From: wxy Date: Fri, 20 Mar 2026 16:08:40 +0800 Subject: [PATCH 29/43] feat: remove username uniqueness constraint for community edition - Remove unique=True from username column in User model - Remove username duplicate check in create_user and create_superuser - Add migration to drop unique index on username, keep email unique --- api/app/models/user_model.py | 2 +- api/app/services/user_service.py | 30 ++---------------------------- 2 files changed, 3 insertions(+), 29 deletions(-) diff --git a/api/app/models/user_model.py b/api/app/models/user_model.py index b6de28ec..81319789 100644 --- a/api/app/models/user_model.py +++ b/api/app/models/user_model.py @@ -9,7 +9,7 @@ class User(Base): __tablename__ = "users" id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True) - username = Column(String, unique=True, index=True, nullable=False) + username = Column(String, index=True, nullable=False) # 社区版:用户名不唯一,仅邮箱唯一 email = Column(String, unique=True, index=True, nullable=False) hashed_password = Column(String, nullable=False) is_active = Column(Boolean, default=True, nullable=False) diff --git a/api/app/services/user_service.py b/api/app/services/user_service.py index e23b1ac3..b5522b74 100644 --- a/api/app/services/user_service.py +++ b/api/app/services/user_service.py @@ -78,18 +78,7 @@ def create_user(db: Session, user: UserCreate) -> User: business_logger.info(f"创建用户: {user.username}, email: {user.email}") try: - # 检查用户名是否已存在 - business_logger.debug(f"检查用户名是否已存在: {user.username}") - db_user_by_username = user_repository.get_user_by_username(db, username=user.username) - if db_user_by_username: - business_logger.warning(f"用户名已存在: {user.username}") - raise BusinessException( - "用户名已存在", - code=BizCode.DUPLICATE_NAME, - context={"username": user.username, "email": user.email} - ) - - # 检查邮箱是否已注册 + # 检查邮箱是否已注册(邮箱保持唯一) business_logger.debug(f"检查邮箱是否已注册: {user.email}") db_user_by_email = user_repository.get_user_by_email(db, email=user.email) if db_user_by_email: @@ -164,22 +153,7 @@ def create_superuser(db: Session, user: UserCreate, current_user: User) -> User: ) try: - # 检查用户名是否已存在 - business_logger.debug(f"检查用户名是否已存在: {user.username}") - db_user_by_username = user_repository.get_user_by_username(db, username=user.username) - if db_user_by_username: - business_logger.warning(f"用户名已存在: {user.username}") - raise BusinessException( - "用户名已存在", - code=BizCode.DUPLICATE_NAME, - context={ - "username": user.username, - "email": user.email, - "created_by": str(current_user.id) - } - ) - - # 检查邮箱是否已注册 + # 检查邮箱是否已注册(邮箱保持唯一) business_logger.debug(f"检查邮箱是否已注册: {user.email}") db_user_by_email = user_repository.get_user_by_email(db, email=user.email) if db_user_by_email: From 911d5e0b34f7c183787aea78c7067955196e10bb Mon Sep 17 00:00:00 2001 From: wxy Date: Fri, 20 Mar 2026 17:07:23 +0800 Subject: [PATCH 30/43] feat(app): support searching application list by API Key --- api/app/controllers/app_controller.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/api/app/controllers/app_controller.py b/api/app/controllers/app_controller.py index e9b539df..1eb45b89 100644 --- a/api/app/controllers/app_controller.py +++ b/api/app/controllers/app_controller.py @@ -57,6 +57,7 @@ def list_apps( page: int = 1, pagesize: int = 10, ids: Optional[str] = None, + api_key: Optional[str] = None, db: Session = Depends(get_db), current_user=Depends(get_current_user), ): @@ -65,10 +66,28 @@ def list_apps( - 默认包含本工作空间的应用和分享给本工作空间的应用 - 设置 include_shared=false 可以只查看本工作空间的应用 - 当提供 ids 参数时,按逗号分割获取指定应用,不分页 + - 当提供 api_key 参数时,查找该 API Key 关联的应用 """ + from sqlalchemy import select as sa_select + from app.models.api_key_model import ApiKey + workspace_id = current_user.current_workspace_id service = app_service.AppService(db) + # 通过 API Key 搜索:查出关联的 app id,复用 ids 分支返回 + if api_key: + matched = db.execute( + sa_select(ApiKey.resource_id).where( + ApiKey.workspace_id == workspace_id, + ApiKey.api_key.like(f"%{api_key}%"), + ApiKey.resource_id.isnot(None), + ) + ).scalars().all() + app_ids = [str(rid) for rid in matched] + items_orm = app_service.get_apps_by_ids(db, app_ids, workspace_id) if app_ids else [] + items = [service._convert_to_schema(app, workspace_id) for app in items_orm] + return success(data=items) + # 当 ids 存在且不为 None 时,根据 ids 获取应用 if ids is not None: app_ids = [app_id.strip() for app_id in ids.split(',') if app_id.strip()] From bf5c4628c35541434be0b228141c4dae417e30e7 Mon Sep 17 00:00:00 2001 From: wxy Date: Fri, 20 Mar 2026 18:02:03 +0800 Subject: [PATCH 31/43] fix: use exact match instead of LIKE for api_key lookup, reuse ids branch flow --- api/app/controllers/app_controller.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/api/app/controllers/app_controller.py b/api/app/controllers/app_controller.py index 1eb45b89..3ba9c3a9 100644 --- a/api/app/controllers/app_controller.py +++ b/api/app/controllers/app_controller.py @@ -74,19 +74,16 @@ def list_apps( workspace_id = current_user.current_workspace_id service = app_service.AppService(db) - # 通过 API Key 搜索:查出关联的 app id,复用 ids 分支返回 + # 通过 API Key 搜索:精确匹配,将 resource_id 注入 ids 走统一分页流程 if api_key: - matched = db.execute( + matched_id = db.execute( sa_select(ApiKey.resource_id).where( ApiKey.workspace_id == workspace_id, - ApiKey.api_key.like(f"%{api_key}%"), + ApiKey.api_key == api_key, ApiKey.resource_id.isnot(None), ) - ).scalars().all() - app_ids = [str(rid) for rid in matched] - items_orm = app_service.get_apps_by_ids(db, app_ids, workspace_id) if app_ids else [] - items = [service._convert_to_schema(app, workspace_id) for app in items_orm] - return success(data=items) + ).scalar_one_or_none() + ids = str(matched_id) if matched_id else "" # 当 ids 存在且不为 None 时,根据 ids 获取应用 if ids is not None: From 240f1d431b2b8f5389a747c1edd3065f9974b3f5 Mon Sep 17 00:00:00 2001 From: Timebomb2018 <18868801967@163.com> Date: Fri, 20 Mar 2026 19:45:41 +0800 Subject: [PATCH 32/43] fix(app): Multimodal file storage --- api/app/services/app_chat_service.py | 139 +++++++++++++++++------ api/app/services/conversation_service.py | 56 +++++++-- api/app/services/draft_run_service.py | 58 ++++++---- api/app/services/multimodal_service.py | 65 +++++++++++ api/app/services/shared_chat_service.py | 4 +- 5 files changed, 250 insertions(+), 72 deletions(-) diff --git a/api/app/services/app_chat_service.py b/api/app/services/app_chat_service.py index d81ba7b7..a1002919 100644 --- a/api/app/services/app_chat_service.py +++ b/api/app/services/app_chat_service.py @@ -118,28 +118,54 @@ class AppChatService: ) + model_info = ModelInfo( + model_name=api_key_obj.model_name, + provider=api_key_obj.provider, + api_key=api_key_obj.api_key, + api_base=api_key_obj.api_base, + capability=api_key_obj.capability, + is_omni=api_key_obj.is_omni, + model_type=ModelType.LLM + ) + # 加载历史消息 messages = self.conversation_service.get_messages( conversation_id=conversation_id, limit=10 ) - history = [ - {"role": msg.role, "content": [{"type": "text", "text": msg.content}] + (msg.meta_data.get("files", []) if msg.meta_data else [])} - for msg in messages - ] + history = [] + for msg in messages: + content = [{"type": "text", "text": msg.content}] + + # 处理 meta_data 中的 files + if msg.meta_data and msg.meta_data.get("files"): + files = msg.meta_data.get("files", []) + # 使用 MultimodalService 处理文件 + multimodal_service = MultimodalService(self.db, api_config=model_info) + + # 将 files 转换为 FileInput 格式 + file_inputs = [] + for file in files: + from app.schemas.app_schema import FileInput, TransferMethod + file_input = FileInput( + type=file.get("type"), + transfer_method=TransferMethod.REMOTE_URL, + url=file.get("url") + ) + file_inputs.append(file_input) + + history_processed_files = await multimodal_service.history_process_files(files=file_inputs) + + content.extend(history_processed_files) + + history.append({ + "role": msg.role, + "content": content + }) # 处理多模态文件 processed_files = None if files: - model_info = ModelInfo( - model_name=api_key_obj.model_name, - provider=api_key_obj.provider, - api_key=api_key_obj.api_key, - api_base=api_key_obj.api_base, - capability=api_key_obj.capability, - is_omni=api_key_obj.is_omni, - model_type=ModelType.LLM - ) multimodal_service = MultimodalService(self.db, model_info) processed_files = await multimodal_service.process_files(user_id, files) logger.info(f"处理了 {len(processed_files)} 个文件") @@ -187,8 +213,13 @@ class AppChatService: "usage": result.get("usage", {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}), "audio_url": None } - if processed_files: - human_meta["files"].extend(processed_files) + if files: + for f in files: + # url = await MultimodalService(self.db).get_file_url(f) + human_meta["files"].append({ + "type": f.type, + "url": f.url + }) # 保存消息 if audio_url: @@ -308,31 +339,54 @@ class AppChatService: streaming=True ) + model_info = ModelInfo( + model_name=api_key_obj.model_name, + provider=api_key_obj.provider, + api_key=api_key_obj.api_key, + api_base=api_key_obj.api_base, + capability=api_key_obj.capability, + is_omni=api_key_obj.is_omni, + model_type=ModelType.LLM + ) + # 加载历史消息 + messages = self.conversation_service.get_messages( + conversation_id=conversation_id, + limit=10 + ) history = [] - memory_config = {"enabled": True, 'max_history': 10} - if memory_config.get("enabled"): - messages = self.conversation_service.get_messages( - conversation_id=conversation_id, - limit=memory_config.get("max_history", 10) - ) - history = [ - {"role": msg.role, "content": [{"type": "text", "text": msg.content}] + (msg.meta_data.get("files", []) if msg.meta_data else [])} - for msg in messages - ] + for msg in messages: + content = [{"type": "text", "text": msg.content}] + + # 处理 meta_data 中的 files + if msg.meta_data and msg.meta_data.get("files"): + files = msg.meta_data.get("files", []) + # 使用 MultimodalService 处理文件 + multimodal_service = MultimodalService(self.db, api_config=model_info) + + # 将 files 转换为 FileInput 格式 + file_inputs = [] + for file in files: + from app.schemas.app_schema import FileInput, TransferMethod + file_input = FileInput( + type=file.get("type"), + transfer_method=TransferMethod.REMOTE_URL, + url=file.get("url") + ) + file_inputs.append(file_input) + + history_processed_files = await multimodal_service.history_process_files(files=file_inputs) + + content.extend(history_processed_files) + + history.append({ + "role": msg.role, + "content": content + }) # 处理多模态文件 processed_files = None if files: - model_info = ModelInfo( - model_name=api_key_obj.model_name, - provider=api_key_obj.provider, - api_key=api_key_obj.api_key, - api_base=api_key_obj.api_base, - capability=api_key_obj.capability, - is_omni=api_key_obj.is_omni, - model_type=ModelType.LLM - ) multimodal_service = MultimodalService(self.db, model_info) processed_files = await multimodal_service.process_files(user_id, files) logger.info(f"处理了 {len(processed_files)} 个文件") @@ -342,8 +396,14 @@ class AppChatService: total_tokens = 0 text_queue: asyncio.Queue = asyncio.Queue() + api_key_config = { + "model_name": api_key_obj.model_name, + "api_key": api_key_obj.api_key, + "api_base": api_key_obj.api_base, + "provider": api_key_obj.provider, + } stream_audio_url, tts_task = await self.agent_service._generate_tts_streaming( - features_config, api_key_obj, + features_config, api_key_config, text_queue=text_queue, tenant_id=tenant_id, workspace_id=workspace_id ) @@ -395,8 +455,13 @@ class AppChatService: "audio_url": None } - if processed_files: - human_meta["files"].extend(processed_files) + if files: + for f in files: + # url = await MultimodalService(self.db).get_file_url(f) + human_meta["files"].append({ + "type": f.type, + "url": f.url + }) if stream_audio_url: assistant_meta["audio_url"] = stream_audio_url diff --git a/api/app/services/conversation_service.py b/api/app/services/conversation_service.py index 30c3feba..f8a01a40 100644 --- a/api/app/services/conversation_service.py +++ b/api/app/services/conversation_service.py @@ -21,6 +21,7 @@ from app.models.conversation_model import ConversationDetail from app.models.prompt_optimizer_model import RoleType from app.repositories.conversation_repository import ConversationRepository, MessageRepository from app.schemas.conversation_schema import ConversationOut +from app.schemas.model_schema import ModelInfo from app.services import workspace_service from app.services.model_service import ModelConfigService @@ -269,10 +270,11 @@ class ConversationService: return messages - def get_conversation_history( + async def get_conversation_history( self, conversation_id: uuid.UUID, - max_history: Optional[int] = None + max_history: Optional[int] = None, + api_config: Optional[ModelInfo] = None ) -> List[dict]: """ Retrieve historical conversation messages formatted as dictionaries. @@ -280,6 +282,7 @@ class ConversationService: Args: conversation_id (uuid.UUID): Conversation UUID. max_history (Optional[int]): Maximum number of messages to retrieve. + api_config (Optional[ModelInfo]): Model API configuration for multimodal processing. Returns: List[dict]: List of message dictionaries with keys 'role' and 'content'. @@ -290,13 +293,37 @@ class ConversationService: ) # 转换为字典格式 - history = [ - { + history = [] + for msg in messages: + content = [{"type": "text", "text": msg.content}] + + # 处理 meta_data 中的 files + if msg.meta_data and msg.meta_data.get("files"): + files = msg.meta_data.get("files", []) + if api_config: + # 使用 MultimodalService 处理文件 + from app.services.multimodal_service import MultimodalService + multimodal_service = MultimodalService(self.db, api_config=api_config) + + # 将 files 转换为 FileInput 格式 + file_inputs = [] + for file in files: + from app.schemas.app_schema import FileInput, TransferMethod + file_input = FileInput( + type=file.get("type"), + transfer_method=TransferMethod.REMOTE_URL, + url=file.get("url") + ) + file_inputs.append(file_input) + + processed_files = await multimodal_service.history_process_files(files=file_inputs) + + content.extend(processed_files) + + history.append({ "role": msg.role, - "content": [{"type": "text", "text": msg.content}] + (msg.meta_data.get("files", []) if msg.meta_data else []) - } - for msg in messages - ] + "content": content + }) return history @@ -524,9 +551,18 @@ class ConversationService: type=ModelType(model_type) ) - conversation_messages = self.get_conversation_history( + conversation_messages = await self.get_conversation_history( conversation_id=conversation_id, - max_history=20 + max_history=20, + api_config=ModelInfo( + model_name=model_name, + provider=provider, + api_key=api_key, + api_base=api_base, + capability=api_config.capability, + is_omni=api_config.is_omni, + model_type=model_type + ) ) if len(conversation_messages) == 0: return ConversationOut( diff --git a/api/app/services/draft_run_service.py b/api/app/services/draft_run_service.py index 0af33357..5989f0f8 100644 --- a/api/app/services/draft_run_service.py +++ b/api/app/services/draft_run_service.py @@ -579,9 +579,20 @@ class AgentRunService: user_id=user_id ) + model_info = ModelInfo( + model_name=api_key_config["model_name"], + provider=api_key_config["provider"], + api_key=api_key_config["api_key"], + api_base=api_key_config["api_base"], + capability=api_key_config["capability"], + is_omni=api_key_config["is_omni"], + model_type=model_config.type + ) + # 6. 加载历史消息 history = await self._load_conversation_history( conversation_id=conversation_id, + api_config=model_info, max_history=10 ) @@ -589,15 +600,6 @@ class AgentRunService: processed_files = None if files: # 获取 provider 信息 - model_info = ModelInfo( - model_name=api_key_config["model_name"], - provider=api_key_config["provider"], - api_key=api_key_config["api_key"], - api_base=api_key_config["api_base"], - capability=api_key_config["capability"], - is_omni=api_key_config["is_omni"], - model_type=ModelType.LLM - ) provider = api_key_config.get("provider", "openai") multimodal_service = MultimodalService(self.db, model_info) processed_files = await multimodal_service.process_files(user_id, files) @@ -658,7 +660,7 @@ class AgentRunService: "total_tokens": 0 }) }, - files=processed_files, + files=files, audio_url=audio_url ) @@ -815,9 +817,20 @@ class AgentRunService: sub_agent=sub_agent ) + model_info = ModelInfo( + model_name=api_key_config["model_name"], + provider=api_key_config["provider"], + api_key=api_key_config["api_key"], + api_base=api_key_config["api_base"], + capability=api_key_config["capability"], + is_omni=api_key_config["is_omni"], + model_type=model_config.type + ) + # 6. 加载历史消息 history = await self._load_conversation_history( conversation_id=conversation_id, + api_config=model_info, max_history=memory_config.get("max_history", 10) ) @@ -825,15 +838,6 @@ class AgentRunService: processed_files = None if files: # 获取 provider 信息 - model_info = ModelInfo( - model_name=api_key_config["model_name"], - provider=api_key_config["provider"], - api_key=api_key_config["api_key"], - api_base=api_key_config["api_base"], - capability=api_key_config["capability"], - is_omni=api_key_config["is_omni"], - model_type=ModelType.LLM - ) provider = api_key_config.get("provider", "openai") multimodal_service = MultimodalService(self.db, model_info) processed_files = await multimodal_service.process_files(user_id, files) @@ -904,7 +908,7 @@ class AgentRunService: meta_data={ "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens} }, - files=processed_files, + files=files, audio_url=stream_audio_url ) @@ -1115,6 +1119,7 @@ class AgentRunService: async def _load_conversation_history( self, conversation_id: str, + api_config: ModelInfo | None = None, max_history: int = 10 ) -> List[Dict[str, str]]: """加载会话历史消息 @@ -1129,9 +1134,11 @@ class AgentRunService: try: conversation_service = ConversationService(self.db) - history = conversation_service.get_conversation_history( + # 获取 API 配置用于多模态处理 + history = await conversation_service.get_conversation_history( conversation_id=uuid.UUID(conversation_id), - max_history=max_history + max_history=max_history, + api_config=api_config ) logger.debug( @@ -1182,7 +1189,12 @@ class AgentRunService: "files": [] } if files: - human_meta["files"].extend(files) + for f in files: + # url = await MultimodalService(self.db).get_file_url(f) + human_meta["files"].append({ + "type": f.type, + "url": f.url + }) # 保存用户消息 conversation_service.add_message( conversation_id=conv_uuid, diff --git a/api/app/services/multimodal_service.py b/api/app/services/multimodal_service.py index d8cca67e..6cb0a7f0 100644 --- a/api/app/services/multimodal_service.py +++ b/api/app/services/multimodal_service.py @@ -418,6 +418,71 @@ class MultimodalService: logger.info(f"成功处理 {len(result)}/{len(files)} 个文件,provider={self.provider}") return result + async def history_process_files( + self, + files: Optional[List[FileInput]], + ) -> List[Dict[str, Any]]: + """ + 处理文件列表,返回 LLM 可用的格式 + + Args: + files: 文件输入列表 + + Returns: + List[Dict]: LLM 可用的内容格式列表(根据 provider 返回不同格式) + """ + if not files: + return [] + + # 获取对应的策略 + # dashscope 的 omni 模型使用 OpenAI 兼容格式 + if self.provider == "dashscope" and self.is_omni: + strategy_class = OpenAIFormatStrategy + else: + strategy_class = PROVIDER_STRATEGIES.get(self.provider) + if not strategy_class: + logger.warning(f"未找到 provider '{self.provider}' 的策略,使用默认策略") + strategy_class = DashScopeFormatStrategy + + result = [] + for idx, file in enumerate(files): + strategy = strategy_class(file) + if not file.url: + file.url = await self.get_file_url(file) + try: + if file.type == FileType.IMAGE and "vision" in self.capability: + is_support, content = await self._process_image(file, strategy) + result.append(content) + elif file.type == FileType.DOCUMENT: + is_support, content = await self._process_document(file, strategy) + result.append(content) + elif file.type == FileType.AUDIO and "audio" in self.capability: + is_support, content = await self._process_audio(file, strategy) + result.append(content) + elif file.type == FileType.VIDEO and "video" in self.capability: + is_support, content = await self._process_video(file, strategy) + result.append(content) + else: + logger.warning(f"不支持的文件类型: {file.type}") + except Exception as e: + logger.error( + f"处理文件失败", + extra={ + "file_index": idx, + "file_type": file.type, + "error": str(e) + }, + exc_info=True + ) + # 继续处理其他文件,不中断整个流程 + result.append({ + "type": "text", + "text": f"[文件处理失败: {str(e)}]" + }) + + logger.info(f"成功处理 {len(result)}/{len(files)} 个文件,provider={self.provider}") + return result + def write_perceptual_memory( self, end_user_id: str, diff --git a/api/app/services/shared_chat_service.py b/api/app/services/shared_chat_service.py index 5e18ee42..0d659832 100644 --- a/api/app/services/shared_chat_service.py +++ b/api/app/services/shared_chat_service.py @@ -264,7 +264,7 @@ class SharedChatService: limit=memory_config.get("max_history", 10) ) history = [ - {"role": msg.role, "content": [{"type": "text", "text": msg.content}] + (msg.meta_data.get("files", []) if msg.meta_data else [])} + {"role": msg.role, "content": msg.content} for msg in messages ] @@ -472,7 +472,7 @@ class SharedChatService: limit=memory_config.get("max_history", 10) ) history = [ - {"role": msg.role, "content": [{"type": "text", "text": msg.content}] + (msg.meta_data.get("files", []) if msg.meta_data else [])} + {"role": msg.role, "content": msg.content} for msg in messages ] From fa29a399201b396d31a138b959f1220e19779cc4 Mon Sep 17 00:00:00 2001 From: Timebomb2018 <18868801967@163.com> Date: Fri, 20 Mar 2026 19:52:28 +0800 Subject: [PATCH 33/43] fix(app): release notes --- api/app/version_info.json | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/api/app/version_info.json b/api/app/version_info.json index 12793cb5..b4f6976f 100644 --- a/api/app/version_info.json +++ b/api/app/version_info.json @@ -1,4 +1,38 @@ { + "v0.2.8": { + "introduction": { + "codeName": "景玉", + "releaseDate": "2026-3-20", + "upgradePosition": "🐻 MemoryBear v0.2.8 社区版全面升级应用共享、多模态交互与平台基础设施,引入语音交互、感知记忆和云端存储,打造更强大的开放 AI 记忆平台", + "coreUpgrades": [ + "1. 应用共享与发布
* 应用共享(Agent、工作流、Agent 集群):全类型应用共享至其他空间
* 分享应用默认开启记忆功能:发布分享后记忆默认开启,关闭时提醒
* 工作流记忆分享规则:按记忆配置自动控制分享页记忆开关
* 分享会话联网搜索修复:恢复分享应用的联网搜索能力", + "2. 多模态与交互 💬
* 语音输入:模型接口和应用支持语音输入
* 语音回复:应用支持语音回复模态
* 多模态感知记忆:记忆系统支持视觉、音频、图片和文件的感知记忆
* 对话框文件展示:试运行和体验分享中正确展示上传文件", + "3. 平台与基础设施 ⚙️
* i18n 国际化:全面多语言多地区支持
* 云端文件存储(OSS + S3):支持阿里云 OSS 和 S3 云端上传
* Flower 容器监控:Celery 异步任务监控与管理", + "4. EndUser 身份迁移 🔐
* EndUser 从 app_id 迁移至 workspace_id:身份从应用级迁移至工作空间级", + "5. 情景记忆 🧠
* 情景记忆聚类算法:基于社区图谱的聚类算法,支持老用户图谱生成", + "6. 稳健性与缺陷修复 🔧
* MCP 服务删除后工具 404:修复删除 MCP 服务后接口报错
* 应用导出配置不一致:导出已保存配置而非画布状态
* 工作流节点 ID 重复:修复复制节点后 ID 冲突
* 条件分支连线错误:修复保存刷新后连线错乱
* 回复节点内容丢失:修复点击画布后内容消失
* 连接桩规则优化:禁止非法连接方向
* 知识库状态列宽度:锁定或自适应宽度
* 等待中文档预览:支持未完成解析文档预览
* 知识库关联修复:统一修复关联问题
* 多模态对话连续性:修复多模态内容后无法继续对话
* 时区统一:环境变量统一控制存储和任务时区
* 遗忘强度精度:修复小数显示过长", + "
", + "v0.2.8 社区版在应用共享和多模态交互方面实现重大升级,感知记忆扩展了平台的认知维度。后续将深化多智能体协作、情景记忆聚类,并持续优化平台稳定性与开放生态。", + "MemoryBear —— 让 AI 拥有记忆 🐻✨" + ] + }, + "introduction_en": { + "codeName": "JingYu", + "releaseDate": "2026-3-20", + "upgradePosition": "🐻 MemoryBear v0.2.8 Community delivers multimodal interaction, perceptual memory, cloud storage, and workspace-level identity for a more capable open AI memory platform", + "coreUpgrades": [ + "1. Application Sharing & Publishing
* Application Sharing (Agent, Workflow, Agent Cluster): Full sharing across all app types
* Memory Enabled by Default: Memory auto-enabled on shared apps with disable reminder
* Workflow Memory Sharing Rules: Auto-controlled based on memory configuration
* Shared Session Web Search Fix: Restored web search for shared apps", + "2. Multimodal & Interaction 💬
* Voice Input: Model interfaces and apps support voice input
* Voice Reply: Apps support voice reply modality
* Multimodal Perceptual Memory: Memory system supports visual, audio, image, and file perception
* File Display in Chat: Uploaded files display correctly in dry-run and sharing", + "3. Platform & Infrastructure ⚙️
* i18n Internationalization: Full multi-language multi-region support
* Cloud File Storage (OSS + S3): Alibaba Cloud OSS and S3 cloud uploads
* Flower Container Monitoring: Celery async task monitoring and management", + "4. EndUser Identity Migration 🔐
* EndUser Migration from app_id to workspace_id: Identity migrated to workspace level", + "5. Episodic Memory 🧠
* Episodic Memory Clustering: Community-graph-based clustering with legacy user support", + "6. Robustness & Bug Fixes 🔧
* MCP Service Deletion 404: Fixed tool endpoint error after MCP removal
* App Export Config Mismatch: Exports saved config instead of canvas state
* Workflow Duplicate Node ID: Fixed ID conflict on node duplication
* Conditional Branch Wiring: Fixed wiring reset after save/refresh
* Reply Node Content Loss: Fixed content disappearing on canvas click
* Port Connection Rules: Prohibited invalid connection directions
* Knowledge Base Status Width: Locked or adaptive column width
* Pending Document Preview: Preview support for unparsed documents
* Knowledge Base Association Fixes: Consolidated association fixes
* Multimodal Conversation Continuity: Fixed single-round limit after multimodal input
* Timezone Unification: Env-var controlled unified timezone
* Forgetting Strength Precision: Fixed excessive decimal display", + "
", + "v0.2.8 Community delivers major upgrades in application sharing and multimodal interaction, with perceptual memory expanding the platform's cognitive dimensions. Multi-agent collaboration, episodic clustering, and continued platform stability improvements are ahead.", + "MemoryBear — Give AI Memory 🐻✨" + ] + } + }, "v0.2.7": { "introduction": { "codeName": "武陵", From 8e7603bcc47b3776c7cae0ebc399f408645a7647 Mon Sep 17 00:00:00 2001 From: Timebomb2018 <18868801967@163.com> Date: Fri, 20 Mar 2026 20:17:42 +0800 Subject: [PATCH 34/43] fix(app): Multimodal file processing --- api/app/services/app_chat_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/app/services/app_chat_service.py b/api/app/services/app_chat_service.py index a1002919..6fcf680b 100644 --- a/api/app/services/app_chat_service.py +++ b/api/app/services/app_chat_service.py @@ -360,13 +360,13 @@ class AppChatService: # 处理 meta_data 中的 files if msg.meta_data and msg.meta_data.get("files"): - files = msg.meta_data.get("files", []) + history_files = msg.meta_data.get("files", []) # 使用 MultimodalService 处理文件 multimodal_service = MultimodalService(self.db, api_config=model_info) # 将 files 转换为 FileInput 格式 file_inputs = [] - for file in files: + for file in history_files: from app.schemas.app_schema import FileInput, TransferMethod file_input = FileInput( type=file.get("type"), From 27672cfaa09aa6a53280d42bf145b6e0f0886b29 Mon Sep 17 00:00:00 2001 From: wxy Date: Mon, 23 Mar 2026 12:05:18 +0800 Subject: [PATCH 35/43] feat(app): add app message log query API --- api/app/controllers/__init__.py | 2 + api/app/controllers/app_log_controller.py | 129 ++++++++++++++++++++++ api/app/schemas/app_log_schema.py | 53 +++++++++ 3 files changed, 184 insertions(+) create mode 100644 api/app/controllers/app_log_controller.py create mode 100644 api/app/schemas/app_log_schema.py diff --git a/api/app/controllers/__init__.py b/api/app/controllers/__init__.py index 585de2ed..50e9e0b0 100644 --- a/api/app/controllers/__init__.py +++ b/api/app/controllers/__init__.py @@ -8,6 +8,7 @@ from fastapi import APIRouter from . import ( api_key_controller, app_controller, + app_log_controller, auth_controller, chunk_controller, document_controller, @@ -69,6 +70,7 @@ manager_router.include_router(chunk_controller.router) manager_router.include_router(test_controller.router) manager_router.include_router(knowledgeshare_controller.router) manager_router.include_router(app_controller.router) +manager_router.include_router(app_log_controller.router) manager_router.include_router(upload_controller.router) manager_router.include_router(memory_agent_controller.router) manager_router.include_router(memory_dashboard_controller.router) diff --git a/api/app/controllers/app_log_controller.py b/api/app/controllers/app_log_controller.py new file mode 100644 index 00000000..a8f6d532 --- /dev/null +++ b/api/app/controllers/app_log_controller.py @@ -0,0 +1,129 @@ +"""应用日志(消息记录)接口""" +import uuid +from typing import Optional + +from fastapi import APIRouter, Depends +from sqlalchemy import select, desc, func +from sqlalchemy.orm import Session + +from app.core.logging_config import get_business_logger +from app.core.response_utils import success +from app.db import get_db +from app.dependencies import get_current_user, cur_workspace_access_guard +from app.models.conversation_model import Conversation, Message +from app.schemas.app_log_schema import AppLogConversation, AppLogConversationDetail, AppLogMessage +from app.schemas.response_schema import PageData, PageMeta +from app.services.app_service import AppService + +router = APIRouter(prefix="/apps", tags=["App Logs"]) +logger = get_business_logger() + + +@router.get("/{app_id}/logs", summary="应用日志 - 会话列表") +@cur_workspace_access_guard() +def list_app_logs( + app_id: uuid.UUID, + page: int = 1, + pagesize: int = 20, + user_id: Optional[str] = None, + is_draft: Optional[bool] = None, + db: Session = Depends(get_db), + current_user=Depends(get_current_user), +): + """查看应用下所有会话记录(分页) + + - 支持按 user_id 筛选 + - 支持按 is_draft 筛选(草稿会话 / 发布会话) + - 按最新更新时间倒序排列 + """ + workspace_id = current_user.current_workspace_id + + # 验证应用访问权限 + service = AppService(db) + service.get_app(app_id, workspace_id) + + stmt = select(Conversation).where( + Conversation.app_id == app_id, + Conversation.workspace_id == workspace_id, + Conversation.is_active.is_(True), + ) + + if user_id: + stmt = stmt.where(Conversation.user_id == user_id) + + if is_draft is not None: + stmt = stmt.where(Conversation.is_draft == is_draft) + + total = int(db.execute( + select(func.count()).select_from(stmt.subquery()) + ).scalar_one()) + + stmt = stmt.order_by(desc(Conversation.updated_at)) + stmt = stmt.offset((page - 1) * pagesize).limit(pagesize) + + conversations = list(db.scalars(stmt).all()) + + items = [AppLogConversation.model_validate(c) for c in conversations] + meta = PageMeta(page=page, pagesize=pagesize, total=total, hasnext=(page * pagesize) < total) + + logger.info( + "查询应用日志会话列表", + extra={"app_id": str(app_id), "total": total, "page": page} + ) + + return success(data=PageData(page=meta, items=items)) + + +@router.get("/{app_id}/logs/{conversation_id}", summary="应用日志 - 会话消息详情") +@cur_workspace_access_guard() +def get_app_log_detail( + app_id: uuid.UUID, + conversation_id: uuid.UUID, + db: Session = Depends(get_db), + current_user=Depends(get_current_user), +): + """查看某会话的完整消息记录 + + - 返回会话基本信息 + 所有消息(按时间正序) + - 消息 meta_data 包含模型名、token 用量等信息 + """ + workspace_id = current_user.current_workspace_id + + # 验证应用访问权限 + service = AppService(db) + service.get_app(app_id, workspace_id) + + # 查询会话(确保属于该应用和工作空间) + conversation = db.scalars( + select(Conversation).where( + Conversation.id == conversation_id, + Conversation.app_id == app_id, + Conversation.workspace_id == workspace_id, + Conversation.is_active.is_(True), + ) + ).first() + + if not conversation: + from app.core.exceptions import ResourceNotFoundException + raise ResourceNotFoundException("会话", str(conversation_id)) + + # 查询消息(按时间正序) + messages = list(db.scalars( + select(Message) + .where(Message.conversation_id == conversation_id) + .order_by(Message.created_at) + ).all()) + + detail = AppLogConversationDetail.model_validate(conversation) + detail.messages = [AppLogMessage.model_validate(m) for m in messages] + + logger.info( + "查询应用日志会话详情", + extra={ + "app_id": str(app_id), + "conversation_id": str(conversation_id), + "message_count": len(messages) + } + ) + + return success(data=detail) diff --git a/api/app/schemas/app_log_schema.py b/api/app/schemas/app_log_schema.py new file mode 100644 index 00000000..e386b5e9 --- /dev/null +++ b/api/app/schemas/app_log_schema.py @@ -0,0 +1,53 @@ +"""应用日志(消息记录)Schema""" +import uuid +import datetime +from typing import Optional, Dict, Any, List + +from pydantic import BaseModel, Field, ConfigDict, field_serializer + + +class AppLogMessage(BaseModel): + """单条消息记录""" + model_config = ConfigDict(from_attributes=True) + + id: uuid.UUID + conversation_id: uuid.UUID + role: str = Field(description="角色: user / assistant / system") + content: str + meta_data: Optional[Dict[str, Any]] = None + created_at: datetime.datetime + + @field_serializer("created_at", when_used="json") + def _serialize_created_at(self, dt: datetime.datetime): + return int(dt.timestamp() * 1000) if dt else None + + @field_serializer("meta_data", when_used="json") + def _serialize_meta_data(self, data: Optional[Dict[str, Any]]): + return data or {} + + +class AppLogConversation(BaseModel): + """会话摘要(用于列表)""" + model_config = ConfigDict(from_attributes=True) + + id: uuid.UUID + app_id: uuid.UUID + user_id: Optional[str] = None + title: Optional[str] = None + message_count: int = 0 + is_draft: bool + created_at: datetime.datetime + updated_at: datetime.datetime + + @field_serializer("created_at", when_used="json") + def _serialize_created_at(self, dt: datetime.datetime): + return int(dt.timestamp() * 1000) if dt else None + + @field_serializer("updated_at", when_used="json") + def _serialize_updated_at(self, dt: datetime.datetime): + return int(dt.timestamp() * 1000) if dt else None + + +class AppLogConversationDetail(AppLogConversation): + """会话详情(包含消息列表)""" + messages: List[AppLogMessage] = [] From c70ac1339e766e4d4fdd478e233b6263fa23fc82 Mon Sep 17 00:00:00 2001 From: wxy Date: Mon, 23 Mar 2026 13:45:56 +0800 Subject: [PATCH 36/43] fix(app): validate pagination params and fix mutable default in schema --- api/app/controllers/app_log_controller.py | 6 +++--- api/app/schemas/app_log_schema.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/api/app/controllers/app_log_controller.py b/api/app/controllers/app_log_controller.py index a8f6d532..dfd10644 100644 --- a/api/app/controllers/app_log_controller.py +++ b/api/app/controllers/app_log_controller.py @@ -2,7 +2,7 @@ import uuid from typing import Optional -from fastapi import APIRouter, Depends +from fastapi import APIRouter, Depends, Query from sqlalchemy import select, desc, func from sqlalchemy.orm import Session @@ -23,8 +23,8 @@ logger = get_business_logger() @cur_workspace_access_guard() def list_app_logs( app_id: uuid.UUID, - page: int = 1, - pagesize: int = 20, + page: int = Query(1, ge=1), + pagesize: int = Query(20, ge=1, le=100), user_id: Optional[str] = None, is_draft: Optional[bool] = None, db: Session = Depends(get_db), diff --git a/api/app/schemas/app_log_schema.py b/api/app/schemas/app_log_schema.py index e386b5e9..bda78138 100644 --- a/api/app/schemas/app_log_schema.py +++ b/api/app/schemas/app_log_schema.py @@ -50,4 +50,4 @@ class AppLogConversation(BaseModel): class AppLogConversationDetail(AppLogConversation): """会话详情(包含消息列表)""" - messages: List[AppLogMessage] = [] + messages: List[AppLogMessage] = Field(default_factory=list) From 93deb286a3097641bc55bb9d813e6d03ff6fd019 Mon Sep 17 00:00:00 2001 From: Mark Date: Mon, 23 Mar 2026 16:14:46 +0800 Subject: [PATCH 37/43] [add] migration script --- .../versions/05a681a6ca93_202603231611.py | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 api/migrations/versions/05a681a6ca93_202603231611.py diff --git a/api/migrations/versions/05a681a6ca93_202603231611.py b/api/migrations/versions/05a681a6ca93_202603231611.py new file mode 100644 index 00000000..5ab9c4de --- /dev/null +++ b/api/migrations/versions/05a681a6ca93_202603231611.py @@ -0,0 +1,32 @@ +"""202603231611 + +Revision ID: 05a681a6ca93 +Revises: 74b51dfece29 +Create Date: 2026-03-23 16:12:44.110292 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '05a681a6ca93' +down_revision: Union[str, None] = '74b51dfece29' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index(op.f('ix_users_username'), table_name='users') + op.create_index(op.f('ix_users_username'), 'users', ['username'], unique=False) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index(op.f('ix_users_username'), table_name='users') + op.create_index(op.f('ix_users_username'), 'users', ['username'], unique=True) + # ### end Alembic commands ### From 31b8a3764eff5d7675c5bd1a244bfa82dd5f5c66 Mon Sep 17 00:00:00 2001 From: lanceyq <1982376970@qq.com> Date: Mon, 23 Mar 2026 16:38:47 +0800 Subject: [PATCH 38/43] =?UTF-8?q?=E3=80=90change=E3=80=91=201.Standardize?= =?UTF-8?q?=20log=20specifications=EF=BC=9B2.Cluster=20settings=20trigger?= =?UTF-8?q?=20explicitly?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/app/celery_app.py | 1 + api/app/core/logging_config.py | 5 +- .../core/memory/agent/utils/write_tools.py | 6 +- .../core/memory/llm_tools/openai_client.py | 18 +- .../clustering_engine/label_propagation.py | 166 ++++++++++-------- .../data_preprocessing/data_pruning.py | 10 +- .../embedding_generation.py | 33 ++-- api/app/repositories/neo4j/add_nodes.py | 7 +- .../neo4j/community_repository.py | 2 +- api/app/repositories/neo4j/cypher_queries.py | 78 ++++---- api/app/repositories/neo4j/graph_saver.py | 13 +- api/app/services/memory_agent_service.py | 3 - api/app/tasks.py | 2 +- 13 files changed, 186 insertions(+), 158 deletions(-) diff --git a/api/app/celery_app.py b/api/app/celery_app.py index 807c59f4..864bee4a 100644 --- a/api/app/celery_app.py +++ b/api/app/celery_app.py @@ -77,6 +77,7 @@ celery_app.conf.update( # Worker 设置 (per-worker settings are in docker-compose command line) worker_prefetch_multiplier=1, # Don't hoard tasks, fairer distribution + worker_redirect_stdouts_level='INFO', # stdout/print → INFO instead of WARNING # 结果过期时间 result_expires=3600, # 结果保存1小时 diff --git a/api/app/core/logging_config.py b/api/app/core/logging_config.py index 28a98a46..d0dda84b 100644 --- a/api/app/core/logging_config.py +++ b/api/app/core/logging_config.py @@ -529,8 +529,9 @@ def log_time(step_name: str, duration: float, log_file: str = "logs/time.log") - # Fallback to console only if file write fails print(f"Warning: Could not write to timing log: {e}") - # Always print to console (backward compatible behavior) - print(f"✓ {step_name}: {duration:.2f}s") + # Always log at INFO level (avoids Celery treating stdout as WARNING) + _timing_logger = logging.getLogger(__name__) + _timing_logger.info(f"✓ {step_name}: {duration:.2f}s") def get_agent_logger(name: str = "agent_service", diff --git a/api/app/core/memory/agent/utils/write_tools.py b/api/app/core/memory/agent/utils/write_tools.py index b62eb50a..f782d44b 100644 --- a/api/app/core/memory/agent/utils/write_tools.py +++ b/api/app/core/memory/agent/utils/write_tools.py @@ -19,7 +19,7 @@ from app.core.memory.utils.log.logging_utils import log_time from app.db import get_db_context from app.repositories.neo4j.add_edges import add_memory_summary_statement_edges from app.repositories.neo4j.add_nodes import add_memory_summary_nodes -from app.repositories.neo4j.graph_saver import save_dialog_and_statements_to_neo4j, schedule_clustering_after_write +from app.repositories.neo4j.graph_saver import save_dialog_and_statements_to_neo4j, _trigger_clustering_sync from app.repositories.neo4j.neo4j_connector import Neo4jConnector from app.schemas.memory_config_schema import MemoryConfig @@ -169,8 +169,8 @@ async def write( ) if success: logger.info("Successfully saved all data to Neo4j") - # 写入成功后,异步触发聚类(不阻塞写入响应) - schedule_clustering_after_write( + # 写入成功后,同步等待聚类完成(避免与 Memory Summary 并发冲突) + await _trigger_clustering_sync( all_entity_nodes, llm_model_id=str(memory_config.llm_model_id) if memory_config.llm_model_id else None, embedding_model_id=str(memory_config.embedding_model_id) if memory_config.embedding_model_id else None, diff --git a/api/app/core/memory/llm_tools/openai_client.py b/api/app/core/memory/llm_tools/openai_client.py index 43c2b445..4536f62d 100644 --- a/api/app/core/memory/llm_tools/openai_client.py +++ b/api/app/core/memory/llm_tools/openai_client.py @@ -82,16 +82,26 @@ class OpenAIClient(LLMClient): LLMClientException: LLM 调用失败 """ try: - template = """{messages}""" - prompt = ChatPromptTemplate.from_template(template) - chain = prompt | self.client + from langchain_core.messages import HumanMessage, SystemMessage, AIMessage + + # 将 dict 消息列表转换为 LangChain 消息对象 + lc_messages = [] + for m in messages: + role = m.get("role", "user") + content = m.get("content", "") + if role == "system": + lc_messages.append(SystemMessage(content=content)) + elif role == "assistant": + lc_messages.append(AIMessage(content=content)) + else: + lc_messages.append(HumanMessage(content=content)) # 添加 Langfuse 回调(如果可用) config = {} if self.langfuse_handler: config["callbacks"] = [self.langfuse_handler] - response = await chain.ainvoke({"messages": messages}, config=config) + response = await self.client.ainvoke(lc_messages, config=config) return response except Exception as e: diff --git a/api/app/core/memory/storage_services/clustering_engine/label_propagation.py b/api/app/core/memory/storage_services/clustering_engine/label_propagation.py index d9c04f8b..0fa6a833 100644 --- a/api/app/core/memory/storage_services/clustering_engine/label_propagation.py +++ b/api/app/core/memory/storage_services/clustering_engine/label_propagation.py @@ -71,13 +71,11 @@ class LabelPropagationEngine: connector: Neo4jConnector, llm_model_id: Optional[str] = None, embedding_model_id: Optional[str] = None, - embedding_model_id: Optional[str] = None, ): self.connector = connector self.repo = CommunityRepository(connector) self.llm_model_id = llm_model_id self.embedding_model_id = embedding_model_id - self.embedding_model_id = embedding_model_id # ────────────────────────────────────────────────────────────────────────── # 公开接口 @@ -239,6 +237,7 @@ class LabelPropagationEngine: await self.repo.upsert_community(new_cid, end_user_id, member_count=1) await self.repo.assign_entity_to_community(entity_id, new_cid, end_user_id) logger.debug(f"[Clustering] 孤立实体 {entity_id} → 新社区 {new_cid}") + await self._generate_community_metadata([new_cid], end_user_id) return # 统计邻居社区分布 @@ -273,7 +272,8 @@ class LabelPropagationEngine: await self._evaluate_merge( list(community_ids_in_neighbors), end_user_id ) - await self._generate_community_metadata([target_cid], end_user_id) + # 新实体加入后成员变化,强制重新生成元数据 + await self._generate_community_metadata([target_cid], end_user_id, force=True) async def _evaluate_merge( self, community_ids: List[str], end_user_id: str @@ -453,7 +453,7 @@ class LabelPropagationEngine: return lines async def _generate_community_metadata( - self, community_ids: List[str], end_user_id: str + self, community_ids: List[str], end_user_id: str, force: bool = False ) -> None: """ 为一个或多个社区生成并写入元数据。 @@ -462,69 +462,82 @@ class LabelPropagationEngine: 1. 逐个社区调 LLM 生成 name / summary(串行) 2. 收集所有 summary,一次性批量 embed 3. 单个社区用 update_community_metadata,多个用 batch_update_community_metadata - """ - if not community_ids: - return + Args: + force: 为 True 时跳过完整性检查,强制重新生成(用于增量更新成员变化后) + """ from app.db import get_db_context from app.core.memory.utils.llm.llm_utils import MemoryClientFactory - # --- 阶段1:并发调 LLM 生成每个社区的 name / summary --- - async def _build_one(cid: str): - members = await self.repo.get_community_members(cid, end_user_id) - if not members: + async def _build_one(cid: str) -> Optional[Dict]: + try: + if not force: + check_embedding = bool(self.embedding_model_id) + if await self.repo.is_community_complete(cid, end_user_id, check_embedding=check_embedding): + return None + + members = await self.repo.get_community_members(cid, end_user_id) + if not members: + logger.warning(f"[Clustering] 社区 {cid} 无成员,跳过元数据生成") + return None + + sorted_members = sorted( + members, + key=lambda m: m.get("activation_value") or 0, + reverse=True, + ) + core_entities = [m["name"] for m in sorted_members[:CORE_ENTITY_LIMIT] if m.get("name")] + all_names = [m["name"] for m in members if m.get("name")] + + name = "、".join(core_entities[:3]) if core_entities else cid[:8] + summary = f"包含实体:{', '.join(all_names)}" + + if self.llm_model_id: + try: + entity_list_str = "\n".join(self._build_entity_lines(members)) + relationships = await self.repo.get_community_relationships(cid, end_user_id) + rel_lines = [ + f"- {r['subject']} → {r['predicate']} → {r['object']}" + for r in relationships + if r.get("subject") and r.get("predicate") and r.get("object") + ] + rel_section = ( + f"\n实体间关系:\n" + "\n".join(rel_lines) + if rel_lines else "" + ) + prompt = ( + f"以下是一组语义相关的实体:\n{entity_list_str}{rel_section}\n\n" + f"请为这组实体所代表的主题:\n" + f"1. 起一个简洁的中文名称(不超过10个字)\n" + f"2. 写一句话摘要(不超过80个字)\n\n" + f"严格按以下格式输出,不要有其他内容:\n" + f"名称:<名称>\n摘要:<摘要>" + ) + with get_db_context() as db: + llm_client = MemoryClientFactory(db).get_llm_client(self.llm_model_id) + response = await llm_client.chat([{"role": "user", "content": prompt}]) + text = response.content if hasattr(response, "content") else str(response) + + for line in text.strip().splitlines(): + if line.startswith("名称:"): + name = line[3:].strip() + elif line.startswith("摘要:"): + summary = line[3:].strip() + except Exception as e: + logger.warning(f"[Clustering] 社区 {cid} LLM 生成失败,使用兜底值: {e}") + + return { + "community_id": cid, + "end_user_id": end_user_id, + "name": name, + "summary": summary, + "core_entities": core_entities, + "summary_embedding": None, + } + except Exception as e: + logger.error(f"[Clustering] 社区 {cid} 元数据准备失败: {e}", exc_info=True) return None - sorted_members = sorted( - members, - key=lambda m: m.get("activation_value") or 0, - reverse=True, - ) - core_entities = [m["name"] for m in sorted_members[:CORE_ENTITY_LIMIT] if m.get("name")] - - entity_list_str = "\n".join(self._build_entity_lines(members)) - - # 方案四:注入社区内实体间关系三元组 - relationships = await self.repo.get_community_relationships(cid, end_user_id) - rel_lines = [ - f"- {r['subject']} → {r['predicate']} → {r['object']}" - for r in relationships - if r.get("subject") and r.get("predicate") and r.get("object") - ] - rel_section = ( - f"\n实体间关系:\n" + "\n".join(rel_lines) - if rel_lines else "" - ) - - prompt = ( - f"以下是一组语义相关的实体:\n{entity_list_str}{rel_section}\n\n" - f"请为这组实体所代表的主题:\n" - f"1. 起一个简洁的中文名称(不超过10个字)\n" - f"2. 写一句话摘要(不超过80个字)\n\n" - f"严格按以下格式输出,不要有其他内容:\n" - f"名称:<名称>\n摘要:<摘要>" - ) - with get_db_context() as db: - llm_client = MemoryClientFactory(db).get_llm_client(self.llm_model_id) - response = await llm_client.chat([{"role": "user", "content": prompt}]) - text = response.content if hasattr(response, "content") else str(response) - - name, summary = "", "" - for line in text.strip().splitlines(): - if line.startswith("名称:"): - name = line[3:].strip() - elif line.startswith("摘要:"): - summary = line[3:].strip() - - return { - "community_id": cid, - "end_user_id": end_user_id, - "name": name, - "summary": summary, - "core_entities": core_entities, - "summary_embedding": None, - } - results = await asyncio.gather( *[_build_one(cid) for cid in community_ids], return_exceptions=True, @@ -537,15 +550,20 @@ class LabelPropagationEngine: metadata_list.append(res) if not metadata_list: + logger.warning(f"[Clustering] 无有效元数据可写入,community_ids={community_ids}") return # --- 阶段2:批量生成 summary_embedding --- - summaries = [m["summary"] for m in metadata_list] - with get_db_context() as db: - embedder = MemoryClientFactory(db).get_embedder_client(self.embedding_model_id) - embeddings = await embedder.response(summaries) - for i, meta in enumerate(metadata_list): - meta["summary_embedding"] = embeddings[i] if i < len(embeddings) else None + if self.embedding_model_id: + try: + summaries = [m["summary"] for m in metadata_list] + with get_db_context() as db: + embedder = MemoryClientFactory(db).get_embedder_client(self.embedding_model_id) + embeddings = await embedder.response(summaries) + for i, meta in enumerate(metadata_list): + meta["summary_embedding"] = embeddings[i] if i < len(embeddings) else None + except Exception as e: + logger.error(f"[Clustering] 批量生成 summary_embedding 失败: {e}", exc_info=True) # --- 阶段3:写入(单个 or 批量)--- if len(metadata_list) == 1: @@ -558,17 +576,13 @@ class LabelPropagationEngine: core_entities=m["core_entities"], summary_embedding=m["summary_embedding"], ) - if result: - logger.info(f"[Clustering] 社区 {m['community_id']} 元数据写入成功: name={m['name']}, summary={m['summary'][:30]}...") - else: - logger.warning(f"[Clustering] 社区 {m['community_id']} 元数据写入返回 False") + if not result: + logger.error(f"[Clustering] 社区 {m['community_id']} 元数据写入失败") else: ok = await self.repo.batch_update_community_metadata(metadata_list) - if ok: - logger.info(f"[Clustering] 批量写入 {len(metadata_list)} 个社区元数据成功") - else: - logger.warning(f"[Clustering] 批量写入社区元数据失败") + if not ok: + logger.error(f"[Clustering] 批量写入 {len(metadata_list)} 个社区元数据失败") @staticmethod def _new_community_id() -> str: - return str(uuid.uuid4()) + return str(uuid.uuid4()) \ No newline at end of file diff --git a/api/app/core/memory/storage_services/extraction_engine/data_preprocessing/data_pruning.py b/api/app/core/memory/storage_services/extraction_engine/data_preprocessing/data_pruning.py index 248067e7..967f529e 100644 --- a/api/app/core/memory/storage_services/extraction_engine/data_preprocessing/data_pruning.py +++ b/api/app/core/memory/storage_services/extraction_engine/data_preprocessing/data_pruning.py @@ -9,6 +9,7 @@ """ import asyncio +import logging import os import hashlib import json @@ -26,6 +27,8 @@ from app.core.memory.storage_services.extraction_engine.data_preprocessing.scene ScenePatterns ) +logger = logging.getLogger(__name__) + class DialogExtractionResponse(BaseModel): """对话级一次性抽取的结构化返回,用于加速剪枝。 @@ -706,7 +709,7 @@ class SemanticPruner: # 阈值保护:最高0.9 proportion = float(self.config.pruning_threshold) if proportion > 0.9: - print(f"[剪枝-数据集] 阈值{proportion}超过上限0.9,已自动调整为0.9") + logger.warning(f"[剪枝-数据集] 阈值{proportion}超过上限0.9,已自动调整为0.9") proportion = 0.9 if proportion < 0.0: proportion = 0.0 @@ -905,7 +908,7 @@ class SemanticPruner: # Safety: avoid empty dataset if not result: - print("警告: 语义剪枝后数据集为空,已回退为未剪枝数据以避免流程中断") + logger.warning("语义剪枝后数据集为空,已回退为未剪枝数据以避免流程中断") return dialogs return result @@ -915,8 +918,7 @@ class SemanticPruner: try: self.run_logs.append(msg) except Exception: - # 任何异常都不影响打印 pass - print(msg) + logger.debug(msg) diff --git a/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/embedding_generation.py b/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/embedding_generation.py index 72f3641e..33838061 100644 --- a/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/embedding_generation.py +++ b/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/embedding_generation.py @@ -5,8 +5,11 @@ """ import asyncio +import logging from typing import Any, Dict, List, Tuple +logger = logging.getLogger(__name__) + from app.core.memory.llm_tools.openai_embedder import OpenAIEmbedderClient from app.core.memory.models.message_models import DialogData from app.core.models.base import RedBearModelConfig @@ -48,9 +51,9 @@ class EmbeddingGenerator: return await self.embedder_client.response(texts) # 分批并行处理 - print(f"文本数量 {len(texts)} 超过批次大小 {batch_size},分批并行处理") + logger.info(f"文本数量 {len(texts)} 超过批次大小 {batch_size},分批并行处理") batches = [texts[i:i+batch_size] for i in range(0, len(texts), batch_size)] - print(f"分成 {len(batches)} 批,每批最多 {batch_size} 个文本") + logger.info(f"分成 {len(batches)} 批,每批最多 {batch_size} 个文本") # 并行发送所有批次 batch_results = await asyncio.gather(*[ @@ -62,7 +65,7 @@ class EmbeddingGenerator: for batch_result in batch_results: embeddings.extend(batch_result) - print(f"分批并行处理完成,共生成 {len(embeddings)} 个嵌入向量") + logger.info(f"分批并行处理完成,共生成 {len(embeddings)} 个嵌入向量") return embeddings async def generate_statement_embeddings( @@ -77,7 +80,7 @@ class EmbeddingGenerator: Returns: 每个对话的陈述句嵌入向量映射列表 """ - print("\n=== 生成陈述句嵌入向量 ===") + logger.debug("=== 生成陈述句嵌入向量 ===") # 收集所有陈述句 all_statements = [] @@ -102,7 +105,7 @@ class EmbeddingGenerator: stmt_id = chunked_dialogs[d_idx].chunks[c_idx].statements[s_idx].id stmt_embedding_maps[d_idx][stmt_id] = embedding - print(f"为 {len(all_statements)} 个陈述句生成了嵌入向量") + logger.info(f"为 {len(all_statements)} 个陈述句生成了嵌入向量") return stmt_embedding_maps async def generate_chunk_embeddings( @@ -117,7 +120,7 @@ class EmbeddingGenerator: Returns: 每个对话的分块嵌入向量映射列表 """ - print("\n=== 生成分块嵌入向量 ===") + logger.debug("=== 生成分块嵌入向量 ===") # 收集所有分块 all_chunks = [] @@ -138,7 +141,7 @@ class EmbeddingGenerator: chunk_id = chunked_dialogs[d_idx].chunks[c_idx].id chunk_embedding_maps[d_idx][chunk_id] = embedding - print(f"为 {len(all_chunks)} 个分块生成了嵌入向量") + logger.info(f"为 {len(all_chunks)} 个分块生成了嵌入向量") return chunk_embedding_maps async def generate_dialog_embeddings( @@ -172,7 +175,7 @@ class EmbeddingGenerator: Returns: (陈述句嵌入映射列表, 分块嵌入映射列表, 对话嵌入列表) """ - print("\n=== 生成所有嵌入向量 ===") + logger.debug("=== 生成所有嵌入向量 ===") # 并发生成陈述句和分块嵌入向量 stmt_embedding_maps, chunk_embedding_maps = await asyncio.gather( @@ -183,9 +186,7 @@ class EmbeddingGenerator: # 对话嵌入向量(当前跳过) dialog_embeddings = await self.generate_dialog_embeddings(chunked_dialogs) - print( - f"生成完成:{len(chunked_dialogs)} 个对话的嵌入向量" - ) + logger.info(f"生成完成:{len(chunked_dialogs)} 个对话的嵌入向量") return stmt_embedding_maps, chunk_embedding_maps, dialog_embeddings @@ -201,7 +202,7 @@ class EmbeddingGenerator: Returns: 更新后的三元组映射列表(实体包含嵌入向量) """ - print("\n=== 生成实体嵌入向量 ===") + logger.debug("=== 生成实体嵌入向量 ===") entity_texts: List[str] = [] entity_refs: List[Any] = [] @@ -219,7 +220,7 @@ class EmbeddingGenerator: entity_refs.append(ent) if not entity_texts: - print("没有找到需要生成嵌入向量的实体") + logger.debug("没有找到需要生成嵌入向量的实体") return triplet_maps # 批量生成嵌入向量 @@ -227,13 +228,13 @@ class EmbeddingGenerator: # 打印前几个嵌入向量的维度 for i in range(min(5, len(embeddings))): - print(f"实体 '{entity_texts[i]}' 嵌入向量维度: {len(embeddings[i])}") + logger.debug(f"实体 '{entity_texts[i]}' 嵌入向量维度: {len(embeddings[i])}") # 将嵌入向量赋值给实体 for ent, emb in zip(entity_refs, embeddings): setattr(ent, "name_embedding", emb) - print(f"为 {len(entity_refs)} 个实体生成了嵌入向量") + logger.info(f"为 {len(entity_refs)} 个实体生成了嵌入向量") return triplet_maps @@ -296,7 +297,7 @@ async def embedding_generation_all( Returns: (陈述句嵌入映射列表, 分块嵌入映射列表, 对话嵌入列表, 更新后的三元组映射列表) """ - print("\n=== 综合嵌入向量生成(陈述句/分块/对话 + 实体)===") + logger.debug("=== 综合嵌入向量生成(陈述句/分块/对话 + 实体)===") generator = EmbeddingGenerator(embedding_id) diff --git a/api/app/repositories/neo4j/add_nodes.py b/api/app/repositories/neo4j/add_nodes.py index 42c178b3..786f7bbe 100644 --- a/api/app/repositories/neo4j/add_nodes.py +++ b/api/app/repositories/neo4j/add_nodes.py @@ -1,10 +1,13 @@ from typing import List, Optional +import logging from app.repositories.neo4j.cypher_queries import DIALOGUE_NODE_SAVE, STATEMENT_NODE_SAVE, CHUNK_NODE_SAVE,MEMORY_SUMMARY_NODE_SAVE from app.core.memory.models.graph_models import DialogueNode, StatementNode, ChunkNode, MemorySummaryNode # 使用新的仓储层 from app.repositories.neo4j.neo4j_connector import Neo4jConnector +logger = logging.getLogger(__name__) + async def delete_all_nodes(end_user_id: str, connector: Neo4jConnector): """Delete all nodes in the database.""" @@ -217,10 +220,10 @@ async def add_memory_summary_nodes(summaries: List[MemorySummaryNode], connector summaries=flattened ) created_ids = [record.get("uuid") for record in result] - print(f"Successfully saved {len(created_ids)} MemorySummary nodes to Neo4j") + logger.info(f"Successfully saved {len(created_ids)} MemorySummary nodes to Neo4j") return created_ids except Exception as e: - print(f"Failed to save MemorySummary nodes to Neo4j: {e}") + logger.error(f"Failed to save MemorySummary nodes to Neo4j: {e}") return None diff --git a/api/app/repositories/neo4j/community_repository.py b/api/app/repositories/neo4j/community_repository.py index 7273340e..bd448c99 100644 --- a/api/app/repositories/neo4j/community_repository.py +++ b/api/app/repositories/neo4j/community_repository.py @@ -300,7 +300,7 @@ class CommunityRepository: ) return bool(result) except Exception as e: - logger.error(f"update_community_metadata failed: {e}") + logger.error(f"update_community_metadata failed: {e}", exc_info=True) return False async def batch_update_community_metadata( diff --git a/api/app/repositories/neo4j/cypher_queries.py b/api/app/repositories/neo4j/cypher_queries.py index 0cdaeb59..fe1cb252 100644 --- a/api/app/repositories/neo4j/cypher_queries.py +++ b/api/app/repositories/neo4j/cypher_queries.py @@ -1069,6 +1069,7 @@ Graph_Node_query = """ COMMUNITY_NODE_UPSERT = """ MERGE (c:Community {community_id: $community_id}) +ON CREATE SET c.id = $community_id SET c.end_user_id = $end_user_id, c.member_count = $member_count, c.updated_at = datetime() @@ -1175,7 +1176,8 @@ RETURN c.community_id AS community_id, cnt AS member_count UPDATE_COMMUNITY_METADATA = """ MATCH (c:Community {community_id: $community_id, end_user_id: $end_user_id}) -SET c.name = $name, +SET c.id = coalesce(c.id, $community_id), + c.name = $name, c.summary = $summary, c.core_entities = $core_entities, c.summary_embedding = $summary_embedding, @@ -1186,7 +1188,8 @@ RETURN c.community_id AS community_id BATCH_UPDATE_COMMUNITY_METADATA = """ UNWIND $communities AS row MATCH (c:Community {community_id: row.community_id, end_user_id: row.end_user_id}) -SET c.name = row.name, +SET c.id = coalesce(c.id, row.community_id), + c.name = row.name, c.summary = row.summary, c.core_entities = row.core_entities, c.summary_embedding = row.summary_embedding, @@ -1270,6 +1273,40 @@ RETURN startNode(r) = e AS r_from_e """ +CHECK_COMMUNITY_IS_COMPLETE = """ +MATCH (c:Community {community_id: $community_id, end_user_id: $end_user_id}) +RETURN ( + c.name IS NOT NULL AND c.name <> '' AND + c.summary IS NOT NULL AND c.summary <> '' AND + c.core_entities IS NOT NULL +) AS is_complete +""" + +CHECK_COMMUNITY_IS_COMPLETE_WITH_EMBEDDING = """ +MATCH (c:Community {community_id: $community_id, end_user_id: $end_user_id}) +RETURN ( + c.name IS NOT NULL AND c.name <> '' AND + c.summary IS NOT NULL AND c.summary <> '' AND + c.core_entities IS NOT NULL AND + c.summary_embedding IS NOT NULL +) AS is_complete +""" + +GET_INCOMPLETE_COMMUNITIES = """ +MATCH (c:Community {end_user_id: $end_user_id}) +WHERE c.name IS NULL OR c.summary IS NULL OR c.core_entities IS NULL + OR c.name = '' OR c.summary = '' +RETURN c.community_id AS community_id +""" + +GET_INCOMPLETE_COMMUNITIES_WITH_EMBEDDING = """ +MATCH (c:Community {end_user_id: $end_user_id}) +WHERE c.name IS NULL OR c.name = '' + OR c.summary IS NULL OR c.summary = '' + OR c.core_entities IS NULL + OR (c.summary_embedding IS NULL AND c.summary IS NOT NULL AND c.summary <> '(empty)') +RETURN c.community_id AS community_id +""" # Community keyword search: matches name or summary via fulltext index SEARCH_COMMUNITIES_BY_KEYWORD = """ @@ -1325,39 +1362,4 @@ RETURN s.statement AS statement, c.name AS community_name ORDER BY COALESCE(s.activation_value, 0) DESC LIMIT $limit -""" - -CHECK_COMMUNITY_IS_COMPLETE = """ -MATCH (c:Community {community_id: $community_id, end_user_id: $end_user_id}) -RETURN ( - c.name IS NOT NULL AND c.name <> '' AND - c.summary IS NOT NULL AND c.summary <> '' AND - c.core_entities IS NOT NULL -) AS is_complete -""" - -CHECK_COMMUNITY_IS_COMPLETE_WITH_EMBEDDING = """ -MATCH (c:Community {community_id: $community_id, end_user_id: $end_user_id}) -RETURN ( - c.name IS NOT NULL AND c.name <> '' AND - c.summary IS NOT NULL AND c.summary <> '' AND - c.core_entities IS NOT NULL AND - c.summary_embedding IS NOT NULL -) AS is_complete -""" - -GET_INCOMPLETE_COMMUNITIES = """ -MATCH (c:Community {end_user_id: $end_user_id}) -WHERE c.name IS NULL OR c.summary IS NULL OR c.core_entities IS NULL - OR c.name = '' OR c.summary = '' -RETURN c.community_id AS community_id -""" - -GET_INCOMPLETE_COMMUNITIES_WITH_EMBEDDING = """ -MATCH (c:Community {end_user_id: $end_user_id}) -WHERE c.name IS NULL OR c.name = '' - OR c.summary IS NULL OR c.summary = '' - OR c.core_entities IS NULL - OR (c.summary_embedding IS NULL AND c.summary IS NOT NULL AND c.summary <> '(empty)') -RETURN c.community_id AS community_id -""" +""" \ No newline at end of file diff --git a/api/app/repositories/neo4j/graph_saver.py b/api/app/repositories/neo4j/graph_saver.py index 34497d5b..d2c4b9bd 100644 --- a/api/app/repositories/neo4j/graph_saver.py +++ b/api/app/repositories/neo4j/graph_saver.py @@ -162,7 +162,7 @@ async def save_dialog_and_statements_to_neo4j( """Save dialogue nodes, chunk nodes, statement nodes, entities, and all relationships to Neo4j using graph models. 只负责数据写入,不触发聚类。聚类由调用方在写入成功后通过 - schedule_clustering_after_write() 显式触发。 + _trigger_clustering_sync() 显式触发。 Args: dialogue_nodes: List of DialogueNode objects to save @@ -303,16 +303,13 @@ async def save_dialog_and_statements_to_neo4j( return False -def schedule_clustering_after_write( +async def _trigger_clustering_sync( entity_nodes: List, llm_model_id: Optional[str] = None, embedding_model_id: Optional[str] = None, ) -> None: """ - 写入 Neo4j 成功后,调度后台聚类任务。 - - 可通过环境变量 CLUSTERING_ENABLED=false 禁用(用于基准测试对比)。 - 使用 asyncio.create_task 异步触发,不阻塞写入响应。 + 同步等待聚类完成,避免与其他 LLM 任务并发冲突。 """ if not entity_nodes: return @@ -324,8 +321,8 @@ def schedule_clustering_after_write( end_user_id = entity_nodes[0].end_user_id new_entity_ids = [e.id for e in entity_nodes] - logger.info(f"[Clustering] 准备触发聚类,实体数: {len(new_entity_ids)}, end_user_id: {end_user_id}") - asyncio.create_task(_trigger_clustering(new_entity_ids, end_user_id, llm_model_id=llm_model_id, embedding_model_id=embedding_model_id)) + logger.info(f"[Clustering] 准备触发聚类(同步),实体数: {len(new_entity_ids)}, end_user_id: {end_user_id}") + await _trigger_clustering(new_entity_ids, end_user_id, llm_model_id=llm_model_id, embedding_model_id=embedding_model_id) async def _trigger_clustering( diff --git a/api/app/services/memory_agent_service.py b/api/app/services/memory_agent_service.py index af9a04e2..dc064540 100644 --- a/api/app/services/memory_agent_service.py +++ b/api/app/services/memory_agent_service.py @@ -350,9 +350,6 @@ class MemoryAgentService: langchain_messages.append(HumanMessage(content=msg['content'])) elif msg['role'] == 'assistant': langchain_messages.append(AIMessage(content=msg['content'])) - print(100 * '-') - print(langchain_messages) - print(100 * '-') # 初始状态 - 包含所有必要字段 initial_state = { "messages": langchain_messages, diff --git a/api/app/tasks.py b/api/app/tasks.py index 3a237d82..354951c6 100644 --- a/api/app/tasks.py +++ b/api/app/tasks.py @@ -2760,7 +2760,7 @@ def init_community_clustering_for_users(self, end_user_ids: List[str], workspace patch_fail = 0 for cid in incomplete_ids: try: - await engine._generate_community_metadata(cid, end_user_id) + await engine._generate_community_metadata([cid], end_user_id) patch_ok += 1 except Exception as patch_err: patch_fail += 1 From a3428c2435271c0ffab64aa2d38f7465a92c9432 Mon Sep 17 00:00:00 2001 From: Timebomb2018 <18868801967@163.com> Date: Mon, 23 Mar 2026 17:04:30 +0800 Subject: [PATCH 39/43] feat(app): 1. Handling the storage of multimodal messages and adapting to the loading of historical messages for multi-round conversations; 2. Obtain the interface for retrieving the voice status of the reply; 3. File Information Retrieval Interface --- .../controllers/file_storage_controller.py | 187 +++++++++++++++++- api/app/services/app_chat_service.py | 69 ++++--- api/app/services/conversation_service.py | 64 ++++-- api/app/services/draft_run_service.py | 77 ++++++-- 4 files changed, 342 insertions(+), 55 deletions(-) diff --git a/api/app/controllers/file_storage_controller.py b/api/app/controllers/file_storage_controller.py index ff284f39..14962a72 100644 --- a/api/app/controllers/file_storage_controller.py +++ b/api/app/controllers/file_storage_controller.py @@ -14,6 +14,9 @@ Routes: import os import uuid from typing import Any +import httpx +import mimetypes +from urllib.parse import urlparse, unquote from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile, status from fastapi.responses import FileResponse, RedirectResponse @@ -91,7 +94,7 @@ async def upload_file( if file_size > settings.MAX_FILE_SIZE: raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, + status_code=status.HTTP_413_CONTENT_TOO_LARGE, detail=f"The file size exceeds the {settings.MAX_FILE_SIZE} byte limit" ) @@ -172,7 +175,6 @@ async def upload_file_with_share_token( # Get share and release info from share_token service = ReleaseShareService(db) - share_info = service.get_shared_release_info(share_token=share_data.share_token) # Get share object to access app_id share = service.repo.get_by_share_token(share_data.share_token) @@ -291,6 +293,101 @@ async def upload_file_with_share_token( ) +@router.get("/files/info-by-url", response_model=ApiResponse) +async def get_file_info_by_url( + url: str, +): + """ + Get file information by network URL (no authentication required). + + Fetches file metadata from a remote URL via HTTP HEAD request. + Falls back to GET request if HEAD is not supported. + Returns file type, name, and size. + + Args: + url: The network URL of the file. + + Returns: + ApiResponse with file information. + """ + api_logger.info(f"File info by URL request: url={url}") + + try: + async with httpx.AsyncClient(timeout=10.0) as client: + # Try HEAD request first + response = await client.head(url, follow_redirects=True) + + # If HEAD fails, try GET request (some servers don't support HEAD) + if response.status_code != 200: + api_logger.info(f"HEAD request failed with {response.status_code}, trying GET request") + response = await client.get(url, follow_redirects=True) + + if response.status_code != 200: + api_logger.error(f"Failed to fetch file info: HTTP {response.status_code}") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Unable to access file: HTTP {response.status_code}" + ) + + # Get file size from Content-Length header or actual content + file_size = response.headers.get("Content-Length") + if file_size: + file_size = int(file_size) + elif hasattr(response, 'content'): + file_size = len(response.content) + else: + file_size = None + + # Get content type from Content-Type header + content_type = response.headers.get("Content-Type", "application/octet-stream") + # Remove charset and other parameters from content type + content_type = content_type.split(';')[0].strip() + + # Extract filename from Content-Disposition or URL + file_name = None + content_disposition = response.headers.get("Content-Disposition") + if content_disposition and "filename=" in content_disposition: + parts = content_disposition.split("filename=") + if len(parts) > 1: + file_name = parts[1].strip('"').strip("'") + + if not file_name: + parsed_url = urlparse(url) + file_name = unquote(os.path.basename(parsed_url.path)) or "unknown" + + # Extract file extension from filename + _, file_ext = os.path.splitext(file_name) + + # If no extension found, infer from content type + if not file_ext: + ext = mimetypes.guess_extension(content_type) + if ext: + file_ext = ext + file_name = f"{file_name}{file_ext}" + + api_logger.info(f"File info retrieved: name={file_name}, size={file_size}, type={content_type}") + + return success( + data={ + "url": url, + "file_name": file_name, + "file_ext": file_ext.lower() if file_ext else "", + "file_size": file_size, + "content_type": content_type, + }, + msg="File information retrieved successfully" + ) + + except HTTPException: + raise + except Exception as e: + api_logger.error(f"Unexpected error: {e}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Failed to retrieve file information: {str(e)}" + ) + + @router.get("/files/{file_id}", response_model=Any) async def download_file( request: Request, @@ -499,6 +596,51 @@ async def get_file_url( ) +@router.get("/files/{file_id}/public-url", response_model=ApiResponse) +async def get_permanent_file_url( + file_id: uuid.UUID, + db: Session = Depends(get_db), + storage_service: FileStorageService = Depends(get_file_storage_service), +): + """ + 获取文件的永久公开 URL(无过期时间)。 + + - 本地存储:返回 API 永久访问地址(基于 FILE_LOCAL_SERVER_URL 配置) + - 远程存储(OSS/S3):返回 bucket 公读地址(需 bucket 已配置公共读权限) + """ + file_metadata = db.query(FileMetadata).filter(FileMetadata.id == file_id).first() + if not file_metadata: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="The file does not exist") + + if file_metadata.status != "completed": + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, + detail=f"File upload not completed, status: {file_metadata.status}") + + file_key = file_metadata.file_key + storage = storage_service.storage + + try: + if isinstance(storage, LocalStorage): + url = f"{settings.FILE_LOCAL_SERVER_URL}/storage/permanent/{file_id}" + else: + url = await storage.get_permanent_url(file_key) + if not url: + raise HTTPException(status_code=status.HTTP_501_NOT_IMPLEMENTED, + detail="Permanent URL not supported for current storage backend") + + api_logger.info(f"Generated permanent URL: file_id={file_id}") + return success( + data={"url": url, "expires_in": None, "permanent": True, "file_name": file_metadata.file_name}, + msg="Permanent file URL generated successfully" + ) + except HTTPException: + raise + except Exception as e: + api_logger.error(f"Failed to generate permanent URL: {e}") + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Failed to generate permanent URL: {str(e)}") + + @router.get("/public/{file_id}", response_model=Any) async def public_download_file( request: Request, @@ -653,3 +795,44 @@ async def permanent_download_file( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Failed to retrieve file: {str(e)}" ) + + +@router.get("/files/{file_id}/status", response_model=ApiResponse) +async def get_file_status( + file_id: uuid.UUID, + db: Session = Depends(get_db), +): + """ + Get file upload/processing status (no authentication required). + + This endpoint is used to check if a file (e.g., TTS audio) is ready. + Returns status: pending, completed, or failed. + + Args: + file_id: The UUID of the file. + db: Database session. + + Returns: + ApiResponse with file status and metadata. + """ + api_logger.info(f"File status request: file_id={file_id}") + + # Query file metadata from database + file_metadata = db.query(FileMetadata).filter(FileMetadata.id == file_id).first() + if not file_metadata: + api_logger.warning(f"File not found in database: file_id={file_id}") + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="The file does not exist" + ) + + return success( + data={ + "file_id": str(file_id), + "status": file_metadata.status, + "file_name": file_metadata.file_name, + "file_size": file_metadata.file_size, + "content_type": file_metadata.content_type, + }, + msg="File status retrieved successfully" + ) diff --git a/api/app/services/app_chat_service.py b/api/app/services/app_chat_service.py index 604514b4..94a6dddb 100644 --- a/api/app/services/app_chat_service.py +++ b/api/app/services/app_chat_service.py @@ -119,14 +119,12 @@ class AppChatService: ) # 加载历史消息 - messages = self.conversation_service.get_messages( + history = self.conversation_service.get_conversation_history( conversation_id=conversation_id, - limit=10 + max_history=10, + current_provider=api_key_obj.provider, + current_is_omni=api_key_obj.is_omni ) - history = [ - {"role": msg.role, "content": msg.content} - for msg in messages - ] # 处理多模态文件 processed_files = None @@ -180,7 +178,8 @@ class AppChatService: # 构建用户消息内容(含多模态文件) human_meta = { - "files": [] + "files": [], + "history_files": {} } assistant_meta = { "model": api_key_obj.model_name, @@ -195,6 +194,13 @@ class AppChatService: "url": f.url }) + if processed_files: + human_meta["history_files"] = { + "content": processed_files, + "provider": api_key_obj.provider, + "is_omni": api_key_obj.is_omni + } + # 保存消息 if audio_url: assistant_meta["audio_url"] = audio_url @@ -225,6 +231,7 @@ class AppChatService: "suggested_questions": suggested_questions, "citations": self.agent_service._filter_citations(features_config, result.get("citations", [])), "audio_url": audio_url, + "audio_status": "pending" } async def agnet_chat_stream( @@ -314,17 +321,12 @@ class AppChatService: ) # 加载历史消息 - history = [] - memory_config = {"enabled": True, 'max_history': 10} - if memory_config.get("enabled"): - messages = self.conversation_service.get_messages( - conversation_id=conversation_id, - limit=memory_config.get("max_history", 10) - ) - history = [ - {"role": msg.role, "content": msg.content} - for msg in messages - ] + history = self.conversation_service.get_conversation_history( + conversation_id=conversation_id, + max_history=10, + current_provider=api_key_obj.provider, + current_is_omni=api_key_obj.is_omni + ) # 处理多模态文件 processed_files = None @@ -347,8 +349,14 @@ class AppChatService: total_tokens = 0 text_queue: asyncio.Queue = asyncio.Queue() + api_key_config = { + "model_name": api_key_obj.model_name, + "api_key": api_key_obj.api_key, + "api_base": api_key_obj.api_base, + "provider": api_key_obj.provider, + } stream_audio_url, tts_task = await self.agent_service._generate_tts_streaming( - features_config, api_key_obj, + features_config, api_key_config, text_queue=text_queue, tenant_id=tenant_id, workspace_id=workspace_id ) @@ -378,7 +386,7 @@ class AppChatService: elapsed_time = time.time() - start_time ModelApiKeyService.record_api_key_usage(self.db, api_key_obj.id) - # 发送结束事件(包含 suggested_questions、tts、citations) + # 发送结束事件(包含 suggested_questions、tts、audio_status、citations) end_data: dict = {"elapsed_time": elapsed_time, "message_length": len(full_content), "error": None} sq_config = features_config.get("suggested_questions_after_answer", {}) if isinstance(sq_config, dict) and sq_config.get("enabled"): @@ -388,11 +396,23 @@ class AppChatService: "api_base": api_key_obj.api_base}, {} ) end_data["audio_url"] = stream_audio_url + # 检查TTS是否已完成(非阻塞,不取消任务) + audio_status = "pending" + if tts_task is not None and tts_task.done(): + # 任务已完成,检查是否有异常 + try: + tts_task.result() + audio_status = "completed" + except Exception as e: + logger.warning(f"TTS任务异常: {e}") + audio_status = "failed" + end_data["audio_status"] = audio_status if stream_audio_url else None end_data["citations"] = self.agent_service._filter_citations(features_config, []) # 保存消息 human_meta = { - "files":[] + "files":[], + "history_files": {} } assistant_meta = { "model": api_key_obj.model_name, @@ -402,11 +422,16 @@ class AppChatService: if files: for f in files: - # url = await MultimodalService(self.db).get_file_url(f) human_meta["files"].append({ "type": f.type, "url": f.url }) + if processed_files: + human_meta["history_files"] = { + "content": processed_files, + "provider": api_key_obj.provider, + "is_omni": api_key_obj.is_omni + } if stream_audio_url: assistant_meta["audio_url"] = stream_audio_url diff --git a/api/app/services/conversation_service.py b/api/app/services/conversation_service.py index aff5f533..d7bb3595 100644 --- a/api/app/services/conversation_service.py +++ b/api/app/services/conversation_service.py @@ -119,25 +119,27 @@ class ConversationService: def get_user_conversations( self, - user_id: uuid.UUID - ) -> list[Conversation]: + user_id: uuid.UUID, + page: int = 1, + page_size: int = 20 + ) -> tuple[list[Conversation], int]: """ - Retrieve recent conversations for a specific user - - This method delegates persistence logic to the repository layer and - applies service-level defaults (e.g. recent conversation limit). + Retrieve recent conversations for a specific user with pagination. Args: user_id (uuid.UUID): Unique identifier of the user. + page (int): Page number (1-based). Defaults to 1. + page_size (int): Number of items per page. Defaults to 20. Returns: - list[Conversation]: A list of recent conversation entities. + tuple[list[Conversation], int]: A list of recent conversation entities and total count. """ - conversations = self.conversation_repo.get_conversation_by_user_id( + conversations, total = self.conversation_repo.get_conversation_by_user_id( user_id, - limit=10 + page=page, + page_size=page_size ) - return conversations + return conversations, total def list_conversations( self, @@ -270,7 +272,9 @@ class ConversationService: def get_conversation_history( self, conversation_id: uuid.UUID, - max_history: Optional[int] = None + max_history: Optional[int] = None, + current_provider: Optional[str] = None, + current_is_omni: Optional[bool] = None ) -> List[dict]: """ Retrieve historical conversation messages formatted as dictionaries. @@ -278,6 +282,8 @@ class ConversationService: Args: conversation_id (uuid.UUID): Conversation UUID. max_history (Optional[int]): Maximum number of messages to retrieve. + current_provider (Optional[str]): Current provider for file handling. + current_is_omni (Optional[bool]): Current omni flag for file handling. Returns: List[dict]: List of message dictionaries with keys 'role' and 'content'. @@ -287,14 +293,30 @@ class ConversationService: limit=max_history ) - # 转换为字典格式 - history = [ - { + history = [] + for msg in messages: + msg_dict = { "role": msg.role, - "content": msg.content + "content": [{"type": "text", "text": msg.content}] } - for msg in messages - ] + + # 处理用户消息中的多模态文件 + if msg.role == "user" and msg.meta_data: + history_files = msg.meta_data.get("history_files", {}) + + if history_files and current_provider and current_is_omni is not None: + # 检查是否需要重新处理文件 + stored_provider = history_files.get("provider") + stored_is_omni = history_files.get("is_omni") + + # 如果provider或is_omni不匹配,需要重新处理 + if stored_provider != current_provider or stored_is_omni != current_is_omni: + continue + + # provider和is_omni匹配,直接使用存储的内容 + msg_dict["content"].extend(history_files.get("content")) + + history.append(msg_dict) return history @@ -510,6 +532,7 @@ class ConversationService: provider = api_config.provider api_key = api_config.api_key api_base = api_config.api_base + is_omni = api_config.is_omni model_type = config.type llm = RedBearLLM( @@ -517,14 +540,17 @@ class ConversationService: model_name=model_name, provider=provider, api_key=api_key, - base_url=api_base + base_url=api_base, + is_omni=is_omni ), type=ModelType(model_type) ) conversation_messages = self.get_conversation_history( conversation_id=conversation_id, - max_history=20 + max_history=20, + current_provider=provider, + current_is_omni=is_omni ) if len(conversation_messages) == 0: return ConversationOut( diff --git a/api/app/services/draft_run_service.py b/api/app/services/draft_run_service.py index ba41d323..13243a92 100644 --- a/api/app/services/draft_run_service.py +++ b/api/app/services/draft_run_service.py @@ -582,7 +582,9 @@ class AgentRunService: # 6. 加载历史消息 history = await self._load_conversation_history( conversation_id=conversation_id, - max_history=10 + max_history=10, + current_provider=api_key_config.get("provider"), + current_is_omni=api_key_config.get("is_omni", False) ) # 6. 处理多模态文件 @@ -659,7 +661,10 @@ class AgentRunService: }) }, files=files, - audio_url=audio_url + processed_files=processed_files, + audio_url=audio_url, + provider=api_key_config.get("provider"), + is_omni=api_key_config.get("is_omni", False) ) response = { @@ -676,6 +681,7 @@ class AgentRunService: ) if not sub_agent else [], "citations": self._filter_citations(features_config, result.get("citations", [])), "audio_url": audio_url, + "audio_status": "pending" } logger.info( @@ -818,7 +824,9 @@ class AgentRunService: # 6. 加载历史消息 history = await self._load_conversation_history( conversation_id=conversation_id, - max_history=memory_config.get("max_history", 10) + max_history=memory_config.get("max_history", 10), + current_provider=api_key_config.get("provider"), + current_is_omni=api_key_config.get("is_omni", False) ) # 6. 处理多模态文件 @@ -905,10 +913,13 @@ class AgentRunService: "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens} }, files=files, - audio_url=stream_audio_url + processed_files=processed_files, + audio_url=stream_audio_url, + provider=api_key_config.get("provider"), + is_omni=api_key_config.get("is_omni", False) ) - # 12. 发送结束事件(包含 suggested_questions 和 tts) + # 12. 发送结束事件(包含 suggested_questions、audio_url 和 audio_status) end_data: Dict[str, Any] = { "conversation_id": conversation_id, "elapsed_time": elapsed_time, @@ -919,6 +930,17 @@ class AgentRunService: features_config, full_content, api_key_config, effective_params ) end_data["audio_url"] = stream_audio_url + # 检查TTS是否已完成(非阻塞,不取消任务) + audio_status = "pending" + if tts_task is not None and tts_task.done(): + # 任务已完成,检查是否有异常 + try: + tts_task.result() + audio_status = "completed" + except Exception as e: + logger.warning(f"TTS任务异常: {e}") + audio_status = "failed" + end_data["audio_status"] = audio_status if stream_audio_url else None end_data["citations"] = self._filter_citations(features_config, []) yield self._format_sse_event("end", end_data) @@ -1115,13 +1137,17 @@ class AgentRunService: async def _load_conversation_history( self, conversation_id: str, - max_history: int = 10 + max_history: int = 10, + current_provider: Optional[str] = None, + current_is_omni: Optional[bool] = None ) -> List[Dict[str, str]]: - """加载会话历史消息 + """加载会话历史消息,并根据当前模型配置处理多模态文件 Args: conversation_id: 会话ID max_history: 最大历史消息数量 + current_provider: 当前模型的provider + current_is_omni: 当前模型的is_omni Returns: List[Dict]: 历史消息列表 @@ -1131,7 +1157,9 @@ class AgentRunService: conversation_service = ConversationService(self.db) history = conversation_service.get_conversation_history( conversation_id=uuid.UUID(conversation_id), - max_history=max_history + max_history=max_history, + current_provider=current_provider, + current_is_omni=current_is_omni ) logger.debug( @@ -1159,7 +1187,10 @@ class AgentRunService: app_id: Optional[uuid.UUID] = None, user_id: Optional[str] = None, files: Optional[List[FileInput]] = None, - audio_url: Optional[str] = None + processed_files: Optional[List[Dict[str, Any]]] = None, + audio_url: Optional[str] = None, + provider: Optional[str] = None, + is_omni: Optional[bool] = None ) -> None: """保存会话消息(会话已通过 _ensure_conversation 确保存在) @@ -1170,6 +1201,11 @@ class AgentRunService: app_id: 应用ID(未使用,保留用于兼容性) user_id: 用户ID(未使用,保留用于兼容性) meta_data: token消耗 + files: 原始文件输入 + processed_files: 处理后的文件 + audio_url: 音频URL + provider: 模型供应商 + is_omni: 是否为全模态模型 """ try: from app.services.conversation_service import ConversationService @@ -1179,15 +1215,24 @@ class AgentRunService: # 保存消息(会话已经存在) human_meta = { - "files": [] + "files": [], + "history_files": {} } if files: for f in files: - # url = await MultimodalService(self.db).get_file_url(f) human_meta["files"].append({ "type": f.type, "url": f.url }) + + # 保存 history_files,包含 provider 和 is_omni 信息 + if processed_files: + human_meta["history_files"] = { + "content": processed_files, + "provider": provider, + "is_omni": is_omni + } + # 保存用户消息 conversation_service.add_message( conversation_id=conv_uuid, @@ -1413,8 +1458,9 @@ class AgentRunService: workspace_id: Optional[uuid.UUID] = None, ) -> tuple[Optional[str], Optional[asyncio.Task]]: """文本流式输入并行合成音频。 - 返回 (audio_url, task),audio_url 立即可用,task 完成后文件内容就绪。 + 返回 (audio_url, task),audio_url 立即可用(pending状态),task 完成后文件内容就绪。 调用方向 text_queue put 文本 chunk,结束时 put None。 + 前端可通过 GET /storage/files/{file_id}/status 轮询检查音频是否就绪。 """ tts_config = features_config.get("text_to_speech", {}) if not isinstance(tts_config, dict) or not tts_config.get("enabled"): @@ -1801,6 +1847,7 @@ class AgentRunService: ), "cost_estimate": self._estimate_cost(usage, model_info["model_config"]), "audio_url": result.get("audio_url"), + "audio_status": result.get("audio_status"), "citations": result.get("citations", []), "suggested_questions": result.get("suggested_questions", []), "error": None @@ -1878,6 +1925,7 @@ class AgentRunService: "results": [{ **r, "audio_url": r.get("audio_url"), + "audio_status": r.get("audio_status"), "citations": r.get("citations", []), "suggested_questions": r.get("suggested_questions", []), } for r in results], @@ -2009,6 +2057,7 @@ class AgentRunService: full_content = "" returned_conversation_id = model_conversation_id audio_url = None + audio_status = None citations = [] suggested_questions = [] @@ -2067,6 +2116,7 @@ class AgentRunService: # 从 end 事件中提取 features 输出字段 if event_type == "end" and event_data: audio_url = event_data.get("audio_url") + audio_status = event_data.get("audio_status") citations = event_data.get("citations", []) suggested_questions = event_data.get("suggested_questions", []) @@ -2096,6 +2146,7 @@ class AgentRunService: "message": full_content, "elapsed_time": elapsed, "audio_url": audio_url, + "audio_status": audio_status, "citations": citations, "suggested_questions": suggested_questions, "error": None @@ -2110,6 +2161,7 @@ class AgentRunService: "elapsed_time": elapsed, "message_length": len(full_content), "audio_url": audio_url, + "audio_status": audio_status, "citations": citations, "suggested_questions": suggested_questions, "timestamp": time.time() @@ -2246,6 +2298,7 @@ class AgentRunService: "message": r.get("message"), "elapsed_time": r.get("elapsed_time", 0), "audio_url": r.get("audio_url"), + "audio_status": r.get("audio_status"), "citations": r.get("citations", []), "suggested_questions": r.get("suggested_questions", []), "error": r.get("error") From efeead41b22eff25a78e8831c1fbb0fa86a996d6 Mon Sep 17 00:00:00 2001 From: Timebomb2018 <18868801967@163.com> Date: Mon, 23 Mar 2026 17:10:49 +0800 Subject: [PATCH 40/43] feat(app): 1. Handling the storage of multimodal messages and adapting to the loading of historical messages for multi-round conversations; 2. Obtain the interface for retrieving the voice status of the reply; 3. File Information Retrieval Interface --- .../controllers/file_storage_controller.py | 187 +++++++++++++++++- api/app/services/app_chat_service.py | 69 ++++--- api/app/services/conversation_service.py | 64 ++++-- api/app/services/draft_run_service.py | 77 ++++++-- 4 files changed, 342 insertions(+), 55 deletions(-) diff --git a/api/app/controllers/file_storage_controller.py b/api/app/controllers/file_storage_controller.py index ff284f39..14962a72 100644 --- a/api/app/controllers/file_storage_controller.py +++ b/api/app/controllers/file_storage_controller.py @@ -14,6 +14,9 @@ Routes: import os import uuid from typing import Any +import httpx +import mimetypes +from urllib.parse import urlparse, unquote from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile, status from fastapi.responses import FileResponse, RedirectResponse @@ -91,7 +94,7 @@ async def upload_file( if file_size > settings.MAX_FILE_SIZE: raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, + status_code=status.HTTP_413_CONTENT_TOO_LARGE, detail=f"The file size exceeds the {settings.MAX_FILE_SIZE} byte limit" ) @@ -172,7 +175,6 @@ async def upload_file_with_share_token( # Get share and release info from share_token service = ReleaseShareService(db) - share_info = service.get_shared_release_info(share_token=share_data.share_token) # Get share object to access app_id share = service.repo.get_by_share_token(share_data.share_token) @@ -291,6 +293,101 @@ async def upload_file_with_share_token( ) +@router.get("/files/info-by-url", response_model=ApiResponse) +async def get_file_info_by_url( + url: str, +): + """ + Get file information by network URL (no authentication required). + + Fetches file metadata from a remote URL via HTTP HEAD request. + Falls back to GET request if HEAD is not supported. + Returns file type, name, and size. + + Args: + url: The network URL of the file. + + Returns: + ApiResponse with file information. + """ + api_logger.info(f"File info by URL request: url={url}") + + try: + async with httpx.AsyncClient(timeout=10.0) as client: + # Try HEAD request first + response = await client.head(url, follow_redirects=True) + + # If HEAD fails, try GET request (some servers don't support HEAD) + if response.status_code != 200: + api_logger.info(f"HEAD request failed with {response.status_code}, trying GET request") + response = await client.get(url, follow_redirects=True) + + if response.status_code != 200: + api_logger.error(f"Failed to fetch file info: HTTP {response.status_code}") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Unable to access file: HTTP {response.status_code}" + ) + + # Get file size from Content-Length header or actual content + file_size = response.headers.get("Content-Length") + if file_size: + file_size = int(file_size) + elif hasattr(response, 'content'): + file_size = len(response.content) + else: + file_size = None + + # Get content type from Content-Type header + content_type = response.headers.get("Content-Type", "application/octet-stream") + # Remove charset and other parameters from content type + content_type = content_type.split(';')[0].strip() + + # Extract filename from Content-Disposition or URL + file_name = None + content_disposition = response.headers.get("Content-Disposition") + if content_disposition and "filename=" in content_disposition: + parts = content_disposition.split("filename=") + if len(parts) > 1: + file_name = parts[1].strip('"').strip("'") + + if not file_name: + parsed_url = urlparse(url) + file_name = unquote(os.path.basename(parsed_url.path)) or "unknown" + + # Extract file extension from filename + _, file_ext = os.path.splitext(file_name) + + # If no extension found, infer from content type + if not file_ext: + ext = mimetypes.guess_extension(content_type) + if ext: + file_ext = ext + file_name = f"{file_name}{file_ext}" + + api_logger.info(f"File info retrieved: name={file_name}, size={file_size}, type={content_type}") + + return success( + data={ + "url": url, + "file_name": file_name, + "file_ext": file_ext.lower() if file_ext else "", + "file_size": file_size, + "content_type": content_type, + }, + msg="File information retrieved successfully" + ) + + except HTTPException: + raise + except Exception as e: + api_logger.error(f"Unexpected error: {e}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Failed to retrieve file information: {str(e)}" + ) + + @router.get("/files/{file_id}", response_model=Any) async def download_file( request: Request, @@ -499,6 +596,51 @@ async def get_file_url( ) +@router.get("/files/{file_id}/public-url", response_model=ApiResponse) +async def get_permanent_file_url( + file_id: uuid.UUID, + db: Session = Depends(get_db), + storage_service: FileStorageService = Depends(get_file_storage_service), +): + """ + 获取文件的永久公开 URL(无过期时间)。 + + - 本地存储:返回 API 永久访问地址(基于 FILE_LOCAL_SERVER_URL 配置) + - 远程存储(OSS/S3):返回 bucket 公读地址(需 bucket 已配置公共读权限) + """ + file_metadata = db.query(FileMetadata).filter(FileMetadata.id == file_id).first() + if not file_metadata: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="The file does not exist") + + if file_metadata.status != "completed": + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, + detail=f"File upload not completed, status: {file_metadata.status}") + + file_key = file_metadata.file_key + storage = storage_service.storage + + try: + if isinstance(storage, LocalStorage): + url = f"{settings.FILE_LOCAL_SERVER_URL}/storage/permanent/{file_id}" + else: + url = await storage.get_permanent_url(file_key) + if not url: + raise HTTPException(status_code=status.HTTP_501_NOT_IMPLEMENTED, + detail="Permanent URL not supported for current storage backend") + + api_logger.info(f"Generated permanent URL: file_id={file_id}") + return success( + data={"url": url, "expires_in": None, "permanent": True, "file_name": file_metadata.file_name}, + msg="Permanent file URL generated successfully" + ) + except HTTPException: + raise + except Exception as e: + api_logger.error(f"Failed to generate permanent URL: {e}") + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Failed to generate permanent URL: {str(e)}") + + @router.get("/public/{file_id}", response_model=Any) async def public_download_file( request: Request, @@ -653,3 +795,44 @@ async def permanent_download_file( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Failed to retrieve file: {str(e)}" ) + + +@router.get("/files/{file_id}/status", response_model=ApiResponse) +async def get_file_status( + file_id: uuid.UUID, + db: Session = Depends(get_db), +): + """ + Get file upload/processing status (no authentication required). + + This endpoint is used to check if a file (e.g., TTS audio) is ready. + Returns status: pending, completed, or failed. + + Args: + file_id: The UUID of the file. + db: Database session. + + Returns: + ApiResponse with file status and metadata. + """ + api_logger.info(f"File status request: file_id={file_id}") + + # Query file metadata from database + file_metadata = db.query(FileMetadata).filter(FileMetadata.id == file_id).first() + if not file_metadata: + api_logger.warning(f"File not found in database: file_id={file_id}") + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="The file does not exist" + ) + + return success( + data={ + "file_id": str(file_id), + "status": file_metadata.status, + "file_name": file_metadata.file_name, + "file_size": file_metadata.file_size, + "content_type": file_metadata.content_type, + }, + msg="File status retrieved successfully" + ) diff --git a/api/app/services/app_chat_service.py b/api/app/services/app_chat_service.py index 604514b4..94a6dddb 100644 --- a/api/app/services/app_chat_service.py +++ b/api/app/services/app_chat_service.py @@ -119,14 +119,12 @@ class AppChatService: ) # 加载历史消息 - messages = self.conversation_service.get_messages( + history = self.conversation_service.get_conversation_history( conversation_id=conversation_id, - limit=10 + max_history=10, + current_provider=api_key_obj.provider, + current_is_omni=api_key_obj.is_omni ) - history = [ - {"role": msg.role, "content": msg.content} - for msg in messages - ] # 处理多模态文件 processed_files = None @@ -180,7 +178,8 @@ class AppChatService: # 构建用户消息内容(含多模态文件) human_meta = { - "files": [] + "files": [], + "history_files": {} } assistant_meta = { "model": api_key_obj.model_name, @@ -195,6 +194,13 @@ class AppChatService: "url": f.url }) + if processed_files: + human_meta["history_files"] = { + "content": processed_files, + "provider": api_key_obj.provider, + "is_omni": api_key_obj.is_omni + } + # 保存消息 if audio_url: assistant_meta["audio_url"] = audio_url @@ -225,6 +231,7 @@ class AppChatService: "suggested_questions": suggested_questions, "citations": self.agent_service._filter_citations(features_config, result.get("citations", [])), "audio_url": audio_url, + "audio_status": "pending" } async def agnet_chat_stream( @@ -314,17 +321,12 @@ class AppChatService: ) # 加载历史消息 - history = [] - memory_config = {"enabled": True, 'max_history': 10} - if memory_config.get("enabled"): - messages = self.conversation_service.get_messages( - conversation_id=conversation_id, - limit=memory_config.get("max_history", 10) - ) - history = [ - {"role": msg.role, "content": msg.content} - for msg in messages - ] + history = self.conversation_service.get_conversation_history( + conversation_id=conversation_id, + max_history=10, + current_provider=api_key_obj.provider, + current_is_omni=api_key_obj.is_omni + ) # 处理多模态文件 processed_files = None @@ -347,8 +349,14 @@ class AppChatService: total_tokens = 0 text_queue: asyncio.Queue = asyncio.Queue() + api_key_config = { + "model_name": api_key_obj.model_name, + "api_key": api_key_obj.api_key, + "api_base": api_key_obj.api_base, + "provider": api_key_obj.provider, + } stream_audio_url, tts_task = await self.agent_service._generate_tts_streaming( - features_config, api_key_obj, + features_config, api_key_config, text_queue=text_queue, tenant_id=tenant_id, workspace_id=workspace_id ) @@ -378,7 +386,7 @@ class AppChatService: elapsed_time = time.time() - start_time ModelApiKeyService.record_api_key_usage(self.db, api_key_obj.id) - # 发送结束事件(包含 suggested_questions、tts、citations) + # 发送结束事件(包含 suggested_questions、tts、audio_status、citations) end_data: dict = {"elapsed_time": elapsed_time, "message_length": len(full_content), "error": None} sq_config = features_config.get("suggested_questions_after_answer", {}) if isinstance(sq_config, dict) and sq_config.get("enabled"): @@ -388,11 +396,23 @@ class AppChatService: "api_base": api_key_obj.api_base}, {} ) end_data["audio_url"] = stream_audio_url + # 检查TTS是否已完成(非阻塞,不取消任务) + audio_status = "pending" + if tts_task is not None and tts_task.done(): + # 任务已完成,检查是否有异常 + try: + tts_task.result() + audio_status = "completed" + except Exception as e: + logger.warning(f"TTS任务异常: {e}") + audio_status = "failed" + end_data["audio_status"] = audio_status if stream_audio_url else None end_data["citations"] = self.agent_service._filter_citations(features_config, []) # 保存消息 human_meta = { - "files":[] + "files":[], + "history_files": {} } assistant_meta = { "model": api_key_obj.model_name, @@ -402,11 +422,16 @@ class AppChatService: if files: for f in files: - # url = await MultimodalService(self.db).get_file_url(f) human_meta["files"].append({ "type": f.type, "url": f.url }) + if processed_files: + human_meta["history_files"] = { + "content": processed_files, + "provider": api_key_obj.provider, + "is_omni": api_key_obj.is_omni + } if stream_audio_url: assistant_meta["audio_url"] = stream_audio_url diff --git a/api/app/services/conversation_service.py b/api/app/services/conversation_service.py index aff5f533..d7bb3595 100644 --- a/api/app/services/conversation_service.py +++ b/api/app/services/conversation_service.py @@ -119,25 +119,27 @@ class ConversationService: def get_user_conversations( self, - user_id: uuid.UUID - ) -> list[Conversation]: + user_id: uuid.UUID, + page: int = 1, + page_size: int = 20 + ) -> tuple[list[Conversation], int]: """ - Retrieve recent conversations for a specific user - - This method delegates persistence logic to the repository layer and - applies service-level defaults (e.g. recent conversation limit). + Retrieve recent conversations for a specific user with pagination. Args: user_id (uuid.UUID): Unique identifier of the user. + page (int): Page number (1-based). Defaults to 1. + page_size (int): Number of items per page. Defaults to 20. Returns: - list[Conversation]: A list of recent conversation entities. + tuple[list[Conversation], int]: A list of recent conversation entities and total count. """ - conversations = self.conversation_repo.get_conversation_by_user_id( + conversations, total = self.conversation_repo.get_conversation_by_user_id( user_id, - limit=10 + page=page, + page_size=page_size ) - return conversations + return conversations, total def list_conversations( self, @@ -270,7 +272,9 @@ class ConversationService: def get_conversation_history( self, conversation_id: uuid.UUID, - max_history: Optional[int] = None + max_history: Optional[int] = None, + current_provider: Optional[str] = None, + current_is_omni: Optional[bool] = None ) -> List[dict]: """ Retrieve historical conversation messages formatted as dictionaries. @@ -278,6 +282,8 @@ class ConversationService: Args: conversation_id (uuid.UUID): Conversation UUID. max_history (Optional[int]): Maximum number of messages to retrieve. + current_provider (Optional[str]): Current provider for file handling. + current_is_omni (Optional[bool]): Current omni flag for file handling. Returns: List[dict]: List of message dictionaries with keys 'role' and 'content'. @@ -287,14 +293,30 @@ class ConversationService: limit=max_history ) - # 转换为字典格式 - history = [ - { + history = [] + for msg in messages: + msg_dict = { "role": msg.role, - "content": msg.content + "content": [{"type": "text", "text": msg.content}] } - for msg in messages - ] + + # 处理用户消息中的多模态文件 + if msg.role == "user" and msg.meta_data: + history_files = msg.meta_data.get("history_files", {}) + + if history_files and current_provider and current_is_omni is not None: + # 检查是否需要重新处理文件 + stored_provider = history_files.get("provider") + stored_is_omni = history_files.get("is_omni") + + # 如果provider或is_omni不匹配,需要重新处理 + if stored_provider != current_provider or stored_is_omni != current_is_omni: + continue + + # provider和is_omni匹配,直接使用存储的内容 + msg_dict["content"].extend(history_files.get("content")) + + history.append(msg_dict) return history @@ -510,6 +532,7 @@ class ConversationService: provider = api_config.provider api_key = api_config.api_key api_base = api_config.api_base + is_omni = api_config.is_omni model_type = config.type llm = RedBearLLM( @@ -517,14 +540,17 @@ class ConversationService: model_name=model_name, provider=provider, api_key=api_key, - base_url=api_base + base_url=api_base, + is_omni=is_omni ), type=ModelType(model_type) ) conversation_messages = self.get_conversation_history( conversation_id=conversation_id, - max_history=20 + max_history=20, + current_provider=provider, + current_is_omni=is_omni ) if len(conversation_messages) == 0: return ConversationOut( diff --git a/api/app/services/draft_run_service.py b/api/app/services/draft_run_service.py index ba41d323..13243a92 100644 --- a/api/app/services/draft_run_service.py +++ b/api/app/services/draft_run_service.py @@ -582,7 +582,9 @@ class AgentRunService: # 6. 加载历史消息 history = await self._load_conversation_history( conversation_id=conversation_id, - max_history=10 + max_history=10, + current_provider=api_key_config.get("provider"), + current_is_omni=api_key_config.get("is_omni", False) ) # 6. 处理多模态文件 @@ -659,7 +661,10 @@ class AgentRunService: }) }, files=files, - audio_url=audio_url + processed_files=processed_files, + audio_url=audio_url, + provider=api_key_config.get("provider"), + is_omni=api_key_config.get("is_omni", False) ) response = { @@ -676,6 +681,7 @@ class AgentRunService: ) if not sub_agent else [], "citations": self._filter_citations(features_config, result.get("citations", [])), "audio_url": audio_url, + "audio_status": "pending" } logger.info( @@ -818,7 +824,9 @@ class AgentRunService: # 6. 加载历史消息 history = await self._load_conversation_history( conversation_id=conversation_id, - max_history=memory_config.get("max_history", 10) + max_history=memory_config.get("max_history", 10), + current_provider=api_key_config.get("provider"), + current_is_omni=api_key_config.get("is_omni", False) ) # 6. 处理多模态文件 @@ -905,10 +913,13 @@ class AgentRunService: "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": total_tokens} }, files=files, - audio_url=stream_audio_url + processed_files=processed_files, + audio_url=stream_audio_url, + provider=api_key_config.get("provider"), + is_omni=api_key_config.get("is_omni", False) ) - # 12. 发送结束事件(包含 suggested_questions 和 tts) + # 12. 发送结束事件(包含 suggested_questions、audio_url 和 audio_status) end_data: Dict[str, Any] = { "conversation_id": conversation_id, "elapsed_time": elapsed_time, @@ -919,6 +930,17 @@ class AgentRunService: features_config, full_content, api_key_config, effective_params ) end_data["audio_url"] = stream_audio_url + # 检查TTS是否已完成(非阻塞,不取消任务) + audio_status = "pending" + if tts_task is not None and tts_task.done(): + # 任务已完成,检查是否有异常 + try: + tts_task.result() + audio_status = "completed" + except Exception as e: + logger.warning(f"TTS任务异常: {e}") + audio_status = "failed" + end_data["audio_status"] = audio_status if stream_audio_url else None end_data["citations"] = self._filter_citations(features_config, []) yield self._format_sse_event("end", end_data) @@ -1115,13 +1137,17 @@ class AgentRunService: async def _load_conversation_history( self, conversation_id: str, - max_history: int = 10 + max_history: int = 10, + current_provider: Optional[str] = None, + current_is_omni: Optional[bool] = None ) -> List[Dict[str, str]]: - """加载会话历史消息 + """加载会话历史消息,并根据当前模型配置处理多模态文件 Args: conversation_id: 会话ID max_history: 最大历史消息数量 + current_provider: 当前模型的provider + current_is_omni: 当前模型的is_omni Returns: List[Dict]: 历史消息列表 @@ -1131,7 +1157,9 @@ class AgentRunService: conversation_service = ConversationService(self.db) history = conversation_service.get_conversation_history( conversation_id=uuid.UUID(conversation_id), - max_history=max_history + max_history=max_history, + current_provider=current_provider, + current_is_omni=current_is_omni ) logger.debug( @@ -1159,7 +1187,10 @@ class AgentRunService: app_id: Optional[uuid.UUID] = None, user_id: Optional[str] = None, files: Optional[List[FileInput]] = None, - audio_url: Optional[str] = None + processed_files: Optional[List[Dict[str, Any]]] = None, + audio_url: Optional[str] = None, + provider: Optional[str] = None, + is_omni: Optional[bool] = None ) -> None: """保存会话消息(会话已通过 _ensure_conversation 确保存在) @@ -1170,6 +1201,11 @@ class AgentRunService: app_id: 应用ID(未使用,保留用于兼容性) user_id: 用户ID(未使用,保留用于兼容性) meta_data: token消耗 + files: 原始文件输入 + processed_files: 处理后的文件 + audio_url: 音频URL + provider: 模型供应商 + is_omni: 是否为全模态模型 """ try: from app.services.conversation_service import ConversationService @@ -1179,15 +1215,24 @@ class AgentRunService: # 保存消息(会话已经存在) human_meta = { - "files": [] + "files": [], + "history_files": {} } if files: for f in files: - # url = await MultimodalService(self.db).get_file_url(f) human_meta["files"].append({ "type": f.type, "url": f.url }) + + # 保存 history_files,包含 provider 和 is_omni 信息 + if processed_files: + human_meta["history_files"] = { + "content": processed_files, + "provider": provider, + "is_omni": is_omni + } + # 保存用户消息 conversation_service.add_message( conversation_id=conv_uuid, @@ -1413,8 +1458,9 @@ class AgentRunService: workspace_id: Optional[uuid.UUID] = None, ) -> tuple[Optional[str], Optional[asyncio.Task]]: """文本流式输入并行合成音频。 - 返回 (audio_url, task),audio_url 立即可用,task 完成后文件内容就绪。 + 返回 (audio_url, task),audio_url 立即可用(pending状态),task 完成后文件内容就绪。 调用方向 text_queue put 文本 chunk,结束时 put None。 + 前端可通过 GET /storage/files/{file_id}/status 轮询检查音频是否就绪。 """ tts_config = features_config.get("text_to_speech", {}) if not isinstance(tts_config, dict) or not tts_config.get("enabled"): @@ -1801,6 +1847,7 @@ class AgentRunService: ), "cost_estimate": self._estimate_cost(usage, model_info["model_config"]), "audio_url": result.get("audio_url"), + "audio_status": result.get("audio_status"), "citations": result.get("citations", []), "suggested_questions": result.get("suggested_questions", []), "error": None @@ -1878,6 +1925,7 @@ class AgentRunService: "results": [{ **r, "audio_url": r.get("audio_url"), + "audio_status": r.get("audio_status"), "citations": r.get("citations", []), "suggested_questions": r.get("suggested_questions", []), } for r in results], @@ -2009,6 +2057,7 @@ class AgentRunService: full_content = "" returned_conversation_id = model_conversation_id audio_url = None + audio_status = None citations = [] suggested_questions = [] @@ -2067,6 +2116,7 @@ class AgentRunService: # 从 end 事件中提取 features 输出字段 if event_type == "end" and event_data: audio_url = event_data.get("audio_url") + audio_status = event_data.get("audio_status") citations = event_data.get("citations", []) suggested_questions = event_data.get("suggested_questions", []) @@ -2096,6 +2146,7 @@ class AgentRunService: "message": full_content, "elapsed_time": elapsed, "audio_url": audio_url, + "audio_status": audio_status, "citations": citations, "suggested_questions": suggested_questions, "error": None @@ -2110,6 +2161,7 @@ class AgentRunService: "elapsed_time": elapsed, "message_length": len(full_content), "audio_url": audio_url, + "audio_status": audio_status, "citations": citations, "suggested_questions": suggested_questions, "timestamp": time.time() @@ -2246,6 +2298,7 @@ class AgentRunService: "message": r.get("message"), "elapsed_time": r.get("elapsed_time", 0), "audio_url": r.get("audio_url"), + "audio_status": r.get("audio_status"), "citations": r.get("citations", []), "suggested_questions": r.get("suggested_questions", []), "error": r.get("error") From 48e2e613bbb322650f78c26dc497d26eacd83bdc Mon Sep 17 00:00:00 2001 From: lanceyq <1982376970@qq.com> Date: Mon, 23 Mar 2026 17:34:54 +0800 Subject: [PATCH 41/43] =?UTF-8?q?=E3=80=90change=E3=80=91Restore=20chat=20?= =?UTF-8?q?mode?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/memory/llm_tools/openai_client.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/api/app/core/memory/llm_tools/openai_client.py b/api/app/core/memory/llm_tools/openai_client.py index 4536f62d..0b75de3a 100644 --- a/api/app/core/memory/llm_tools/openai_client.py +++ b/api/app/core/memory/llm_tools/openai_client.py @@ -82,26 +82,17 @@ class OpenAIClient(LLMClient): LLMClientException: LLM 调用失败 """ try: - from langchain_core.messages import HumanMessage, SystemMessage, AIMessage - - # 将 dict 消息列表转换为 LangChain 消息对象 - lc_messages = [] - for m in messages: - role = m.get("role", "user") - content = m.get("content", "") - if role == "system": - lc_messages.append(SystemMessage(content=content)) - elif role == "assistant": - lc_messages.append(AIMessage(content=content)) - else: - lc_messages.append(HumanMessage(content=content)) + # 使用 Langfuse 回调(如果可用) + template = """{messages}""" + prompt = ChatPromptTemplate.from_template(template) + chain = prompt | self.client # 添加 Langfuse 回调(如果可用) config = {} if self.langfuse_handler: config["callbacks"] = [self.langfuse_handler] - response = await self.client.ainvoke(lc_messages, config=config) + response = await chain.ainvoke({"messages": messages}, config=config) return response except Exception as e: From f86448f4bfd6f45e976230088dec9721835a55ba Mon Sep 17 00:00:00 2001 From: lanceyq <1982376970@qq.com> Date: Mon, 23 Mar 2026 17:39:17 +0800 Subject: [PATCH 42/43] =?UTF-8?q?=E3=80=90change=E3=80=91=20Restore=20chat?= =?UTF-8?q?=20mode=201?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/app/core/memory/llm_tools/openai_client.py | 1 - 1 file changed, 1 deletion(-) diff --git a/api/app/core/memory/llm_tools/openai_client.py b/api/app/core/memory/llm_tools/openai_client.py index 0b75de3a..43c2b445 100644 --- a/api/app/core/memory/llm_tools/openai_client.py +++ b/api/app/core/memory/llm_tools/openai_client.py @@ -82,7 +82,6 @@ class OpenAIClient(LLMClient): LLMClientException: LLM 调用失败 """ try: - # 使用 Langfuse 回调(如果可用) template = """{messages}""" prompt = ChatPromptTemplate.from_template(template) chain = prompt | self.client From 6348304b7dc9dbf125156de3dd47c37231ef9d73 Mon Sep 17 00:00:00 2001 From: Timebomb2018 <18868801967@163.com> Date: Mon, 23 Mar 2026 18:52:23 +0800 Subject: [PATCH 43/43] fix(app): Error occurred while processing the experience sharing and loading the historical messages. --- api/app/services/app_chat_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/app/services/app_chat_service.py b/api/app/services/app_chat_service.py index 645de979..f87e5f5a 100644 --- a/api/app/services/app_chat_service.py +++ b/api/app/services/app_chat_service.py @@ -129,7 +129,7 @@ class AppChatService: ) # 加载历史消息 - history = self.conversation_service.get_conversation_history( + history = await self.conversation_service.get_conversation_history( conversation_id=conversation_id, max_history=10, current_provider=api_key_obj.provider, @@ -332,7 +332,7 @@ class AppChatService: ) # 加载历史消息 - history = self.conversation_service.get_conversation_history( + history = await self.conversation_service.get_conversation_history( conversation_id=conversation_id, max_history=10, current_provider=api_key_obj.provider,