diff --git a/api/app/celery_app.py b/api/app/celery_app.py index e77ed683..807c59f4 100644 --- a/api/app/celery_app.py +++ b/api/app/celery_app.py @@ -62,10 +62,10 @@ celery_app.conf.update( task_serializer='json', accept_content=['json'], result_serializer='json', - - # 时区 - timezone='Asia/Shanghai', - enable_utc=False, + + # # 时区 + # timezone='Asia/Shanghai', + # enable_utc=False, # 任务追踪 task_track_started=True, diff --git a/api/app/controllers/mcp_market_config_controller.py b/api/app/controllers/mcp_market_config_controller.py index 7958fa1a..0f2da3b0 100644 --- a/api/app/controllers/mcp_market_config_controller.py +++ b/api/app/controllers/mcp_market_config_controller.py @@ -55,6 +55,12 @@ async def get_mcp_servers( status_code=status.HTTP_400_BAD_REQUEST, detail="The paging parameter must be greater than 0" ) + if page * pagesize > 100: + api_logger.warning(f"Paging parameters exceed ModelScope limit: page={page}, pagesize={pagesize}") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"The maximum number of MCP services can view is 100. Please visit the ModelScope MCP Plaza." + ) # 2. Query mcp market config information from the database api_logger.debug(f"Query mcp market config: {mcp_market_config_id}") @@ -64,23 +70,26 @@ async def get_mcp_servers( if not db_mcp_market_config: api_logger.warning( f"The mcp market config does not exist or access is denied: mcp_market_config_id={mcp_market_config_id}") - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="The mcp market config does not exist or access is denied" - ) + return success(msg='The mcp market config does not exist or access is denied') # 3. Execute paged query - try: - api = MCPApi() - token = db_mcp_market_config.token - api.login(token) + token = db_mcp_market_config.token + if not token: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="MCP market config token is not configured" + ) + api = MCPApi() + api.login(token) - body = { - 'filter': {}, - 'page_number': page, - 'page_size': pagesize, - 'search': keywords - } + body = { + 'filter': {}, + 'page_number': page, + 'page_size': pagesize, + 'search': keywords + } + + try: cookies = api.get_cookies(token) r = api.session.put( url=api.mcp_base_url, @@ -150,14 +159,16 @@ async def get_operational_mcp_servers( if not db_mcp_market_config: api_logger.warning( f"The mcp market config does not exist or access is denied: mcp_market_config_id={mcp_market_config_id}") - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="The mcp market config does not exist or access is denied" - ) + return success(msg='The mcp market config does not exist or access is denied') # 2. Execute paged query - api = MCPApi() token = db_mcp_market_config.token + if not token: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="MCP market config token is not configured" + ) + api = MCPApi() api.login(token) url = f'{api.mcp_base_url}/operational' @@ -208,14 +219,16 @@ async def get_mcp_server( if not db_mcp_market_config: api_logger.warning( f"The mcp market config does not exist or access is denied: mcp_market_config_id={mcp_market_config_id}") - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="The mcp market config does not exist or access is denied" - ) + return success(msg='The mcp market config does not exist or access is denied') # 2. Get detailed information for a specific MCP Server - api = MCPApi() token = db_mcp_market_config.token + if not token: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="MCP market config token is not configured" + ) + api = MCPApi() api.login(token) result = api.get_mcp_server(server_id=server_id) @@ -236,7 +249,26 @@ async def create_mcp_market_config( try: api_logger.debug(f"Start creating the mcp market config: {create_data.mcp_market_id}") - # 1. Check if the mcp market name already exists + # 1. Validate token can access ModelScope MCP market + if not create_data.token: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Token is required to access ModelScope MCP market" + ) + try: + api = MCPApi() + api.login(create_data.token) + body = {'filter': {}, 'page_number': 1, 'page_size': 1, 'search': None} + cookies = api.get_cookies(create_data.token) + r = api.session.put(url=api.mcp_base_url, headers=api.builder_headers(api.headers), json=body, cookies=cookies) + raise_for_http_status(r) + except Exception as e: + api_logger.warning(f"Token validation failed for ModelScope MCP market: {str(e)}") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Unable to access ModelScope MCP market with the provided token: {str(e)}" + ) + # 2. Check if the mcp market name already exists db_mcp_market_config_exist = mcp_market_config_service.get_mcp_market_config_by_mcp_market_id(db, mcp_market_id=create_data.mcp_market_id, current_user=current_user) if db_mcp_market_config_exist: api_logger.warning(f"The mcp market id already exists: {create_data.mcp_market_id}") @@ -296,10 +328,7 @@ async def get_mcp_market_config( db_mcp_market_config = mcp_market_config_service.get_mcp_market_config_by_id(db, mcp_market_config_id=mcp_market_config_id, current_user=current_user) if not db_mcp_market_config: api_logger.warning(f"The mcp market config does not exist or access is denied: mcp_market_config_id={mcp_market_config_id}") - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="The mcp market config does not exist or access is denied" - ) + return success(msg='The mcp market config does not exist or access is denied') api_logger.info(f"mcp market config query successful: (ID: {db_mcp_market_config.id})") return success(data=jsonable_encoder(mcp_market_config_schema.McpMarketConfig.model_validate(db_mcp_market_config)), @@ -329,10 +358,7 @@ async def get_mcp_market_config_by_mcp_market_id( db_mcp_market_config = mcp_market_config_service.get_mcp_market_config_by_mcp_market_id(db, mcp_market_id=mcp_market_id, current_user=current_user) if not db_mcp_market_config: api_logger.warning(f"The mcp market config does not exist or access is denied: mcp_market_id={mcp_market_id}") - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="The mcp market config does not exist or access is denied" - ) + return success(msg='The mcp market config does not exist or access is denied') api_logger.info(f"mcp market config query successful: (ID: {db_mcp_market_config.id})") return success(data=jsonable_encoder(mcp_market_config_schema.McpMarketConfig.model_validate(db_mcp_market_config)), @@ -358,12 +384,25 @@ async def update_mcp_market_config( if not db_mcp_market_config: api_logger.warning( f"The mcp market config does not exist or you do not have permission to access it: mcp_market_config_id={mcp_market_config_id}") - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="The mcp market config does not exist or you do not have permission to access it" - ) + return success(msg='The mcp market config does not exist or access is denied') - # 2. Update fields (only update non-null fields) + # 2. Validate new token if provided + if update_data.token is not None: + try: + api = MCPApi() + api.login(update_data.token) + body = {'filter': {}, 'page_number': 1, 'page_size': 1, 'search': None} + cookies = api.get_cookies(update_data.token) + r = api.session.put(url=api.mcp_base_url, headers=api.builder_headers(api.headers), json=body, cookies=cookies) + raise_for_http_status(r) + except Exception as e: + api_logger.warning(f"Token validation failed for ModelScope MCP market: {str(e)}") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Unable to access ModelScope MCP market with the provided token: {str(e)}" + ) + + # 3. Update fields (only update non-null fields) api_logger.debug(f"Start updating the mcp market config fields: {mcp_market_config_id}") update_dict = update_data.dict(exclude_unset=True) updated_fields = [] @@ -378,30 +417,6 @@ async def update_mcp_market_config( if updated_fields: api_logger.debug(f"updated fields: {', '.join(updated_fields)}") - # 3. verify token - db_mcp_market_config.status = 1 - try: - api = MCPApi() - token = update_data.token - api.login(token) - - body = { - 'filter': {}, - 'page_number': 1, - 'page_size': 20, - 'search': "" - } - cookies = api.get_cookies(token) - r = api.session.put( - url=api.mcp_base_url, - headers=api.builder_headers(api.headers), - json=body, - cookies=cookies) - raise_for_http_status(r) - except requests.exceptions.RequestException as e: - api_logger.error(f"Failed to get MCP servers: {str(e)}") - db_mcp_market_config.status = 0 - # 4. Save to database try: db.commit() @@ -439,10 +454,7 @@ async def delete_mcp_market_config( if not db_mcp_market_config: api_logger.warning( f"The mcp market config does not exist or you do not have permission to access it: mcp_market_config_id={mcp_market_config_id}") - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="The mcp market config does not exist or you do not have permission to access it" - ) + return success(msg='The mcp market config does not exist or access is denied') # 2. Deleting mcp market config mcp_market_config_service.delete_mcp_market_config_by_id(db, mcp_market_config_id=mcp_market_config_id, current_user=current_user) diff --git a/api/app/controllers/memory_dashboard_controller.py b/api/app/controllers/memory_dashboard_controller.py index bad706d4..22fd2c6c 100644 --- a/api/app/controllers/memory_dashboard_controller.py +++ b/api/app/controllers/memory_dashboard_controller.py @@ -412,14 +412,15 @@ def get_current_user_rag_total_num( @router.get("/rag_content", response_model=ApiResponse) def get_rag_content( end_user_id: str = Query(..., description="宿主ID"), - limit: int = Query(15, description="返回记录数"), + page: int = Query(1, gt=0, description="页码,从1开始"), + pagesize: int = Query(15, gt=0, le=100, description="每页返回记录数"), db: Session = Depends(get_db), current_user: User = Depends(get_current_user), ): """ - 获取当前宿主知识库中的chunk内容 + 获取当前宿主知识库中的chunk内容(分页) """ - data = memory_dashboard_service.get_rag_content(end_user_id, limit, db, current_user) + data = memory_dashboard_service.get_rag_content(end_user_id, page, pagesize, db, current_user) return success(data=data, msg="宿主RAGchunk数据获取成功") diff --git a/api/app/core/tools/mcp/client.py b/api/app/core/tools/mcp/client.py index c082b314..f19902a2 100644 --- a/api/app/core/tools/mcp/client.py +++ b/api/app/core/tools/mcp/client.py @@ -53,6 +53,7 @@ class SimpleMCPClient: else: await self._connect_http() except Exception as e: + await self.disconnect() logger.error(f"MCP连接失败: {self.server_url}, 错误: {e}") raise MCPConnectionError(f"连接失败: {e}") diff --git a/api/app/repositories/end_user_repository.py b/api/app/repositories/end_user_repository.py index 0b828a8b..61faf6d4 100644 --- a/api/app/repositories/end_user_repository.py +++ b/api/app/repositories/end_user_repository.py @@ -247,7 +247,6 @@ class EndUserRepository: EndUser.user_summary: user_summary, EndUser.rag_tags: rag_tags, EndUser.rag_personas: rag_personas, - EndUser.storage_type: "rag", EndUser.rag_summary_updated_at: datetime.datetime.now(), }, synchronize_session=False @@ -286,7 +285,6 @@ class EndUserRepository: .update( { EndUser.memory_insight: memory_insight, - EndUser.storage_type: "rag", EndUser.memory_insight_updated_at: datetime.datetime.now(), }, synchronize_session=False diff --git a/api/app/services/app_dsl_service.py b/api/app/services/app_dsl_service.py index fc071177..0c778b81 100644 --- a/api/app/services/app_dsl_service.py +++ b/api/app/services/app_dsl_service.py @@ -18,6 +18,7 @@ from app.models.tool_model import ToolConfig as ToolConfigModel from app.models.workflow_model import WorkflowConfig from app.services.workflow_service import WorkflowService from app.core.workflow.adapters.memory_bear.memory_bear_adapter import MemoryBearAdapter +from app.models.memory_config_model import MemoryConfig as MemoryConfigModel class AppDslService: @@ -220,7 +221,7 @@ class AppDslService: id=uuid.uuid4(), workspace_id=workspace_id, created_by=user_id, - name=app_meta.get("name", "导入应用"), + name=self._unique_app_name(app_meta.get("name", "导入应用"), workspace_id, app_type), description=app_meta.get("description"), icon=app_meta.get("icon"), icon_type=app_meta.get("icon_type"), @@ -296,6 +297,19 @@ class AppDslService: self.db.refresh(new_app) return new_app, warnings + def _unique_app_name(self, name: str, workspace_id: uuid.UUID, app_type: AppType) -> str: + existing = {r[0] for r in self.db.query(App.name).filter( + App.workspace_id == workspace_id, + App.type == app_type, + App.is_active.is_(True) + ).all()} + if name not in existing: + return name + counter = 1 + while f"{name}({counter})" in existing: + counter += 1 + return f"{name}({counter})" + def _resolve_model(self, ref: Optional[dict], tenant_id: uuid.UUID, warnings: list) -> Optional[uuid.UUID]: if not ref: return None @@ -398,9 +412,19 @@ class AppDslService: config_id = memory.get("memory_config_id") or memory.get("memory_content") if not config_id: return memory - from app.models.memory_config_model import MemoryConfig as MemoryConfigModel + try: + config_uuid = uuid.UUID(str(config_id)) + except (ValueError, AttributeError): + exists = self.db.query(MemoryConfigModel).filter( + MemoryConfigModel.config_id_old == int(config_id), + MemoryConfigModel.workspace_id == workspace_id + ).first() + if not exists: + warnings.append(f"记忆配置 '{config_id}' 未匹配,已置空,请导入后手动配置") + return {**memory, "memory_config_id": None, "enabled": False} + return memory exists = self.db.query(MemoryConfigModel).filter( - MemoryConfigModel.config_id == config_id, + MemoryConfigModel.config_id == config_uuid, MemoryConfigModel.workspace_id == workspace_id ).first() if not exists: diff --git a/api/app/services/memory_dashboard_service.py b/api/app/services/memory_dashboard_service.py index db49c50a..be656acb 100644 --- a/api/app/services/memory_dashboard_service.py +++ b/api/app/services/memory_dashboard_service.py @@ -535,7 +535,8 @@ def get_users_total_chunk_batch( def get_rag_content( end_user_id: str, - limit: int, + page: int, + pagesize: int, db: Session, current_user: User ) -> dict: @@ -543,9 +544,9 @@ def get_rag_content( 先在documents表中查询file_name=='end_user_id'+'.txt'的id和kb_id, 然后调用/chunks/{kb_id}/{document_id}/chunks接口的相关代码获取所有内容, 接着对获取的内容进行提取,只要page_content的内容, - 最后返回数据 + 最后返回分页数据 """ - business_logger.info(f"获取RAG内容: end_user_id={end_user_id}, limit={limit}, 操作者: {current_user.username}") + business_logger.info(f"获取RAG内容: end_user_id={end_user_id}, page={page}, pagesize={pagesize}, 操作者: {current_user.username}") try: from app.models.document_model import Document @@ -562,63 +563,76 @@ def get_rag_content( if not documents: business_logger.warning(f"未找到文件: {file_name}") return { - "total": 0, - "contents": [] + "page": { + "page": page, + "pagesize": pagesize, + "total": 0, + "hasnext": False, + }, + "items": [] } business_logger.info(f"找到 {len(documents)} 个文档记录") - # 3. 获取所有chunks的page_content - all_contents = [] - total_chunks = 0 + # 3. 按全局偏移量计算当前页数据 + # 全局偏移范围:[offset_start, offset_end) + offset_start = (page - 1) * pagesize + offset_end = offset_start + pagesize + + global_total = 0 # 所有文档的 chunk 总数 + page_contents = [] # 当前页的内容 for document in documents: try: - # 获取知识库信息 kb = knowledge_repository.get_knowledge_by_id(db, document.kb_id) if not kb: business_logger.warning(f"知识库不存在: kb_id={document.kb_id}") continue - # 初始化向量服务 vector_service = ElasticSearchVectorFactory().init_vector(knowledge=kb) - # 获取该文档的所有chunks(分页获取) - page = 1 - pagesize = 100 # 每页100条 + # 先用 pagesize=1 获取该文档的 chunk 总数 + doc_total, _ = vector_service.search_by_segment( + document_id=str(document.id), + query=None, + pagesize=1, + page=1, + asc=True + ) - while True: - total, items = vector_service.search_by_segment( + doc_offset_start = global_total # 该文档在全局中的起始偏移 + doc_offset_end = global_total + doc_total # 该文档在全局中的结束偏移 + global_total += doc_total + + # 当前页与该文档无交集,跳过 + if doc_offset_end <= offset_start or doc_offset_start >= offset_end: + continue + + # 计算需要从该文档取的局部范围 + local_start = max(offset_start - doc_offset_start, 0) + local_end = min(offset_end - doc_offset_start, doc_total) + need_count = local_end - local_start + + # 换算成 ES 分页参数(ES page 从1开始) + es_page = (local_start // pagesize) + 1 + es_offset_in_page = local_start % pagesize + + fetched = [] + while len(fetched) < es_offset_in_page + need_count: + _, items = vector_service.search_by_segment( document_id=str(document.id), query=None, pagesize=pagesize, - page=page, + page=es_page, asc=True ) - if not items: break - - # 提取page_content - for item in items: - all_contents.append(item.page_content) - total_chunks += 1 - - # # 如果达到limit限制,直接返回 - # if limit > 0 and total_chunks >= limit: - # business_logger.info(f"已达到limit限制: {limit}") - # return { - # "total": total_chunks, - # "contents": all_contents[:limit] - # } - - # 检查是否还有下一页 - if page * pagesize >= total: - break - - page += 1 + fetched.extend(items) + es_page += 1 - business_logger.info(f"文档 {document.id} 获取了 {len(items)} 个chunks") + slice_items = fetched[es_offset_in_page: es_offset_in_page + need_count] + page_contents.extend([item.page_content for item in slice_items]) except Exception as e: business_logger.error(f"获取文档 {document.id} 的chunks失败: {str(e)}") @@ -626,11 +640,16 @@ def get_rag_content( # 4. 返回结果 result = { - "total": total_chunks, - "contents": all_contents[:limit] if limit > 0 else all_contents + "page": { + "page": page, + "pagesize": pagesize, + "total": global_total, + "hasnext": offset_end < global_total, + }, + "items": page_contents } - business_logger.info(f"成功获取RAG内容: total={total_chunks}, 返回={len(result['contents'])} 条") + business_logger.info(f"成功获取RAG内容: total={global_total}, page={page}, 返回={len(page_contents)} 条") return result except Exception as e: @@ -730,8 +749,8 @@ async def generate_rag_profile( if not end_user: raise ValueError(f"end_user {end_user_id} 不存在") - rag_content = get_rag_content(end_user_id, limit, db, current_user) - chunks = rag_content.get("contents", []) + rag_content = get_rag_content(end_user_id, page=1, pagesize=limit, db=db, current_user=current_user) + chunks = rag_content.get("items", []) if not chunks: business_logger.warning(f"未找到chunk内容,无法生产RAG画像: end_user_id={end_user_id}") diff --git a/api/app/version_info.json b/api/app/version_info.json index bbaffc17..12793cb5 100644 --- a/api/app/version_info.json +++ b/api/app/version_info.json @@ -1,4 +1,38 @@ { + "v0.2.7": { + "introduction": { + "codeName": "武陵", + "releaseDate": "2026-3-13", + "upgradePosition": "🐻 应用可移植性、工具生态扩展与记忆智能精细化", + "coreUpgrades": [ + "1. 应用管理与可移植性
* 应用导入/导出:全面支持 Agent 配置和工作流定义的导入导出,实现跨环境无缝迁移、备份和共享", + "2. 工具生态扩展 🔌
* MCP 广场集成:工具管理接入 MCP 广场,提供集中式工具发现、浏览和集成枢纽", + "3. 工作流增强 📝
* 备注节点:新增备注节点类型,支持工作流图中的内联文档和上下文说明,提升协作效率", + "4. 记忆智能精细化 🧠
* 隐性记忆与情绪记忆生成逻辑优化:含数据存在性校验、时间轴筛选和兴趣分布缓存校验
* 兴趣分布生成逻辑改进:优化算法产生更准确的用户兴趣画像", + "5. 用户体验改进 🎨
* 知识库分享加载状态:增加加载指示器,改善感知响应速度", + "6. 稳健性与缺陷修复 🔧
* 应用调试终端用户管理:修复调试会话错误创建 end_user 记录问题
* 知识库数据集创建流程:解决创建数据集后无法进入下一步的缺陷
* RAG 空间记忆生成失败:修复记忆生成失败和存储中断的关键问题
* 应用字符限制强制执行:增加条件校验防止过长输入
* 语义剪枝情绪/兴趣保留:优化剪枝逻辑防止误删情绪和兴趣片段
* 语义剪枝效果优化:增强算法平衡记忆压缩与信息保留", + "
", + "v0.2.8 及更远的未来将引入多模态记忆能力,实现知识库和模型的分服务部署,为应用增加语音输入支持,并扩展应用能力至语音回复、BI 可视化、PPT 生成和直接生图。应用会话分享和联网搜索功能将得到修复和增强。记忆检索基准测试和情景记忆聚类算法将增强上下文召回和时序推理能力。通往真正智能、多模态、上下文感知应用的旅程仍在继续。", + "记忆熊,智慧致远 🐻✨" + ] + }, + "introduction_en": { + "codeName": "WuLing", + "releaseDate": "2026-3-13", + "upgradePosition": "🐻 Application portability, tool ecosystem expansion, and memory intelligence refinement", + "coreUpgrades": [ + "1. Application Management & Portability
* Application Import/Export: Full support for importing and exporting agent configurations and workflow definitions, enabling seamless cross-environment migration, backup, and sharing", + "2. Tool Ecosystem Expansion 🔌
* MCP Marketplace Integration: Tool management now includes MCP Marketplace access for centralized tool discovery, browsing, and integration", + "3. Workflow Enhancements 📝
* Annotation Node: Introduced annotation node type for inline documentation and contextual notes within workflow graphs, improving collaboration", + "4. Memory Intelligence Refinement 🧠
* Implicit & Emotional Memory Generation Logic: Comprehensive optimization including data existence validation, timeline filtering, and interest distribution cache validation
* Interest Distribution Generation Logic: Refined algorithm for more accurate user interest profiles", + "5. User Experience Improvements 🎨
* Knowledge Base Sharing Loading State: Added loading indicators to improve perceived responsiveness", + "6. Robustness & Bug Fixes 🔧
* End User Management in App Debugging: Fixed incorrect end_user record creation during debugging sessions
* Knowledge Base Dataset Creation Flow: Resolved bug preventing next step after dataset creation
* RAG Space Memory Generation Failure: Fixed critical memory generation and storage interruption issue
* Application Character Limit Enforcement: Added conditional validation to prevent excessively long input
* Semantic Pruning Emotion/Interest Preservation: Optimized pruning logic to prevent incorrect deletion of emotional and interest fragments
* Semantic Pruning Effectiveness: Enhanced algorithm balance between memory compression and information retention", + "
", + "Looking forward to v0.2.8 and beyond, we will introduce multimodal memory capabilities with distributed service deployment for knowledge bases and models, enabling voice input for applications and expanding application capabilities with voice responses, BI visualizations, PPT generation, and direct image creation. Application conversation sharing and web search functionality will be restored and enhanced. Memory retrieval benchmarking and episodic memory clustering algorithms will enhance contextual recall and temporal reasoning. The journey toward truly intelligent, multimodal, context-aware applications continues.", + "MemoryBear, Wisdom Reaching Far 🐻✨" + ] + } + }, "v0.2.6": { "introduction": { "codeName": "听剑", diff --git a/web/package.json b/web/package.json index 2799a631..b9e3709e 100644 --- a/web/package.json +++ b/web/package.json @@ -44,6 +44,7 @@ "i18next": "^25.6.0", "js-yaml": "^4.1.1", "lexical": "^0.39.0", + "mammoth": "^1.12.0", "mermaid": "^11.12.1", "react": "^18.2.0", "react-dom": "^18.2.0", @@ -59,6 +60,7 @@ "remark-gfm": "^4.0.1", "remark-math": "^6.0.0", "tailwindcss": "^4.1.14", + "xlsx": "^0.18.5", "zustand": "^5.0.8" }, "devDependencies": { diff --git a/web/src/api/memory.ts b/web/src/api/memory.ts index b8bfac32..823e3d78 100644 --- a/web/src/api/memory.ts +++ b/web/src/api/memory.ts @@ -123,8 +123,9 @@ export const getChunkInsight = (end_user_id: string) => { return request.get(`/dashboard/chunk_insight`, { end_user_id }) } // RAG User Memory - Storage content -export const getRagContent = (end_user_id: string) => { - return request.get(`/dashboard/rag_content`, { end_user_id, limit: 20 }) +export const getRagContentUrl = '/dashboard/rag_content' +export const getRagContent = (end_user_id: string, page = 1, pagesize = 20) => { + return request.get(getRagContentUrl, { end_user_id, page, pagesize }) } // Emotion distribution analysis export const getWordCloud = (end_user_id: string) => { diff --git a/web/src/api/tools.ts b/web/src/api/tools.ts index 7c7a0e3d..612f924d 100644 --- a/web/src/api/tools.ts +++ b/web/src/api/tools.ts @@ -6,12 +6,12 @@ export const getTools = (data: Query) => { return request.get('/tools', data) } // 创建MCP工具 -export const addTool = (values: MCPToolItem | CustomToolItem) => { - return request.post('/tools', values) +export const addTool = (values: MCPToolItem | CustomToolItem, config?: { signal?: AbortSignal }) => { + return request.post('/tools', values, config) } // 更新工具 -export const updateTool = (tool_id: string, data: MCPToolItem | InnerToolItem | CustomToolItem) => { - return request.put(`/tools/${tool_id}`, data) +export const updateTool = (tool_id: string, data: MCPToolItem | InnerToolItem | CustomToolItem, config?: { signal?: AbortSignal }) => { + return request.put(`/tools/${tool_id}`, data, config) } // 删除工具 export const deleteTool = (tool_id: string) => { diff --git a/web/src/components/DocumentPreview/index.tsx b/web/src/components/DocumentPreview/index.tsx index 404d6e50..c57080fb 100644 --- a/web/src/components/DocumentPreview/index.tsx +++ b/web/src/components/DocumentPreview/index.tsx @@ -1,20 +1,18 @@ import { useState, useEffect, type FC } from 'react'; -import { Spin, Alert, Button } from 'antd'; -import { ReloadOutlined } from '@ant-design/icons'; +import { Spin, Alert, Button, Table } from 'antd'; +import { ReloadOutlined, DownloadOutlined } from '@ant-design/icons'; import RbMarkdown from '../Markdown'; -import { cookieUtils } from '@/utils/request' - -type PreviewMode = 'office' | 'google'; +import { cookieUtils } from '@/utils/request'; +import mammoth from 'mammoth'; +import * as XLSX from 'xlsx'; interface DocumentPreviewProps { fileUrl: string; fileName?: string; - fileExt?: string; // 文件扩展名(优先使用) + fileExt?: string; width?: string | number; height?: string | number; className?: string; - mode?: PreviewMode; // 预览模式 - showModeSwitch?: boolean; // 是否显示模式切换按钮 } const DocumentPreview: FC = ({ @@ -24,18 +22,19 @@ const DocumentPreview: FC = ({ width = '100%', height = '600px', className = '', - mode = 'office', - showModeSwitch = true, }) => { const [loading, setLoading] = useState(true); const [error, setError] = useState(false); - const [currentMode, setCurrentMode] = useState(mode); + const [errorMessage, setErrorMessage] = useState(''); const [textContent, setTextContent] = useState(''); + const [htmlContent, setHtmlContent] = useState(''); + const [excelData, setExcelData] = useState<{ sheetName: string; data: any[][] }[]>([]); - // 支持的文件类型 - const supportedTypes = ['.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.pdf', '.txt', '.md', '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp']; + // 支持预览的文件类型 + const previewableTypes = ['.pdf', '.txt', '.md', '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp', '.doc', '.docx', '.xls', '.xlsx']; + // PPT 暂不支持 + const downloadOnlyTypes = ['.ppt', '.pptx']; - // 获取文件扩展名(优先使用 fileExt prop) const getFileExtension = () => { if (fileExt) { return fileExt.toLowerCase().startsWith('.') ? fileExt.toLowerCase() : `.${fileExt.toLowerCase()}`; @@ -45,67 +44,25 @@ const DocumentPreview: FC = ({ return match ? `.${match[1].toLowerCase()}` : ''; }; - // 检查是否为文本文件 - const isTextFile = () => { - const ext = getFileExtension(); - return ext === '.txt'; - }; - - // 检查是否为 Markdown 文件 - const isMarkdownFile = () => { - const ext = getFileExtension(); - return ext === '.md'; - }; - - // 检查是否为图片文件 + const isTextFile = () => getFileExtension() === '.txt'; + const isMarkdownFile = () => getFileExtension() === '.md'; const isImageFile = () => { - const ext = getFileExtension(); const imageExts = ['.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp']; - return imageExts.includes(ext); - }; - - // 检查文件类型是否支持 - const isSupportedFile = () => { - const ext = getFileExtension(); - return ext && supportedTypes.includes(ext); + return imageExts.includes(getFileExtension()); }; + const isPdfFile = () => getFileExtension() === '.pdf'; + const isWordFile = () => ['.doc', '.docx'].includes(getFileExtension()); + const isExcelFile = () => ['.xls', '.xlsx'].includes(getFileExtension()); + const isPreviewable = () => previewableTypes.includes(getFileExtension()); + const isDownloadOnly = () => downloadOnlyTypes.includes(getFileExtension()); - // 检查是否为 PDF 文件 - const isPdfFile = () => { - const ext = getFileExtension(); - return ext === '.pdf'; - }; - - // 构建预览 URL - const getPreviewUrl = () => { - // 处理文件 URL,如果是完整的 URL,转换为代理路径 - let requestUrl = fileUrl; - - // 如果是完整的 https://devapi.mem.redbearai.com 开头的 URL,提取路径部分 - // 这样可以通过代理访问,避免 CORS 问题 - if (fileUrl.includes('devapi.mem.redbearai.com')) { - const url = new URL(fileUrl); - requestUrl = url.pathname; // 只取路径部分,例如 /api/files/xxx - } - - // 对于 PDF 文件,直接使用浏览器内置预览 - if (isPdfFile()) { - return requestUrl; - } - - // 确保 fileUrl 是完整的 URL(用于第三方预览服务) - let fullUrl = fileUrl; - if (!fileUrl.startsWith('http')) { - fullUrl = `${window.location.origin}${fileUrl.startsWith('/') ? '' : '/'}${fileUrl}`; - } - console.log('预览 URL:', fullUrl); - // 根据模式选择预览服务 - if (currentMode === 'google') { - return `https://docs.google.com/viewer?url=${encodeURIComponent(fullUrl)}&embedded=true`; - } - - // 默认使用 Microsoft Office Online Viewer - return `https://view.officeapps.live.com/op/embed.aspx?src=${encodeURIComponent(fullUrl)}`; + const handleDownload = () => { + const link = document.createElement('a'); + link.href = fileUrl; + link.download = fileName || 'document'; + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); }; const handleLoad = () => { @@ -113,20 +70,24 @@ const DocumentPreview: FC = ({ setError(false); }; - const handleError = () => { + const handleError = (msg?: string) => { setLoading(false); setError(true); + if (msg) setErrorMessage(msg); }; const handleRetry = () => { setLoading(true); setError(false); + setErrorMessage(''); if (isTextFile() || isMarkdownFile()) { - // 重新加载文本文件 loadTextFile(); + } else if (isWordFile()) { + loadWordFile(); + } else if (isExcelFile()) { + loadExcelFile(); } else { - // 强制重新加载 iframe const iframe = document.querySelector(`iframe[title="${fileName || '文档预览'}"]`) as HTMLIFrameElement; if (iframe) { iframe.src = iframe.src; @@ -134,82 +95,164 @@ const DocumentPreview: FC = ({ } }; - const handleSwitchMode = () => { - setCurrentMode(prev => prev === 'office' ? 'google' : 'office'); - setLoading(true); - setError(false); - }; - - // 加载文本文件内容 const loadTextFile = async () => { setLoading(true); setError(false); + setErrorMessage(''); try { - // 处理文件 URL,如果是完整的 URL,转换为代理路径 let requestUrl = fileUrl; - // 如果是完整的 https://devapi.mem.redbearai.com 开头的 URL,提取路径部分 if (fileUrl.includes('devapi.mem.redbearai.com')) { const url = new URL(fileUrl); - requestUrl = url.pathname; // 只取路径部分,例如 /api/files/xxx + requestUrl = url.pathname; } const response = await fetch(requestUrl, { - credentials: 'include', // 包含认证信息 + credentials: 'include', headers: { 'Authorization': `Bearer ${cookieUtils.get('authToken') || ''}`, }, }); if (!response.ok) { - throw new Error('Failed to load file'); + throw new Error(`HTTP ${response.status}: ${response.statusText}`); } - // 检查响应的 Content-Type const contentType = response.headers.get('Content-Type') || ''; - console.log('文件 Content-Type:', contentType); - // 如果是图片类型,显示错误提示 if (contentType.startsWith('image/')) { - setError(true); - setTextContent(''); - setLoading(false); - console.error('文件实际是图片类型,但被标记为 txt'); + handleError('文件实际是图片类型,但被标记为文本文件'); return; } const text = await response.text(); - // 检查是否是二进制数据(如 PNG 文件头) if (text.startsWith('\x89PNG') || text.startsWith('�PNG')) { - setError(true); - setTextContent(''); - setLoading(false); - console.error('文件内容是 PNG 图片,但扩展名是 txt'); + handleError('文件内容是图片,但扩展名是文本'); return; } setTextContent(text); setLoading(false); - } catch (err) { + } catch (err: any) { console.error('加载文本文件失败:', err); - setError(true); - setLoading(false); + handleError(err.message || '加载文本文件失败'); + } + }; + + const loadWordFile = async () => { + setLoading(true); + setError(false); + setErrorMessage(''); + try { + let requestUrl = fileUrl; + + if (fileUrl.includes('devapi.mem.redbearai.com')) { + const url = new URL(fileUrl); + requestUrl = url.pathname; + } + + const response = await fetch(requestUrl, { + credentials: 'include', + headers: { + 'Authorization': `Bearer ${cookieUtils.get('authToken') || ''}`, + }, + }); + + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${response.statusText}`); + } + + const arrayBuffer = await response.arrayBuffer(); + const result = await mammoth.convertToHtml({ arrayBuffer }); + setHtmlContent(result.value); + setLoading(false); + } catch (err: any) { + console.error('加载 Word 文件失败:', err); + handleError(err.message || '加载 Word 文件失败,文件可能已损坏'); + } + }; + + const loadExcelFile = async () => { + setLoading(true); + setError(false); + setErrorMessage(''); + try { + let requestUrl = fileUrl; + + if (fileUrl.includes('devapi.mem.redbearai.com')) { + const url = new URL(fileUrl); + requestUrl = url.pathname; + } + + const response = await fetch(requestUrl, { + credentials: 'include', + headers: { + 'Authorization': `Bearer ${cookieUtils.get('authToken') || ''}`, + }, + }); + + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${response.statusText}`); + } + + const arrayBuffer = await response.arrayBuffer(); + const workbook = XLSX.read(arrayBuffer, { type: 'array' }); + + const sheets = workbook.SheetNames.map(sheetName => { + const worksheet = workbook.Sheets[sheetName]; + const data = XLSX.utils.sheet_to_json(worksheet, { header: 1 }) as any[][]; + return { sheetName, data }; + }); + + setExcelData(sheets); + setLoading(false); + } catch (err: any) { + console.error('加载 Excel 文件失败:', err); + handleError(err.message || '加载 Excel 文件失败,文件可能已损坏'); } }; - // 当文件是 txt 或 md 时,加载文本内容 useEffect(() => { if (isTextFile() || isMarkdownFile()) { loadTextFile(); + } else if (isWordFile()) { + loadWordFile(); + } else if (isExcelFile()) { + loadExcelFile(); } }, [fileUrl]); - if (!isSupportedFile()) { + // PPT 文件只提供下载 + if (isDownloadOnly()) { + return ( +
+ +

PPT 文件暂不支持在线预览,请下载后查看

+ +
+ } + type="info" + showIcon + /> + + ); + } + + if (!isPreviewable()) { return ( @@ -230,23 +273,26 @@ const DocumentPreview: FC = ({ message="预览失败" description={
-

无法加载文档预览,可能的原因:

-
    -
  • 文件需要认证访问,Office 预览服务无法访问
  • -
  • 文件 URL 无法公开访问(需要配置公开访问或临时签名 URL)
  • -
  • 文件大小超过限制(Office 预览通常限制 10MB)
  • -
  • 预览服务暂时不可用
  • +

    无法加载文档预览

    + {errorMessage && ( +

    + 错误详情:{errorMessage} +

    + )} +

    可能的原因:

    +
      +
    • 文件 URL 无法访问(401/403/404)
    • +
    • 认证 token 已过期
    • +
    • 文件格式损坏或不匹配
    • +
    • 网络连接问题
    -

    建议:请下载文件到本地查看

    - {showModeSwitch && !isPdfFile() && ( - - )} +
} @@ -256,26 +302,23 @@ const DocumentPreview: FC = ({ )} - {/* 图片文件预览 */} {isImageFile() && !error && !loading && (
{fileName setError(true)} + onError={() => handleError('图片加载失败')} />
)} - {/* Markdown 文件预览 */} {isMarkdownFile() && !error && !loading && (
)} - {/* 文本文件预览 */} {isTextFile() && !error && !loading && (
@@ -284,44 +327,52 @@ const DocumentPreview: FC = ({
         
)} - {/* PDF 文件预览(使用浏览器内置预览) */} + {isWordFile() && !error && !loading && ( +
+
+
+ )} + + {isExcelFile() && !error && !loading && ( +
+ {excelData.map((sheet, index) => ( +
+

{sheet.sheetName}

+ {sheet.data.length > 0 && ( + ({ key: idx, ...row }))} + columns={sheet.data[0]?.map((header: any, colIdx: number) => ({ + title: header || `列 ${colIdx + 1}`, + dataIndex: colIdx, + key: colIdx, + width: 150, + })) || []} + pagination={false} + scroll={{ x: 'max-content' }} + size="small" + bordered + /> + )} + + ))} + + )} + {isPdfFile() && !error && !loading && (