diff --git a/api/app/controllers/public_share_controller.py b/api/app/controllers/public_share_controller.py index 0e666898..34572964 100644 --- a/api/app/controllers/public_share_controller.py +++ b/api/app/controllers/public_share_controller.py @@ -663,6 +663,7 @@ async def config_query( content = { "app_type": release.app.type, "variables": workflow_service.get_start_node_variables(release.config), + "memory": workflow_service.is_memory_enable(release.config), "features": release.config.get("features") } elif release.app.type == AppType.AGENT: diff --git a/api/app/core/workflow/engine/stream_output_coordinator.py b/api/app/core/workflow/engine/stream_output_coordinator.py index c2885ab0..ddee9adc 100644 --- a/api/app/core/workflow/engine/stream_output_coordinator.py +++ b/api/app/core/workflow/engine/stream_output_coordinator.py @@ -5,7 +5,7 @@ import re from typing import AsyncGenerator -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, PrivateAttr from app.core.logging_config import get_logger from app.core.workflow.engine.variable_pool import VariablePool @@ -52,10 +52,11 @@ class OutputContent(BaseModel): ) ) - _SCOPE: str | None = None + _SCOPE: str | None = PrivateAttr(default=None) - def get_scope(self) -> str: - self._SCOPE = SCOPE_PATTERN.findall(self.literal)[0] + def get_scope(self) -> str | None: + matches = SCOPE_PATTERN.findall(self.literal) + self._SCOPE = matches[0] if matches else None return self._SCOPE def depends_on_scope(self, scope: str) -> bool: @@ -68,6 +69,8 @@ class OutputContent(BaseModel): Returns: bool: True if this segment references the given scope. """ + if not self.is_variable: + return False if self._SCOPE: return self._SCOPE == scope return self.get_scope() == scope @@ -152,7 +155,7 @@ class StreamOutputConfig(BaseModel): """ # Case 1: resolve control branch dependency - if scope in self.control_nodes.keys(): + if scope in self.control_nodes: if status is None: raise RuntimeError("[Stream Output] Control node activation status not provided") if status in self.control_nodes[scope]: diff --git a/api/app/models/workflow_model.py b/api/app/models/workflow_model.py index 4f9ffe68..29fe5369 100644 --- a/api/app/models/workflow_model.py +++ b/api/app/models/workflow_model.py @@ -35,6 +35,7 @@ class WorkflowConfig(Base): # 执行配置 execution_config = Column(JSONB, nullable=False, default=dict) + features = Column(JSONB, nullable=True, default=dict) # 触发器配置(可选) triggers = Column(JSONB, default=list) diff --git a/api/app/schemas/workflow_schema.py b/api/app/schemas/workflow_schema.py index e580833f..d878d97c 100644 --- a/api/app/schemas/workflow_schema.py +++ b/api/app/schemas/workflow_schema.py @@ -80,6 +80,7 @@ class WorkflowConfigCreate(BaseModel): variables: list[VariableDefinition] = Field(default_factory=list, description="变量列表") execution_config: ExecutionConfig = Field(default_factory=ExecutionConfig, description="执行配置") triggers: list[TriggerConfig] = Field(default_factory=list, description="触发器列表") + features: dict = Field(default_factory=dict, description="功能特性配置") class WorkflowConfigUpdate(BaseModel): @@ -87,6 +88,7 @@ class WorkflowConfigUpdate(BaseModel): nodes: list[NodeDefinition] | None = None edges: list[EdgeDefinition] | None = None variables: list[VariableDefinition] | None = None + features: dict | None = None execution_config: ExecutionConfig | None = None triggers: list[TriggerConfig] | None = None @@ -102,6 +104,7 @@ class WorkflowConfig(BaseModel): variables: list[dict[str, Any]] execution_config: dict[str, Any] triggers: list[dict[str, Any]] + features: dict | None is_active: bool created_at: datetime.datetime updated_at: datetime.datetime @@ -114,6 +117,10 @@ class WorkflowConfig(BaseModel): def _serialize_updated_at(self, dt: datetime.datetime): return int(dt.timestamp() * 1000) if dt else None + @field_serializer("features", when_used="json") + def _serialize_features(self, features: dict | None): + return features or {} + # ==================== 工作流执行 ==================== diff --git a/api/app/services/app_service.py b/api/app/services/app_service.py index 5ef34da8..98fdf6c9 100644 --- a/api/app/services/app_service.py +++ b/api/app/services/app_service.py @@ -1609,6 +1609,7 @@ class AppService: variables=[var.model_dump() for var in data.variables] if data.variables else [], execution_config=data.execution_config.model_dump() if data.execution_config else {}, triggers=[trigger.model_dump() for trigger in data.triggers] if data.triggers else [], + features=data.features or {}, is_active=True, created_at=now, updated_at=now @@ -1622,6 +1623,7 @@ class AppService: workflow_cfg.variables = [var.model_dump() for var in data.variables] if data.variables else [] workflow_cfg.execution_config = data.execution_config.model_dump() if data.execution_config else {} workflow_cfg.triggers = [trigger.model_dump() for trigger in data.triggers] if data.triggers else [] + workflow_cfg.features = data.features or {} workflow_cfg.updated_at = now self.db.commit() @@ -1875,7 +1877,8 @@ class AppService: "edges": workflow_cfg.edges, "variables": workflow_cfg.variables, "execution_config": workflow_cfg.execution_config, - "triggers": workflow_cfg.triggers + "triggers": workflow_cfg.triggers, + "features": workflow_cfg.features or {} } is_valid, errors = WorkflowValidator.validate_for_publish(config) diff --git a/api/app/services/memory_perceptual_service.py b/api/app/services/memory_perceptual_service.py index 53d935fe..8a7c86e2 100644 --- a/api/app/services/memory_perceptual_service.py +++ b/api/app/services/memory_perceptual_service.py @@ -5,12 +5,14 @@ from urllib.parse import urlparse, unquote import json_repair from jinja2 import Template +from sqlalchemy import select from sqlalchemy.orm import Session from app.core.error_codes import BizCode from app.core.exceptions import BusinessException from app.core.logging_config import get_business_logger from app.core.models import RedBearLLM, RedBearModelConfig +from app.models import FileMetadata from app.models.memory_perceptual_model import PerceptualType, FileStorageService from app.models.prompt_optimizer_model import RoleType from app.repositories.memory_perceptual_repository import MemoryPerceptualRepository @@ -245,6 +247,18 @@ class MemoryPerceptualService: filename = os.path.basename(path) filename = unquote(filename) file_ext = os.path.splitext(filename)[1] + try: + file_id = uuid.UUID(filename) + stmt = select(FileMetadata).where( + FileMetadata.id == file_id + ) + file = self.db.execute(stmt).scalar_one_or_none() + + if file: + filename = file.file_name + file_ext = file.file_ext + except ValueError: + business_logger.debug(f"Remote file, file_id={filename}") if not file_ext: if file_type == FileType.AUDIO: file_ext = ".mp3" @@ -262,17 +276,17 @@ class MemoryPerceptualService: } if file_type in [FileType.IMAGE, FileType.VIDEO]: file_modalities = { - "scene": content.get("scene") + "scene": content.get("scene", []) } elif file_type in [FileType.DOCUMENT]: file_modalities = { - "section_count": content.get("section_count"), - "title": content.get("title"), - "first_line": content.get("first_line") + "section_count": content.get("section_count", 0), + "title": content.get("title", ""), + "first_line": content.get("first_line", "") } else: file_modalities = { - "speaker_count": content.get("speaker_count") + "speaker_count": content.get("speaker_count", 0) } self.repository.create_perceptual_memory( end_user_id=uuid.UUID(end_user_id), @@ -280,7 +294,7 @@ class MemoryPerceptualService: file_path=file_url, file_name=filename, file_ext=file_ext, - summary=content.get('summary'), + summary=content.get('summary', ""), meta_data={ "content": file_content, "modalities": file_modalities diff --git a/api/app/services/multimodal_service.py b/api/app/services/multimodal_service.py index 908ba953..1f0e1cc2 100644 --- a/api/app/services/multimodal_service.py +++ b/api/app/services/multimodal_service.py @@ -59,22 +59,22 @@ class MultimodalFormatStrategy(ABC): self.file = file @abstractmethod - async def format_image(self, url: str, content: bytes | None = None) -> Dict[str, Any]: + async def format_image(self, url: str, content: bytes | None = None) -> tuple[bool, Dict[str, Any]]: """格式化图片""" pass @abstractmethod - async def format_document(self, file_name: str, text: str) -> Dict[str, Any]: + async def format_document(self, file_name: str, text: str) -> tuple[bool, Dict[str, Any]]: """格式化文档""" pass @abstractmethod - async def format_audio(self, file_type: str, url: str, content: bytes | None = None) -> Dict[str, Any]: + async def format_audio(self, file_type: str, url: str, content: bytes | None = None) -> tuple[bool, Dict[str, Any]]: """格式化音频""" pass @abstractmethod - async def format_video(self, url: str) -> Dict[str, Any]: + async def format_video(self, url: str) -> tuple[bool, Dict[str, Any]]: """格式化视频""" pass @@ -82,16 +82,16 @@ class MultimodalFormatStrategy(ABC): class DashScopeFormatStrategy(MultimodalFormatStrategy): """通义千问策略""" - async def format_image(self, url: str, content: bytes | None = None) -> Dict[str, Any]: + async def format_image(self, url: str, content: bytes | None = None) -> tuple[bool, Dict[str, Any]]: """通义千问图片格式:{"type": "image", "image": "url"}""" - return { + return True, { "type": "image", "image": url } - async def format_document(self, file_name: str, text: str) -> Dict[str, Any]: + async def format_document(self, file_name: str, text: str) -> tuple[bool, Dict[str, Any]]: """通义千问文档格式""" - return { + return True, { "type": "text", "text": f"\n{text}\n" } @@ -102,26 +102,26 @@ class DashScopeFormatStrategy(MultimodalFormatStrategy): url: str, content: bytes | None = None, transcription: Optional[str] = None - ) -> Dict[str, Any]: + ) -> tuple[bool, Dict[str, Any]]: """ 通义千问音频格式 - 原生支持: qwen-audio 系列 - 其他模型: 需要转录为文本 """ if transcription: - return { + return True, { "type": "text", "text": f"" } # 通义千问音频格式:{"type": "audio", "audio": "url"} - return { + return True, { "type": "audio", "audio": url } - async def format_video(self, url: str) -> Dict[str, Any]: + async def format_video(self, url: str) -> tuple[bool, Dict[str, Any]]: """通义千问视频格式(qwen-vl 系列原生支持)""" - return { + return True, { "type": "video", "video": url } @@ -130,7 +130,7 @@ class DashScopeFormatStrategy(MultimodalFormatStrategy): class BedrockFormatStrategy(MultimodalFormatStrategy): """Bedrock/Anthropic 策略""" - async def format_image(self, url: str, content: bytes | None = None) -> Dict[str, Any]: + async def format_image(self, url: str, content: bytes | None = None) -> tuple[bool, Dict[str, Any]]: """ Bedrock/Anthropic 格式: base64 编码 {"type": "image", "source": {"type": "base64", "media_type": "...", "data": "..."}} @@ -153,7 +153,7 @@ class BedrockFormatStrategy(MultimodalFormatStrategy): logger.info(f"图片编码完成: media_type={media_type}, size={len(base64_data)}") - return { + return True, { "type": "image", "source": { "type": "base64", @@ -162,13 +162,13 @@ class BedrockFormatStrategy(MultimodalFormatStrategy): } } - async def format_document(self, file_name: str, text: str) -> Dict[str, Any]: + async def format_document(self, file_name: str, text: str) -> tuple[bool, Dict[str, Any]]: """Bedrock/Anthropic 文档格式(需要 base64 编码)""" # Bedrock 文档需要 base64 编码 text_bytes = text.encode('utf-8') base64_text = base64.b64encode(text_bytes).decode('utf-8') - return { + return True, { "type": "document", "source": { "type": "base64", @@ -182,24 +182,24 @@ class BedrockFormatStrategy(MultimodalFormatStrategy): url: str, content: bytes | None = None, transcription: Optional[str] = None - ) -> Dict[str, Any]: + ) -> tuple[bool, Dict[str, Any]]: """ Bedrock/Anthropic 音频格式 不支持原生音频,必须转录为文本 """ if transcription: - return { + return True, { "type": "text", "text": f"[音频转录]\n{transcription}" } - return { + return False, { "type": "text", "text": "[音频文件:Bedrock 不支持原生音频,请启用音频转文本功能]" } - async def format_video(self, url: str) -> Dict[str, Any]: + async def format_video(self, url: str) -> tuple[bool, Dict[str, Any]]: """Bedrock/Anthropic 视频格式""" - return { + return False, { "type": "text", "text": f"" } @@ -208,18 +208,18 @@ class BedrockFormatStrategy(MultimodalFormatStrategy): class OpenAIFormatStrategy(MultimodalFormatStrategy): """OpenAI 策略""" - async def format_image(self, url: str, content: bytes | None = None) -> Dict[str, Any]: + async def format_image(self, url: str, content: bytes | None = None) -> tuple[bool, Dict[str, Any]]: """OpenAI 格式: {"type": "image_url", "image_url": {"url": "..."}}""" - return { + return True, { "type": "image_url", "image_url": { "url": url } } - async def format_document(self, file_name: str, text: str) -> Dict[str, Any]: + async def format_document(self, file_name: str, text: str) -> tuple[bool, Dict[str, Any]]: """OpenAI 文档格式""" - return { + return True, { "type": "text", "text": f"\n{text}\n" } @@ -230,14 +230,14 @@ class OpenAIFormatStrategy(MultimodalFormatStrategy): url: str, content: bytes | None = None, transcription: Optional[str] = None - ) -> Dict[str, Any]: + ) -> tuple[bool, Dict[str, Any]]: """ OpenAI 音频格式 - gpt-4o-audio 系列支持原生音频(需要 base64 编码) - 其他模型使用转录文本 """ if transcription: - return { + return True, { "type": "text", "text": f"" } @@ -266,7 +266,7 @@ class OpenAIFormatStrategy(MultimodalFormatStrategy): # supported_ext = {"wav", "mp3", "mp4", "ogg", "flac", "webm", "m4a", "wave", "x-m4a"} file_ext = "wav" if not file_ext else file_ext - return { + return True, { "type": "input_audio", "input_audio": { "data": f"data:;base64,{base64_audio}", @@ -275,14 +275,14 @@ class OpenAIFormatStrategy(MultimodalFormatStrategy): } except Exception as e: logger.error(f"下载音频失败: {e}") - return { + return False, { "type": "text", "text": f"[音频处理失败: {str(e)}]" } - async def format_video(self, url: str) -> Dict[str, Any]: + async def format_video(self, url: str) -> tuple[bool, Dict[str, Any]]: """OpenAI 视频格式""" - return { + return True, { "type": "video_url", "video_url": { "url": url @@ -377,21 +377,25 @@ class MultimodalService: file.url = await self.get_file_url(file) try: if file.type == FileType.IMAGE and "vision" in self.capability: - content = await self._process_image(file, strategy) + is_support, content = await self._process_image(file, strategy) result.append(content) - self.write_perceptual_memory(end_user_id, file.type, file.url, content) + if is_support: + self.write_perceptual_memory(end_user_id, file.type, file.url, content) elif file.type == FileType.DOCUMENT: - content = await self._process_document(file, strategy) + is_support, content = await self._process_document(file, strategy) result.append(content) - self.write_perceptual_memory(end_user_id, file.type, file.url, content) + if is_support: + self.write_perceptual_memory(end_user_id, file.type, file.url, content) elif file.type == FileType.AUDIO and "audio" in self.capability: - content = await self._process_audio(file, strategy) + is_support, content = await self._process_audio(file, strategy) result.append(content) - self.write_perceptual_memory(end_user_id, file.type, file.url, content) + if is_support: + self.write_perceptual_memory(end_user_id, file.type, file.url, content) elif file.type == FileType.VIDEO and "video" in self.capability: - content = await self._process_video(file, strategy) + is_support, content = await self._process_video(file, strategy) result.append(content) - self.write_perceptual_memory(end_user_id, file.type, file.url, content) + if is_support: + self.write_perceptual_memory(end_user_id, file.type, file.url, content) else: logger.warning(f"不支持的文件类型: {file.type}") except Exception as e: @@ -424,7 +428,7 @@ class MultimodalService: if end_user_id and self.api_config: write_perceptual_memory.delay(end_user_id, self.api_config.model_dump(), file_type, file_url, file_message) - async def _process_image(self, file: FileInput, strategy) -> Dict[str, Any]: + async def _process_image(self, file: FileInput, strategy) -> tuple[bool, Dict[str, Any]]: """ 处理图片文件 @@ -440,12 +444,12 @@ class MultimodalService: return await strategy.format_image(file.url, content=file.get_content()) except Exception as e: logger.error(f"处理图片失败: {e}", exc_info=True) - return { + return False, { "type": "text", "text": f"[图片处理失败: {str(e)}]" } - async def _process_document(self, file: FileInput, strategy) -> Dict[str, Any]: + async def _process_document(self, file: FileInput, strategy) -> tuple[bool, Dict[str, Any]]: """ 处理文档文件(PDF、Word 等) @@ -457,7 +461,7 @@ class MultimodalService: Dict: 根据 provider 返回不同格式的文档内容 """ if file.transfer_method == TransferMethod.REMOTE_URL: - return { + return True, { "type": "text", "text": f"\n{await self._extract_document_text(file)}\n" } @@ -475,7 +479,7 @@ class MultimodalService: # 使用策略格式化文档 return await strategy.format_document(file_name, text) - async def _process_audio(self, file: FileInput, strategy) -> Dict[str, Any]: + async def _process_audio(self, file: FileInput, strategy) -> tuple[bool, Dict[str, Any]]: """ 处理音频文件 @@ -503,12 +507,12 @@ class MultimodalService: return await strategy.format_audio(file.file_type, file.url, file.get_content(), transcription) except Exception as e: logger.error(f"处理音频失败: {e}", exc_info=True) - return { + return False, { "type": "text", "text": f"[音频处理失败: {str(e)}]" } - async def _process_video(self, file: FileInput, strategy) -> Dict[str, Any]: + async def _process_video(self, file: FileInput, strategy) -> tuple[bool, Dict[str, Any]]: """ 处理视频文件 @@ -524,7 +528,7 @@ class MultimodalService: return await strategy.format_video(file.url) except Exception as e: logger.error(f"处理视频失败: {e}", exc_info=True) - return { + return False, { "type": "text", "text": f"[视频处理失败: {str(e)}]" } diff --git a/api/app/services/workflow_service.py b/api/app/services/workflow_service.py index 4e7268d3..9f421976 100644 --- a/api/app/services/workflow_service.py +++ b/api/app/services/workflow_service.py @@ -570,6 +570,9 @@ class WorkflowService: message=f"工作流配置不存在: app_id={app_id}" ) + feature_configs = config.features or {} + self._validate_file_upload(feature_configs, payload.files) + input_data = { "message": payload.message, "variables": payload.variables, "conversation_id": payload.conversation_id, @@ -737,6 +740,8 @@ class WorkflowService: code=BizCode.CONFIG_MISSING, message=f"工作流配置不存在: app_id={app_id}" ) + feature_configs = config.features or {} + self._validate_file_upload(feature_configs, payload.files) input_data = { "message": payload.message, "variables": payload.variables, @@ -845,7 +850,10 @@ class WorkflowService: yield event except Exception as e: - logger.error(f"工作流流式执行失败: execution_id={execution.execution_id}, error={e}", exc_info=True) + logger.error( + f"Workflow streaming execution failed: execution_id={execution.execution_id}, error={e}", + exc_info=True + ) self.update_execution_status( execution.execution_id, "failed", @@ -868,6 +876,80 @@ class WorkflowService: return node.get("config", {}).get("variables", []) raise BusinessException("workflow config error - start node not found") + @staticmethod + def is_memory_enable(config: dict) -> bool: + nodes = config.get("nodes", []) + for node in nodes: + if node.get("type") in [NodeType.MEMORY_READ, NodeType.MEMORY_WRITE]: + return True + return False + + @staticmethod + def _validate_file_upload( + features_config: dict[str, Any], + files: Optional[list[FileInput]] + ) -> None: + """校验上传文件是否符合 file_upload 配置""" + if not files: + return + fu = features_config.get("file_upload") + if fu is None: + return + if not (isinstance(fu, dict) and fu.get("enabled")): + raise BusinessException( + "The application does not have file upload functionality enabled", + BizCode.BAD_REQUEST + ) + max_count = fu.get("max_file_count", 5) + if len(files) > max_count: + raise BusinessException( + f"File count exceeds limit (maximum {max_count} files)", + BizCode.BAD_REQUEST + ) + + # 校验传输方式 + allowed_methods = fu.get("allowed_transfer_methods", ["local_file", "remote_url"]) + for f in files: + if f.transfer_method.value not in allowed_methods: + raise BusinessException( + f"Unsupport file transfer method:{f.transfer_method.value}," + f"allowed method:{', '.join(allowed_methods)}", + BizCode.BAD_REQUEST + ) + + # 各类型对应的开关和大小限制配置键 + type_cfg = { + "image": ("image_enabled", "image_max_size_mb", 20, "image"), + "audio": ("audio_enabled", "audio_max_size_mb", 50, "audio"), + "document": ("document_enabled", "document_max_size_mb", 100, "document"), + "video": ("video_enabled", "video_max_size_mb", 500, "video"), + } + + for f in files: + ftype = str(f.type) # 如 "image", "audio", "document", "video" + cfg = type_cfg.get(ftype) + if cfg is None: + continue + enabled_key, size_key, default_max_mb, label = cfg + + # 校验类型开关 + if not fu.get(enabled_key): + raise BusinessException( + f"The application has not enabled {label} file upload", + BizCode.BAD_REQUEST + ) + + # 校验文件大小(仅当内容已加载时) + content = f.get_content() + if content is not None: + max_mb = fu.get(size_key, default_max_mb) + size_mb = len(content) / (1024 * 1024) + if size_mb > max_mb: + raise BusinessException( + f"{label} File size exceeds the limit (maximum {max_mb} MB, current {size_mb:.1f} MB)", + BizCode.BAD_REQUEST + ) + # ==================== 依赖注入函数 ==================== diff --git a/web/src/components/AudioRecorder/index.tsx b/web/src/components/AudioRecorder/index.tsx index 10b8eca9..639a9109 100644 --- a/web/src/components/AudioRecorder/index.tsx +++ b/web/src/components/AudioRecorder/index.tsx @@ -2,10 +2,12 @@ * @Author: ZhaoYing * @Date: 2026-02-06 21:11:51 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-03-13 17:11:14 + * @Last Modified time: 2026-03-17 18:39:09 */ import { type FC, useRef, useState } from 'react' import RecordRTC from 'recordrtc' +import { App } from 'antd' +import { useTranslation } from 'react-i18next'; import { fileUploadUrlWithoutApiPrefix } from '@/api/fileStorage' import { request } from '@/utils/request' @@ -19,14 +21,20 @@ interface AudioRecorderProps { action?: string; /** Additional config passed to the upload request */ requestConfig?: Record; + disabled?: boolean; + maxSize?: number; } const AudioRecorder: FC = ({ onRecordingComplete, className = '', action = fileUploadUrlWithoutApiPrefix, - requestConfig = {} + requestConfig = {}, + disabled = false, + maxSize, }) => { + const { message } = App.useApp() + const { t } = useTranslation(); // Whether the recorder is currently capturing audio const [isRecording, setIsRecording] = useState(false) // Holds the RecordRTC instance across renders @@ -34,6 +42,7 @@ const AudioRecorder: FC = ({ /** Request microphone access and start recording */ const startRecording = async () => { + if (disabled) return try { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }) recorderRef.current = new RecordRTC(stream, { @@ -49,10 +58,17 @@ const AudioRecorder: FC = ({ /** Stop recording, upload the audio blob, then invoke the completion callback */ const stopRecording = () => { + if (disabled) return if (recorderRef.current) { recorderRef.current.stopRecording(() => { const blob = recorderRef.current!.getBlob() const url = recorderRef.current!.toURL() + + if (maxSize && blob.size > maxSize * 1024 * 1024) { + message.error(t('common.fileSizeTip', { size: maxSize })); + return + } + const formData = new FormData() formData.append('file', blob, `recording_${Date.now()}.webm`) request @@ -76,7 +92,7 @@ const AudioRecorder: FC = ({ // swap background image to reflect current state return (
= ({ align="center" justify={cicle ? 'center' : 'start'} gap={4} - className={clsx("rb:flex rb:items-center rb:cursor-pointer rb:border rb:hover:bg-[#F6F6F6]", { + className={clsx("rb:flex rb:items-center rb:cursor-pointer rb:px-2! rb:border rb:hover:bg-[#F6F6F6]", { 'rb:size-7 rb:rounded-[14px] rb:border-[0.5px] rb:border-[#EBEBEB]': cicle, - 'rb:rounded-lg rb:px-2 rb:text-[12px] rb:h-6': !cicle, + 'rb:rounded-lg rb:text-[12px] rb:h-6': !cicle, // Checked state: blue background and border "rb:bg-[rgba(21,94,239,0.06)] rb:border-[rgba(21,94,239,0.25)] rb:hover:bg-[rgba(21,94,239,0.06)] rb:text-[#155EEF]": checked, // Unchecked state: gray border and dark text diff --git a/web/src/components/Chat/ChatContent.tsx b/web/src/components/Chat/ChatContent.tsx index c1f5223c..15dcd496 100644 --- a/web/src/components/Chat/ChatContent.tsx +++ b/web/src/components/Chat/ChatContent.tsx @@ -2,13 +2,14 @@ * @Author: ZhaoYing * @Date: 2025-12-10 16:46:17 * @Last Modified by: ZhaoYing - * @Last Modified time: 2026-02-06 21:05:52 + * @Last Modified time: 2026-03-17 14:11:24 */ -import { type FC, useRef, useEffect } from 'react' +import { type FC, useRef, useEffect, useState } from 'react' import clsx from 'clsx' import Markdown from '@/components/Markdown' import type { ChatContentProps } from './types' -import { Spin } from 'antd' +import { Spin, Divider, Space } from 'antd' +import { SoundOutlined } from '@ant-design/icons' /** * Chat Content Display Component @@ -28,7 +29,25 @@ const ChatContent: FC = ({ // Scroll container reference for controlling auto-scroll to bottom const scrollContainerRef = useRef<(HTMLDivElement | null)>(null) const prevDataLengthRef = useRef(data.length); - const isScrolledToBottomRef = useRef(true); // Track if user is scrolled to bottom + const isScrolledToBottomRef = useRef(true); + const audioRef = useRef(null) + const [playingIndex, setPlayingIndex] = useState(null) + + const handlePlay = (index: number, audioUrl: string) => { + if (playingIndex === index) { + audioRef.current?.pause() + setPlayingIndex(null) + return + } + if (audioRef.current) { + audioRef.current.pause() + } + const audio = new Audio(audioUrl) + audioRef.current = audio + audio.play() + setPlayingIndex(index) + audio.onended = () => setPlayingIndex(null) + } // Track scroll position to determine if user is at bottom useEffect(() => { @@ -101,6 +120,19 @@ const ChatContent: FC = ({ {item.subContent && renderRuntime && renderRuntime(item, index)} {/* Render message content using Markdown component */} + + {item.audioUrl && <> + + + {playingIndex !== index + ? handlePlay(index, item.audioUrl!)} /> + :
handlePlay(index, item.audioUrl!)} + /> + } + + }
{/* Bottom label (such as timestamp, username, etc.) */} {labelPosition === 'bottom' && diff --git a/web/src/components/Chat/ChatToolbar.tsx b/web/src/components/Chat/ChatToolbar.tsx new file mode 100644 index 00000000..883ac98a --- /dev/null +++ b/web/src/components/Chat/ChatToolbar.tsx @@ -0,0 +1,204 @@ +/* + * @Author: ZhaoYing + * @Date: 2026-03-17 14:22:25 + * @Last Modified by: ZhaoYing + * @Last Modified time: 2026-03-18 15:55:13 + */ +// Toolbar component for chat input area, supporting file upload, audio recording, and variable configuration +import { useRef, forwardRef, useImperativeHandle, type ReactNode, useEffect } from 'react' +import { Flex, Dropdown, Divider, App, Form, type MenuProps } from 'antd' +import { SettingOutlined } from '@ant-design/icons' +import { useTranslation } from 'react-i18next' +import clsx from 'clsx' + +import AudioRecorder from '@/components/AudioRecorder' +import UploadFiles from '@/views/Conversation/components/FileUpload' +import UploadFileListModal from '@/views/Conversation/components/UploadFileListModal' +import VariableConfigModal from '@/views/Workflow/components/Chat/VariableConfigModal' +import type { FeaturesConfigForm } from '@/views/ApplicationConfig/types' +import type { UploadFileListModalRef } from '@/views/Conversation/types' +import type { VariableConfigModalRef } from '@/views/Workflow/types' +import type { Variable } from '@/views/Workflow/components/Properties/VariableList/types' + +// Exposed methods via ref for parent components to access/set form state +export interface ChatToolbarRef { + getFiles: () => any[] + getVariables: () => Variable[] + setFiles: (files: any[]) => void + setVariables: (variables: Variable[]) => void +} + +// Props for configuring toolbar features, upload settings, and event callbacks +export interface ChatToolbarProps { + features: FeaturesConfigForm + extra?: ReactNode + uploadAction?: string + uploadRequestConfig?: { + data?: Record + headers?: Record + } + onFilesChange?: (files: any[]) => void + onVariablesChange?: (variables: Variable[]) => void + onRecordingComplete?: (file: any) => void; + defaultValue?: { memory: boolean } +} + +interface FormValues { + files: any[] + variables: Variable[]; + memory?: boolean; +} + +const ChatToolbar = forwardRef(({ + features, + extra, + uploadAction, + uploadRequestConfig, + onFilesChange, + onVariablesChange, + onRecordingComplete, + defaultValue, +}, ref) => { + const { t } = useTranslation() + const { message: messageApi } = App.useApp() + const uploadFileListModalRef = useRef(null) + const variableConfigModalRef = useRef(null) + const [form] = Form.useForm() + const queryValues = Form.useWatch([], form) + + useEffect(() => { + if (!defaultValue) return + form.setFieldsValue(defaultValue) + }, [defaultValue]) + + useImperativeHandle(ref, () => ({ + getFiles: () => form.getFieldValue('files') || [], + getVariables: () => form.getFieldValue('variables') || [], + setFiles: (files) => form.setFieldValue('files', files), + setVariables: (variables) => { + console.log('variables', variables) + form.setFieldValue('variables', variables) + }, + })) + + const { file_upload } = features || {} + + // Append newly uploaded file to the file list when upload is complete + const fileChange = (file?: any) => { + if (file?.status !== 'done') return + const files = [...(queryValues?.files || []), file] + form.setFieldValue('files', files) + onFilesChange?.(files) + } + + // Append recorded audio file to the file list and notify parent + const handleRecordingComplete = (file: any) => { + const files = [...(queryValues?.files || []), file] + form.setFieldValue('files', files) + onFilesChange?.(files) + onRecordingComplete?.(file) + } + + // Merge a batch of files (e.g. from remote URL modal) into the file list + const addFileList = (list?: any[]) => { + if (!list?.length) return + const files = [...(queryValues?.files || []), ...list] + form.setFieldValue('files', files) + onFilesChange?.(files) + } + + // Persist variable values from the config modal and notify parent + const handleVariablesSave = (values: Variable[]) => { + form.setFieldValue('variables', values) + onVariablesChange?.(values) + } + + // True when any required variable is missing a value, used to highlight the config button + const isNeedVariableConfig = queryValues?.variables?.some( + vo => vo.required && (vo.value === null || vo.value === undefined || vo.value === '') + ) + + // Build dropdown menu items based on allowed transfer methods + const fileMenus: MenuProps['items'] = [] + const enabledTypes = ['image', 'document', 'video', 'audio'].filter( + type => file_upload?.[`${type}_enabled` as keyof FeaturesConfigForm['file_upload']] + ) + if (file_upload?.allowed_transfer_methods?.includes('remote_url') && enabledTypes.length > 0) { + fileMenus.push({ + key: 'url', + label: t('memoryConversation.addRemoteFile'), + onClick: () => { + if ((queryValues?.files?.length || 0) >= file_upload.max_file_count) { + messageApi.warning(t('common.fileNumTip', { num: file_upload.max_file_count })) + return + } + uploadFileListModalRef.current?.handleOpen() + } + }) + } + if (file_upload?.allowed_transfer_methods?.includes('local_file') && enabledTypes.length > 0) { + fileMenus.push({ + key: 'upload', + label: ( + = file_upload.max_file_count} + /> + ) + }) + } + + return ( +
+ + +