Merge #60 into develop_web from feature/20251219_yjp
feat(knowledgeBase): add media file validation and PDF enhancement method selection * feature/20251219_yjp: (1 commits) feat(knowledgeBase): add media file validation and PDF enhancement method selection Signed-off-by: vrhs@163.com <accounts_660b6454a0eb398d3f8d2c76@mail.teambition.com> Merged-by: vrhs@163.com <accounts_660b6454a0eb398d3f8d2c76@mail.teambition.com> CR-link: https://codeup.aliyun.com/redbearai/python/redbear-mem-open/change/60
This commit is contained in:
@@ -619,6 +619,11 @@ export const en = {
|
||||
qaMode: 'QA Mode',
|
||||
fileParsingSettings: 'File Parsing Settings',
|
||||
pdfEnhancementAnalysis: 'PDF Enhancement Analysis',
|
||||
fileSizeExceeds: 'File size exceeds the limit',
|
||||
sizeLimitError: 'The file size exceeds the limit. The maximum supported size is 256MB. The current file size is',
|
||||
fileDurationExceeds: 'File duration exceeds the limit',
|
||||
fileDurationLimitError: 'The duration of the media file exceeds the limit. The maximum supported duration is 150 seconds. Current duration',
|
||||
unableReadFile:'Unable to read the information of the media file. Please check the file format.',
|
||||
createForm:{
|
||||
name: 'Name',
|
||||
embedding_id: 'Embedding',
|
||||
|
||||
@@ -242,6 +242,11 @@ export const zh = {
|
||||
qaMode: '问答模式',
|
||||
fileParsingSettings: '文件解析设置',
|
||||
pdfEnhancementAnalysis: 'PDF增强解析',
|
||||
fileSizeExceeds: '文件大小超过限制',
|
||||
sizeLimitError: '文件大小超过限制,最大支持256MB,当前文件大小',
|
||||
fileDurationExceeds:'文件时长超过限制',
|
||||
fileDurationLimitError: '媒体文件时长超过限制,最大支持150秒,当前时长',
|
||||
unableReadFile:'无法读取媒体文件信息,请检查文件格式',
|
||||
createForm: {
|
||||
name: '名称',
|
||||
embedding_id: '嵌入模型',
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { useMemo,useRef, useState, useEffect } from 'react';
|
||||
import { Button, Flex, Radio, Steps, Modal, Input, Spin, message, Checkbox} from 'antd';
|
||||
import { Button, Flex, Radio, Steps, Modal, Input, Spin, message, Checkbox, Select} from 'antd';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useLocation, useNavigate, useParams } from 'react-router-dom';
|
||||
import Table, { type TableRef } from '@/components/Table'
|
||||
@@ -81,9 +81,10 @@ const CreateDataset = () => {
|
||||
const [blockSize, setBlockSize] = useState<number>(130);
|
||||
const [processingMethod, setProcessingMethod] = useState<ProcessingMethod>('directBlock');
|
||||
const [parameterSettings, setParameterSettings] = useState<ParameterSettings>('defaultSettings');
|
||||
const [pdfEnhancementEnabled, setPdfEnhancementEnabled] = useState<boolean>(false);
|
||||
const [pdfEnhancementEnabled, setPdfEnhancementEnabled] = useState<boolean>(true);
|
||||
const [pdfEnhancementMethod, setPdfEnhancementMethod] = useState<string>('deepdoc');
|
||||
const [messageApi, contextHolder] = message.useMessage();
|
||||
const fileType = ['pdf', 'doc', 'docx', 'xls', 'xlsx', 'csv', 'md', 'htm', 'html', 'json', 'ppt', 'pptx', 'txt','png','jpg']
|
||||
const fileType = ['pdf', 'doc', 'docx', 'xls', 'xlsx', 'csv', 'md', 'htm', 'html', 'json', 'ppt', 'pptx', 'txt','png','jpg','mp3','mp4','mov','wav']
|
||||
const steps = useMemo(
|
||||
() => [
|
||||
{ title: t('knowledgeBase.selectFile') },
|
||||
@@ -119,7 +120,7 @@ const CreateDataset = () => {
|
||||
const params = {
|
||||
progress: 0,
|
||||
parser_config: {
|
||||
layout_recognize:'DeepDOC',
|
||||
layout_recognize: pdfEnhancementMethod || 'DeepDOC',
|
||||
delimiter: delimiter,
|
||||
chunk_token_num: blockSize,
|
||||
auto_questions: processingMethod === 'directBlock' ? 0 : 1,
|
||||
@@ -244,11 +245,61 @@ const CreateDataset = () => {
|
||||
),
|
||||
},
|
||||
];
|
||||
// 上传文件
|
||||
const handleUpload = (options: UploadRequestOption) => {
|
||||
const { file, onSuccess, onError, onProgress, filename = 'file' } = options;
|
||||
const formData = new FormData();
|
||||
// 检查媒体文件时长的辅助函数
|
||||
const checkMediaDuration = (file: File): Promise<number> => {
|
||||
return new Promise((resolve, reject) => {
|
||||
const url = URL.createObjectURL(file);
|
||||
const media = document.createElement(file.type.startsWith('video/') ? 'video' : 'audio');
|
||||
|
||||
media.onloadedmetadata = () => {
|
||||
URL.revokeObjectURL(url);
|
||||
resolve(media.duration);
|
||||
};
|
||||
|
||||
media.onerror = () => {
|
||||
URL.revokeObjectURL(url);
|
||||
reject(new Error('无法读取媒体文件'));
|
||||
};
|
||||
|
||||
media.src = url;
|
||||
});
|
||||
};
|
||||
|
||||
// 上传文件
|
||||
const handleUpload = async (options: UploadRequestOption) => {
|
||||
const { file, onSuccess, onError, onProgress, filename = 'file' } = options;
|
||||
|
||||
// 获取文件扩展名
|
||||
const fileExtension = (file as File).name.split('.').pop()?.toLowerCase();
|
||||
const mediaExtensions = ['mp3', 'mp4', 'mov', 'wav'];
|
||||
|
||||
// 如果是媒体文件,进行大小和时长检查
|
||||
if (fileExtension && mediaExtensions.includes(fileExtension)) {
|
||||
const fileSizeInMB = (file as File).size / (1024 * 1024);
|
||||
|
||||
// 检查文件大小(256MB限制)
|
||||
if (fileSizeInMB > 256) {
|
||||
messageApi.error(`${t('knowledgeBase.sizeLimitError')}:${fileSizeInMB.toFixed(2)}MB`);
|
||||
onError?.(new Error(`${t('knowledgeBase.fileSizeExceeds')}`));
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
// 检查媒体时长(150秒限制)
|
||||
const duration = await checkMediaDuration(file as File);
|
||||
if (duration > 150) {
|
||||
messageApi.error(`${t('knowledgeBase.fileDurationLimitError')}:${Math.round(duration)}秒`);
|
||||
onError?.(new Error(`${t('knowledgeBase.fileDurationExceeds')}`));
|
||||
return;
|
||||
}
|
||||
} catch (error) {
|
||||
messageApi.error(`${t('knowledgeBase.unableReadFile')}`);
|
||||
onError?.(error as Error);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const formData = new FormData();
|
||||
formData.append(filename, file as File);
|
||||
if (knowledgeBaseId) {
|
||||
formData.append('kb_id', knowledgeBaseId);
|
||||
@@ -469,7 +520,7 @@ const CreateDataset = () => {
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
<div className='rb:text-base rb:font-medium rb:text-gray-800'>
|
||||
<div className='rb:text-base rb:font-medium rb:text-gray-800 rb:mt-4'>
|
||||
{t('knowledgeBase.fileParsingSettings')}
|
||||
</div>
|
||||
<div className='rb:mt-4'>
|
||||
@@ -477,7 +528,7 @@ const CreateDataset = () => {
|
||||
className={`rb:flex rb:items-center rb:w-full rb:border rb:rounded-lg rb:p-4 rb:cursor-pointer ${
|
||||
pdfEnhancementEnabled ? 'rb:border-blue-500' : 'rb:border-gray-300'
|
||||
}`}
|
||||
onClick={() => setPdfEnhancementEnabled(!pdfEnhancementEnabled)}
|
||||
// onClick={() => setPdfEnhancementEnabled(!pdfEnhancementEnabled)}
|
||||
>
|
||||
<Checkbox
|
||||
checked={pdfEnhancementEnabled}
|
||||
@@ -487,7 +538,22 @@ const CreateDataset = () => {
|
||||
<span className='rb:text-base rb:font-medium rb:text-gray-800 rb:pl-[22px]'>
|
||||
{t('knowledgeBase.pdfEnhancementAnalysis')}
|
||||
</span>
|
||||
{pdfEnhancementEnabled && (
|
||||
<div className='rb:ml-10'>
|
||||
<Select
|
||||
value={pdfEnhancementMethod}
|
||||
onChange={(value) => setPdfEnhancementMethod(value)}
|
||||
className='rb:w-48'
|
||||
options={[
|
||||
{ value: 'deepdoc', label: 'DeepDoc' },
|
||||
{ value: 'mineru', label: 'MinerU' },
|
||||
{ value: 'textln', label: 'TextLN' }
|
||||
]}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div className='rb:text-base rb:font-medium rb:text-gray-800 rb:mt-6'>
|
||||
{t('knowledgeBase.dataProcessingSettings')}
|
||||
|
||||
@@ -512,6 +512,22 @@ const Private: FC = () => {
|
||||
);
|
||||
},
|
||||
},
|
||||
{
|
||||
title: t('knowledgeBase.status'),
|
||||
dataIndex: 'progress',
|
||||
key: 'progress',
|
||||
render: (value: string | number) => {
|
||||
return (
|
||||
<span className="rb:text-xs rb:border rb:border-[#DFE4ED] rb:bg-[#FBFDFF] rb:rounded rb:items-center rb:text-[#212332] rb:py-1 rb:px-2">
|
||||
<span
|
||||
className="rb:inline-block rb:w-[5px] rb:h-[5px] rb:mr-2 rb:rounded-full"
|
||||
style={{ backgroundColor: value === 1 ? '#369F21' : value === 0 ? '#FF0000' : '#FF8A4C' }}
|
||||
></span>
|
||||
<span>{value === 1 ? t('knowledgeBase.completed') : value === 0 ? t('knowledgeBase.pending') : t('knowledgeBase.processing')}</span>
|
||||
</span>
|
||||
);
|
||||
}
|
||||
},
|
||||
{
|
||||
title: t('knowledgeBase.processingMode'),
|
||||
dataIndex: 'parser_id',
|
||||
@@ -532,22 +548,7 @@ const Private: FC = () => {
|
||||
)
|
||||
}
|
||||
},
|
||||
{
|
||||
title: t('knowledgeBase.status'),
|
||||
dataIndex: 'progress',
|
||||
key: 'progress',
|
||||
render: (value: string | number) => {
|
||||
return (
|
||||
<span className="rb:text-xs rb:border rb:border-[#DFE4ED] rb:bg-[#FBFDFF] rb:rounded rb:items-center rb:text-[#212332] rb:py-1 rb:px-2">
|
||||
<span
|
||||
className="rb:inline-block rb:w-[5px] rb:h-[5px] rb:mr-2 rb:rounded-full"
|
||||
style={{ backgroundColor: value === 1 ? '#369F21' : value === 0 ? '#FF0000' : '#FF8A4C' }}
|
||||
></span>
|
||||
<span>{value === 1 ? t('knowledgeBase.completed') : value === 0 ? t('knowledgeBase.pending') : t('knowledgeBase.processing')}</span>
|
||||
</span>
|
||||
);
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
title: t('common.operation'),
|
||||
key: 'action',
|
||||
|
||||
Reference in New Issue
Block a user