Merge #60 into develop_web from feature/20251219_yjp
feat(knowledgeBase): add media file validation and PDF enhancement method selection * feature/20251219_yjp: (1 commits) feat(knowledgeBase): add media file validation and PDF enhancement method selection Signed-off-by: vrhs@163.com <accounts_660b6454a0eb398d3f8d2c76@mail.teambition.com> Merged-by: vrhs@163.com <accounts_660b6454a0eb398d3f8d2c76@mail.teambition.com> CR-link: https://codeup.aliyun.com/redbearai/python/redbear-mem-open/change/60
This commit is contained in:
@@ -619,6 +619,11 @@ export const en = {
|
|||||||
qaMode: 'QA Mode',
|
qaMode: 'QA Mode',
|
||||||
fileParsingSettings: 'File Parsing Settings',
|
fileParsingSettings: 'File Parsing Settings',
|
||||||
pdfEnhancementAnalysis: 'PDF Enhancement Analysis',
|
pdfEnhancementAnalysis: 'PDF Enhancement Analysis',
|
||||||
|
fileSizeExceeds: 'File size exceeds the limit',
|
||||||
|
sizeLimitError: 'The file size exceeds the limit. The maximum supported size is 256MB. The current file size is',
|
||||||
|
fileDurationExceeds: 'File duration exceeds the limit',
|
||||||
|
fileDurationLimitError: 'The duration of the media file exceeds the limit. The maximum supported duration is 150 seconds. Current duration',
|
||||||
|
unableReadFile:'Unable to read the information of the media file. Please check the file format.',
|
||||||
createForm:{
|
createForm:{
|
||||||
name: 'Name',
|
name: 'Name',
|
||||||
embedding_id: 'Embedding',
|
embedding_id: 'Embedding',
|
||||||
|
|||||||
@@ -242,6 +242,11 @@ export const zh = {
|
|||||||
qaMode: '问答模式',
|
qaMode: '问答模式',
|
||||||
fileParsingSettings: '文件解析设置',
|
fileParsingSettings: '文件解析设置',
|
||||||
pdfEnhancementAnalysis: 'PDF增强解析',
|
pdfEnhancementAnalysis: 'PDF增强解析',
|
||||||
|
fileSizeExceeds: '文件大小超过限制',
|
||||||
|
sizeLimitError: '文件大小超过限制,最大支持256MB,当前文件大小',
|
||||||
|
fileDurationExceeds:'文件时长超过限制',
|
||||||
|
fileDurationLimitError: '媒体文件时长超过限制,最大支持150秒,当前时长',
|
||||||
|
unableReadFile:'无法读取媒体文件信息,请检查文件格式',
|
||||||
createForm: {
|
createForm: {
|
||||||
name: '名称',
|
name: '名称',
|
||||||
embedding_id: '嵌入模型',
|
embedding_id: '嵌入模型',
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import { useMemo,useRef, useState, useEffect } from 'react';
|
import { useMemo,useRef, useState, useEffect } from 'react';
|
||||||
import { Button, Flex, Radio, Steps, Modal, Input, Spin, message, Checkbox} from 'antd';
|
import { Button, Flex, Radio, Steps, Modal, Input, Spin, message, Checkbox, Select} from 'antd';
|
||||||
import { useTranslation } from 'react-i18next';
|
import { useTranslation } from 'react-i18next';
|
||||||
import { useLocation, useNavigate, useParams } from 'react-router-dom';
|
import { useLocation, useNavigate, useParams } from 'react-router-dom';
|
||||||
import Table, { type TableRef } from '@/components/Table'
|
import Table, { type TableRef } from '@/components/Table'
|
||||||
@@ -81,9 +81,10 @@ const CreateDataset = () => {
|
|||||||
const [blockSize, setBlockSize] = useState<number>(130);
|
const [blockSize, setBlockSize] = useState<number>(130);
|
||||||
const [processingMethod, setProcessingMethod] = useState<ProcessingMethod>('directBlock');
|
const [processingMethod, setProcessingMethod] = useState<ProcessingMethod>('directBlock');
|
||||||
const [parameterSettings, setParameterSettings] = useState<ParameterSettings>('defaultSettings');
|
const [parameterSettings, setParameterSettings] = useState<ParameterSettings>('defaultSettings');
|
||||||
const [pdfEnhancementEnabled, setPdfEnhancementEnabled] = useState<boolean>(false);
|
const [pdfEnhancementEnabled, setPdfEnhancementEnabled] = useState<boolean>(true);
|
||||||
|
const [pdfEnhancementMethod, setPdfEnhancementMethod] = useState<string>('deepdoc');
|
||||||
const [messageApi, contextHolder] = message.useMessage();
|
const [messageApi, contextHolder] = message.useMessage();
|
||||||
const fileType = ['pdf', 'doc', 'docx', 'xls', 'xlsx', 'csv', 'md', 'htm', 'html', 'json', 'ppt', 'pptx', 'txt','png','jpg']
|
const fileType = ['pdf', 'doc', 'docx', 'xls', 'xlsx', 'csv', 'md', 'htm', 'html', 'json', 'ppt', 'pptx', 'txt','png','jpg','mp3','mp4','mov','wav']
|
||||||
const steps = useMemo(
|
const steps = useMemo(
|
||||||
() => [
|
() => [
|
||||||
{ title: t('knowledgeBase.selectFile') },
|
{ title: t('knowledgeBase.selectFile') },
|
||||||
@@ -119,7 +120,7 @@ const CreateDataset = () => {
|
|||||||
const params = {
|
const params = {
|
||||||
progress: 0,
|
progress: 0,
|
||||||
parser_config: {
|
parser_config: {
|
||||||
layout_recognize:'DeepDOC',
|
layout_recognize: pdfEnhancementMethod || 'DeepDOC',
|
||||||
delimiter: delimiter,
|
delimiter: delimiter,
|
||||||
chunk_token_num: blockSize,
|
chunk_token_num: blockSize,
|
||||||
auto_questions: processingMethod === 'directBlock' ? 0 : 1,
|
auto_questions: processingMethod === 'directBlock' ? 0 : 1,
|
||||||
@@ -244,11 +245,61 @@ const CreateDataset = () => {
|
|||||||
),
|
),
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
// 上传文件
|
// 检查媒体文件时长的辅助函数
|
||||||
const handleUpload = (options: UploadRequestOption) => {
|
const checkMediaDuration = (file: File): Promise<number> => {
|
||||||
const { file, onSuccess, onError, onProgress, filename = 'file' } = options;
|
return new Promise((resolve, reject) => {
|
||||||
const formData = new FormData();
|
const url = URL.createObjectURL(file);
|
||||||
|
const media = document.createElement(file.type.startsWith('video/') ? 'video' : 'audio');
|
||||||
|
|
||||||
|
media.onloadedmetadata = () => {
|
||||||
|
URL.revokeObjectURL(url);
|
||||||
|
resolve(media.duration);
|
||||||
|
};
|
||||||
|
|
||||||
|
media.onerror = () => {
|
||||||
|
URL.revokeObjectURL(url);
|
||||||
|
reject(new Error('无法读取媒体文件'));
|
||||||
|
};
|
||||||
|
|
||||||
|
media.src = url;
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
// 上传文件
|
||||||
|
const handleUpload = async (options: UploadRequestOption) => {
|
||||||
|
const { file, onSuccess, onError, onProgress, filename = 'file' } = options;
|
||||||
|
|
||||||
|
// 获取文件扩展名
|
||||||
|
const fileExtension = (file as File).name.split('.').pop()?.toLowerCase();
|
||||||
|
const mediaExtensions = ['mp3', 'mp4', 'mov', 'wav'];
|
||||||
|
|
||||||
|
// 如果是媒体文件,进行大小和时长检查
|
||||||
|
if (fileExtension && mediaExtensions.includes(fileExtension)) {
|
||||||
|
const fileSizeInMB = (file as File).size / (1024 * 1024);
|
||||||
|
|
||||||
|
// 检查文件大小(256MB限制)
|
||||||
|
if (fileSizeInMB > 256) {
|
||||||
|
messageApi.error(`${t('knowledgeBase.sizeLimitError')}:${fileSizeInMB.toFixed(2)}MB`);
|
||||||
|
onError?.(new Error(`${t('knowledgeBase.fileSizeExceeds')}`));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// 检查媒体时长(150秒限制)
|
||||||
|
const duration = await checkMediaDuration(file as File);
|
||||||
|
if (duration > 150) {
|
||||||
|
messageApi.error(`${t('knowledgeBase.fileDurationLimitError')}:${Math.round(duration)}秒`);
|
||||||
|
onError?.(new Error(`${t('knowledgeBase.fileDurationExceeds')}`));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
messageApi.error(`${t('knowledgeBase.unableReadFile')}`);
|
||||||
|
onError?.(error as Error);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const formData = new FormData();
|
||||||
formData.append(filename, file as File);
|
formData.append(filename, file as File);
|
||||||
if (knowledgeBaseId) {
|
if (knowledgeBaseId) {
|
||||||
formData.append('kb_id', knowledgeBaseId);
|
formData.append('kb_id', knowledgeBaseId);
|
||||||
@@ -469,7 +520,7 @@ const CreateDataset = () => {
|
|||||||
))}
|
))}
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
<div className='rb:text-base rb:font-medium rb:text-gray-800'>
|
<div className='rb:text-base rb:font-medium rb:text-gray-800 rb:mt-4'>
|
||||||
{t('knowledgeBase.fileParsingSettings')}
|
{t('knowledgeBase.fileParsingSettings')}
|
||||||
</div>
|
</div>
|
||||||
<div className='rb:mt-4'>
|
<div className='rb:mt-4'>
|
||||||
@@ -477,7 +528,7 @@ const CreateDataset = () => {
|
|||||||
className={`rb:flex rb:items-center rb:w-full rb:border rb:rounded-lg rb:p-4 rb:cursor-pointer ${
|
className={`rb:flex rb:items-center rb:w-full rb:border rb:rounded-lg rb:p-4 rb:cursor-pointer ${
|
||||||
pdfEnhancementEnabled ? 'rb:border-blue-500' : 'rb:border-gray-300'
|
pdfEnhancementEnabled ? 'rb:border-blue-500' : 'rb:border-gray-300'
|
||||||
}`}
|
}`}
|
||||||
onClick={() => setPdfEnhancementEnabled(!pdfEnhancementEnabled)}
|
// onClick={() => setPdfEnhancementEnabled(!pdfEnhancementEnabled)}
|
||||||
>
|
>
|
||||||
<Checkbox
|
<Checkbox
|
||||||
checked={pdfEnhancementEnabled}
|
checked={pdfEnhancementEnabled}
|
||||||
@@ -487,7 +538,22 @@ const CreateDataset = () => {
|
|||||||
<span className='rb:text-base rb:font-medium rb:text-gray-800 rb:pl-[22px]'>
|
<span className='rb:text-base rb:font-medium rb:text-gray-800 rb:pl-[22px]'>
|
||||||
{t('knowledgeBase.pdfEnhancementAnalysis')}
|
{t('knowledgeBase.pdfEnhancementAnalysis')}
|
||||||
</span>
|
</span>
|
||||||
|
{pdfEnhancementEnabled && (
|
||||||
|
<div className='rb:ml-10'>
|
||||||
|
<Select
|
||||||
|
value={pdfEnhancementMethod}
|
||||||
|
onChange={(value) => setPdfEnhancementMethod(value)}
|
||||||
|
className='rb:w-48'
|
||||||
|
options={[
|
||||||
|
{ value: 'deepdoc', label: 'DeepDoc' },
|
||||||
|
{ value: 'mineru', label: 'MinerU' },
|
||||||
|
{ value: 'textln', label: 'TextLN' }
|
||||||
|
]}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
<div className='rb:text-base rb:font-medium rb:text-gray-800 rb:mt-6'>
|
<div className='rb:text-base rb:font-medium rb:text-gray-800 rb:mt-6'>
|
||||||
{t('knowledgeBase.dataProcessingSettings')}
|
{t('knowledgeBase.dataProcessingSettings')}
|
||||||
|
|||||||
@@ -512,6 +512,22 @@ const Private: FC = () => {
|
|||||||
);
|
);
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
title: t('knowledgeBase.status'),
|
||||||
|
dataIndex: 'progress',
|
||||||
|
key: 'progress',
|
||||||
|
render: (value: string | number) => {
|
||||||
|
return (
|
||||||
|
<span className="rb:text-xs rb:border rb:border-[#DFE4ED] rb:bg-[#FBFDFF] rb:rounded rb:items-center rb:text-[#212332] rb:py-1 rb:px-2">
|
||||||
|
<span
|
||||||
|
className="rb:inline-block rb:w-[5px] rb:h-[5px] rb:mr-2 rb:rounded-full"
|
||||||
|
style={{ backgroundColor: value === 1 ? '#369F21' : value === 0 ? '#FF0000' : '#FF8A4C' }}
|
||||||
|
></span>
|
||||||
|
<span>{value === 1 ? t('knowledgeBase.completed') : value === 0 ? t('knowledgeBase.pending') : t('knowledgeBase.processing')}</span>
|
||||||
|
</span>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
title: t('knowledgeBase.processingMode'),
|
title: t('knowledgeBase.processingMode'),
|
||||||
dataIndex: 'parser_id',
|
dataIndex: 'parser_id',
|
||||||
@@ -532,22 +548,7 @@ const Private: FC = () => {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
|
||||||
title: t('knowledgeBase.status'),
|
|
||||||
dataIndex: 'progress',
|
|
||||||
key: 'progress',
|
|
||||||
render: (value: string | number) => {
|
|
||||||
return (
|
|
||||||
<span className="rb:text-xs rb:border rb:border-[#DFE4ED] rb:bg-[#FBFDFF] rb:rounded rb:items-center rb:text-[#212332] rb:py-1 rb:px-2">
|
|
||||||
<span
|
|
||||||
className="rb:inline-block rb:w-[5px] rb:h-[5px] rb:mr-2 rb:rounded-full"
|
|
||||||
style={{ backgroundColor: value === 1 ? '#369F21' : value === 0 ? '#FF0000' : '#FF8A4C' }}
|
|
||||||
></span>
|
|
||||||
<span>{value === 1 ? t('knowledgeBase.completed') : value === 0 ? t('knowledgeBase.pending') : t('knowledgeBase.processing')}</span>
|
|
||||||
</span>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
title: t('common.operation'),
|
title: t('common.operation'),
|
||||||
key: 'action',
|
key: 'action',
|
||||||
|
|||||||
Reference in New Issue
Block a user