From 909c536b473f554d1bab52fea7b9685bbaa950ca Mon Sep 17 00:00:00 2001 From: lixiangcheng1 Date: Tue, 30 Dec 2025 18:20:34 +0800 Subject: [PATCH] [fix]parsed excel document error:float division by zero --- api/app/core/rag/deepdoc/parser/excel_parser.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/api/app/core/rag/deepdoc/parser/excel_parser.py b/api/app/core/rag/deepdoc/parser/excel_parser.py index b6e1e4a1..856155f1 100644 --- a/api/app/core/rag/deepdoc/parser/excel_parser.py +++ b/api/app/core/rag/deepdoc/parser/excel_parser.py @@ -52,13 +52,19 @@ class RAGExcelParser: raise Exception(f"pandas.read_excel error: {e_pandas}, original openpyxl error: {e}") @staticmethod - def _clean_dataframe(df: pd.DataFrame): + def _clean_dataframe(df): def clean_string(s): if isinstance(s, str): return ILLEGAL_CHARACTERS_RE.sub(" ", s) return s - return df.apply(lambda col: col.map(clean_string)) + # 处理单 DataFrame 或字典(多 Sheet) + if isinstance(df, dict): + return {sheet: RAGExcelParser._clean_dataframe(sheet_df) for sheet, sheet_df in df.items()} + elif isinstance(df, pd.DataFrame): + return df.apply(lambda col: col.map(clean_string)) + else: + raise ValueError(f"Unsupported type for cleaning: {type(df)}") @staticmethod def _dataframe_to_workbook(df):