Merge branch 'feature/20251219_lxc' into develop

This commit is contained in:
lixiangcheng1
2025-12-31 13:00:02 +08:00

View File

@@ -48,7 +48,6 @@ class RAGExcelParser:
logging.info(f"pandas with default engine load error: {ex}, try calamine instead") logging.info(f"pandas with default engine load error: {ex}, try calamine instead")
file_like_object.seek(0) file_like_object.seek(0)
df = pd.read_excel(file_like_object, engine="calamine") df = pd.read_excel(file_like_object, engine="calamine")
print("lxc1")
return RAGExcelParser._dataframe_to_workbook(df) return RAGExcelParser._dataframe_to_workbook(df)
except Exception as e_pandas: except Exception as e_pandas:
raise Exception(f"pandas.read_excel error: {e_pandas}, original openpyxl error: {e}") raise Exception(f"pandas.read_excel error: {e_pandas}, original openpyxl error: {e}")
@@ -215,8 +214,17 @@ class RAGExcelParser:
continue continue
if not rows: if not rows:
continue continue
# 获取表头
ti = list(rows[0]) ti = list(rows[0])
for r in list(rows[1:]): header_fields = []
for cell in ti:
if cell.value: # 只添加有值的表头
header_fields.append(str(cell.value))
# 如果有数据行,处理数据行;否则只处理表头
data_rows = rows[1:]
if data_rows:
for r in data_rows:
fields = [] fields = []
for i, c in enumerate(r): for i, c in enumerate(r):
if not c.value: if not c.value:
@@ -228,6 +236,13 @@ class RAGExcelParser:
if sheetname.lower().find("sheet") < 0: if sheetname.lower().find("sheet") < 0:
line += " ——" + sheetname line += " ——" + sheetname
res.append(line) res.append(line)
else:
# 只有表头的情况
if header_fields:
line = "; ".join(header_fields)
if sheetname.lower().find("sheet") < 0:
line += " ——" + sheetname
res.append(line)
return res return res
@staticmethod @staticmethod