Merge branch 'feature/20251219_lxc' into develop

This commit is contained in:
lixiangcheng1
2025-12-31 13:00:02 +08:00

View File

@@ -48,7 +48,6 @@ class RAGExcelParser:
logging.info(f"pandas with default engine load error: {ex}, try calamine instead") logging.info(f"pandas with default engine load error: {ex}, try calamine instead")
file_like_object.seek(0) file_like_object.seek(0)
df = pd.read_excel(file_like_object, engine="calamine") df = pd.read_excel(file_like_object, engine="calamine")
print("lxc1")
return RAGExcelParser._dataframe_to_workbook(df) return RAGExcelParser._dataframe_to_workbook(df)
except Exception as e_pandas: except Exception as e_pandas:
raise Exception(f"pandas.read_excel error: {e_pandas}, original openpyxl error: {e}") raise Exception(f"pandas.read_excel error: {e_pandas}, original openpyxl error: {e}")
@@ -215,19 +214,35 @@ class RAGExcelParser:
continue continue
if not rows: if not rows:
continue continue
# 获取表头
ti = list(rows[0]) ti = list(rows[0])
for r in list(rows[1:]): header_fields = []
fields = [] for cell in ti:
for i, c in enumerate(r): if cell.value: # 只添加有值的表头
if not c.value: header_fields.append(str(cell.value))
continue
t = str(ti[i].value) if i < len(ti) else "" # 如果有数据行,处理数据行;否则只处理表头
t += ("" if t else "") + str(c.value) data_rows = rows[1:]
fields.append(t) if data_rows:
line = "; ".join(fields) for r in data_rows:
if sheetname.lower().find("sheet") < 0: fields = []
line += " ——" + sheetname for i, c in enumerate(r):
res.append(line) if not c.value:
continue
t = str(ti[i].value) if i < len(ti) else ""
t += ("" if t else "") + str(c.value)
fields.append(t)
line = "; ".join(fields)
if sheetname.lower().find("sheet") < 0:
line += " ——" + sheetname
res.append(line)
else:
# 只有表头的情况
if header_fields:
line = "; ".join(header_fields)
if sheetname.lower().find("sheet") < 0:
line += " ——" + sheetname
res.append(line)
return res return res
@staticmethod @staticmethod