[fix] parse excel

This commit is contained in:
lixiangcheng1
2026-02-10 14:05:01 +08:00
parent e3074b833f
commit 26abf7b586
3 changed files with 5 additions and 5 deletions

View File

@@ -670,7 +670,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
with open(filename, "rb") as f:
binary = f.read()
excel_parser = ExcelParser()
if parser_config.get("html4excel"):
if parser_config.get("html4excel") and parser_config.get("html4excel").lower() == "true":
sections = [(_, "") for _ in excel_parser.html(binary, 12) if _]
parser_config["chunk_token_num"] = 0
else:

View File

@@ -19,7 +19,7 @@ class Document(Base):
parser_config = Column(JSON, nullable=False,
default={
"layout_recognize": "DeepDOC",
"chunk_token_num": 128,
"chunk_token_num": 130,
"delimiter": "\n",
"auto_keywords": 0,
"auto_questions": 0,

View File

@@ -495,7 +495,7 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
parser_id="naive",
parser_config={
"layout_recognize": "DeepDOC",
"chunk_token_num": 128,
"chunk_token_num": 130,
"delimiter": "\n",
"auto_keywords": 0,
"auto_questions": 0,
@@ -658,7 +658,7 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
parser_id="naive",
parser_config={
"layout_recognize": "DeepDOC",
"chunk_token_num": 128,
"chunk_token_num": 130,
"delimiter": "\n",
"auto_keywords": 0,
"auto_questions": 0,
@@ -811,7 +811,7 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
parser_id="naive",
parser_config={
"layout_recognize": "DeepDOC",
"chunk_token_num": 128,
"chunk_token_num": 130,
"delimiter": "\n",
"auto_keywords": 0,
"auto_questions": 0,