[fix] parse excel
This commit is contained in:
@@ -670,7 +670,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
with open(filename, "rb") as f:
|
||||
binary = f.read()
|
||||
excel_parser = ExcelParser()
|
||||
if parser_config.get("html4excel"):
|
||||
if parser_config.get("html4excel") and parser_config.get("html4excel").lower() == "true":
|
||||
sections = [(_, "") for _ in excel_parser.html(binary, 12) if _]
|
||||
parser_config["chunk_token_num"] = 0
|
||||
else:
|
||||
|
||||
@@ -19,7 +19,7 @@ class Document(Base):
|
||||
parser_config = Column(JSON, nullable=False,
|
||||
default={
|
||||
"layout_recognize": "DeepDOC",
|
||||
"chunk_token_num": 128,
|
||||
"chunk_token_num": 130,
|
||||
"delimiter": "\n",
|
||||
"auto_keywords": 0,
|
||||
"auto_questions": 0,
|
||||
|
||||
@@ -495,7 +495,7 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
|
||||
parser_id="naive",
|
||||
parser_config={
|
||||
"layout_recognize": "DeepDOC",
|
||||
"chunk_token_num": 128,
|
||||
"chunk_token_num": 130,
|
||||
"delimiter": "\n",
|
||||
"auto_keywords": 0,
|
||||
"auto_questions": 0,
|
||||
@@ -658,7 +658,7 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
|
||||
parser_id="naive",
|
||||
parser_config={
|
||||
"layout_recognize": "DeepDOC",
|
||||
"chunk_token_num": 128,
|
||||
"chunk_token_num": 130,
|
||||
"delimiter": "\n",
|
||||
"auto_keywords": 0,
|
||||
"auto_questions": 0,
|
||||
@@ -811,7 +811,7 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
|
||||
parser_id="naive",
|
||||
parser_config={
|
||||
"layout_recognize": "DeepDOC",
|
||||
"chunk_token_num": 128,
|
||||
"chunk_token_num": 130,
|
||||
"delimiter": "\n",
|
||||
"auto_keywords": 0,
|
||||
"auto_questions": 0,
|
||||
|
||||
Reference in New Issue
Block a user