[fix] parse excel
This commit is contained in:
@@ -670,7 +670,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
|||||||
with open(filename, "rb") as f:
|
with open(filename, "rb") as f:
|
||||||
binary = f.read()
|
binary = f.read()
|
||||||
excel_parser = ExcelParser()
|
excel_parser = ExcelParser()
|
||||||
if parser_config.get("html4excel"):
|
if parser_config.get("html4excel") and parser_config.get("html4excel").lower() == "true":
|
||||||
sections = [(_, "") for _ in excel_parser.html(binary, 12) if _]
|
sections = [(_, "") for _ in excel_parser.html(binary, 12) if _]
|
||||||
parser_config["chunk_token_num"] = 0
|
parser_config["chunk_token_num"] = 0
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ class Document(Base):
|
|||||||
parser_config = Column(JSON, nullable=False,
|
parser_config = Column(JSON, nullable=False,
|
||||||
default={
|
default={
|
||||||
"layout_recognize": "DeepDOC",
|
"layout_recognize": "DeepDOC",
|
||||||
"chunk_token_num": 128,
|
"chunk_token_num": 130,
|
||||||
"delimiter": "\n",
|
"delimiter": "\n",
|
||||||
"auto_keywords": 0,
|
"auto_keywords": 0,
|
||||||
"auto_questions": 0,
|
"auto_questions": 0,
|
||||||
|
|||||||
@@ -495,7 +495,7 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
|
|||||||
parser_id="naive",
|
parser_id="naive",
|
||||||
parser_config={
|
parser_config={
|
||||||
"layout_recognize": "DeepDOC",
|
"layout_recognize": "DeepDOC",
|
||||||
"chunk_token_num": 128,
|
"chunk_token_num": 130,
|
||||||
"delimiter": "\n",
|
"delimiter": "\n",
|
||||||
"auto_keywords": 0,
|
"auto_keywords": 0,
|
||||||
"auto_questions": 0,
|
"auto_questions": 0,
|
||||||
@@ -658,7 +658,7 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
|
|||||||
parser_id="naive",
|
parser_id="naive",
|
||||||
parser_config={
|
parser_config={
|
||||||
"layout_recognize": "DeepDOC",
|
"layout_recognize": "DeepDOC",
|
||||||
"chunk_token_num": 128,
|
"chunk_token_num": 130,
|
||||||
"delimiter": "\n",
|
"delimiter": "\n",
|
||||||
"auto_keywords": 0,
|
"auto_keywords": 0,
|
||||||
"auto_questions": 0,
|
"auto_questions": 0,
|
||||||
@@ -811,7 +811,7 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
|
|||||||
parser_id="naive",
|
parser_id="naive",
|
||||||
parser_config={
|
parser_config={
|
||||||
"layout_recognize": "DeepDOC",
|
"layout_recognize": "DeepDOC",
|
||||||
"chunk_token_num": 128,
|
"chunk_token_num": 130,
|
||||||
"delimiter": "\n",
|
"delimiter": "\n",
|
||||||
"auto_keywords": 0,
|
"auto_keywords": 0,
|
||||||
"auto_questions": 0,
|
"auto_questions": 0,
|
||||||
|
|||||||
Reference in New Issue
Block a user