diff --git a/api/app/core/rag/app/naive.py b/api/app/core/rag/app/naive.py index 2b8d0e50..72272347 100644 --- a/api/app/core/rag/app/naive.py +++ b/api/app/core/rag/app/naive.py @@ -670,7 +670,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, with open(filename, "rb") as f: binary = f.read() excel_parser = ExcelParser() - if parser_config.get("html4excel"): + if parser_config.get("html4excel") and parser_config.get("html4excel").lower() == "true": sections = [(_, "") for _ in excel_parser.html(binary, 12) if _] parser_config["chunk_token_num"] = 0 else: diff --git a/api/app/models/document_model.py b/api/app/models/document_model.py index fb43d44d..48be241c 100644 --- a/api/app/models/document_model.py +++ b/api/app/models/document_model.py @@ -19,7 +19,7 @@ class Document(Base): parser_config = Column(JSON, nullable=False, default={ "layout_recognize": "DeepDOC", - "chunk_token_num": 128, + "chunk_token_num": 130, "delimiter": "\n", "auto_keywords": 0, "auto_questions": 0, diff --git a/api/app/tasks.py b/api/app/tasks.py index 91b2e070..8d89e0d1 100644 --- a/api/app/tasks.py +++ b/api/app/tasks.py @@ -497,7 +497,7 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID): parser_id="naive", parser_config={ "layout_recognize": "DeepDOC", - "chunk_token_num": 128, + "chunk_token_num": 130, "delimiter": "\n", "auto_keywords": 0, "auto_questions": 0, @@ -666,7 +666,7 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID): parser_id="naive", parser_config={ "layout_recognize": "DeepDOC", - "chunk_token_num": 128, + "chunk_token_num": 130, "delimiter": "\n", "auto_keywords": 0, "auto_questions": 0, @@ -828,7 +828,7 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID): parser_id="naive", parser_config={ "layout_recognize": "DeepDOC", - "chunk_token_num": 128, + "chunk_token_num": 130, "delimiter": "\n", "auto_keywords": 0, "auto_questions": 0,