From 26abf7b5861b7fa0b773e6a5e17fbfa933d2cd5f Mon Sep 17 00:00:00 2001 From: lixiangcheng1 Date: Tue, 10 Feb 2026 14:05:01 +0800 Subject: [PATCH] [fix] parse excel --- api/app/core/rag/app/naive.py | 2 +- api/app/models/document_model.py | 2 +- api/app/tasks.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/api/app/core/rag/app/naive.py b/api/app/core/rag/app/naive.py index 2b8d0e50..72272347 100644 --- a/api/app/core/rag/app/naive.py +++ b/api/app/core/rag/app/naive.py @@ -670,7 +670,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, with open(filename, "rb") as f: binary = f.read() excel_parser = ExcelParser() - if parser_config.get("html4excel"): + if parser_config.get("html4excel") and parser_config.get("html4excel").lower() == "true": sections = [(_, "") for _ in excel_parser.html(binary, 12) if _] parser_config["chunk_token_num"] = 0 else: diff --git a/api/app/models/document_model.py b/api/app/models/document_model.py index fb43d44d..48be241c 100644 --- a/api/app/models/document_model.py +++ b/api/app/models/document_model.py @@ -19,7 +19,7 @@ class Document(Base): parser_config = Column(JSON, nullable=False, default={ "layout_recognize": "DeepDOC", - "chunk_token_num": 128, + "chunk_token_num": 130, "delimiter": "\n", "auto_keywords": 0, "auto_questions": 0, diff --git a/api/app/tasks.py b/api/app/tasks.py index 712c0ee5..e6bdeb18 100644 --- a/api/app/tasks.py +++ b/api/app/tasks.py @@ -495,7 +495,7 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID): parser_id="naive", parser_config={ "layout_recognize": "DeepDOC", - "chunk_token_num": 128, + "chunk_token_num": 130, "delimiter": "\n", "auto_keywords": 0, "auto_questions": 0, @@ -658,7 +658,7 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID): parser_id="naive", parser_config={ "layout_recognize": "DeepDOC", - "chunk_token_num": 128, + "chunk_token_num": 130, "delimiter": "\n", "auto_keywords": 0, "auto_questions": 0, @@ -811,7 +811,7 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID): parser_id="naive", parser_config={ "layout_recognize": "DeepDOC", - "chunk_token_num": 128, + "chunk_token_num": 130, "delimiter": "\n", "auto_keywords": 0, "auto_questions": 0,