From 33238d34c94d421050c5c0579c6bb1e005bd05a8 Mon Sep 17 00:00:00 2001
From: lixiangcheng1 <lixiangcheng1@wanda.cn>
Date: Thu, 26 Feb 2026 10:17:44 +0800
Subject: [PATCH 1/3] [fix]Force re-importing Trio in child processes (to avoid
 inheriting the state of the parent process)

---
 api/app/tasks.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/api/app/tasks.py b/api/app/tasks.py
index d60af6e5..499b93a5 100644
--- a/api/app/tasks.py
+++ b/api/app/tasks.py
@@ -255,7 +255,7 @@ def parse_document(file_path: str, document_id: uuid.UUID):
                 progress_msg += f"{datetime.now().strftime('%H:%M:%S')} GraphRAG task result for task {task}:\n{result}\n"
                 return result
 
-            try:
+            def sync_task():
                 trio.run(
                     lambda: _run(
                         row=task,
@@ -270,6 +270,10 @@ def parse_document(file_path: str, document_id: uuid.UUID):
                         with_community=with_community,
                     )
                 )
+            try:
+                with ThreadPoolExecutor(max_workers=1) as executor:
+                    future = executor.submit(sync_task)
+                    future.result()  # Blocks until the task completes
             except Exception as e:
                 progress_msg += f"{datetime.now().strftime('%H:%M:%S')} GraphRAG task failed for task {task}:\n{str(e)}\n"
             progress_msg += f"{datetime.now().strftime('%H:%M:%S')} Knowledge Graph done ({time.time() - start_time}s)"

From 4f0b653a822119c0ccf2fbbc0b75262113743794 Mon Sep 17 00:00:00 2001
From: lixiangcheng1 <lixiangcheng1@wanda.cn>
Date: Thu, 26 Feb 2026 19:04:42 +0800
Subject: [PATCH 2/3] =?UTF-8?q?=E3=80=90fix]The=20complexity=20and=20volum?=
 =?UTF-8?q?e=20of=20the=20document=20content=20require=20an=20extended=20t?=
 =?UTF-8?q?imeframe?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 api/app/celery_app.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/api/app/celery_app.py b/api/app/celery_app.py
index db78a368..265cd2ab 100644
--- a/api/app/celery_app.py
+++ b/api/app/celery_app.py
@@ -43,8 +43,8 @@ celery_app.conf.update(
     task_ignore_result=False,
     
     # 超时设置
-    task_time_limit=1800,  # 30分钟硬超时
-    task_soft_time_limit=1500,  # 25分钟软超时
+    task_time_limit=3600,  # 60分钟硬超时
+    task_soft_time_limit=3000,  # 50分钟软超时
     
     # Worker 设置 (per-worker settings are in docker-compose command line)
     worker_prefetch_multiplier=1,  # Don't hoard tasks, fairer distribution

From d4c4160215f9bdf35f941d7c3242dbed9795fa1c Mon Sep 17 00:00:00 2001
From: lixiangcheng1 <lixiangcheng1@wanda.cn>
Date: Wed, 4 Mar 2026 15:28:17 +0800
Subject: [PATCH 3/3] =?UTF-8?q?=E3=80=90ADD]Knowledge=20base=20retrieval?=
 =?UTF-8?q?=20supports=20file=20set=20retrieval?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 api/app/controllers/chunk_controller.py | 8 ++++----
 api/app/schemas/chunk_schema.py         | 1 +
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/api/app/controllers/chunk_controller.py b/api/app/controllers/chunk_controller.py
index 620d8a1a..988aa706 100644
--- a/api/app/controllers/chunk_controller.py
+++ b/api/app/controllers/chunk_controller.py
@@ -441,14 +441,14 @@ async def retrieve_chunks(
     # 1 participle search, 2 semantic search, 3 hybrid search
     match retrieve_data.retrieve_type:
         case chunk_schema.RetrieveType.PARTICIPLE:
-            rs = vector_service.search_by_full_text(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.similarity_threshold)
+            rs = vector_service.search_by_full_text(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.similarity_threshold, file_names_filter=retrieve_data.file_names_filter)
             return success(data=rs, msg="retrieval successful")
         case chunk_schema.RetrieveType.SEMANTIC:
-            rs = vector_service.search_by_vector(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.vector_similarity_weight)
+            rs = vector_service.search_by_vector(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.vector_similarity_weight, file_names_filter=retrieve_data.file_names_filter)
             return success(data=rs, msg="retrieval successful")
         case _:
-            rs1 = vector_service.search_by_vector(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.vector_similarity_weight)
-            rs2 = vector_service.search_by_full_text(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.similarity_threshold)
+            rs1 = vector_service.search_by_vector(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.vector_similarity_weight, file_names_filter=retrieve_data.file_names_filter)
+            rs2 = vector_service.search_by_full_text(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.similarity_threshold, file_names_filter=retrieve_data.file_names_filter)
             # Efficient deduplication
             seen_ids = set()
             unique_rs = []
diff --git a/api/app/schemas/chunk_schema.py b/api/app/schemas/chunk_schema.py
index cef9b9cb..ce8f70f2 100644
--- a/api/app/schemas/chunk_schema.py
+++ b/api/app/schemas/chunk_schema.py
@@ -46,6 +46,7 @@ class ChunkUpdate(BaseModel):
 class ChunkRetrieve(BaseModel):
     query: str
     kb_ids: list[uuid.UUID]
+    file_names_filter: list[str] | None = Field(None)
     similarity_threshold: float | None = Field(None)
     vector_similarity_weight: float | None = Field(None)
     top_k: int | None = Field(None)