Merge branch 'feature/knowledge_lxc' into develop

This commit is contained in:
lixiangcheng1
2026-03-04 15:42:06 +08:00
4 changed files with 12 additions and 7 deletions

View File

@@ -44,8 +44,8 @@ celery_app.conf.update(
task_ignore_result=False, task_ignore_result=False,
# 超时设置 # 超时设置
task_time_limit=1800, # 30分钟硬超时 task_time_limit=3600, # 60分钟硬超时
task_soft_time_limit=1500, # 25分钟软超时 task_soft_time_limit=3000, # 50分钟软超时
# Worker 设置 (per-worker settings are in docker-compose command line) # Worker 设置 (per-worker settings are in docker-compose command line)
worker_prefetch_multiplier=1, # Don't hoard tasks, fairer distribution worker_prefetch_multiplier=1, # Don't hoard tasks, fairer distribution

View File

@@ -441,14 +441,14 @@ async def retrieve_chunks(
# 1 participle search, 2 semantic search, 3 hybrid search # 1 participle search, 2 semantic search, 3 hybrid search
match retrieve_data.retrieve_type: match retrieve_data.retrieve_type:
case chunk_schema.RetrieveType.PARTICIPLE: case chunk_schema.RetrieveType.PARTICIPLE:
rs = vector_service.search_by_full_text(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.similarity_threshold) rs = vector_service.search_by_full_text(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.similarity_threshold, file_names_filter=retrieve_data.file_names_filter)
return success(data=rs, msg="retrieval successful") return success(data=rs, msg="retrieval successful")
case chunk_schema.RetrieveType.SEMANTIC: case chunk_schema.RetrieveType.SEMANTIC:
rs = vector_service.search_by_vector(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.vector_similarity_weight) rs = vector_service.search_by_vector(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.vector_similarity_weight, file_names_filter=retrieve_data.file_names_filter)
return success(data=rs, msg="retrieval successful") return success(data=rs, msg="retrieval successful")
case _: case _:
rs1 = vector_service.search_by_vector(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.vector_similarity_weight) rs1 = vector_service.search_by_vector(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.vector_similarity_weight, file_names_filter=retrieve_data.file_names_filter)
rs2 = vector_service.search_by_full_text(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.similarity_threshold) rs2 = vector_service.search_by_full_text(query=retrieve_data.query, top_k=retrieve_data.top_k, indices=indices, score_threshold=retrieve_data.similarity_threshold, file_names_filter=retrieve_data.file_names_filter)
# Efficient deduplication # Efficient deduplication
seen_ids = set() seen_ids = set()
unique_rs = [] unique_rs = []

View File

@@ -46,6 +46,7 @@ class ChunkUpdate(BaseModel):
class ChunkRetrieve(BaseModel): class ChunkRetrieve(BaseModel):
query: str query: str
kb_ids: list[uuid.UUID] kb_ids: list[uuid.UUID]
file_names_filter: list[str] | None = Field(None)
similarity_threshold: float | None = Field(None) similarity_threshold: float | None = Field(None)
vector_similarity_weight: float | None = Field(None) vector_similarity_weight: float | None = Field(None)
top_k: int | None = Field(None) top_k: int | None = Field(None)

View File

@@ -257,7 +257,7 @@ def parse_document(file_path: str, document_id: uuid.UUID):
progress_msg += f"{datetime.now().strftime('%H:%M:%S')} GraphRAG task result for task {task}:\n{result}\n" progress_msg += f"{datetime.now().strftime('%H:%M:%S')} GraphRAG task result for task {task}:\n{result}\n"
return result return result
try: def sync_task():
trio.run( trio.run(
lambda: _run( lambda: _run(
row=task, row=task,
@@ -272,6 +272,10 @@ def parse_document(file_path: str, document_id: uuid.UUID):
with_community=with_community, with_community=with_community,
) )
) )
try:
with ThreadPoolExecutor(max_workers=1) as executor:
future = executor.submit(sync_task)
future.result() # Blocks until the task completes
except Exception as e: except Exception as e:
progress_msg += f"{datetime.now().strftime('%H:%M:%S')} GraphRAG task failed for task {task}:\n{str(e)}\n" progress_msg += f"{datetime.now().strftime('%H:%M:%S')} GraphRAG task failed for task {task}:\n{str(e)}\n"
progress_msg += f"{datetime.now().strftime('%H:%M:%S')} Knowledge Graph done ({time.time() - start_time}s)" progress_msg += f"{datetime.now().strftime('%H:%M:%S')} Knowledge Graph done ({time.time() - start_time}s)"