[add] The application layer introduces the clustering community-retrieval module

This commit is contained in:
lanceyq
2026-03-17 14:51:04 +08:00
parent 19d149c129
commit 8a0d83b340
7 changed files with 180 additions and 49 deletions

View File

@@ -42,6 +42,13 @@ async def create_fulltext_indexes():
OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } }
""")
print("✓ Created: summariesFulltext")
# 创建 Community 索引
await connector.execute_query("""
CREATE FULLTEXT INDEX communitiesFulltext IF NOT EXISTS FOR (c:Community) ON EACH [c.name, c.summary]
OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } }
""")
print("✓ Created: communitiesFulltext")
print("\nFull-text indexes created successfully with BM25 support.")
except Exception as e:
@@ -112,6 +119,18 @@ async def create_vector_indexes():
}}
""")
print("✓ Created: summary_embedding_index")
# Community summary embedding index
await connector.execute_query("""
CREATE VECTOR INDEX community_summary_embedding_index IF NOT EXISTS
FOR (c:Community)
ON c.summary_embedding
OPTIONS {indexConfig: {
`vector.dimensions`: 1024,
`vector.similarity_function`: 'cosine'
}}
""")
print("✓ Created: community_summary_embedding_index")
# Dialogue embedding index (optional)
await connector.execute_query("""

View File

@@ -305,6 +305,7 @@ async def search_graph(
results = {}
for key, result in zip(task_keys, task_results):
if isinstance(result, Exception):
logger.warning(f"search_graph: {key} 关键词查询异常: {result}")
results[key] = []
else:
results[key] = result
@@ -361,7 +362,11 @@ async def search_graph_by_embedding(
print(f"[PERF] Embedding generation took: {embed_time:.4f}s")
if not embeddings or not embeddings[0]:
return {"statements": [], "chunks": [], "entities": [], "summaries": []}
logger.warning(
f"search_graph_by_embedding: embedding 生成失败或为空,"
f"query='{query_text[:50]}', end_user_id={end_user_id},向量检索跳过"
)
return {"statements": [], "chunks": [], "entities": [], "summaries": [], "communities": []}
embedding = embeddings[0]
# Prepare tasks for parallel execution
@@ -435,6 +440,7 @@ async def search_graph_by_embedding(
for key, result in zip(task_keys, task_results):
if isinstance(result, Exception):
logger.warning(f"search_graph_by_embedding: {key} 向量查询异常: {result}")
results[key] = []
else:
results[key] = result