Merge pull request #573 from SuanmoSuanyangTechnology/feature/node-aggregation
Feature/node aggregation
This commit is contained in:
@@ -13,12 +13,17 @@ from app.repositories.neo4j.cypher_queries import (
|
||||
ENTITY_LEAVE_ALL_COMMUNITIES,
|
||||
GET_ENTITY_NEIGHBORS,
|
||||
GET_ALL_ENTITIES_FOR_USER,
|
||||
GET_ENTITY_COUNT_FOR_USER,
|
||||
GET_ALL_ENTITY_IDS_FOR_USER,
|
||||
GET_ENTITIES_PAGE,
|
||||
GET_COMMUNITY_MEMBERS,
|
||||
GET_ALL_COMMUNITY_MEMBERS_BATCH,
|
||||
GET_ALL_ENTITY_NEIGHBORS_BATCH,
|
||||
GET_ENTITY_NEIGHBORS_BATCH_FOR_IDS,
|
||||
CHECK_USER_HAS_COMMUNITIES,
|
||||
UPDATE_COMMUNITY_MEMBER_COUNT,
|
||||
UPDATE_COMMUNITY_METADATA,
|
||||
BATCH_UPDATE_COMMUNITY_METADATA,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -110,6 +115,65 @@ class CommunityRepository:
|
||||
logger.error(f"get_all_entities failed: {e}")
|
||||
return []
|
||||
|
||||
async def get_entity_count(self, end_user_id: str) -> int:
|
||||
"""仅返回用户实体总数,不加载实体数据。"""
|
||||
try:
|
||||
result = await self.connector.execute_query(
|
||||
GET_ENTITY_COUNT_FOR_USER,
|
||||
end_user_id=end_user_id,
|
||||
)
|
||||
return result[0]["entity_count"] if result else 0
|
||||
except Exception as e:
|
||||
logger.error(f"get_entity_count failed: {e}")
|
||||
return 0
|
||||
|
||||
async def get_all_entity_ids(self, end_user_id: str) -> List[str]:
|
||||
"""仅返回用户所有实体 ID 列表,不加载 embedding 等大字段。"""
|
||||
try:
|
||||
result = await self.connector.execute_query(
|
||||
GET_ALL_ENTITY_IDS_FOR_USER,
|
||||
end_user_id=end_user_id,
|
||||
)
|
||||
return [r["id"] for r in result]
|
||||
except Exception as e:
|
||||
logger.error(f"get_all_entity_ids failed: {e}")
|
||||
return []
|
||||
|
||||
async def get_entities_page(
|
||||
self, end_user_id: str, skip: int, limit: int
|
||||
) -> List[Dict]:
|
||||
"""分页拉取实体,用于全量聚类分批处理。"""
|
||||
try:
|
||||
return await self.connector.execute_query(
|
||||
GET_ENTITIES_PAGE,
|
||||
end_user_id=end_user_id,
|
||||
skip=skip,
|
||||
limit=limit,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"get_entities_page failed: {e}")
|
||||
return []
|
||||
|
||||
async def get_entity_neighbors_for_ids(
|
||||
self, entity_ids: List[str], end_user_id: str
|
||||
) -> Dict[str, List[Dict]]:
|
||||
"""批量拉取指定实体列表的邻居,返回 {entity_id: [neighbors]}。"""
|
||||
try:
|
||||
rows = await self.connector.execute_query(
|
||||
GET_ENTITY_NEIGHBORS_BATCH_FOR_IDS,
|
||||
entity_ids=entity_ids,
|
||||
end_user_id=end_user_id,
|
||||
)
|
||||
result: Dict[str, List[Dict]] = {}
|
||||
for row in rows:
|
||||
eid = row["entity_id"]
|
||||
neighbor = {k: v for k, v in row.items() if k != "entity_id"}
|
||||
result.setdefault(eid, []).append(neighbor)
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"get_entity_neighbors_for_ids failed: {e}")
|
||||
return {}
|
||||
|
||||
async def get_community_members(
|
||||
self, community_id: str, end_user_id: str
|
||||
) -> List[Dict]:
|
||||
@@ -177,8 +241,9 @@ class CommunityRepository:
|
||||
name: str,
|
||||
summary: str,
|
||||
core_entities: List[str],
|
||||
summary_embedding: Optional[List[float]] = None,
|
||||
) -> bool:
|
||||
"""更新社区的名称、摘要和核心实体列表。"""
|
||||
"""更新社区的名称、摘要、核心实体列表和摘要向量。"""
|
||||
try:
|
||||
result = await self.connector.execute_query(
|
||||
UPDATE_COMMUNITY_METADATA,
|
||||
@@ -187,8 +252,31 @@ class CommunityRepository:
|
||||
name=name,
|
||||
summary=summary,
|
||||
core_entities=core_entities,
|
||||
summary_embedding=summary_embedding,
|
||||
)
|
||||
return bool(result)
|
||||
except Exception as e:
|
||||
logger.error(f"update_community_metadata failed: {e}")
|
||||
return False
|
||||
|
||||
async def batch_update_community_metadata(
|
||||
self,
|
||||
communities: List[Dict],
|
||||
) -> bool:
|
||||
"""批量更新多个社区的元数据。
|
||||
|
||||
Args:
|
||||
communities: 每项包含 community_id, end_user_id, name, summary,
|
||||
core_entities, summary_embedding
|
||||
"""
|
||||
if not communities:
|
||||
return True
|
||||
try:
|
||||
await self.connector.execute_query(
|
||||
BATCH_UPDATE_COMMUNITY_METADATA,
|
||||
communities=communities,
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"batch_update_community_metadata failed: {e}")
|
||||
return False
|
||||
|
||||
@@ -42,6 +42,13 @@ async def create_fulltext_indexes():
|
||||
OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } }
|
||||
""")
|
||||
print("✓ Created: summariesFulltext")
|
||||
|
||||
# 创建 Community 索引
|
||||
await connector.execute_query("""
|
||||
CREATE FULLTEXT INDEX communitiesFulltext IF NOT EXISTS FOR (c:Community) ON EACH [c.name, c.summary]
|
||||
OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } }
|
||||
""")
|
||||
print("✓ Created: communitiesFulltext")
|
||||
|
||||
print("\nFull-text indexes created successfully with BM25 support.")
|
||||
except Exception as e:
|
||||
@@ -124,6 +131,18 @@ async def create_vector_indexes():
|
||||
}}
|
||||
""")
|
||||
print("✓ Created: dialogue_embedding_index")
|
||||
|
||||
# Community summary embedding index
|
||||
await connector.execute_query("""
|
||||
CREATE VECTOR INDEX community_summary_embedding_index IF NOT EXISTS
|
||||
FOR (c:Community)
|
||||
ON c.summary_embedding
|
||||
OPTIONS {indexConfig: {
|
||||
`vector.dimensions`: 1024,
|
||||
`vector.similarity_function`: 'cosine'
|
||||
}}
|
||||
""")
|
||||
print("✓ Created: community_summary_embedding_index")
|
||||
|
||||
print("\nVector indexes created successfully!")
|
||||
print("\nExpected performance improvement:")
|
||||
|
||||
@@ -1122,21 +1122,33 @@ RETURN e.id AS id,
|
||||
CASE WHEN c IS NOT NULL THEN c.community_id ELSE null END AS community_id
|
||||
"""
|
||||
|
||||
GET_ENTITY_COUNT_FOR_USER = """
|
||||
MATCH (e:ExtractedEntity {end_user_id: $end_user_id})
|
||||
RETURN count(e) AS entity_count
|
||||
"""
|
||||
|
||||
GET_ALL_ENTITY_IDS_FOR_USER = """
|
||||
MATCH (e:ExtractedEntity {end_user_id: $end_user_id})
|
||||
RETURN e.id AS id
|
||||
"""
|
||||
|
||||
GET_COMMUNITY_MEMBERS = """
|
||||
MATCH (e:ExtractedEntity {end_user_id: $end_user_id})-[:BELONGS_TO_COMMUNITY]->(c:Community {community_id: $community_id})
|
||||
RETURN e.id AS id, e.name AS name, e.entity_type AS entity_type,
|
||||
e.importance_score AS importance_score, e.activation_value AS activation_value,
|
||||
e.name_embedding AS name_embedding
|
||||
e.name_embedding AS name_embedding,
|
||||
e.aliases AS aliases, e.description AS description
|
||||
ORDER BY coalesce(e.activation_value, 0) DESC
|
||||
"""
|
||||
|
||||
GET_ALL_COMMUNITY_MEMBERS_BATCH = """
|
||||
MATCH (e:ExtractedEntity {end_user_id: $end_user_id})-[:BELONGS_TO_COMMUNITY]->(c:Community)
|
||||
WHERE c.community_id IN $community_ids
|
||||
RETURN c.community_id AS community_id,
|
||||
e.id AS id,
|
||||
e.id AS id, e.name AS name, e.entity_type AS entity_type,
|
||||
e.importance_score AS importance_score, e.activation_value AS activation_value,
|
||||
e.name_embedding AS name_embedding,
|
||||
e.activation_value AS activation_value
|
||||
e.aliases AS aliases, e.description AS description
|
||||
ORDER BY c.community_id, coalesce(e.activation_value, 0) DESC
|
||||
"""
|
||||
|
||||
CHECK_USER_HAS_COMMUNITIES = """
|
||||
@@ -1153,13 +1165,58 @@ RETURN c.community_id AS community_id, cnt AS member_count
|
||||
|
||||
UPDATE_COMMUNITY_METADATA = """
|
||||
MATCH (c:Community {community_id: $community_id, end_user_id: $end_user_id})
|
||||
SET c.name = $name,
|
||||
c.summary = $summary,
|
||||
c.core_entities = $core_entities,
|
||||
c.updated_at = datetime()
|
||||
SET c.name = $name,
|
||||
c.summary = $summary,
|
||||
c.core_entities = $core_entities,
|
||||
c.summary_embedding = $summary_embedding,
|
||||
c.updated_at = datetime()
|
||||
RETURN c.community_id AS community_id
|
||||
"""
|
||||
|
||||
BATCH_UPDATE_COMMUNITY_METADATA = """
|
||||
UNWIND $communities AS row
|
||||
MATCH (c:Community {community_id: row.community_id, end_user_id: row.end_user_id})
|
||||
SET c.name = row.name,
|
||||
c.summary = row.summary,
|
||||
c.core_entities = row.core_entities,
|
||||
c.summary_embedding = row.summary_embedding,
|
||||
c.updated_at = datetime()
|
||||
RETURN c.community_id AS community_id
|
||||
"""
|
||||
|
||||
GET_ENTITIES_PAGE = """
|
||||
MATCH (e:ExtractedEntity {end_user_id: $end_user_id})
|
||||
OPTIONAL MATCH (e)-[:BELONGS_TO_COMMUNITY]->(c:Community)
|
||||
RETURN e.id AS id,
|
||||
e.name AS name,
|
||||
e.name_embedding AS name_embedding,
|
||||
e.activation_value AS activation_value,
|
||||
CASE WHEN c IS NOT NULL THEN c.community_id ELSE null END AS community_id
|
||||
ORDER BY e.id
|
||||
SKIP $skip LIMIT $limit
|
||||
"""
|
||||
|
||||
GET_ENTITY_NEIGHBORS_BATCH_FOR_IDS = """
|
||||
// 批量拉取指定实体列表的邻居(用于分批全量聚类)
|
||||
MATCH (e:ExtractedEntity {end_user_id: $end_user_id})
|
||||
WHERE e.id IN $entity_ids
|
||||
OPTIONAL MATCH (e)-[:EXTRACTED_RELATIONSHIP]-(nb1:ExtractedEntity {end_user_id: $end_user_id})
|
||||
OPTIONAL MATCH (s:Statement)-[:REFERENCES_ENTITY]->(e)
|
||||
OPTIONAL MATCH (s)-[:REFERENCES_ENTITY]->(nb2:ExtractedEntity {end_user_id: $end_user_id})
|
||||
WHERE nb2.id <> e.id
|
||||
WITH e, collect(DISTINCT nb1) + collect(DISTINCT nb2) AS all_neighbors
|
||||
UNWIND all_neighbors AS nb
|
||||
WITH e, nb WHERE nb IS NOT NULL
|
||||
OPTIONAL MATCH (nb)-[:BELONGS_TO_COMMUNITY]->(c:Community)
|
||||
RETURN DISTINCT
|
||||
e.id AS entity_id,
|
||||
nb.id AS id,
|
||||
nb.name AS name,
|
||||
nb.name_embedding AS name_embedding,
|
||||
nb.activation_value AS activation_value,
|
||||
CASE WHEN c IS NOT NULL THEN c.community_id ELSE null END AS community_id
|
||||
"""
|
||||
|
||||
GET_ALL_ENTITY_NEIGHBORS_BATCH = """
|
||||
// 批量拉取某用户下所有实体的邻居(用于全量聚类预加载)
|
||||
MATCH (e:ExtractedEntity {end_user_id: $end_user_id})
|
||||
@@ -1185,20 +1242,59 @@ RETURN DISTINCT
|
||||
CASE WHEN c IS NOT NULL THEN c.community_id ELSE null END AS community_id
|
||||
"""
|
||||
|
||||
GET_COMMUNITY_GRAPH_DATA = """
|
||||
MATCH (c:Community {end_user_id: $end_user_id})
|
||||
MATCH (e:ExtractedEntity {end_user_id: $end_user_id})-[b:BELONGS_TO_COMMUNITY]->(c)
|
||||
OPTIONAL MATCH (e)-[r:EXTRACTED_RELATIONSHIP]-(e2:ExtractedEntity {end_user_id: $end_user_id})
|
||||
RETURN
|
||||
elementId(c) AS c_id,
|
||||
properties(c) AS c_props,
|
||||
elementId(e) AS e_id,
|
||||
properties(e) AS e_props,
|
||||
elementId(b) AS b_id,
|
||||
elementId(e2) AS e2_id,
|
||||
properties(e2) AS e2_props,
|
||||
elementId(r) AS r_id,
|
||||
type(r) AS r_type,
|
||||
properties(r) AS r_props,
|
||||
startNode(r) = e AS r_from_e
|
||||
|
||||
# Community keyword search: matches name or summary via fulltext index
|
||||
SEARCH_COMMUNITIES_BY_KEYWORD = """
|
||||
CALL db.index.fulltext.queryNodes("communitiesFulltext", $q) YIELD node AS c, score
|
||||
WHERE ($end_user_id IS NULL OR c.end_user_id = $end_user_id)
|
||||
RETURN c.community_id AS id,
|
||||
c.name AS name,
|
||||
c.summary AS content,
|
||||
c.core_entities AS core_entities,
|
||||
c.member_count AS member_count,
|
||||
c.end_user_id AS end_user_id,
|
||||
c.updated_at AS updated_at,
|
||||
score
|
||||
ORDER BY score DESC
|
||||
LIMIT $limit
|
||||
"""
|
||||
|
||||
# Community 向量检索 ──────────────────────────────────────────────────
|
||||
# Community embedding-based search: cosine similarity on Community.summary_embedding
|
||||
COMMUNITY_EMBEDDING_SEARCH = """
|
||||
CALL db.index.vector.queryNodes('community_summary_embedding_index', $limit * 100, $embedding)
|
||||
YIELD node AS c, score
|
||||
WHERE c.summary_embedding IS NOT NULL
|
||||
AND ($end_user_id IS NULL OR c.end_user_id = $end_user_id)
|
||||
RETURN c.community_id AS id,
|
||||
c.name AS name,
|
||||
c.summary AS content,
|
||||
c.core_entities AS core_entities,
|
||||
c.member_count AS member_count,
|
||||
c.end_user_id AS end_user_id,
|
||||
c.updated_at AS updated_at,
|
||||
score
|
||||
ORDER BY score DESC
|
||||
LIMIT $limit
|
||||
"""
|
||||
|
||||
# Community 展开检索 ──────────────────────────────────────────────────
|
||||
# 命中社区后,拉取该社区所有成员实体关联的 Statement 节点(主题→细节两级检索)
|
||||
EXPAND_COMMUNITY_STATEMENTS = """
|
||||
MATCH (c:Community {community_id: $community_id})
|
||||
MATCH (e:ExtractedEntity)-[:BELONGS_TO_COMMUNITY]->(c)
|
||||
MATCH (s:Statement)-[:REFERENCES_ENTITY]->(e)
|
||||
WHERE s.end_user_id = $end_user_id
|
||||
RETURN s.statement AS statement,
|
||||
s.id AS id,
|
||||
s.end_user_id AS end_user_id,
|
||||
s.created_at AS created_at,
|
||||
s.valid_at AS valid_at,
|
||||
s.invalid_at AS invalid_at,
|
||||
COALESCE(s.activation_value, s.importance_score, 0.5) AS activation_value,
|
||||
COALESCE(s.importance_score, 0.5) AS importance_score,
|
||||
e.name AS source_entity,
|
||||
c.name AS community_name
|
||||
ORDER BY COALESCE(s.activation_value, 0) DESC
|
||||
LIMIT $limit
|
||||
"""
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import asyncio
|
||||
import os
|
||||
from typing import List, Optional
|
||||
|
||||
# 使用新的仓储层
|
||||
@@ -158,11 +157,12 @@ async def save_dialog_and_statements_to_neo4j(
|
||||
statement_chunk_edges: List[StatementChunkEdge],
|
||||
statement_entity_edges: List[StatementEntityEdge],
|
||||
connector: Neo4jConnector,
|
||||
config_id: Optional[str] = None,
|
||||
llm_model_id: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""Save dialogue nodes, chunk nodes, statement nodes, entities, and all relationships to Neo4j using graph models.
|
||||
|
||||
只负责数据写入,不触发聚类。聚类由调用方在写入成功后通过
|
||||
schedule_clustering_after_write() 显式触发。
|
||||
|
||||
Args:
|
||||
dialogue_nodes: List of DialogueNode objects to save
|
||||
chunk_nodes: List of ChunkNode objects to save
|
||||
@@ -293,9 +293,6 @@ async def save_dialog_and_statements_to_neo4j(
|
||||
logger.info("Transaction completed. Summary: %s", summary)
|
||||
logger.debug("Full transaction results: %r", results)
|
||||
|
||||
# 写入成功后,异步触发聚类(不阻塞写入响应)
|
||||
schedule_clustering_after_write(entity_nodes, config_id=config_id, llm_model_id=llm_model_id)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
@@ -309,6 +306,7 @@ def schedule_clustering_after_write(
|
||||
entity_nodes: List,
|
||||
config_id: Optional[str] = None,
|
||||
llm_model_id: Optional[str] = None,
|
||||
embedding_model_id: Optional[str] = None,
|
||||
) -> None:
|
||||
"""
|
||||
写入 Neo4j 成功后,调度后台聚类任务。
|
||||
@@ -327,7 +325,7 @@ def schedule_clustering_after_write(
|
||||
end_user_id = entity_nodes[0].end_user_id
|
||||
new_entity_ids = [e.id for e in entity_nodes]
|
||||
logger.info(f"[Clustering] 准备触发聚类,实体数: {len(new_entity_ids)}, end_user_id: {end_user_id}")
|
||||
asyncio.create_task(_trigger_clustering(new_entity_ids, end_user_id, config_id=config_id, llm_model_id=llm_model_id))
|
||||
asyncio.create_task(_trigger_clustering(new_entity_ids, end_user_id, config_id=config_id, llm_model_id=llm_model_id, embedding_model_id=embedding_model_id))
|
||||
|
||||
|
||||
async def _trigger_clustering(
|
||||
@@ -335,6 +333,7 @@ async def _trigger_clustering(
|
||||
end_user_id: str,
|
||||
config_id: Optional[str] = None,
|
||||
llm_model_id: Optional[str] = None,
|
||||
embedding_model_id: Optional[str] = None,
|
||||
) -> None:
|
||||
"""
|
||||
聚类触发函数,自动判断全量初始化还是增量更新。
|
||||
@@ -344,7 +343,7 @@ async def _trigger_clustering(
|
||||
from app.core.memory.storage_services.clustering_engine import LabelPropagationEngine
|
||||
logger.info(f"[Clustering] 开始聚类,end_user_id={end_user_id}, 实体数={len(new_entity_ids)}")
|
||||
connector = Neo4jConnector()
|
||||
engine = LabelPropagationEngine(connector, config_id=config_id, llm_model_id=llm_model_id)
|
||||
engine = LabelPropagationEngine(connector, config_id=config_id, llm_model_id=llm_model_id, embedding_model_id=embedding_model_id)
|
||||
await engine.run(end_user_id=end_user_id, new_entity_ids=new_entity_ids)
|
||||
logger.info(f"[Clustering] 聚类完成,end_user_id={end_user_id}")
|
||||
except Exception as e:
|
||||
|
||||
@@ -4,10 +4,13 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
from app.repositories.neo4j.cypher_queries import (
|
||||
CHUNK_EMBEDDING_SEARCH,
|
||||
COMMUNITY_EMBEDDING_SEARCH,
|
||||
ENTITY_EMBEDDING_SEARCH,
|
||||
EXPAND_COMMUNITY_STATEMENTS,
|
||||
MEMORY_SUMMARY_EMBEDDING_SEARCH,
|
||||
SEARCH_CHUNK_BY_CHUNK_ID,
|
||||
SEARCH_CHUNKS_BY_CONTENT,
|
||||
SEARCH_COMMUNITIES_BY_KEYWORD,
|
||||
SEARCH_DIALOGUE_BY_DIALOG_ID,
|
||||
SEARCH_ENTITIES_BY_NAME,
|
||||
SEARCH_MEMORY_SUMMARIES_BY_KEYWORD,
|
||||
@@ -285,6 +288,15 @@ async def search_graph(
|
||||
limit=limit,
|
||||
))
|
||||
task_keys.append("summaries")
|
||||
|
||||
if "communities" in include:
|
||||
tasks.append(connector.execute_query(
|
||||
SEARCH_COMMUNITIES_BY_KEYWORD,
|
||||
q=q,
|
||||
end_user_id=end_user_id,
|
||||
limit=limit,
|
||||
))
|
||||
task_keys.append("communities")
|
||||
|
||||
# Execute all queries in parallel
|
||||
task_results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
@@ -396,6 +408,16 @@ async def search_graph_by_embedding(
|
||||
))
|
||||
task_keys.append("summaries")
|
||||
|
||||
# Communities (向量语义匹配)
|
||||
if "communities" in include:
|
||||
tasks.append(connector.execute_query(
|
||||
COMMUNITY_EMBEDDING_SEARCH,
|
||||
embedding=embedding,
|
||||
end_user_id=end_user_id,
|
||||
limit=limit,
|
||||
))
|
||||
task_keys.append("communities")
|
||||
|
||||
# Execute all queries in parallel
|
||||
query_start = time.time()
|
||||
task_results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
@@ -408,6 +430,7 @@ async def search_graph_by_embedding(
|
||||
"chunks": [],
|
||||
"entities": [],
|
||||
"summaries": [],
|
||||
"communities": [],
|
||||
}
|
||||
|
||||
for key, result in zip(task_keys, task_results):
|
||||
@@ -661,6 +684,62 @@ async def search_graph_by_chunk_id(
|
||||
return {"chunks": chunks}
|
||||
|
||||
|
||||
async def search_graph_community_expand(
|
||||
connector: Neo4jConnector,
|
||||
community_ids: List[str],
|
||||
end_user_id: str,
|
||||
limit: int = 10,
|
||||
) -> Dict[str, List[Dict[str, Any]]]:
|
||||
"""
|
||||
三期:社区展开检索 —— 主题 → 细节两级检索。
|
||||
|
||||
命中 Community 节点后,沿 BELONGS_TO_COMMUNITY 关系拉取成员实体,
|
||||
再沿 REFERENCES_ENTITY 关系拉取关联的 Statement 节点,
|
||||
按 activation_value 降序返回,实现"主题摘要 → 具体记忆"的深度召回。
|
||||
|
||||
Args:
|
||||
connector: Neo4j 连接器
|
||||
community_ids: 已命中的社区 ID 列表
|
||||
end_user_id: 用户 ID,用于数据隔离
|
||||
limit: 每个社区最多返回的 Statement 数量
|
||||
|
||||
Returns:
|
||||
{"expanded_statements": [Statement 列表,含 community_name / source_entity 字段]}
|
||||
"""
|
||||
if not community_ids or not end_user_id:
|
||||
return {"expanded_statements": []}
|
||||
|
||||
tasks = [
|
||||
connector.execute_query(
|
||||
EXPAND_COMMUNITY_STATEMENTS,
|
||||
community_id=cid,
|
||||
end_user_id=end_user_id,
|
||||
limit=limit,
|
||||
)
|
||||
for cid in community_ids
|
||||
]
|
||||
|
||||
task_results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
expanded: List[Dict[str, Any]] = []
|
||||
for cid, result in zip(community_ids, task_results):
|
||||
if isinstance(result, Exception):
|
||||
logger.warning(f"社区展开检索失败 community_id={cid}: {result}")
|
||||
else:
|
||||
expanded.extend(result)
|
||||
|
||||
# 按 activation_value 全局排序后去重
|
||||
from app.core.memory.src.search import _deduplicate_results
|
||||
expanded.sort(
|
||||
key=lambda x: float(x.get("activation_value") or 0),
|
||||
reverse=True,
|
||||
)
|
||||
expanded = _deduplicate_results(expanded)
|
||||
|
||||
logger.info(f"社区展开检索完成: community_ids={community_ids}, 展开 statements={len(expanded)}")
|
||||
return {"expanded_statements": expanded}
|
||||
|
||||
|
||||
async def search_graph_by_created_at(
|
||||
connector: Neo4jConnector,
|
||||
end_user_id: Optional[str] = None,
|
||||
|
||||
Reference in New Issue
Block a user