Merge pull request #659 from SuanmoSuanyangTechnology/feature/rich-summary

[add] Introduce examples and triples to enrich the community summaries
This commit is contained in:
Ke Sun
2026-03-20 20:50:35 +08:00
committed by GitHub
4 changed files with 47 additions and 6 deletions

View File

@@ -439,15 +439,17 @@ class LabelPropagationEngine:
@staticmethod
def _build_entity_lines(members: List[Dict]) -> List[str]:
"""将实体列表格式化为 prompt 行,包含 name、aliases、description。"""
"""将实体列表格式化为 prompt 行,包含 name、aliases、description、example"""
lines = []
for m in members:
m_name = m.get("name", "")
aliases = m.get("aliases") or []
description = m.get("description") or ""
example = m.get("example") or ""
aliases_str = f"(别名:{''.join(aliases)}" if aliases else ""
desc_str = f"{description}" if description else ""
lines.append(f"- {m_name}{aliases_str}{desc_str}")
example_str = f"(示例:{example}" if example else ""
lines.append(f"- {m_name}{aliases_str}{desc_str}{example_str}")
return lines
async def _generate_community_metadata(
@@ -481,11 +483,24 @@ class LabelPropagationEngine:
core_entities = [m["name"] for m in sorted_members[:CORE_ENTITY_LIMIT] if m.get("name")]
entity_list_str = "\n".join(self._build_entity_lines(members))
# 方案四:注入社区内实体间关系三元组
relationships = await self.repo.get_community_relationships(cid, end_user_id)
rel_lines = [
f"- {r['subject']}{r['predicate']}{r['object']}"
for r in relationships
if r.get("subject") and r.get("predicate") and r.get("object")
]
rel_section = (
f"\n实体间关系:\n" + "\n".join(rel_lines)
if rel_lines else ""
)
prompt = (
f"以下是一组语义相关的实体:\n{entity_list_str}\n\n"
f"以下是一组语义相关的实体:\n{entity_list_str}{rel_section}\n\n"
f"请为这组实体所代表的主题:\n"
f"1. 起一个简洁的中文名称不超过10个字\n"
f"2. 写一句话摘要(不超过50个字\n\n"
f"2. 写一句话摘要(不超过80个字\n\n"
f"严格按以下格式输出,不要有其他内容:\n"
f"名称:<名称>\n摘要:<摘要>"
)

View File

@@ -17,6 +17,7 @@ from app.repositories.neo4j.cypher_queries import (
GET_ALL_ENTITY_IDS_FOR_USER,
GET_ENTITIES_PAGE,
GET_COMMUNITY_MEMBERS,
GET_COMMUNITY_RELATIONSHIPS,
GET_ALL_COMMUNITY_MEMBERS_BATCH,
GET_ALL_ENTITY_NEIGHBORS_BATCH,
GET_ENTITY_NEIGHBORS_BATCH_FOR_IDS,
@@ -177,7 +178,7 @@ class CommunityRepository:
async def get_community_members(
self, community_id: str, end_user_id: str
) -> List[Dict]:
"""查询社区成员列表。"""
"""查询社区成员列表(含 example 字段)"""
try:
return await self.connector.execute_query(
GET_COMMUNITY_MEMBERS,
@@ -188,6 +189,20 @@ class CommunityRepository:
logger.error(f"get_community_members failed: {e}")
return []
async def get_community_relationships(
self, community_id: str, end_user_id: str
) -> List[Dict]:
"""查询社区内实体间的关系三元组subject, predicate, object"""
try:
return await self.connector.execute_query(
GET_COMMUNITY_RELATIONSHIPS,
community_id=community_id,
end_user_id=end_user_id,
)
except Exception as e:
logger.error(f"get_community_relationships failed: {e}")
return []
async def get_all_community_members_batch(
self, community_ids: List[str], end_user_id: str
) -> Dict[str, List[Dict]]:

View File

@@ -1137,10 +1137,20 @@ MATCH (e:ExtractedEntity {end_user_id: $end_user_id})-[:BELONGS_TO_COMMUNITY]->(
RETURN e.id AS id, e.name AS name, e.entity_type AS entity_type,
e.importance_score AS importance_score, e.activation_value AS activation_value,
e.name_embedding AS name_embedding,
e.aliases AS aliases, e.description AS description
e.aliases AS aliases, e.description AS description,
e.example AS example
ORDER BY coalesce(e.activation_value, 0) DESC
"""
GET_COMMUNITY_RELATIONSHIPS = """
MATCH (e1:ExtractedEntity {end_user_id: $end_user_id})-[:BELONGS_TO_COMMUNITY]->(c:Community {community_id: $community_id})
MATCH (e2:ExtractedEntity {end_user_id: $end_user_id})-[:BELONGS_TO_COMMUNITY]->(c)
MATCH (e1)-[r:EXTRACTED_RELATIONSHIP]->(e2)
RETURN e1.name AS subject, r.predicate AS predicate, e2.name AS object
ORDER BY e1.name, r.predicate, e2.name
LIMIT 20
"""
GET_ALL_COMMUNITY_MEMBERS_BATCH = """
MATCH (e:ExtractedEntity {end_user_id: $end_user_id})-[:BELONGS_TO_COMMUNITY]->(c:Community)
RETURN c.community_id AS community_id,

View File

@@ -1,4 +1,5 @@
import asyncio
import os
from typing import List, Optional
# 使用新的仓储层