Fix/fact summary (#333)
* [fix]Disable the contents related to fact_summary * [fix]Disable the contents related to fact_summary * [fix]Modify the code based on the AI review
This commit is contained in:
@@ -186,10 +186,11 @@ def create_hybrid_retrieval_tool_async(memory_config, **search_params):
|
|||||||
清理后的数据
|
清理后的数据
|
||||||
"""
|
"""
|
||||||
# 需要过滤的字段列表
|
# 需要过滤的字段列表
|
||||||
|
# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||||
fields_to_remove = {
|
fields_to_remove = {
|
||||||
'invalid_at', 'valid_at', 'chunk_id_from_rel', 'entity_ids',
|
'invalid_at', 'valid_at', 'chunk_id_from_rel', 'entity_ids',
|
||||||
'expired_at', 'created_at', 'chunk_id', 'id', 'apply_id',
|
'expired_at', 'created_at', 'chunk_id', 'id', 'apply_id',
|
||||||
'user_id', 'statement_ids', 'updated_at',"chunk_ids","fact_summary"
|
'user_id', 'statement_ids', 'updated_at',"chunk_ids" ,"fact_summary"
|
||||||
}
|
}
|
||||||
|
|
||||||
if isinstance(data, dict):
|
if isinstance(data, dict):
|
||||||
|
|||||||
@@ -413,7 +413,8 @@ class ExtractedEntityNode(Node):
|
|||||||
description="Entity aliases - alternative names for this entity"
|
description="Entity aliases - alternative names for this entity"
|
||||||
)
|
)
|
||||||
name_embedding: Optional[List[float]] = Field(default_factory=list, description="Name embedding vector")
|
name_embedding: Optional[List[float]] = Field(default_factory=list, description="Name embedding vector")
|
||||||
fact_summary: str = Field(default="", description="Summary of the fact about this entity")
|
# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||||
|
# fact_summary: str = Field(default="", description="Summary of the fact about this entity")
|
||||||
connect_strength: str = Field(..., description="Strong VS Weak about this entity")
|
connect_strength: str = Field(..., description="Strong VS Weak about this entity")
|
||||||
config_id: Optional[int | str] = Field(None, description="Configuration ID used to process this entity (integer or string)")
|
config_id: Optional[int | str] = Field(None, description="Configuration ID used to process this entity (integer or string)")
|
||||||
|
|
||||||
|
|||||||
@@ -134,42 +134,45 @@ def _merge_attribute(canonical: ExtractedEntityNode, ent: ExtractedEntityNode):
|
|||||||
if len(desc_b) > len(desc_a):
|
if len(desc_b) > len(desc_a):
|
||||||
canonical.description = desc_b
|
canonical.description = desc_b
|
||||||
# 合并事实摘要:统一保留一个“实体: name”行,来源行去重保序
|
# 合并事实摘要:统一保留一个“实体: name”行,来源行去重保序
|
||||||
fact_a = getattr(canonical, "fact_summary", "") or ""
|
# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||||
fact_b = getattr(ent, "fact_summary", "") or ""
|
# fact_a = getattr(canonical, "fact_summary", "") or ""
|
||||||
def _extract_sources(txt: str) -> List[str]:
|
# fact_b = getattr(ent, "fact_summary", "") or ""
|
||||||
sources: List[str] = []
|
# def _extract_sources(txt: str) -> List[str]:
|
||||||
if not txt:
|
# sources: List[str] = []
|
||||||
return sources
|
# if not txt:
|
||||||
for line in str(txt).splitlines():
|
# return sources
|
||||||
ln = line.strip()
|
# for line in str(txt).splitlines():
|
||||||
|
# ln = line.strip()
|
||||||
# 支持“来源:”或“来源:”前缀
|
# 支持“来源:”或“来源:”前缀
|
||||||
m = re.match(r"^来源[::]\s*(.+)$", ln)
|
# m = re.match(r"^来源[::]\s*(.+)$", ln)
|
||||||
if m:
|
# if m:
|
||||||
content = m.group(1).strip()
|
# content = m.group(1).strip()
|
||||||
if content:
|
# if content:
|
||||||
sources.append(content)
|
# sources.append(content)
|
||||||
# 如果不存在“来源”前缀,则将整体文本视为一个来源片段,避免信息丢失
|
# 如果不存在“来源”前缀,则将整体文本视为一个来源片段,避免信息丢失
|
||||||
if not sources and txt.strip():
|
# if not sources and txt.strip():
|
||||||
sources.append(txt.strip())
|
# sources.append(txt.strip())
|
||||||
return sources
|
# return sources
|
||||||
try:
|
try:
|
||||||
src_a = _extract_sources(fact_a)
|
# src_a = _extract_sources(fact_a)
|
||||||
src_b = _extract_sources(fact_b)
|
# src_b = _extract_sources(fact_b)
|
||||||
seen = set()
|
# seen = set()
|
||||||
merged_sources: List[str] = []
|
# merged_sources: List[str] = []
|
||||||
for s in src_a + src_b:
|
# for s in src_a + src_b:
|
||||||
if s and s not in seen:
|
# if s and s not in seen:
|
||||||
seen.add(s)
|
# seen.add(s)
|
||||||
merged_sources.append(s)
|
# merged_sources.append(s)
|
||||||
if merged_sources:
|
# if merged_sources:
|
||||||
name_line = f"实体: {getattr(canonical, 'name', '')}".strip()
|
# name_line = f"实体: {getattr(canonical, 'name', '')}".strip()
|
||||||
canonical.fact_summary = "\n".join([name_line] + [f"来源: {s}" for s in merged_sources])
|
# canonical.fact_summary = "\n".join([name_line] + [f"来源: {s}" for s in merged_sources])
|
||||||
elif fact_b and not fact_a:
|
# elif fact_b and not fact_a:
|
||||||
canonical.fact_summary = fact_b
|
# canonical.fact_summary = fact_b
|
||||||
|
pass
|
||||||
except Exception:
|
except Exception:
|
||||||
# 兜底:若解析失败,保留较长文本
|
# 兜底:若解析失败,保留较长文本
|
||||||
if len(fact_b) > len(fact_a):
|
# if len(fact_b) > len(fact_a):
|
||||||
canonical.fact_summary = fact_b
|
# canonical.fact_summary = fact_b
|
||||||
|
pass
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
@@ -145,10 +145,13 @@ def _choose_canonical(a: ExtractedEntityNode, b: ExtractedEntityNode) -> int: #
|
|||||||
# 2. 第二优先级:按“描述+事实摘要”的总长度排序(内容越长,信息越完整)
|
# 2. 第二优先级:按“描述+事实摘要”的总长度排序(内容越长,信息越完整)
|
||||||
desc_a = (getattr(a, "description", "") or "")
|
desc_a = (getattr(a, "description", "") or "")
|
||||||
desc_b = (getattr(b, "description", "") or "")
|
desc_b = (getattr(b, "description", "") or "")
|
||||||
fact_a = (getattr(a, "fact_summary", "") or "")
|
# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||||
fact_b = (getattr(b, "fact_summary", "") or "")
|
# fact_a = (getattr(a, "fact_summary", "") or "")
|
||||||
score_a = len(desc_a) + len(fact_a)
|
# fact_b = (getattr(b, "fact_summary", "") or "")
|
||||||
score_b = len(desc_b) + len(fact_b)
|
# score_a = len(desc_a) + len(fact_a)
|
||||||
|
# score_b = len(desc_b) + len(fact_b)
|
||||||
|
score_a = len(desc_a)
|
||||||
|
score_b = len(desc_b)
|
||||||
if score_a != score_b:
|
if score_a != score_b:
|
||||||
return 0 if score_a >= score_b else 1
|
return 0 if score_a >= score_b else 1
|
||||||
return 0
|
return 0
|
||||||
@@ -189,7 +192,8 @@ async def _judge_pair(
|
|||||||
"entity_type": getattr(a, "entity_type", None),
|
"entity_type": getattr(a, "entity_type", None),
|
||||||
"description": getattr(a, "description", None),
|
"description": getattr(a, "description", None),
|
||||||
"aliases": getattr(a, "aliases", None) or [],
|
"aliases": getattr(a, "aliases", None) or [],
|
||||||
"fact_summary": getattr(a, "fact_summary", None),
|
# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||||
|
# "fact_summary": getattr(a, "fact_summary", None),
|
||||||
"connect_strength": getattr(a, "connect_strength", None),
|
"connect_strength": getattr(a, "connect_strength", None),
|
||||||
}
|
}
|
||||||
entity_b = {
|
entity_b = {
|
||||||
@@ -197,7 +201,8 @@ async def _judge_pair(
|
|||||||
"entity_type": getattr(b, "entity_type", None),
|
"entity_type": getattr(b, "entity_type", None),
|
||||||
"description": getattr(b, "description", None),
|
"description": getattr(b, "description", None),
|
||||||
"aliases": getattr(b, "aliases", None) or [],
|
"aliases": getattr(b, "aliases", None) or [],
|
||||||
"fact_summary": getattr(b, "fact_summary", None),
|
# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||||
|
# "fact_summary": getattr(b, "fact_summary", None),
|
||||||
"connect_strength": getattr(b, "connect_strength", None),
|
"connect_strength": getattr(b, "connect_strength", None),
|
||||||
}
|
}
|
||||||
# 5. 渲染LLM提示词(用工具函数填充模板,包含实体信息、上下文、输出格式)
|
# 5. 渲染LLM提示词(用工具函数填充模板,包含实体信息、上下文、输出格式)
|
||||||
@@ -248,7 +253,8 @@ async def _judge_pair_disamb(
|
|||||||
"entity_type": getattr(a, "entity_type", None),
|
"entity_type": getattr(a, "entity_type", None),
|
||||||
"description": getattr(a, "description", None),
|
"description": getattr(a, "description", None),
|
||||||
"aliases": getattr(a, "aliases", None) or [],
|
"aliases": getattr(a, "aliases", None) or [],
|
||||||
"fact_summary": getattr(a, "fact_summary", None),
|
# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||||
|
# "fact_summary": getattr(a, "fact_summary", None),
|
||||||
"connect_strength": getattr(a, "connect_strength", None),
|
"connect_strength": getattr(a, "connect_strength", None),
|
||||||
}
|
}
|
||||||
entity_b = {
|
entity_b = {
|
||||||
@@ -256,7 +262,8 @@ async def _judge_pair_disamb(
|
|||||||
"entity_type": getattr(b, "entity_type", None),
|
"entity_type": getattr(b, "entity_type", None),
|
||||||
"description": getattr(b, "description", None),
|
"description": getattr(b, "description", None),
|
||||||
"aliases": getattr(b, "aliases", None) or [],
|
"aliases": getattr(b, "aliases", None) or [],
|
||||||
"fact_summary": getattr(b, "fact_summary", None),
|
# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||||
|
# "fact_summary": getattr(b, "fact_summary", None),
|
||||||
"connect_strength": getattr(b, "connect_strength", None),
|
"connect_strength": getattr(b, "connect_strength", None),
|
||||||
}
|
}
|
||||||
prompt = render_entity_dedup_prompt(
|
prompt = render_entity_dedup_prompt(
|
||||||
|
|||||||
@@ -72,7 +72,8 @@ def _row_to_entity(row: Dict[str, Any]) -> ExtractedEntityNode:
|
|||||||
description=row.get("description") or "",
|
description=row.get("description") or "",
|
||||||
aliases=row.get("aliases") or [],
|
aliases=row.get("aliases") or [],
|
||||||
name_embedding=row.get("name_embedding") or [],
|
name_embedding=row.get("name_embedding") or [],
|
||||||
fact_summary=row.get("fact_summary") or "",
|
# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||||
|
# fact_summary=row.get("fact_summary") or "",
|
||||||
connect_strength=row.get("connect_strength") or "",
|
connect_strength=row.get("connect_strength") or "",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -1085,7 +1085,8 @@ class ExtractionOrchestrator:
|
|||||||
entity_type=getattr(entity, 'type', 'unknown'), # 使用 type 而不是 entity_type
|
entity_type=getattr(entity, 'type', 'unknown'), # 使用 type 而不是 entity_type
|
||||||
description=getattr(entity, 'description', ''), # 添加必需的 description 字段
|
description=getattr(entity, 'description', ''), # 添加必需的 description 字段
|
||||||
example=getattr(entity, 'example', ''), # 新增:传递示例字段
|
example=getattr(entity, 'example', ''), # 新增:传递示例字段
|
||||||
fact_summary=getattr(entity, 'fact_summary', ''), # 添加必需的 fact_summary 字段
|
# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||||
|
# fact_summary=getattr(entity, 'fact_summary', ''), # 添加必需的 fact_summary 字段
|
||||||
connect_strength=entity_connect_strength if entity_connect_strength is not None else 'Strong', # 添加必需的 connect_strength 字段
|
connect_strength=entity_connect_strength if entity_connect_strength is not None else 'Strong', # 添加必需的 connect_strength 字段
|
||||||
aliases=getattr(entity, 'aliases', []) or [], # 传递从三元组提取阶段获取的aliases
|
aliases=getattr(entity, 'aliases', []) or [], # 传递从三元组提取阶段获取的aliases
|
||||||
name_embedding=getattr(entity, 'name_embedding', None),
|
name_embedding=getattr(entity, 'name_embedding', None),
|
||||||
|
|||||||
@@ -296,7 +296,9 @@ def resolve_alias_cycles(entities: List[Any], cycles: Dict[str, Set[str]]) -> Li
|
|||||||
key=lambda eid: (
|
key=lambda eid: (
|
||||||
_strength_rank(eid),
|
_strength_rank(eid),
|
||||||
len(getattr(entity_by_id.get(eid), 'description', '') or ''),
|
len(getattr(entity_by_id.get(eid), 'description', '') or ''),
|
||||||
len(getattr(entity_by_id.get(eid), 'fact_summary', '') or '')
|
# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||||
|
# len(getattr(entity_by_id.get(eid), 'fact_summary', '') or '')
|
||||||
|
0 # 临时占位
|
||||||
),
|
),
|
||||||
reverse=True
|
reverse=True
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -9,7 +9,8 @@
|
|||||||
- 类型: "{{ entity_a.entity_type | default('') }}"
|
- 类型: "{{ entity_a.entity_type | default('') }}"
|
||||||
- 描述: "{{ entity_a.description | default('') }}"
|
- 描述: "{{ entity_a.description | default('') }}"
|
||||||
- 别名: {{ entity_a.aliases | default([]) }}
|
- 别名: {{ entity_a.aliases | default([]) }}
|
||||||
- 摘要: "{{ entity_a.fact_summary | default('') }}"
|
{# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用 #}
|
||||||
|
{# - 摘要: "{{ entity_a.fact_summary | default('') }}" #}
|
||||||
- 连接强弱: "{{ entity_a.connect_strength | default('') }}"
|
- 连接强弱: "{{ entity_a.connect_strength | default('') }}"
|
||||||
|
|
||||||
实体B:
|
实体B:
|
||||||
@@ -17,7 +18,8 @@
|
|||||||
- 类型: "{{ entity_b.entity_type | default('') }}"
|
- 类型: "{{ entity_b.entity_type | default('') }}"
|
||||||
- 描述: "{{ entity_b.description | default('') }}"
|
- 描述: "{{ entity_b.description | default('') }}"
|
||||||
- 别名: {{ entity_b.aliases | default([]) }}
|
- 别名: {{ entity_b.aliases | default([]) }}
|
||||||
- 摘要: "{{ entity_b.fact_summary | default('') }}"
|
{# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用 #}
|
||||||
|
{# - 摘要: "{{ entity_b.fact_summary | default('') }}" #}
|
||||||
- 连接强弱: "{{ entity_b.connect_strength | default('') }}"
|
- 连接强弱: "{{ entity_b.connect_strength | default('') }}"
|
||||||
|
|
||||||
上下文:
|
上下文:
|
||||||
|
|||||||
@@ -86,7 +86,8 @@ class MemoryConfigRepository:
|
|||||||
n.description AS description,
|
n.description AS description,
|
||||||
n.entity_type AS entity_type,
|
n.entity_type AS entity_type,
|
||||||
n.name AS name,
|
n.name AS name,
|
||||||
COALESCE(n.fact_summary, '') AS fact_summary,
|
// TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||||
|
// COALESCE(n.fact_summary, '') AS fact_summary,
|
||||||
n.end_user_id AS end_user_id,
|
n.end_user_id AS end_user_id,
|
||||||
n.apply_id AS apply_id,
|
n.apply_id AS apply_id,
|
||||||
n.user_id AS user_id,
|
n.user_id AS user_id,
|
||||||
|
|||||||
@@ -101,10 +101,11 @@ SET e.name = CASE WHEN entity.name IS NOT NULL AND entity.name <> '' THEN entity
|
|||||||
e.name_embedding = CASE
|
e.name_embedding = CASE
|
||||||
WHEN entity.name_embedding IS NOT NULL AND size(entity.name_embedding) > 0 THEN entity.name_embedding
|
WHEN entity.name_embedding IS NOT NULL AND size(entity.name_embedding) > 0 THEN entity.name_embedding
|
||||||
ELSE e.name_embedding END,
|
ELSE e.name_embedding END,
|
||||||
e.fact_summary = CASE
|
// TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||||
WHEN entity.fact_summary IS NOT NULL AND entity.fact_summary <> ''
|
// e.fact_summary = CASE
|
||||||
AND (e.fact_summary IS NULL OR size(e.fact_summary) = 0 OR size(entity.fact_summary) > size(e.fact_summary))
|
// WHEN entity.fact_summary IS NOT NULL AND entity.fact_summary <> ''
|
||||||
THEN entity.fact_summary ELSE e.fact_summary END,
|
// AND (e.fact_summary IS NULL OR size(e.fact_summary) = 0 OR size(entity.fact_summary) > size(e.fact_summary))
|
||||||
|
// THEN entity.fact_summary ELSE e.fact_summary END,
|
||||||
e.connect_strength = CASE
|
e.connect_strength = CASE
|
||||||
WHEN entity.connect_strength IS NULL OR entity.connect_strength = '' THEN e.connect_strength
|
WHEN entity.connect_strength IS NULL OR entity.connect_strength = '' THEN e.connect_strength
|
||||||
ELSE CASE
|
ELSE CASE
|
||||||
@@ -321,7 +322,8 @@ RETURN e.id AS id,
|
|||||||
e.description AS description,
|
e.description AS description,
|
||||||
e.aliases AS aliases,
|
e.aliases AS aliases,
|
||||||
e.name_embedding AS name_embedding,
|
e.name_embedding AS name_embedding,
|
||||||
COALESCE(e.fact_summary, '') AS fact_summary,
|
// TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||||
|
// COALESCE(e.fact_summary, '') AS fact_summary,
|
||||||
e.connect_strength AS connect_strength,
|
e.connect_strength AS connect_strength,
|
||||||
collect(DISTINCT s.id) AS statement_ids,
|
collect(DISTINCT s.id) AS statement_ids,
|
||||||
collect(DISTINCT c.id) AS chunk_ids,
|
collect(DISTINCT c.id) AS chunk_ids,
|
||||||
|
|||||||
Reference in New Issue
Block a user