Fix/fact summary (#333)
* [fix]Disable the contents related to fact_summary * [fix]Disable the contents related to fact_summary * [fix]Modify the code based on the AI review
This commit is contained in:
@@ -186,10 +186,11 @@ def create_hybrid_retrieval_tool_async(memory_config, **search_params):
|
||||
清理后的数据
|
||||
"""
|
||||
# 需要过滤的字段列表
|
||||
# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||
fields_to_remove = {
|
||||
'invalid_at', 'valid_at', 'chunk_id_from_rel', 'entity_ids',
|
||||
'expired_at', 'created_at', 'chunk_id', 'id', 'apply_id',
|
||||
'user_id', 'statement_ids', 'updated_at',"chunk_ids","fact_summary"
|
||||
'user_id', 'statement_ids', 'updated_at',"chunk_ids" ,"fact_summary"
|
||||
}
|
||||
|
||||
if isinstance(data, dict):
|
||||
|
||||
@@ -413,7 +413,8 @@ class ExtractedEntityNode(Node):
|
||||
description="Entity aliases - alternative names for this entity"
|
||||
)
|
||||
name_embedding: Optional[List[float]] = Field(default_factory=list, description="Name embedding vector")
|
||||
fact_summary: str = Field(default="", description="Summary of the fact about this entity")
|
||||
# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||
# fact_summary: str = Field(default="", description="Summary of the fact about this entity")
|
||||
connect_strength: str = Field(..., description="Strong VS Weak about this entity")
|
||||
config_id: Optional[int | str] = Field(None, description="Configuration ID used to process this entity (integer or string)")
|
||||
|
||||
|
||||
@@ -134,42 +134,45 @@ def _merge_attribute(canonical: ExtractedEntityNode, ent: ExtractedEntityNode):
|
||||
if len(desc_b) > len(desc_a):
|
||||
canonical.description = desc_b
|
||||
# 合并事实摘要:统一保留一个“实体: name”行,来源行去重保序
|
||||
fact_a = getattr(canonical, "fact_summary", "") or ""
|
||||
fact_b = getattr(ent, "fact_summary", "") or ""
|
||||
def _extract_sources(txt: str) -> List[str]:
|
||||
sources: List[str] = []
|
||||
if not txt:
|
||||
return sources
|
||||
for line in str(txt).splitlines():
|
||||
ln = line.strip()
|
||||
# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||
# fact_a = getattr(canonical, "fact_summary", "") or ""
|
||||
# fact_b = getattr(ent, "fact_summary", "") or ""
|
||||
# def _extract_sources(txt: str) -> List[str]:
|
||||
# sources: List[str] = []
|
||||
# if not txt:
|
||||
# return sources
|
||||
# for line in str(txt).splitlines():
|
||||
# ln = line.strip()
|
||||
# 支持“来源:”或“来源:”前缀
|
||||
m = re.match(r"^来源[::]\s*(.+)$", ln)
|
||||
if m:
|
||||
content = m.group(1).strip()
|
||||
if content:
|
||||
sources.append(content)
|
||||
# m = re.match(r"^来源[::]\s*(.+)$", ln)
|
||||
# if m:
|
||||
# content = m.group(1).strip()
|
||||
# if content:
|
||||
# sources.append(content)
|
||||
# 如果不存在“来源”前缀,则将整体文本视为一个来源片段,避免信息丢失
|
||||
if not sources and txt.strip():
|
||||
sources.append(txt.strip())
|
||||
return sources
|
||||
# if not sources and txt.strip():
|
||||
# sources.append(txt.strip())
|
||||
# return sources
|
||||
try:
|
||||
src_a = _extract_sources(fact_a)
|
||||
src_b = _extract_sources(fact_b)
|
||||
seen = set()
|
||||
merged_sources: List[str] = []
|
||||
for s in src_a + src_b:
|
||||
if s and s not in seen:
|
||||
seen.add(s)
|
||||
merged_sources.append(s)
|
||||
if merged_sources:
|
||||
name_line = f"实体: {getattr(canonical, 'name', '')}".strip()
|
||||
canonical.fact_summary = "\n".join([name_line] + [f"来源: {s}" for s in merged_sources])
|
||||
elif fact_b and not fact_a:
|
||||
canonical.fact_summary = fact_b
|
||||
# src_a = _extract_sources(fact_a)
|
||||
# src_b = _extract_sources(fact_b)
|
||||
# seen = set()
|
||||
# merged_sources: List[str] = []
|
||||
# for s in src_a + src_b:
|
||||
# if s and s not in seen:
|
||||
# seen.add(s)
|
||||
# merged_sources.append(s)
|
||||
# if merged_sources:
|
||||
# name_line = f"实体: {getattr(canonical, 'name', '')}".strip()
|
||||
# canonical.fact_summary = "\n".join([name_line] + [f"来源: {s}" for s in merged_sources])
|
||||
# elif fact_b and not fact_a:
|
||||
# canonical.fact_summary = fact_b
|
||||
pass
|
||||
except Exception:
|
||||
# 兜底:若解析失败,保留较长文本
|
||||
if len(fact_b) > len(fact_a):
|
||||
canonical.fact_summary = fact_b
|
||||
# if len(fact_b) > len(fact_a):
|
||||
# canonical.fact_summary = fact_b
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@@ -145,10 +145,13 @@ def _choose_canonical(a: ExtractedEntityNode, b: ExtractedEntityNode) -> int: #
|
||||
# 2. 第二优先级:按“描述+事实摘要”的总长度排序(内容越长,信息越完整)
|
||||
desc_a = (getattr(a, "description", "") or "")
|
||||
desc_b = (getattr(b, "description", "") or "")
|
||||
fact_a = (getattr(a, "fact_summary", "") or "")
|
||||
fact_b = (getattr(b, "fact_summary", "") or "")
|
||||
score_a = len(desc_a) + len(fact_a)
|
||||
score_b = len(desc_b) + len(fact_b)
|
||||
# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||
# fact_a = (getattr(a, "fact_summary", "") or "")
|
||||
# fact_b = (getattr(b, "fact_summary", "") or "")
|
||||
# score_a = len(desc_a) + len(fact_a)
|
||||
# score_b = len(desc_b) + len(fact_b)
|
||||
score_a = len(desc_a)
|
||||
score_b = len(desc_b)
|
||||
if score_a != score_b:
|
||||
return 0 if score_a >= score_b else 1
|
||||
return 0
|
||||
@@ -189,7 +192,8 @@ async def _judge_pair(
|
||||
"entity_type": getattr(a, "entity_type", None),
|
||||
"description": getattr(a, "description", None),
|
||||
"aliases": getattr(a, "aliases", None) or [],
|
||||
"fact_summary": getattr(a, "fact_summary", None),
|
||||
# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||
# "fact_summary": getattr(a, "fact_summary", None),
|
||||
"connect_strength": getattr(a, "connect_strength", None),
|
||||
}
|
||||
entity_b = {
|
||||
@@ -197,7 +201,8 @@ async def _judge_pair(
|
||||
"entity_type": getattr(b, "entity_type", None),
|
||||
"description": getattr(b, "description", None),
|
||||
"aliases": getattr(b, "aliases", None) or [],
|
||||
"fact_summary": getattr(b, "fact_summary", None),
|
||||
# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||
# "fact_summary": getattr(b, "fact_summary", None),
|
||||
"connect_strength": getattr(b, "connect_strength", None),
|
||||
}
|
||||
# 5. 渲染LLM提示词(用工具函数填充模板,包含实体信息、上下文、输出格式)
|
||||
@@ -248,7 +253,8 @@ async def _judge_pair_disamb(
|
||||
"entity_type": getattr(a, "entity_type", None),
|
||||
"description": getattr(a, "description", None),
|
||||
"aliases": getattr(a, "aliases", None) or [],
|
||||
"fact_summary": getattr(a, "fact_summary", None),
|
||||
# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||
# "fact_summary": getattr(a, "fact_summary", None),
|
||||
"connect_strength": getattr(a, "connect_strength", None),
|
||||
}
|
||||
entity_b = {
|
||||
@@ -256,7 +262,8 @@ async def _judge_pair_disamb(
|
||||
"entity_type": getattr(b, "entity_type", None),
|
||||
"description": getattr(b, "description", None),
|
||||
"aliases": getattr(b, "aliases", None) or [],
|
||||
"fact_summary": getattr(b, "fact_summary", None),
|
||||
# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||
# "fact_summary": getattr(b, "fact_summary", None),
|
||||
"connect_strength": getattr(b, "connect_strength", None),
|
||||
}
|
||||
prompt = render_entity_dedup_prompt(
|
||||
|
||||
@@ -72,7 +72,8 @@ def _row_to_entity(row: Dict[str, Any]) -> ExtractedEntityNode:
|
||||
description=row.get("description") or "",
|
||||
aliases=row.get("aliases") or [],
|
||||
name_embedding=row.get("name_embedding") or [],
|
||||
fact_summary=row.get("fact_summary") or "",
|
||||
# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||
# fact_summary=row.get("fact_summary") or "",
|
||||
connect_strength=row.get("connect_strength") or "",
|
||||
)
|
||||
|
||||
|
||||
@@ -1085,7 +1085,8 @@ class ExtractionOrchestrator:
|
||||
entity_type=getattr(entity, 'type', 'unknown'), # 使用 type 而不是 entity_type
|
||||
description=getattr(entity, 'description', ''), # 添加必需的 description 字段
|
||||
example=getattr(entity, 'example', ''), # 新增:传递示例字段
|
||||
fact_summary=getattr(entity, 'fact_summary', ''), # 添加必需的 fact_summary 字段
|
||||
# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||
# fact_summary=getattr(entity, 'fact_summary', ''), # 添加必需的 fact_summary 字段
|
||||
connect_strength=entity_connect_strength if entity_connect_strength is not None else 'Strong', # 添加必需的 connect_strength 字段
|
||||
aliases=getattr(entity, 'aliases', []) or [], # 传递从三元组提取阶段获取的aliases
|
||||
name_embedding=getattr(entity, 'name_embedding', None),
|
||||
|
||||
@@ -296,7 +296,9 @@ def resolve_alias_cycles(entities: List[Any], cycles: Dict[str, Set[str]]) -> Li
|
||||
key=lambda eid: (
|
||||
_strength_rank(eid),
|
||||
len(getattr(entity_by_id.get(eid), 'description', '') or ''),
|
||||
len(getattr(entity_by_id.get(eid), 'fact_summary', '') or '')
|
||||
# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||
# len(getattr(entity_by_id.get(eid), 'fact_summary', '') or '')
|
||||
0 # 临时占位
|
||||
),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
@@ -9,7 +9,8 @@
|
||||
- 类型: "{{ entity_a.entity_type | default('') }}"
|
||||
- 描述: "{{ entity_a.description | default('') }}"
|
||||
- 别名: {{ entity_a.aliases | default([]) }}
|
||||
- 摘要: "{{ entity_a.fact_summary | default('') }}"
|
||||
{# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用 #}
|
||||
{# - 摘要: "{{ entity_a.fact_summary | default('') }}" #}
|
||||
- 连接强弱: "{{ entity_a.connect_strength | default('') }}"
|
||||
|
||||
实体B:
|
||||
@@ -17,7 +18,8 @@
|
||||
- 类型: "{{ entity_b.entity_type | default('') }}"
|
||||
- 描述: "{{ entity_b.description | default('') }}"
|
||||
- 别名: {{ entity_b.aliases | default([]) }}
|
||||
- 摘要: "{{ entity_b.fact_summary | default('') }}"
|
||||
{# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用 #}
|
||||
{# - 摘要: "{{ entity_b.fact_summary | default('') }}" #}
|
||||
- 连接强弱: "{{ entity_b.connect_strength | default('') }}"
|
||||
|
||||
上下文:
|
||||
|
||||
@@ -86,7 +86,8 @@ class MemoryConfigRepository:
|
||||
n.description AS description,
|
||||
n.entity_type AS entity_type,
|
||||
n.name AS name,
|
||||
COALESCE(n.fact_summary, '') AS fact_summary,
|
||||
// TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||
// COALESCE(n.fact_summary, '') AS fact_summary,
|
||||
n.end_user_id AS end_user_id,
|
||||
n.apply_id AS apply_id,
|
||||
n.user_id AS user_id,
|
||||
|
||||
@@ -101,10 +101,11 @@ SET e.name = CASE WHEN entity.name IS NOT NULL AND entity.name <> '' THEN entity
|
||||
e.name_embedding = CASE
|
||||
WHEN entity.name_embedding IS NOT NULL AND size(entity.name_embedding) > 0 THEN entity.name_embedding
|
||||
ELSE e.name_embedding END,
|
||||
e.fact_summary = CASE
|
||||
WHEN entity.fact_summary IS NOT NULL AND entity.fact_summary <> ''
|
||||
AND (e.fact_summary IS NULL OR size(e.fact_summary) = 0 OR size(entity.fact_summary) > size(e.fact_summary))
|
||||
THEN entity.fact_summary ELSE e.fact_summary END,
|
||||
// TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||
// e.fact_summary = CASE
|
||||
// WHEN entity.fact_summary IS NOT NULL AND entity.fact_summary <> ''
|
||||
// AND (e.fact_summary IS NULL OR size(e.fact_summary) = 0 OR size(entity.fact_summary) > size(e.fact_summary))
|
||||
// THEN entity.fact_summary ELSE e.fact_summary END,
|
||||
e.connect_strength = CASE
|
||||
WHEN entity.connect_strength IS NULL OR entity.connect_strength = '' THEN e.connect_strength
|
||||
ELSE CASE
|
||||
@@ -321,7 +322,8 @@ RETURN e.id AS id,
|
||||
e.description AS description,
|
||||
e.aliases AS aliases,
|
||||
e.name_embedding AS name_embedding,
|
||||
COALESCE(e.fact_summary, '') AS fact_summary,
|
||||
// TODO: fact_summary 功能暂时禁用,待后续开发完善后启用
|
||||
// COALESCE(e.fact_summary, '') AS fact_summary,
|
||||
e.connect_strength AS connect_strength,
|
||||
collect(DISTINCT s.id) AS statement_ids,
|
||||
collect(DISTINCT c.id) AS chunk_ids,
|
||||
|
||||
Reference in New Issue
Block a user