Fix/fact summary (#333)

* [fix]Disable the contents related to fact_summary * [fix]Disable the contents related to fact_summary * [fix]Modify the code based on the AI review
2026-02-05 15:56:43 +08:00
parent aca7d25001
commit 47b25d7a26
10 changed files with 73 additions and 52 deletions
--- a/api/app/core/memory/agent/langgraph_graph/tools/tool.py
+++ b/api/app/core/memory/agent/langgraph_graph/tools/tool.py
@@ -186,10 +186,11 @@ def create_hybrid_retrieval_tool_async(memory_config, **search_params):
            清理后的数据
        """
        # 需要过滤的字段列表
+        # TODO: fact_summary 功能暂时禁用，待后续开发完善后启用
        fields_to_remove = {
            'invalid_at', 'valid_at', 'chunk_id_from_rel', 'entity_ids', 
            'expired_at', 'created_at', 'chunk_id', 'id', 'apply_id', 
-            'user_id', 'statement_ids', 'updated_at',"chunk_ids","fact_summary"
+            'user_id', 'statement_ids', 'updated_at',"chunk_ids"  ,"fact_summary"
        }
        
        if isinstance(data, dict):
--- a/api/app/core/memory/models/graph_models.py
+++ b/api/app/core/memory/models/graph_models.py
@@ -413,7 +413,8 @@ class ExtractedEntityNode(Node):
        description="Entity aliases - alternative names for this entity"
    )
    name_embedding: Optional[List[float]] = Field(default_factory=list, description="Name embedding vector")
-    fact_summary: str = Field(default="", description="Summary of the fact about this entity")
+    # TODO: fact_summary 功能暂时禁用，待后续开发完善后启用
+    # fact_summary: str = Field(default="", description="Summary of the fact about this entity")
    connect_strength: str = Field(..., description="Strong VS Weak about this entity")
    config_id: Optional[int | str] = Field(None, description="Configuration ID used to process this entity (integer or string)")
    
--- a/api/app/core/memory/storage_services/extraction_engine/deduplication/deduped_and_disamb.py
+++ b/api/app/core/memory/storage_services/extraction_engine/deduplication/deduped_and_disamb.py
@@ -134,42 +134,45 @@ def _merge_attribute(canonical: ExtractedEntityNode, ent: ExtractedEntityNode):
        if len(desc_b) > len(desc_a):
            canonical.description = desc_b
        # 合并事实摘要：统一保留一个“实体: name”行，来源行去重保序
-        fact_a = getattr(canonical, "fact_summary", "") or ""
-        fact_b = getattr(ent, "fact_summary", "") or ""
-        def _extract_sources(txt: str) -> List[str]:
-            sources: List[str] = []
-            if not txt:
-                return sources
-            for line in str(txt).splitlines():
-                ln = line.strip()
+        # TODO: fact_summary 功能暂时禁用，待后续开发完善后启用
+        # fact_a = getattr(canonical, "fact_summary", "") or ""
+        # fact_b = getattr(ent, "fact_summary", "") or ""
+        # def _extract_sources(txt: str) -> List[str]:
+            # sources: List[str] = []
+            # if not txt:
+                # return sources
+            # for line in str(txt).splitlines():
+                # ln = line.strip()
                # 支持“来源:”或“来源：”前缀
-                m = re.match(r"^来源[:：]\s*(.+)$", ln)
-                if m:
-                    content = m.group(1).strip()
-                    if content:
-                        sources.append(content)
+                # m = re.match(r"^来源[:：]\s*(.+)$", ln)
+                # if m:
+                    # content = m.group(1).strip()
+                    # if content:
+                        # sources.append(content)
            # 如果不存在“来源”前缀，则将整体文本视为一个来源片段，避免信息丢失
-            if not sources and txt.strip():
-                sources.append(txt.strip())
-            return sources
+            # if not sources and txt.strip():
+                # sources.append(txt.strip())
+            # return sources
        try:
-            src_a = _extract_sources(fact_a)
-            src_b = _extract_sources(fact_b)
-            seen = set()
-            merged_sources: List[str] = []
-            for s in src_a + src_b:
-                if s and s not in seen:
-                    seen.add(s)
-                    merged_sources.append(s)
-            if merged_sources:
-                name_line = f"实体: {getattr(canonical, 'name', '')}".strip()
-                canonical.fact_summary = "\n".join([name_line] + [f"来源: {s}" for s in merged_sources])
-            elif fact_b and not fact_a:
-                canonical.fact_summary = fact_b
+            #     src_a = _extract_sources(fact_a)
+            #     src_b = _extract_sources(fact_b)
+            #     seen = set()
+            #     merged_sources: List[str] = []
+            #     for s in src_a + src_b:
+            #         if s and s not in seen:
+            #             seen.add(s)
+            #             merged_sources.append(s)
+            #     if merged_sources:
+            #         name_line = f"实体: {getattr(canonical, 'name', '')}".strip()
+            #         canonical.fact_summary = "\n".join([name_line] + [f"来源: {s}" for s in merged_sources])
+            #     elif fact_b and not fact_a:
+            #         canonical.fact_summary = fact_b
+            pass
        except Exception:
            # 兜底：若解析失败，保留较长文本
-            if len(fact_b) > len(fact_a):
-                canonical.fact_summary = fact_b
+            # if len(fact_b) > len(fact_a):
+            #     canonical.fact_summary = fact_b
+            pass
    except Exception:
        pass

--- a/api/app/core/memory/storage_services/extraction_engine/deduplication/entity_dedup_llm.py
+++ b/api/app/core/memory/storage_services/extraction_engine/deduplication/entity_dedup_llm.py
@@ -145,10 +145,13 @@ def _choose_canonical(a: ExtractedEntityNode, b: ExtractedEntityNode) -> int: #
     # 2. 第二优先级：按“描述+事实摘要”的总长度排序（内容越长，信息越完整）
    desc_a = (getattr(a, "description", "") or "")
    desc_b = (getattr(b, "description", "") or "")
-    fact_a = (getattr(a, "fact_summary", "") or "")
-    fact_b = (getattr(b, "fact_summary", "") or "")
-    score_a = len(desc_a) + len(fact_a)
-    score_b = len(desc_b) + len(fact_b)
+    # TODO: fact_summary 功能暂时禁用，待后续开发完善后启用
+    # fact_a = (getattr(a, "fact_summary", "") or "")
+    # fact_b = (getattr(b, "fact_summary", "") or "")
+    # score_a = len(desc_a) + len(fact_a)
+    # score_b = len(desc_b) + len(fact_b)
+    score_a = len(desc_a)
+    score_b = len(desc_b)
    if score_a != score_b:
        return 0 if score_a >= score_b else 1
    return 0
@@ -189,7 +192,8 @@ async def _judge_pair(
        "entity_type": getattr(a, "entity_type", None),
        "description": getattr(a, "description", None),
        "aliases": getattr(a, "aliases", None) or [],
-        "fact_summary": getattr(a, "fact_summary", None),
+        # TODO: fact_summary 功能暂时禁用，待后续开发完善后启用
+        # "fact_summary": getattr(a, "fact_summary", None),
        "connect_strength": getattr(a, "connect_strength", None),
    }
    entity_b = {
@@ -197,7 +201,8 @@ async def _judge_pair(
        "entity_type": getattr(b, "entity_type", None),
        "description": getattr(b, "description", None),
        "aliases": getattr(b, "aliases", None) or [],
-        "fact_summary": getattr(b, "fact_summary", None),
+        # TODO: fact_summary 功能暂时禁用，待后续开发完善后启用
+        # "fact_summary": getattr(b, "fact_summary", None),
        "connect_strength": getattr(b, "connect_strength", None),
    }
 # 5. 渲染LLM提示词（用工具函数填充模板，包含实体信息、上下文、输出格式）
@@ -248,7 +253,8 @@ async def _judge_pair_disamb(
        "entity_type": getattr(a, "entity_type", None),
        "description": getattr(a, "description", None),
        "aliases": getattr(a, "aliases", None) or [],
-        "fact_summary": getattr(a, "fact_summary", None),
+        # TODO: fact_summary 功能暂时禁用，待后续开发完善后启用
+        # "fact_summary": getattr(a, "fact_summary", None),
        "connect_strength": getattr(a, "connect_strength", None),
    }
    entity_b = {
@@ -256,7 +262,8 @@ async def _judge_pair_disamb(
        "entity_type": getattr(b, "entity_type", None),
        "description": getattr(b, "description", None),
        "aliases": getattr(b, "aliases", None) or [],
-        "fact_summary": getattr(b, "fact_summary", None),
+        # TODO: fact_summary 功能暂时禁用，待后续开发完善后启用
+        # "fact_summary": getattr(b, "fact_summary", None),
        "connect_strength": getattr(b, "connect_strength", None),
    }
    prompt = render_entity_dedup_prompt(
--- a/api/app/core/memory/storage_services/extraction_engine/deduplication/second_layer_dedup.py
+++ b/api/app/core/memory/storage_services/extraction_engine/deduplication/second_layer_dedup.py
@@ -72,7 +72,8 @@ def _row_to_entity(row: Dict[str, Any]) -> ExtractedEntityNode:
        description=row.get("description") or "",
        aliases=row.get("aliases") or [],
        name_embedding=row.get("name_embedding") or [],
-        fact_summary=row.get("fact_summary") or "",
+        # TODO: fact_summary 功能暂时禁用，待后续开发完善后启用
+        # fact_summary=row.get("fact_summary") or "",
        connect_strength=row.get("connect_strength") or "",
    )

--- a/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py
+++ b/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py
@@ -1085,7 +1085,8 @@ class ExtractionOrchestrator:
                                    entity_type=getattr(entity, 'type', 'unknown'),  # 使用 type 而不是 entity_type
                                    description=getattr(entity, 'description', ''),  # 添加必需的 description 字段
                                    example=getattr(entity, 'example', ''),  # 新增：传递示例字段
-                                    fact_summary=getattr(entity, 'fact_summary', ''),  # 添加必需的 fact_summary 字段
+                                    # TODO: fact_summary 功能暂时禁用，待后续开发完善后启用
+                                    # fact_summary=getattr(entity, 'fact_summary', ''),  # 添加必需的 fact_summary 字段
                                    connect_strength=entity_connect_strength if entity_connect_strength is not None else 'Strong',  # 添加必需的 connect_strength 字段
                                    aliases=getattr(entity, 'aliases', []) or [],  # 传递从三元组提取阶段获取的aliases
                                    name_embedding=getattr(entity, 'name_embedding', None),
--- a/api/app/core/memory/utils/alias_utils.py
+++ b/api/app/core/memory/utils/alias_utils.py
@@ -296,7 +296,9 @@ def resolve_alias_cycles(entities: List[Any], cycles: Dict[str, Set[str]]) -> Li
            key=lambda eid: (
                _strength_rank(eid),
                len(getattr(entity_by_id.get(eid), 'description', '') or ''),
-                len(getattr(entity_by_id.get(eid), 'fact_summary', '') or '')
+                # TODO: fact_summary 功能暂时禁用，待后续开发完善后启用
+                # len(getattr(entity_by_id.get(eid), 'fact_summary', '') or '')
+                0  # 临时占位
            ),
            reverse=True
        )
--- a/api/app/core/memory/utils/prompt/prompts/entity_dedup.jinja2
+++ b/api/app/core/memory/utils/prompt/prompts/entity_dedup.jinja2
@@ -9,7 +9,8 @@
 - 类型: "{{ entity_a.entity_type | default('') }}"
 - 描述: "{{ entity_a.description | default('') }}"
 - 别名: {{ entity_a.aliases | default([]) }}
- 摘要: "{{ entity_a.fact_summary | default('') }}"
+{# TODO: fact_summary 功能暂时禁用，待后续开发完善后启用 #}
+{# - 摘要: "{{ entity_a.fact_summary | default('') }}" #}
 - 连接强弱: "{{ entity_a.connect_strength | default('') }}"

 实体B:
@@ -17,7 +18,8 @@
 - 类型: "{{ entity_b.entity_type | default('') }}"
 - 描述: "{{ entity_b.description | default('') }}"
 - 别名: {{ entity_b.aliases | default([]) }}
- 摘要: "{{ entity_b.fact_summary | default('') }}"
+{# TODO: fact_summary 功能暂时禁用，待后续开发完善后启用 #}
+{# - 摘要: "{{ entity_b.fact_summary | default('') }}" #}
 - 连接强弱: "{{ entity_b.connect_strength | default('') }}"

 上下文：
--- a/api/app/repositories/memory_config_repository.py
+++ b/api/app/repositories/memory_config_repository.py
@@ -86,7 +86,8 @@ class MemoryConfigRepository:
        n.description AS description, 
        n.entity_type AS entity_type, 
        n.name AS name,
-        COALESCE(n.fact_summary, '') AS fact_summary,
+        // TODO: fact_summary 功能暂时禁用，待后续开发完善后启用
+        // COALESCE(n.fact_summary, '') AS fact_summary,
        n.end_user_id AS end_user_id,
        n.apply_id AS apply_id,
        n.user_id AS user_id,
--- a/api/app/repositories/neo4j/cypher_queries.py
+++ b/api/app/repositories/neo4j/cypher_queries.py
@@ -101,10 +101,11 @@ SET e.name = CASE WHEN entity.name IS NOT NULL AND entity.name <> '' THEN entity
    e.name_embedding = CASE
        WHEN entity.name_embedding IS NOT NULL AND size(entity.name_embedding) > 0 THEN entity.name_embedding
        ELSE e.name_embedding END,
-    e.fact_summary = CASE
-        WHEN entity.fact_summary IS NOT NULL AND entity.fact_summary <> ''
-         AND (e.fact_summary IS NULL OR size(e.fact_summary) = 0 OR size(entity.fact_summary) > size(e.fact_summary))
-        THEN entity.fact_summary ELSE e.fact_summary END,
+    // TODO: fact_summary 功能暂时禁用，待后续开发完善后启用
+    // e.fact_summary = CASE
+    //     WHEN entity.fact_summary IS NOT NULL AND entity.fact_summary <> ''
+    //      AND (e.fact_summary IS NULL OR size(e.fact_summary) = 0 OR size(entity.fact_summary) > size(e.fact_summary))
+    //     THEN entity.fact_summary ELSE e.fact_summary END,
    e.connect_strength = CASE
        WHEN entity.connect_strength IS NULL OR entity.connect_strength = '' THEN e.connect_strength
        ELSE CASE
@@ -321,7 +322,8 @@ RETURN e.id AS id,
       e.description AS description,
       e.aliases AS aliases,
       e.name_embedding AS name_embedding,
-       COALESCE(e.fact_summary, '') AS fact_summary,
+       // TODO: fact_summary 功能暂时禁用，待后续开发完善后启用
+       // COALESCE(e.fact_summary, '') AS fact_summary,
       e.connect_strength AS connect_strength,
       collect(DISTINCT s.id) AS statement_ids,
       collect(DISTINCT c.id) AS chunk_ids,