diff --git a/api/app/core/memory/agent/langgraph_graph/tools/tool.py b/api/app/core/memory/agent/langgraph_graph/tools/tool.py index c4814de1..fcbb18e3 100644 --- a/api/app/core/memory/agent/langgraph_graph/tools/tool.py +++ b/api/app/core/memory/agent/langgraph_graph/tools/tool.py @@ -186,10 +186,11 @@ def create_hybrid_retrieval_tool_async(memory_config, **search_params): 清理后的数据 """ # 需要过滤的字段列表 + # TODO: fact_summary 功能暂时禁用,待后续开发完善后启用 fields_to_remove = { 'invalid_at', 'valid_at', 'chunk_id_from_rel', 'entity_ids', 'expired_at', 'created_at', 'chunk_id', 'id', 'apply_id', - 'user_id', 'statement_ids', 'updated_at',"chunk_ids","fact_summary" + 'user_id', 'statement_ids', 'updated_at',"chunk_ids" ,"fact_summary" } if isinstance(data, dict): diff --git a/api/app/core/memory/models/graph_models.py b/api/app/core/memory/models/graph_models.py index 79b88fdc..1880b9ab 100644 --- a/api/app/core/memory/models/graph_models.py +++ b/api/app/core/memory/models/graph_models.py @@ -413,7 +413,8 @@ class ExtractedEntityNode(Node): description="Entity aliases - alternative names for this entity" ) name_embedding: Optional[List[float]] = Field(default_factory=list, description="Name embedding vector") - fact_summary: str = Field(default="", description="Summary of the fact about this entity") + # TODO: fact_summary 功能暂时禁用,待后续开发完善后启用 + # fact_summary: str = Field(default="", description="Summary of the fact about this entity") connect_strength: str = Field(..., description="Strong VS Weak about this entity") config_id: Optional[int | str] = Field(None, description="Configuration ID used to process this entity (integer or string)") diff --git a/api/app/core/memory/storage_services/extraction_engine/deduplication/deduped_and_disamb.py b/api/app/core/memory/storage_services/extraction_engine/deduplication/deduped_and_disamb.py index a425e0ed..f2f14d9e 100644 --- a/api/app/core/memory/storage_services/extraction_engine/deduplication/deduped_and_disamb.py +++ b/api/app/core/memory/storage_services/extraction_engine/deduplication/deduped_and_disamb.py @@ -134,42 +134,45 @@ def _merge_attribute(canonical: ExtractedEntityNode, ent: ExtractedEntityNode): if len(desc_b) > len(desc_a): canonical.description = desc_b # 合并事实摘要:统一保留一个“实体: name”行,来源行去重保序 - fact_a = getattr(canonical, "fact_summary", "") or "" - fact_b = getattr(ent, "fact_summary", "") or "" - def _extract_sources(txt: str) -> List[str]: - sources: List[str] = [] - if not txt: - return sources - for line in str(txt).splitlines(): - ln = line.strip() + # TODO: fact_summary 功能暂时禁用,待后续开发完善后启用 + # fact_a = getattr(canonical, "fact_summary", "") or "" + # fact_b = getattr(ent, "fact_summary", "") or "" + # def _extract_sources(txt: str) -> List[str]: + # sources: List[str] = [] + # if not txt: + # return sources + # for line in str(txt).splitlines(): + # ln = line.strip() # 支持“来源:”或“来源:”前缀 - m = re.match(r"^来源[::]\s*(.+)$", ln) - if m: - content = m.group(1).strip() - if content: - sources.append(content) + # m = re.match(r"^来源[::]\s*(.+)$", ln) + # if m: + # content = m.group(1).strip() + # if content: + # sources.append(content) # 如果不存在“来源”前缀,则将整体文本视为一个来源片段,避免信息丢失 - if not sources and txt.strip(): - sources.append(txt.strip()) - return sources + # if not sources and txt.strip(): + # sources.append(txt.strip()) + # return sources try: - src_a = _extract_sources(fact_a) - src_b = _extract_sources(fact_b) - seen = set() - merged_sources: List[str] = [] - for s in src_a + src_b: - if s and s not in seen: - seen.add(s) - merged_sources.append(s) - if merged_sources: - name_line = f"实体: {getattr(canonical, 'name', '')}".strip() - canonical.fact_summary = "\n".join([name_line] + [f"来源: {s}" for s in merged_sources]) - elif fact_b and not fact_a: - canonical.fact_summary = fact_b + # src_a = _extract_sources(fact_a) + # src_b = _extract_sources(fact_b) + # seen = set() + # merged_sources: List[str] = [] + # for s in src_a + src_b: + # if s and s not in seen: + # seen.add(s) + # merged_sources.append(s) + # if merged_sources: + # name_line = f"实体: {getattr(canonical, 'name', '')}".strip() + # canonical.fact_summary = "\n".join([name_line] + [f"来源: {s}" for s in merged_sources]) + # elif fact_b and not fact_a: + # canonical.fact_summary = fact_b + pass except Exception: # 兜底:若解析失败,保留较长文本 - if len(fact_b) > len(fact_a): - canonical.fact_summary = fact_b + # if len(fact_b) > len(fact_a): + # canonical.fact_summary = fact_b + pass except Exception: pass diff --git a/api/app/core/memory/storage_services/extraction_engine/deduplication/entity_dedup_llm.py b/api/app/core/memory/storage_services/extraction_engine/deduplication/entity_dedup_llm.py index 0249ac1f..a028e916 100644 --- a/api/app/core/memory/storage_services/extraction_engine/deduplication/entity_dedup_llm.py +++ b/api/app/core/memory/storage_services/extraction_engine/deduplication/entity_dedup_llm.py @@ -145,10 +145,13 @@ def _choose_canonical(a: ExtractedEntityNode, b: ExtractedEntityNode) -> int: # # 2. 第二优先级:按“描述+事实摘要”的总长度排序(内容越长,信息越完整) desc_a = (getattr(a, "description", "") or "") desc_b = (getattr(b, "description", "") or "") - fact_a = (getattr(a, "fact_summary", "") or "") - fact_b = (getattr(b, "fact_summary", "") or "") - score_a = len(desc_a) + len(fact_a) - score_b = len(desc_b) + len(fact_b) + # TODO: fact_summary 功能暂时禁用,待后续开发完善后启用 + # fact_a = (getattr(a, "fact_summary", "") or "") + # fact_b = (getattr(b, "fact_summary", "") or "") + # score_a = len(desc_a) + len(fact_a) + # score_b = len(desc_b) + len(fact_b) + score_a = len(desc_a) + score_b = len(desc_b) if score_a != score_b: return 0 if score_a >= score_b else 1 return 0 @@ -189,7 +192,8 @@ async def _judge_pair( "entity_type": getattr(a, "entity_type", None), "description": getattr(a, "description", None), "aliases": getattr(a, "aliases", None) or [], - "fact_summary": getattr(a, "fact_summary", None), + # TODO: fact_summary 功能暂时禁用,待后续开发完善后启用 + # "fact_summary": getattr(a, "fact_summary", None), "connect_strength": getattr(a, "connect_strength", None), } entity_b = { @@ -197,7 +201,8 @@ async def _judge_pair( "entity_type": getattr(b, "entity_type", None), "description": getattr(b, "description", None), "aliases": getattr(b, "aliases", None) or [], - "fact_summary": getattr(b, "fact_summary", None), + # TODO: fact_summary 功能暂时禁用,待后续开发完善后启用 + # "fact_summary": getattr(b, "fact_summary", None), "connect_strength": getattr(b, "connect_strength", None), } # 5. 渲染LLM提示词(用工具函数填充模板,包含实体信息、上下文、输出格式) @@ -248,7 +253,8 @@ async def _judge_pair_disamb( "entity_type": getattr(a, "entity_type", None), "description": getattr(a, "description", None), "aliases": getattr(a, "aliases", None) or [], - "fact_summary": getattr(a, "fact_summary", None), + # TODO: fact_summary 功能暂时禁用,待后续开发完善后启用 + # "fact_summary": getattr(a, "fact_summary", None), "connect_strength": getattr(a, "connect_strength", None), } entity_b = { @@ -256,7 +262,8 @@ async def _judge_pair_disamb( "entity_type": getattr(b, "entity_type", None), "description": getattr(b, "description", None), "aliases": getattr(b, "aliases", None) or [], - "fact_summary": getattr(b, "fact_summary", None), + # TODO: fact_summary 功能暂时禁用,待后续开发完善后启用 + # "fact_summary": getattr(b, "fact_summary", None), "connect_strength": getattr(b, "connect_strength", None), } prompt = render_entity_dedup_prompt( diff --git a/api/app/core/memory/storage_services/extraction_engine/deduplication/second_layer_dedup.py b/api/app/core/memory/storage_services/extraction_engine/deduplication/second_layer_dedup.py index dbc697d9..028a926f 100644 --- a/api/app/core/memory/storage_services/extraction_engine/deduplication/second_layer_dedup.py +++ b/api/app/core/memory/storage_services/extraction_engine/deduplication/second_layer_dedup.py @@ -72,7 +72,8 @@ def _row_to_entity(row: Dict[str, Any]) -> ExtractedEntityNode: description=row.get("description") or "", aliases=row.get("aliases") or [], name_embedding=row.get("name_embedding") or [], - fact_summary=row.get("fact_summary") or "", + # TODO: fact_summary 功能暂时禁用,待后续开发完善后启用 + # fact_summary=row.get("fact_summary") or "", connect_strength=row.get("connect_strength") or "", ) diff --git a/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py b/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py index 7b7e854b..8a99cb40 100644 --- a/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py +++ b/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py @@ -1085,7 +1085,8 @@ class ExtractionOrchestrator: entity_type=getattr(entity, 'type', 'unknown'), # 使用 type 而不是 entity_type description=getattr(entity, 'description', ''), # 添加必需的 description 字段 example=getattr(entity, 'example', ''), # 新增:传递示例字段 - fact_summary=getattr(entity, 'fact_summary', ''), # 添加必需的 fact_summary 字段 + # TODO: fact_summary 功能暂时禁用,待后续开发完善后启用 + # fact_summary=getattr(entity, 'fact_summary', ''), # 添加必需的 fact_summary 字段 connect_strength=entity_connect_strength if entity_connect_strength is not None else 'Strong', # 添加必需的 connect_strength 字段 aliases=getattr(entity, 'aliases', []) or [], # 传递从三元组提取阶段获取的aliases name_embedding=getattr(entity, 'name_embedding', None), diff --git a/api/app/core/memory/utils/alias_utils.py b/api/app/core/memory/utils/alias_utils.py index df75752a..ff139128 100644 --- a/api/app/core/memory/utils/alias_utils.py +++ b/api/app/core/memory/utils/alias_utils.py @@ -296,7 +296,9 @@ def resolve_alias_cycles(entities: List[Any], cycles: Dict[str, Set[str]]) -> Li key=lambda eid: ( _strength_rank(eid), len(getattr(entity_by_id.get(eid), 'description', '') or ''), - len(getattr(entity_by_id.get(eid), 'fact_summary', '') or '') + # TODO: fact_summary 功能暂时禁用,待后续开发完善后启用 + # len(getattr(entity_by_id.get(eid), 'fact_summary', '') or '') + 0 # 临时占位 ), reverse=True ) diff --git a/api/app/core/memory/utils/prompt/prompts/entity_dedup.jinja2 b/api/app/core/memory/utils/prompt/prompts/entity_dedup.jinja2 index be53c9d4..7fb465a2 100644 --- a/api/app/core/memory/utils/prompt/prompts/entity_dedup.jinja2 +++ b/api/app/core/memory/utils/prompt/prompts/entity_dedup.jinja2 @@ -9,7 +9,8 @@ - 类型: "{{ entity_a.entity_type | default('') }}" - 描述: "{{ entity_a.description | default('') }}" - 别名: {{ entity_a.aliases | default([]) }} -- 摘要: "{{ entity_a.fact_summary | default('') }}" +{# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用 #} +{# - 摘要: "{{ entity_a.fact_summary | default('') }}" #} - 连接强弱: "{{ entity_a.connect_strength | default('') }}" 实体B: @@ -17,7 +18,8 @@ - 类型: "{{ entity_b.entity_type | default('') }}" - 描述: "{{ entity_b.description | default('') }}" - 别名: {{ entity_b.aliases | default([]) }} -- 摘要: "{{ entity_b.fact_summary | default('') }}" +{# TODO: fact_summary 功能暂时禁用,待后续开发完善后启用 #} +{# - 摘要: "{{ entity_b.fact_summary | default('') }}" #} - 连接强弱: "{{ entity_b.connect_strength | default('') }}" 上下文: diff --git a/api/app/repositories/memory_config_repository.py b/api/app/repositories/memory_config_repository.py index acb68ba0..68e7cb04 100644 --- a/api/app/repositories/memory_config_repository.py +++ b/api/app/repositories/memory_config_repository.py @@ -86,7 +86,8 @@ class MemoryConfigRepository: n.description AS description, n.entity_type AS entity_type, n.name AS name, - COALESCE(n.fact_summary, '') AS fact_summary, + // TODO: fact_summary 功能暂时禁用,待后续开发完善后启用 + // COALESCE(n.fact_summary, '') AS fact_summary, n.end_user_id AS end_user_id, n.apply_id AS apply_id, n.user_id AS user_id, diff --git a/api/app/repositories/neo4j/cypher_queries.py b/api/app/repositories/neo4j/cypher_queries.py index cf1732fd..aabd0050 100644 --- a/api/app/repositories/neo4j/cypher_queries.py +++ b/api/app/repositories/neo4j/cypher_queries.py @@ -101,10 +101,11 @@ SET e.name = CASE WHEN entity.name IS NOT NULL AND entity.name <> '' THEN entity e.name_embedding = CASE WHEN entity.name_embedding IS NOT NULL AND size(entity.name_embedding) > 0 THEN entity.name_embedding ELSE e.name_embedding END, - e.fact_summary = CASE - WHEN entity.fact_summary IS NOT NULL AND entity.fact_summary <> '' - AND (e.fact_summary IS NULL OR size(e.fact_summary) = 0 OR size(entity.fact_summary) > size(e.fact_summary)) - THEN entity.fact_summary ELSE e.fact_summary END, + // TODO: fact_summary 功能暂时禁用,待后续开发完善后启用 + // e.fact_summary = CASE + // WHEN entity.fact_summary IS NOT NULL AND entity.fact_summary <> '' + // AND (e.fact_summary IS NULL OR size(e.fact_summary) = 0 OR size(entity.fact_summary) > size(e.fact_summary)) + // THEN entity.fact_summary ELSE e.fact_summary END, e.connect_strength = CASE WHEN entity.connect_strength IS NULL OR entity.connect_strength = '' THEN e.connect_strength ELSE CASE @@ -321,7 +322,8 @@ RETURN e.id AS id, e.description AS description, e.aliases AS aliases, e.name_embedding AS name_embedding, - COALESCE(e.fact_summary, '') AS fact_summary, + // TODO: fact_summary 功能暂时禁用,待后续开发完善后启用 + // COALESCE(e.fact_summary, '') AS fact_summary, e.connect_strength AS connect_strength, collect(DISTINCT s.id) AS statement_ids, collect(DISTINCT c.id) AS chunk_ids,