【change】 1.Standardize log specifications；2.Cluster settings trigger explicitly

2026-03-23 16:38:47 +08:00
parent 37bc4beab4
commit 31b8a3764e
13 changed files with 186 additions and 158 deletions
--- a/api/app/repositories/neo4j/add_nodes.py
+++ b/api/app/repositories/neo4j/add_nodes.py
@@ -1,10 +1,13 @@
 from typing import List, Optional
+import logging

 from app.repositories.neo4j.cypher_queries import DIALOGUE_NODE_SAVE, STATEMENT_NODE_SAVE, CHUNK_NODE_SAVE,MEMORY_SUMMARY_NODE_SAVE
 from app.core.memory.models.graph_models import DialogueNode, StatementNode, ChunkNode, MemorySummaryNode
 # 使用新的仓储层
 from app.repositories.neo4j.neo4j_connector import Neo4jConnector

+logger = logging.getLogger(__name__)
+

 async def delete_all_nodes(end_user_id: str, connector: Neo4jConnector):
    """Delete all nodes in the database."""
@@ -217,10 +220,10 @@ async def add_memory_summary_nodes(summaries: List[MemorySummaryNode], connector
            summaries=flattened
        )
        created_ids = [record.get("uuid") for record in result]
-        print(f"Successfully saved {len(created_ids)} MemorySummary nodes to Neo4j")
+        logger.info(f"Successfully saved {len(created_ids)} MemorySummary nodes to Neo4j")
        return created_ids
    except Exception as e:
-        print(f"Failed to save MemorySummary nodes to Neo4j: {e}")
+        logger.error(f"Failed to save MemorySummary nodes to Neo4j: {e}")
        return None


--- a/api/app/repositories/neo4j/community_repository.py
+++ b/api/app/repositories/neo4j/community_repository.py
@@ -300,7 +300,7 @@ class CommunityRepository:
            )
            return bool(result)
        except Exception as e:
-            logger.error(f"update_community_metadata failed: {e}")
+            logger.error(f"update_community_metadata failed: {e}", exc_info=True)
            return False

    async def batch_update_community_metadata(
--- a/api/app/repositories/neo4j/cypher_queries.py
+++ b/api/app/repositories/neo4j/cypher_queries.py
@@ -1069,6 +1069,7 @@ Graph_Node_query = """

 COMMUNITY_NODE_UPSERT = """
 MERGE (c:Community {community_id: $community_id})
+ON CREATE SET c.id = $community_id
 SET c.end_user_id = $end_user_id,
    c.member_count = $member_count,
    c.updated_at = datetime()
@@ -1175,7 +1176,8 @@ RETURN c.community_id AS community_id, cnt AS member_count

 UPDATE_COMMUNITY_METADATA = """
 MATCH (c:Community {community_id: $community_id, end_user_id: $end_user_id})
-SET c.name             = $name,
+SET c.id               = coalesce(c.id, $community_id),
+    c.name             = $name,
    c.summary          = $summary,
    c.core_entities    = $core_entities,
    c.summary_embedding = $summary_embedding,
@@ -1186,7 +1188,8 @@ RETURN c.community_id AS community_id
 BATCH_UPDATE_COMMUNITY_METADATA = """
 UNWIND $communities AS row
 MATCH (c:Community {community_id: row.community_id, end_user_id: row.end_user_id})
-SET c.name             = row.name,
+SET c.id               = coalesce(c.id, row.community_id),
+    c.name             = row.name,
    c.summary          = row.summary,
    c.core_entities    = row.core_entities,
    c.summary_embedding = row.summary_embedding,
@@ -1270,6 +1273,40 @@ RETURN
    startNode(r) = e      AS r_from_e
 """

+CHECK_COMMUNITY_IS_COMPLETE = """
+MATCH (c:Community {community_id: $community_id, end_user_id: $end_user_id})
+RETURN (
+    c.name IS NOT NULL AND c.name <> '' AND
+    c.summary IS NOT NULL AND c.summary <> '' AND
+    c.core_entities IS NOT NULL
+) AS is_complete
+"""
+
+CHECK_COMMUNITY_IS_COMPLETE_WITH_EMBEDDING = """
+MATCH (c:Community {community_id: $community_id, end_user_id: $end_user_id})
+RETURN (
+    c.name IS NOT NULL AND c.name <> '' AND
+    c.summary IS NOT NULL AND c.summary <> '' AND
+    c.core_entities IS NOT NULL AND
+    c.summary_embedding IS NOT NULL
+) AS is_complete
+"""
+
+GET_INCOMPLETE_COMMUNITIES = """
+MATCH (c:Community {end_user_id: $end_user_id})
+WHERE c.name IS NULL OR c.summary IS NULL OR c.core_entities IS NULL
+   OR c.name = '' OR c.summary = ''
+RETURN c.community_id AS community_id
+"""
+
+GET_INCOMPLETE_COMMUNITIES_WITH_EMBEDDING = """
+MATCH (c:Community {end_user_id: $end_user_id})
+WHERE c.name IS NULL OR c.name = ''
+   OR c.summary IS NULL OR c.summary = ''
+   OR c.core_entities IS NULL
+   OR (c.summary_embedding IS NULL AND c.summary IS NOT NULL AND c.summary <> '(empty)')
+RETURN c.community_id AS community_id
+"""

 # Community keyword search: matches name or summary via fulltext index
 SEARCH_COMMUNITIES_BY_KEYWORD = """
@@ -1325,39 +1362,4 @@ RETURN s.statement AS statement,
       c.name AS community_name
 ORDER BY COALESCE(s.activation_value, 0) DESC
 LIMIT $limit
-"""
-
-CHECK_COMMUNITY_IS_COMPLETE = """
-MATCH (c:Community {community_id: $community_id, end_user_id: $end_user_id})
-RETURN (
-    c.name IS NOT NULL AND c.name <> '' AND
-    c.summary IS NOT NULL AND c.summary <> '' AND
-    c.core_entities IS NOT NULL
-) AS is_complete
-"""
-
-CHECK_COMMUNITY_IS_COMPLETE_WITH_EMBEDDING = """
-MATCH (c:Community {community_id: $community_id, end_user_id: $end_user_id})
-RETURN (
-    c.name IS NOT NULL AND c.name <> '' AND
-    c.summary IS NOT NULL AND c.summary <> '' AND
-    c.core_entities IS NOT NULL AND
-    c.summary_embedding IS NOT NULL
-) AS is_complete
-"""
-
-GET_INCOMPLETE_COMMUNITIES = """
-MATCH (c:Community {end_user_id: $end_user_id})
-WHERE c.name IS NULL OR c.summary IS NULL OR c.core_entities IS NULL
-   OR c.name = '' OR c.summary = ''
-RETURN c.community_id AS community_id
-"""
-
-GET_INCOMPLETE_COMMUNITIES_WITH_EMBEDDING = """
-MATCH (c:Community {end_user_id: $end_user_id})
-WHERE c.name IS NULL OR c.name = ''
-   OR c.summary IS NULL OR c.summary = ''
-   OR c.core_entities IS NULL
-   OR (c.summary_embedding IS NULL AND c.summary IS NOT NULL AND c.summary <> '(empty)')
-RETURN c.community_id AS community_id
-"""
+"""
--- a/api/app/repositories/neo4j/graph_saver.py
+++ b/api/app/repositories/neo4j/graph_saver.py
@@ -162,7 +162,7 @@ async def save_dialog_and_statements_to_neo4j(
    """Save dialogue nodes, chunk nodes, statement nodes, entities, and all relationships to Neo4j using graph models.

    只负责数据写入，不触发聚类。聚类由调用方在写入成功后通过
-    schedule_clustering_after_write() 显式触发。
+    _trigger_clustering_sync() 显式触发。

    Args:
        dialogue_nodes: List of DialogueNode objects to save
@@ -303,16 +303,13 @@ async def save_dialog_and_statements_to_neo4j(
        return False


-def schedule_clustering_after_write(
+async def _trigger_clustering_sync(
    entity_nodes: List,
    llm_model_id: Optional[str] = None,
    embedding_model_id: Optional[str] = None,
 ) -> None:
    """
-    写入 Neo4j 成功后，调度后台聚类任务。
-
-    可通过环境变量 CLUSTERING_ENABLED=false 禁用（用于基准测试对比）。
-    使用 asyncio.create_task 异步触发，不阻塞写入响应。
+    同步等待聚类完成，避免与其他 LLM 任务并发冲突。
    """
    if not entity_nodes:
        return
@@ -324,8 +321,8 @@ def schedule_clustering_after_write(

    end_user_id = entity_nodes[0].end_user_id
    new_entity_ids = [e.id for e in entity_nodes]
-    logger.info(f"[Clustering] 准备触发聚类，实体数: {len(new_entity_ids)}, end_user_id: {end_user_id}")
-    asyncio.create_task(_trigger_clustering(new_entity_ids, end_user_id, llm_model_id=llm_model_id, embedding_model_id=embedding_model_id))
+    logger.info(f"[Clustering] 准备触发聚类（同步），实体数: {len(new_entity_ids)}, end_user_id: {end_user_id}")
+    await _trigger_clustering(new_entity_ids, end_user_id, llm_model_id=llm_model_id, embedding_model_id=embedding_model_id)


 async def _trigger_clustering(