diff --git a/api/app/core/memory/agent/utils/write_tools.py b/api/app/core/memory/agent/utils/write_tools.py index 55bcb8ba..3af9326e 100644 --- a/api/app/core/memory/agent/utils/write_tools.py +++ b/api/app/core/memory/agent/utils/write_tools.py @@ -151,11 +151,6 @@ async def write( # Step 3: Save all data to Neo4j database step_start = time.time() - from app.repositories.neo4j.create_indexes import create_fulltext_indexes - try: - await create_fulltext_indexes() - except Exception as e: - logger.error(f"Error creating indexes: {e}", exc_info=True) # 添加死锁重试机制 max_retries = 3 diff --git a/api/app/main.py b/api/app/main.py index f4c23ca8..9e501f11 100644 --- a/api/app/main.py +++ b/api/app/main.py @@ -1,5 +1,6 @@ import os import subprocess +from app.repositories.neo4j.create_indexes import create_all_indexes from contextlib import asynccontextmanager from fastapi import FastAPI, APIRouter @@ -60,8 +61,10 @@ async def lifespan(app: FastAPI): logger.warning(f"加载预定义模型时出错: {str(e)}") else: logger.info("预定义模型加载已禁用 (LOAD_MODEL=false)") - + await create_all_indexes() logger.info("应用程序启动完成") + + yield # 应用关闭事件 logger.info("应用程序正在关闭") diff --git a/api/app/repositories/neo4j/create_indexes.py b/api/app/repositories/neo4j/create_indexes.py index d9e94117..5132aa09 100644 --- a/api/app/repositories/neo4j/create_indexes.py +++ b/api/app/repositories/neo4j/create_indexes.py @@ -1,62 +1,47 @@ +import asyncio from app.repositories.neo4j.neo4j_connector import Neo4jConnector - - async def create_fulltext_indexes(): """Create full-text indexes for keyword search with BM25 scoring.""" connector = Neo4jConnector() try: - print("\n" + "=" * 70) - print("Creating Full-Text Indexes (for keyword search)") - print("=" * 70) + # 创建 Statements 索引 await connector.execute_query(""" CREATE FULLTEXT INDEX statementsFulltext IF NOT EXISTS FOR (s:Statement) ON EACH [s.statement] OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } } - """) - print("✓ Created: statementsFulltext") + """) # # 创建 Dialogues 索引 # await connector.execute_query(""" # CREATE FULLTEXT INDEX dialoguesFulltext IF NOT EXISTS FOR (d:Dialogue) ON EACH [d.content] # OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } } # """) - # 创建 Entities 索引 await connector.execute_query(""" CREATE FULLTEXT INDEX entitiesFulltext IF NOT EXISTS FOR (e:ExtractedEntity) ON EACH [e.name] OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } } - """) - print("✓ Created: entitiesFulltext") + """) # 创建 Chunks 索引 await connector.execute_query(""" CREATE FULLTEXT INDEX chunksFulltext IF NOT EXISTS FOR (c:Chunk) ON EACH [c.content] OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } } - """) - print("✓ Created: chunksFulltext") + """) # 创建 MemorySummary 索引 await connector.execute_query(""" CREATE FULLTEXT INDEX summariesFulltext IF NOT EXISTS FOR (m:MemorySummary) ON EACH [m.content] OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } } - """) - print("✓ Created: summariesFulltext") - + """) # 创建 Community 索引 await connector.execute_query(""" CREATE FULLTEXT INDEX communitiesFulltext IF NOT EXISTS FOR (c:Community) ON EACH [c.name, c.summary] OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } } """) - print("✓ Created: communitiesFulltext") - print("\nFull-text indexes created successfully with BM25 support.") - except Exception as e: - print(f"✗ Error creating full-text indexes: {e}") finally: await connector.close() - - async def create_vector_indexes(): """Create vector indexes for fast embedding similarity search. @@ -65,12 +50,7 @@ async def create_vector_indexes(): """ connector = Neo4jConnector() try: - print("\n" + "=" * 70) - print("Creating Vector Indexes (for embedding search)") - print("=" * 70) - print("Note: Adjust vector.dimensions if using different embedding model") - print(" Current setting: 1024 dimensions (for bge-m3)") - print() + # Statement embedding index await connector.execute_query(""" @@ -82,7 +62,7 @@ async def create_vector_indexes(): `vector.similarity_function`: 'cosine' }} """) - print("✓ Created: statement_embedding_index") + # Chunk embedding index await connector.execute_query(""" @@ -94,7 +74,7 @@ async def create_vector_indexes(): `vector.similarity_function`: 'cosine' }} """) - print("✓ Created: chunk_embedding_index") + # Entity name embedding index await connector.execute_query(""" @@ -106,7 +86,7 @@ async def create_vector_indexes(): `vector.similarity_function`: 'cosine' }} """) - print("✓ Created: entity_embedding_index") + # Memory summary embedding index await connector.execute_query(""" @@ -118,8 +98,7 @@ async def create_vector_indexes(): `vector.similarity_function`: 'cosine' }} """) - print("✓ Created: summary_embedding_index") - + # Community summary embedding index await connector.execute_query(""" CREATE VECTOR INDEX community_summary_embedding_index IF NOT EXISTS @@ -129,8 +108,7 @@ async def create_vector_indexes(): `vector.dimensions`: 1024, `vector.similarity_function`: 'cosine' }} - """) - print("✓ Created: community_summary_embedding_index") + """) # Dialogue embedding index (optional) await connector.execute_query(""" @@ -142,91 +120,15 @@ async def create_vector_indexes(): `vector.similarity_function`: 'cosine' }} """) - print("✓ Created: dialogue_embedding_index") - - # Community summary embedding index - await connector.execute_query(""" - CREATE VECTOR INDEX community_summary_embedding_index IF NOT EXISTS - FOR (c:Community) - ON c.summary_embedding - OPTIONS {indexConfig: { - `vector.dimensions`: 1024, - `vector.similarity_function`: 'cosine' - }} - """) - print("✓ Created: community_summary_embedding_index") - print("\nVector indexes created successfully!") - print("\nExpected performance improvement:") - print(" Before: ~1.4s for embedding search") - print(" After: ~0.05-0.2s for embedding search (10-30x faster!)") - - except Exception as e: - print(f"✗ Error creating vector indexes: {e}") finally: await connector.close() - - -async def create_config_id_indexes(): - """Create indexes on config_id fields for improved query performance. - - These indexes enable fast filtering of nodes by configuration ID, - which is essential for configuration isolation and multi-tenant scenarios. - """ - connector = Neo4jConnector() - try: - print("\n" + "=" * 70) - print("Creating Config ID Indexes") - print("=" * 70) - - # Dialogue.config_id index - await connector.execute_query(""" - CREATE INDEX dialogue_config_id_index IF NOT EXISTS - FOR (d:Dialogue) ON (d.config_id) - """) - print("✓ Created: dialogue_config_id_index") - - # Statement.config_id index - await connector.execute_query(""" - CREATE INDEX statement_config_id_index IF NOT EXISTS - FOR (s:Statement) ON (s.config_id) - """) - print("✓ Created: statement_config_id_index") - - # ExtractedEntity.config_id index - await connector.execute_query(""" - CREATE INDEX entity_config_id_index IF NOT EXISTS - FOR (e:ExtractedEntity) ON (e.config_id) - """) - print("✓ Created: entity_config_id_index") - - # MemorySummary.config_id index - await connector.execute_query(""" - CREATE INDEX summary_config_id_index IF NOT EXISTS - FOR (m:MemorySummary) ON (m.config_id) - """) - print("✓ Created: summary_config_id_index") - - print("\nConfig ID indexes created successfully!") - print("These indexes enable fast filtering by configuration ID.") - - except Exception as e: - print(f"✗ Error creating config_id indexes: {e}") - finally: - await connector.close() - - async def create_unique_constraints(): """Create uniqueness constraints for core node identifiers. - Ensures concurrent MERGE operations remain safe and prevents duplicates. """ connector = Neo4jConnector() - try: - print("\n" + "=" * 70) - print("Creating Unique Constraints") - print("=" * 70) - + try: # Dialogue.id unique await connector.execute_query( """ @@ -234,8 +136,7 @@ async def create_unique_constraints(): FOR (d:Dialogue) REQUIRE d.id IS UNIQUE """ ) - print("✓ Created: dialog_id_unique") - + # Statement.id unique await connector.execute_query( """ @@ -243,8 +144,7 @@ async def create_unique_constraints(): FOR (s:Statement) REQUIRE s.id IS UNIQUE """ ) - print("✓ Created: statement_id_unique") - + # Chunk.id unique await connector.execute_query( """ @@ -252,112 +152,13 @@ async def create_unique_constraints(): FOR (c:Chunk) REQUIRE c.id IS UNIQUE """ ) - print("✓ Created: chunk_id_unique") - - print("\nUnique constraints ensured for Dialogue, Statement, and Chunk.") - except Exception as e: - print(f"✗ Error creating unique constraints: {e}") + finally: await connector.close() - - async def create_all_indexes(): """Create all indexes and constraints in one go.""" - print("\n" + "=" * 70) - print("Neo4j Index & Constraint Setup") - print("=" * 70) - print("This will create:") - print(" 1. Full-text indexes (for keyword/BM25 search)") - print(" 2. Vector indexes (for embedding similarity search)") - print(" 3. Config ID indexes (for configuration isolation)") - print(" 4. Unique constraints (for data integrity)") - print("=" * 70) - await create_fulltext_indexes() await create_vector_indexes() - await create_config_id_indexes() await create_unique_constraints() - - print("\n" + "=" * 70) print("✓ All indexes and constraints created successfully!") - print("=" * 70) - print("\nTo verify, run in Neo4j Browser:") - print(" SHOW INDEXES") - print(" SHOW CONSTRAINTS") - print() - - -async def check_indexes(): - """Check what indexes currently exist.""" - connector = Neo4jConnector() - - try: - print("\n" + "=" * 70) - print("Checking Existing Indexes") - print("=" * 70) - query = "SHOW INDEXES" - result = await connector.execute_query(query) - - fulltext_indexes = [idx for idx in result if idx.get('type') == 'FULLTEXT'] - vector_indexes = [idx for idx in result if idx.get('type') == 'VECTOR'] - range_indexes = [idx for idx in result if idx.get('type') == 'RANGE'] - - print(f"\nFull-text indexes: {len(fulltext_indexes)}") - for idx in fulltext_indexes: - print(f" ✓ {idx.get('name')}") - - print(f"\nVector indexes: {len(vector_indexes)}") - for idx in vector_indexes: - print(f" ✓ {idx.get('name')}") - - print(f"\nRange indexes (including config_id): {len(range_indexes)}") - for idx in range_indexes: - print(f" ✓ {idx.get('name')}") - - if not vector_indexes: - print("\n⚠️ WARNING: No vector indexes found!") - print(" Embedding search will be VERY SLOW (~1.4s)") - print(" Run: python create_indexes.py") - - # Check for config_id indexes - config_id_indexes = [idx for idx in range_indexes if 'config_id' in idx.get('name', '')] - if len(config_id_indexes) < 4: - print("\n⚠️ WARNING: Not all config_id indexes found!") - print(f" Expected 4, found {len(config_id_indexes)}") - print(" Run: python create_indexes.py config_id") - - print("=" * 70) - - finally: - await connector.close() - - -if __name__ == "__main__": - import asyncio - import sys - - if len(sys.argv) > 1: - command = sys.argv[1] - if command == "check": - asyncio.run(check_indexes()) - elif command == "fulltext": - asyncio.run(create_fulltext_indexes()) - elif command == "vector": - asyncio.run(create_vector_indexes()) - elif command == "config_id": - asyncio.run(create_config_id_indexes()) - elif command == "constraints": - asyncio.run(create_unique_constraints()) - else: - print(f"Unknown command: {command}") - print("\nUsage:") - print(" python create_indexes.py # Create all indexes") - print(" python create_indexes.py check # Check existing indexes") - print(" python create_indexes.py fulltext # Create only full-text indexes") - print(" python create_indexes.py vector # Create only vector indexes") - print(" python create_indexes.py config_id # Create only config_id indexes") - print(" python create_indexes.py constraints # Create only constraints") - else: - asyncio.run(create_all_indexes()) -