[fix] Modify Index Creation

This commit is contained in:
lanceyq
2026-03-30 18:14:31 +08:00
parent 052c7c19b3
commit 418114ef72
2 changed files with 18 additions and 163 deletions

View File

@@ -1,5 +1,6 @@
import os import os
import subprocess import subprocess
from app.repositories.neo4j.create_indexes import create_all_indexes
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from fastapi import FastAPI, APIRouter from fastapi import FastAPI, APIRouter
@@ -60,16 +61,9 @@ async def lifespan(app: FastAPI):
logger.warning(f"加载预定义模型时出错: {str(e)}") logger.warning(f"加载预定义模型时出错: {str(e)}")
else: else:
logger.info("预定义模型加载已禁用 (LOAD_MODEL=false)") logger.info("预定义模型加载已禁用 (LOAD_MODEL=false)")
await create_all_indexes()
logger.info("应用程序启动完成") logger.info("应用程序启动完成")
# 初始化 Neo4j 索引和约束(仅启动时执行一次)
try:
from app.repositories.neo4j.create_indexes import create_all_indexes
await create_all_indexes()
logger.info("Neo4j 索引和约束初始化完成")
except Exception as e:
logger.warning(f"Neo4j 索引初始化失败(服务仍可启动,但查询性能可能受影响): {e}")
yield yield
# 应用关闭事件 # 应用关闭事件

View File

@@ -1,62 +1,47 @@
import asyncio
from app.repositories.neo4j.neo4j_connector import Neo4jConnector from app.repositories.neo4j.neo4j_connector import Neo4jConnector
async def create_fulltext_indexes(): async def create_fulltext_indexes():
"""Create full-text indexes for keyword search with BM25 scoring.""" """Create full-text indexes for keyword search with BM25 scoring."""
connector = Neo4jConnector() connector = Neo4jConnector()
try: try:
print("\n" + "=" * 70)
print("Creating Full-Text Indexes (for keyword search)")
print("=" * 70)
# 创建 Statements 索引 # 创建 Statements 索引
await connector.execute_query(""" await connector.execute_query("""
CREATE FULLTEXT INDEX statementsFulltext IF NOT EXISTS FOR (s:Statement) ON EACH [s.statement] CREATE FULLTEXT INDEX statementsFulltext IF NOT EXISTS FOR (s:Statement) ON EACH [s.statement]
OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } } OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } }
""") """)
print("✓ Created: statementsFulltext")
# # 创建 Dialogues 索引 # # 创建 Dialogues 索引
# await connector.execute_query(""" # await connector.execute_query("""
# CREATE FULLTEXT INDEX dialoguesFulltext IF NOT EXISTS FOR (d:Dialogue) ON EACH [d.content] # CREATE FULLTEXT INDEX dialoguesFulltext IF NOT EXISTS FOR (d:Dialogue) ON EACH [d.content]
# OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } } # OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } }
# """) # """)
# 创建 Entities 索引 # 创建 Entities 索引
await connector.execute_query(""" await connector.execute_query("""
CREATE FULLTEXT INDEX entitiesFulltext IF NOT EXISTS FOR (e:ExtractedEntity) ON EACH [e.name] CREATE FULLTEXT INDEX entitiesFulltext IF NOT EXISTS FOR (e:ExtractedEntity) ON EACH [e.name]
OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } } OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } }
""") """)
print("✓ Created: entitiesFulltext")
# 创建 Chunks 索引 # 创建 Chunks 索引
await connector.execute_query(""" await connector.execute_query("""
CREATE FULLTEXT INDEX chunksFulltext IF NOT EXISTS FOR (c:Chunk) ON EACH [c.content] CREATE FULLTEXT INDEX chunksFulltext IF NOT EXISTS FOR (c:Chunk) ON EACH [c.content]
OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } } OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } }
""") """)
print("✓ Created: chunksFulltext")
# 创建 MemorySummary 索引 # 创建 MemorySummary 索引
await connector.execute_query(""" await connector.execute_query("""
CREATE FULLTEXT INDEX summariesFulltext IF NOT EXISTS FOR (m:MemorySummary) ON EACH [m.content] CREATE FULLTEXT INDEX summariesFulltext IF NOT EXISTS FOR (m:MemorySummary) ON EACH [m.content]
OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } } OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } }
""") """)
print("✓ Created: summariesFulltext")
# 创建 Community 索引 # 创建 Community 索引
await connector.execute_query(""" await connector.execute_query("""
CREATE FULLTEXT INDEX communitiesFulltext IF NOT EXISTS FOR (c:Community) ON EACH [c.name, c.summary] CREATE FULLTEXT INDEX communitiesFulltext IF NOT EXISTS FOR (c:Community) ON EACH [c.name, c.summary]
OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } } OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } }
""") """)
print("✓ Created: communitiesFulltext")
print("\nFull-text indexes created successfully with BM25 support.")
except Exception as e:
print(f"✗ Error creating full-text indexes: {e}")
finally: finally:
await connector.close() await connector.close()
async def create_vector_indexes(): async def create_vector_indexes():
"""Create vector indexes for fast embedding similarity search. """Create vector indexes for fast embedding similarity search.
@@ -65,12 +50,7 @@ async def create_vector_indexes():
""" """
connector = Neo4jConnector() connector = Neo4jConnector()
try: try:
print("\n" + "=" * 70)
print("Creating Vector Indexes (for embedding search)")
print("=" * 70)
print("Note: Adjust vector.dimensions if using different embedding model")
print(" Current setting: 1024 dimensions (for bge-m3)")
print()
# Statement embedding index # Statement embedding index
await connector.execute_query(""" await connector.execute_query("""
@@ -82,7 +62,7 @@ async def create_vector_indexes():
`vector.similarity_function`: 'cosine' `vector.similarity_function`: 'cosine'
}} }}
""") """)
print("✓ Created: statement_embedding_index")
# Chunk embedding index # Chunk embedding index
await connector.execute_query(""" await connector.execute_query("""
@@ -94,7 +74,7 @@ async def create_vector_indexes():
`vector.similarity_function`: 'cosine' `vector.similarity_function`: 'cosine'
}} }}
""") """)
print("✓ Created: chunk_embedding_index")
# Entity name embedding index # Entity name embedding index
await connector.execute_query(""" await connector.execute_query("""
@@ -106,7 +86,7 @@ async def create_vector_indexes():
`vector.similarity_function`: 'cosine' `vector.similarity_function`: 'cosine'
}} }}
""") """)
print("✓ Created: entity_embedding_index")
# Memory summary embedding index # Memory summary embedding index
await connector.execute_query(""" await connector.execute_query("""
@@ -118,8 +98,7 @@ async def create_vector_indexes():
`vector.similarity_function`: 'cosine' `vector.similarity_function`: 'cosine'
}} }}
""") """)
print("✓ Created: summary_embedding_index")
# Community summary embedding index # Community summary embedding index
await connector.execute_query(""" await connector.execute_query("""
CREATE VECTOR INDEX community_summary_embedding_index IF NOT EXISTS CREATE VECTOR INDEX community_summary_embedding_index IF NOT EXISTS
@@ -129,8 +108,7 @@ async def create_vector_indexes():
`vector.dimensions`: 1024, `vector.dimensions`: 1024,
`vector.similarity_function`: 'cosine' `vector.similarity_function`: 'cosine'
}} }}
""") """)
print("✓ Created: community_summary_embedding_index")
# Dialogue embedding index (optional) # Dialogue embedding index (optional)
await connector.execute_query(""" await connector.execute_query("""
@@ -142,31 +120,15 @@ async def create_vector_indexes():
`vector.similarity_function`: 'cosine' `vector.similarity_function`: 'cosine'
}} }}
""") """)
print("✓ Created: dialogue_embedding_index")
print("\nVector indexes created successfully!")
print("\nExpected performance improvement:")
print(" Before: ~1.4s for embedding search")
print(" After: ~0.05-0.2s for embedding search (10-30x faster!)")
except Exception as e:
print(f"✗ Error creating vector indexes: {e}")
finally: finally:
await connector.close() await connector.close()
async def create_unique_constraints(): async def create_unique_constraints():
"""Create uniqueness constraints for core node identifiers. """Create uniqueness constraints for core node identifiers.
Ensures concurrent MERGE operations remain safe and prevents duplicates. Ensures concurrent MERGE operations remain safe and prevents duplicates.
""" """
connector = Neo4jConnector() connector = Neo4jConnector()
try: try:
print("\n" + "=" * 70)
print("Creating Unique Constraints")
print("=" * 70)
# Dialogue.id unique # Dialogue.id unique
await connector.execute_query( await connector.execute_query(
""" """
@@ -174,8 +136,7 @@ async def create_unique_constraints():
FOR (d:Dialogue) REQUIRE d.id IS UNIQUE FOR (d:Dialogue) REQUIRE d.id IS UNIQUE
""" """
) )
print("✓ Created: dialog_id_unique")
# Statement.id unique # Statement.id unique
await connector.execute_query( await connector.execute_query(
""" """
@@ -183,8 +144,7 @@ async def create_unique_constraints():
FOR (s:Statement) REQUIRE s.id IS UNIQUE FOR (s:Statement) REQUIRE s.id IS UNIQUE
""" """
) )
print("✓ Created: statement_id_unique")
# Chunk.id unique # Chunk.id unique
await connector.execute_query( await connector.execute_query(
""" """
@@ -192,112 +152,13 @@ async def create_unique_constraints():
FOR (c:Chunk) REQUIRE c.id IS UNIQUE FOR (c:Chunk) REQUIRE c.id IS UNIQUE
""" """
) )
print("✓ Created: chunk_id_unique")
print("\nUnique constraints ensured for Dialogue, Statement, and Chunk.")
except Exception as e:
print(f"✗ Error creating unique constraints: {e}")
finally: finally:
await connector.close() await connector.close()
async def create_all_indexes(): async def create_all_indexes():
"""Create all indexes and constraints in one go.""" """Create all indexes and constraints in one go."""
print("\n" + "=" * 70)
print("Neo4j Index & Constraint Setup")
print("=" * 70)
print("This will create:")
print(" 1. Full-text indexes (for keyword/BM25 search)")
print(" 2. Vector indexes (for embedding similarity search)")
print(" 3. Config ID indexes (for configuration isolation)")
print(" 4. Unique constraints (for data integrity)")
print("=" * 70)
await create_fulltext_indexes() await create_fulltext_indexes()
await create_vector_indexes() await create_vector_indexes()
await create_config_id_indexes()
await create_unique_constraints() await create_unique_constraints()
print("\n" + "=" * 70)
print("✓ All indexes and constraints created successfully!") print("✓ All indexes and constraints created successfully!")
print("=" * 70)
print("\nTo verify, run in Neo4j Browser:")
print(" SHOW INDEXES")
print(" SHOW CONSTRAINTS")
print()
async def check_indexes():
"""Check what indexes currently exist."""
connector = Neo4jConnector()
try:
print("\n" + "=" * 70)
print("Checking Existing Indexes")
print("=" * 70)
query = "SHOW INDEXES"
result = await connector.execute_query(query)
fulltext_indexes = [idx for idx in result if idx.get('type') == 'FULLTEXT']
vector_indexes = [idx for idx in result if idx.get('type') == 'VECTOR']
range_indexes = [idx for idx in result if idx.get('type') == 'RANGE']
print(f"\nFull-text indexes: {len(fulltext_indexes)}")
for idx in fulltext_indexes:
print(f"{idx.get('name')}")
print(f"\nVector indexes: {len(vector_indexes)}")
for idx in vector_indexes:
print(f"{idx.get('name')}")
print(f"\nRange indexes (including config_id): {len(range_indexes)}")
for idx in range_indexes:
print(f"{idx.get('name')}")
if not vector_indexes:
print("\n⚠️ WARNING: No vector indexes found!")
print(" Embedding search will be VERY SLOW (~1.4s)")
print(" Run: python create_indexes.py")
# Check for config_id indexes
config_id_indexes = [idx for idx in range_indexes if 'config_id' in idx.get('name', '')]
if len(config_id_indexes) < 4:
print("\n⚠️ WARNING: Not all config_id indexes found!")
print(f" Expected 4, found {len(config_id_indexes)}")
print(" Run: python create_indexes.py config_id")
print("=" * 70)
finally:
await connector.close()
if __name__ == "__main__":
import asyncio
import sys
if len(sys.argv) > 1:
command = sys.argv[1]
if command == "check":
asyncio.run(check_indexes())
elif command == "fulltext":
asyncio.run(create_fulltext_indexes())
elif command == "vector":
asyncio.run(create_vector_indexes())
elif command == "config_id":
asyncio.run(create_config_id_indexes())
elif command == "constraints":
asyncio.run(create_unique_constraints())
else:
print(f"Unknown command: {command}")
print("\nUsage:")
print(" python create_indexes.py # Create all indexes")
print(" python create_indexes.py check # Check existing indexes")
print(" python create_indexes.py fulltext # Create only full-text indexes")
print(" python create_indexes.py vector # Create only vector indexes")
print(" python create_indexes.py config_id # Create only config_id indexes")
print(" python create_indexes.py constraints # Create only constraints")
else:
asyncio.run(create_all_indexes())