diff --git a/api/app/core/memory/read_services/content_search.py b/api/app/core/memory/read_services/content_search.py index 58356e84..54d99060 100644 --- a/api/app/core/memory/read_services/content_search.py +++ b/api/app/core/memory/read_services/content_search.py @@ -12,8 +12,8 @@ from app.repositories.neo4j.neo4j_connector import Neo4jConnector logger = logging.getLogger(__name__) -DEFAULT_ALPHA = 0.7 -DEFAULT_FULLTEXT_SCORE_THRESHOLD = 1 +DEFAULT_ALPHA = 0.6 +DEFAULT_FULLTEXT_SCORE_THRESHOLD = 1.5 DEFAULT_COSINE_SCORE_THRESHOLD = 0.5 DEFAULT_CONTENT_SCORE_THRESHOLD = 0.5 @@ -112,7 +112,7 @@ class Neo4jSearchService: kw = float(combined[item_id].get("kw_score", 0) or 0) emb = float(combined[item_id].get("embedding_score", 0) or 0) base = self.alpha * emb + (1 - self.alpha) * kw - combined[item_id]["content_score"] = base + min(1 - base, kw * emb) + combined[item_id]["content_score"] = base + min(1 - base, 0.1 * kw * emb) results = sorted(combined.values(), key=lambda x: x["content_score"], reverse=True) # results = [ # res for res in results diff --git a/api/app/core/memory/read_services/result_builder.py b/api/app/core/memory/read_services/result_builder.py index 949ff3ed..dd376c7c 100644 --- a/api/app/core/memory/read_services/result_builder.py +++ b/api/app/core/memory/read_services/result_builder.py @@ -61,14 +61,18 @@ class EntityBuilder(BaseBuilder): def data(self) -> dict: return { "id": self.record.get("id"), - "content": self.record.get("name"), + "name": self.record.get("name"), + "description": self.record.get("description"), "kw_score": self.record.get("kw_score", 0.0), "emb_score": self.record.get("embedding_score", 0.0) } @property def content(self) -> str: - return self.record.get("name") + return (f"" + f"{self.record.get("name")}" + f"{self.record.get("description")}" + f"") class SummaryBuilder(BaseBuilder): diff --git a/api/app/repositories/neo4j/create_indexes.py b/api/app/repositories/neo4j/create_indexes.py index 7caeea8a..0a9aaf71 100644 --- a/api/app/repositories/neo4j/create_indexes.py +++ b/api/app/repositories/neo4j/create_indexes.py @@ -19,7 +19,8 @@ async def create_fulltext_indexes(): # """) # 创建 Entities 索引 await connector.execute_query(""" - CREATE FULLTEXT INDEX entitiesFulltext IF NOT EXISTS FOR (e:ExtractedEntity) ON EACH [e.name] + CREATE FULLTEXT INDEX entitiesFulltext IF NOT EXISTS + FOR (e:ExtractedEntity) ON EACH [e.name, e.description, e.aliases] OPTIONS { indexConfig: { `fulltext.analyzer`: 'cjk' } } """) @@ -139,6 +140,16 @@ async def create_vector_indexes(): await connector.close() +async def create_user_indexes(): + connector = Neo4jConnector() + await connector.execute_query( + """ + CREATE INDEX user_perceptual IF NOT EXISTS + FOR (p:Perceptual) ON (p.end_user_id); + """ + ) + + async def create_unique_constraints(): """Create uniqueness constraints for core node identifiers. Ensures concurrent MERGE operations remain safe and prevents duplicates. diff --git a/api/app/repositories/neo4j/graph_search.py b/api/app/repositories/neo4j/graph_search.py index 354c0e23..70913267 100644 --- a/api/app/repositories/neo4j/graph_search.py +++ b/api/app/repositories/neo4j/graph_search.py @@ -45,14 +45,17 @@ def cosine_similarity_search( vectors: np.ndarray = np.array(vectors, dtype=np.float32) vectors_norm = vectors / np.linalg.norm(vectors, axis=1, keepdims=True) query: np.ndarray = np.array(query, dtype=np.float32) - query_norm = query / np.linalg.norm(query) + norm = np.linalg.norm(query) + if norm == 0: + return {} + query_norm = query / norm similarities = vectors_norm @ query_norm similarities = np.clip(similarities, 0, 1) top_k = min(limit, similarities.shape[0]) if top_k <= 0: return {} - top_indices = np.argpartition(-similarities, top_k - 1)[-top_k:] + top_indices = np.argpartition(-similarities, top_k - 1)[:top_k] top_indices = top_indices[np.argsort(-similarities[top_indices])] result = {} for idx in top_indices: @@ -510,7 +513,7 @@ async def search_graph_by_embedding( task_keys = [] for node_type in include: - tasks.append(search_by_embedding(connector, node_type, end_user_id, embedding, limit)) + tasks.append(search_by_embedding(connector, node_type, end_user_id, embedding, limit*2)) task_keys.append(node_type.value) task_results = await asyncio.gather(*tasks, return_exceptions=True)