refactor(memory): redesign metadata extraction as async pipeline step

- Replace extract_user_metadata_task with entity-level extract_metadata_batch_task
- Add MetadataExtractionStep following ExtractionStep pattern with Jinja2 prompts
- Flatten MetadataExtractionResponse to 9-field schema (aliases, core_facts, etc.)
- Add Cypher queries for incremental metadata writeback and alias edge redirection
- Wire _extract_metadata into WritePipeline as Step 3.6 (fire-and-forget)
- Add pilot_write() to MemoryService; refactor pilot_run_service to use it
- Extract snapshot logic into WriteSnapshotRecorder
This commit is contained in:
lanceyq
2026-04-29 18:16:24 +08:00
parent 4af9b02815
commit d66d601e41
23 changed files with 1437 additions and 819 deletions

View File

@@ -12,7 +12,6 @@ from typing import Awaitable, Callable, Optional
from app.core.config import settings
from app.core.logging_config import get_memory_logger, log_time
from app.core.memory.pipelines.pilot_write_pipeline import PilotWritePipeline
from app.core.memory.models.message_models import (
ConversationContext,
ConversationMessage,
@@ -306,14 +305,11 @@ async def run_pilot_extraction(
logger.warning(f"Failed to load ontology types: {e}", exc_info=True)
if use_refactored:
pilot_pipeline = PilotWritePipeline(
llm_client=llm_client,
embedder_client=embedder_client,
pipeline_config=get_pipeline_config(memory_config),
progress_callback=progress_callback,
embedding_id=str(memory_config.embedding_model_id),
language=language,
ontology_types=ontology_types,
from app.core.memory.memory_service import MemoryService
memory_service = MemoryService(
memory_config=memory_config,
end_user_id=str(memory_config.workspace_id),
)
log_time("Pilot Pipeline Initialization", time.time() - step_start, log_file)
@@ -325,7 +321,11 @@ async def run_pilot_extraction(
if progress_callback:
await progress_callback("knowledge_extraction", "正在知识抽取...")
pilot_result = await pilot_pipeline.run(chunked_dialogs)
pilot_result = await memory_service.pilot_write(
chunked_dialogs=chunked_dialogs,
language=language,
progress_callback=progress_callback,
)
dialog_data_list = pilot_result.dialog_data_list
graph = pilot_result.graph
chunk_nodes = graph.chunk_nodes