refactor(memory): remove first-person pronoun replacement and inline metadata utils

- Remove _replace_first_person_with_user from StatementExtractor to preserve
  original user text for downstream metadata/alias extraction
- Delete metadata_utils.py module, inline clean_metadata into Celery task
- Remove unused imports and commented-out collect_user_raw_messages method
- Apply formatting cleanup across metadata models and extraction orchestrator
This commit is contained in:
lanceyq
2026-04-10 00:29:18 +08:00
parent 15a863b41a
commit e0b7e95af6
6 changed files with 69 additions and 127 deletions

View File

@@ -2969,7 +2969,6 @@ def extract_user_metadata_task(
async def _run() -> Dict[str, Any]:
from app.core.memory.storage_services.extraction_engine.knowledge_extraction.metadata_extractor import MetadataExtractor
from app.core.memory.utils.metadata_utils import clean_metadata, validate_metadata
from app.repositories.end_user_info_repository import EndUserInfoRepository
from app.repositories.end_user_repository import EndUserRepository
from app.services.memory_config_service import MemoryConfigService
@@ -3029,6 +3028,14 @@ def extract_user_metadata_task(
logger.info(f"[CELERY METADATA] LLM 别名新增: {aliases_to_add}, 移除: {aliases_to_remove}")
# 4. 清洗元数据、覆盖写入元数据和别名
def clean_metadata(raw: dict) -> dict:
"""递归移除空字符串、空列表、空字典。"""
return {
k: (cleaned if isinstance(v, dict) and (cleaned := clean_metadata(v)) else v)
for k, v in raw.items()
if not (v == "" or v == [] or (isinstance(v, dict) and not clean_metadata(v)))
}
raw_dict = user_metadata.model_dump(exclude_none=True) if user_metadata else {}
logger.info(f"[CELERY METADATA] LLM 输出完整元数据: {json.dumps(raw_dict, ensure_ascii=False)}")