From cd018814feba3fca3ac3d5f6beadb555fc272739 Mon Sep 17 00:00:00 2001 From: lanceyq <1982376970@qq.com> Date: Fri, 10 Apr 2026 00:42:11 +0800 Subject: [PATCH] fix(memory): improve metadata language detection and clean_metadata logic - Make MetadataExtractor language param optional (default None) to support auto-detection fallback when no language is explicitly set - Refactor clean_metadata from walrus-operator dict comprehension to explicit loop for correctness and readability --- .../knowledge_extraction/metadata_extractor.py | 10 +++++++--- api/app/tasks.py | 16 +++++++++++----- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/metadata_extractor.py b/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/metadata_extractor.py index 8b749c40..19f1e533 100644 --- a/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/metadata_extractor.py +++ b/api/app/core/memory/storage_services/extraction_engine/knowledge_extraction/metadata_extractor.py @@ -31,7 +31,7 @@ def _is_user_entity(ent: ExtractedEntityNode) -> bool: class MetadataExtractor: """Extracts user metadata from post-dedup graph data via independent LLM call.""" - def __init__(self, llm_client, language: str = "zh"): + def __init__(self, llm_client, language: Optional[str] = None): self.llm_client = llm_client self.language = language @@ -134,8 +134,12 @@ class MetadataExtractor: try: from app.core.memory.utils.prompt.prompt_utils import prompt_env - detected_language = self.detect_language(statements) - logger.info(f"元数据提取语言检测结果: {detected_language}") + if self.language: + detected_language = self.language + logger.info(f"元数据提取使用显式指定语言: {detected_language}") + else: + detected_language = self.detect_language(statements) + logger.info(f"元数据提取语言自动检测结果: {detected_language}") template = prompt_env.get_template("extract_user_metadata.jinja2") prompt = template.render( diff --git a/api/app/tasks.py b/api/app/tasks.py index 6fd5f8d6..9afb6225 100644 --- a/api/app/tasks.py +++ b/api/app/tasks.py @@ -3030,11 +3030,17 @@ def extract_user_metadata_task( # 4. 清洗元数据、覆盖写入元数据和别名 def clean_metadata(raw: dict) -> dict: """递归移除空字符串、空列表、空字典。""" - return { - k: (cleaned if isinstance(v, dict) and (cleaned := clean_metadata(v)) else v) - for k, v in raw.items() - if not (v == "" or v == [] or (isinstance(v, dict) and not clean_metadata(v))) - } + result = {} + for k, v in raw.items(): + if v == "" or v == []: + continue + if isinstance(v, dict): + cleaned = clean_metadata(v) + if cleaned: + result[k] = cleaned + else: + result[k] = v + return result raw_dict = user_metadata.model_dump(exclude_none=True) if user_metadata else {} logger.info(f"[CELERY METADATA] LLM 输出完整元数据: {json.dumps(raw_dict, ensure_ascii=False)}")