fix(memory): improve metadata language detection and clean_metadata logic
- Make MetadataExtractor language param optional (default None) to support auto-detection fallback when no language is explicitly set - Refactor clean_metadata from walrus-operator dict comprehension to explicit loop for correctness and readability
This commit is contained in:
@@ -31,7 +31,7 @@ def _is_user_entity(ent: ExtractedEntityNode) -> bool:
|
|||||||
class MetadataExtractor:
|
class MetadataExtractor:
|
||||||
"""Extracts user metadata from post-dedup graph data via independent LLM call."""
|
"""Extracts user metadata from post-dedup graph data via independent LLM call."""
|
||||||
|
|
||||||
def __init__(self, llm_client, language: str = "zh"):
|
def __init__(self, llm_client, language: Optional[str] = None):
|
||||||
self.llm_client = llm_client
|
self.llm_client = llm_client
|
||||||
self.language = language
|
self.language = language
|
||||||
|
|
||||||
@@ -134,8 +134,12 @@ class MetadataExtractor:
|
|||||||
try:
|
try:
|
||||||
from app.core.memory.utils.prompt.prompt_utils import prompt_env
|
from app.core.memory.utils.prompt.prompt_utils import prompt_env
|
||||||
|
|
||||||
detected_language = self.detect_language(statements)
|
if self.language:
|
||||||
logger.info(f"元数据提取语言检测结果: {detected_language}")
|
detected_language = self.language
|
||||||
|
logger.info(f"元数据提取使用显式指定语言: {detected_language}")
|
||||||
|
else:
|
||||||
|
detected_language = self.detect_language(statements)
|
||||||
|
logger.info(f"元数据提取语言自动检测结果: {detected_language}")
|
||||||
|
|
||||||
template = prompt_env.get_template("extract_user_metadata.jinja2")
|
template = prompt_env.get_template("extract_user_metadata.jinja2")
|
||||||
prompt = template.render(
|
prompt = template.render(
|
||||||
|
|||||||
@@ -3030,11 +3030,17 @@ def extract_user_metadata_task(
|
|||||||
# 4. 清洗元数据、覆盖写入元数据和别名
|
# 4. 清洗元数据、覆盖写入元数据和别名
|
||||||
def clean_metadata(raw: dict) -> dict:
|
def clean_metadata(raw: dict) -> dict:
|
||||||
"""递归移除空字符串、空列表、空字典。"""
|
"""递归移除空字符串、空列表、空字典。"""
|
||||||
return {
|
result = {}
|
||||||
k: (cleaned if isinstance(v, dict) and (cleaned := clean_metadata(v)) else v)
|
for k, v in raw.items():
|
||||||
for k, v in raw.items()
|
if v == "" or v == []:
|
||||||
if not (v == "" or v == [] or (isinstance(v, dict) and not clean_metadata(v)))
|
continue
|
||||||
}
|
if isinstance(v, dict):
|
||||||
|
cleaned = clean_metadata(v)
|
||||||
|
if cleaned:
|
||||||
|
result[k] = cleaned
|
||||||
|
else:
|
||||||
|
result[k] = v
|
||||||
|
return result
|
||||||
|
|
||||||
raw_dict = user_metadata.model_dump(exclude_none=True) if user_metadata else {}
|
raw_dict = user_metadata.model_dump(exclude_none=True) if user_metadata else {}
|
||||||
logger.info(f"[CELERY METADATA] LLM 输出完整元数据: {json.dumps(raw_dict, ensure_ascii=False)}")
|
logger.info(f"[CELERY METADATA] LLM 输出完整元数据: {json.dumps(raw_dict, ensure_ascii=False)}")
|
||||||
|
|||||||
Reference in New Issue
Block a user