From df34735a9bb2f15f7d9d19e92edbe857eaf75c5d Mon Sep 17 00:00:00 2001 From: lanceyq <1982376970@qq.com> Date: Wed, 4 Mar 2026 12:08:57 +0800 Subject: [PATCH] [add] Set cache for the distribution of interest tags --- api/app/cache/__init__.py | 3 +- api/app/cache/memory/__init__.py | 2 + api/app/cache/memory/interest_memory.py | 122 ++++++++++++++++++ .../controllers/memory_agent_controller.py | 19 +++ api/app/core/config.py | 2 +- .../core/memory/analytics/hot_memory_tags.py | 23 +++- .../prompt/prompts/interest_filter.jinja2 | 84 +++++++----- api/env.example | 2 +- 8 files changed, 215 insertions(+), 42 deletions(-) create mode 100644 api/app/cache/memory/interest_memory.py diff --git a/api/app/cache/__init__.py b/api/app/cache/__init__.py index a79d4cb2..46d1c959 100644 --- a/api/app/cache/__init__.py +++ b/api/app/cache/__init__.py @@ -3,9 +3,10 @@ Cache 缓存模块 提供各种缓存功能的统一入口 """ -from .memory import EmotionMemoryCache, ImplicitMemoryCache +from .memory import EmotionMemoryCache, ImplicitMemoryCache, InterestMemoryCache __all__ = [ "EmotionMemoryCache", "ImplicitMemoryCache", + "InterestMemoryCache", ] diff --git a/api/app/cache/memory/__init__.py b/api/app/cache/memory/__init__.py index 4ada3153..0e21df0f 100644 --- a/api/app/cache/memory/__init__.py +++ b/api/app/cache/memory/__init__.py @@ -5,8 +5,10 @@ Memory 缓存模块 """ from .emotion_memory import EmotionMemoryCache from .implicit_memory import ImplicitMemoryCache +from .interest_memory import InterestMemoryCache __all__ = [ "EmotionMemoryCache", "ImplicitMemoryCache", + "InterestMemoryCache", ] diff --git a/api/app/cache/memory/interest_memory.py b/api/app/cache/memory/interest_memory.py new file mode 100644 index 00000000..108e2a37 --- /dev/null +++ b/api/app/cache/memory/interest_memory.py @@ -0,0 +1,122 @@ +""" +Interest Distribution Cache + +兴趣分布缓存模块 +用于缓存用户的兴趣分布标签数据,避免重复调用模型生成 +""" +import json +import logging +from typing import Optional, List, Dict, Any +from datetime import datetime + +from app.aioRedis import aio_redis + +logger = logging.getLogger(__name__) + +# 缓存过期时间:24小时 +INTEREST_CACHE_EXPIRE = 86400 + + +class InterestMemoryCache: + """兴趣分布缓存类""" + + PREFIX = "cache:memory:interest_distribution" + + @classmethod + def _get_key(cls, end_user_id: str, language: str) -> str: + """生成 Redis key + + Args: + end_user_id: 用户ID + language: 语言类型 + + Returns: + 完整的 Redis key + """ + return f"{cls.PREFIX}:by_user:{end_user_id}:{language}" + + @classmethod + async def set_interest_distribution( + cls, + end_user_id: str, + language: str, + data: List[Dict[str, Any]], + expire: int = INTEREST_CACHE_EXPIRE, + ) -> bool: + """设置用户兴趣分布缓存 + + Args: + end_user_id: 用户ID + language: 语言类型 + data: 兴趣分布列表,格式 [{"name": "...", "frequency": ...}, ...] + expire: 过期时间(秒),默认24小时 + + Returns: + 是否设置成功 + """ + try: + key = cls._get_key(end_user_id, language) + payload = { + "data": data, + "generated_at": datetime.now().isoformat(), + "cached": True, + } + value = json.dumps(payload, ensure_ascii=False) + await aio_redis.set(key, value, ex=expire) + logger.info(f"设置兴趣分布缓存成功: {key}, 过期时间: {expire}秒") + return True + except Exception as e: + logger.error(f"设置兴趣分布缓存失败: {e}", exc_info=True) + return False + + @classmethod + async def get_interest_distribution( + cls, + end_user_id: str, + language: str, + ) -> Optional[List[Dict[str, Any]]]: + """获取用户兴趣分布缓存 + + Args: + end_user_id: 用户ID + language: 语言类型 + + Returns: + 兴趣分布列表,缓存不存在或已过期返回 None + """ + try: + key = cls._get_key(end_user_id, language) + value = await aio_redis.get(key) + if value: + payload = json.loads(value) + logger.info(f"命中兴趣分布缓存: {key}") + return payload.get("data") + logger.info(f"兴趣分布缓存不存在或已过期: {key}") + return None + except Exception as e: + logger.error(f"获取兴趣分布缓存失败: {e}", exc_info=True) + return None + + @classmethod + async def delete_interest_distribution( + cls, + end_user_id: str, + language: str, + ) -> bool: + """删除用户兴趣分布缓存 + + Args: + end_user_id: 用户ID + language: 语言类型 + + Returns: + 是否删除成功 + """ + try: + key = cls._get_key(end_user_id, language) + result = await aio_redis.delete(key) + logger.info(f"删除兴趣分布缓存: {key}, 结果: {result}") + return result > 0 + except Exception as e: + logger.error(f"删除兴趣分布缓存失败: {e}", exc_info=True) + return False diff --git a/api/app/controllers/memory_agent_controller.py b/api/app/controllers/memory_agent_controller.py index 8f2e5c31..1f070eb6 100644 --- a/api/app/controllers/memory_agent_controller.py +++ b/api/app/controllers/memory_agent_controller.py @@ -1,5 +1,6 @@ from typing import List, Optional +from app.cache.memory.interest_memory import InterestMemoryCache from app.celery_app import celery_app from app.core.error_codes import BizCode from app.core.language_utils import get_language_from_header @@ -684,11 +685,29 @@ async def get_interest_distribution_by_user_api( language = get_language_from_header(language_type) api_logger.info(f"Interest distribution by user requested: end_user_id={end_user_id}, language={language}") try: + # 优先读取缓存 + cached = await InterestMemoryCache.get_interest_distribution( + end_user_id=end_user_id, + language=language, + ) + if cached is not None: + api_logger.info(f"Interest distribution cache hit: end_user_id={end_user_id}") + return success(data=cached, msg="获取兴趣分布标签成功") + + # 缓存未命中,调用模型生成 result = await memory_agent_service.get_interest_distribution_by_user( end_user_id=end_user_id, limit=limit, language=language ) + + # 写入缓存,24小时过期 + await InterestMemoryCache.set_interest_distribution( + end_user_id=end_user_id, + language=language, + data=result, + ) + return success(data=result, msg="获取兴趣分布标签成功") except Exception as e: api_logger.error(f"Interest distribution by user failed: {str(e)}") diff --git a/api/app/core/config.py b/api/app/core/config.py index 6a2cf206..d9132be2 100644 --- a/api/app/core/config.py +++ b/api/app/core/config.py @@ -230,7 +230,7 @@ class Settings: # General Ontology Type Configuration # ======================================================================== # 通用本体文件路径列表(逗号分隔) - GENERAL_ONTOLOGY_FILES: str = os.getenv("GENERAL_ONTOLOGY_FILES", "app/core/memory/ontology_services/General_purpose_entity.ttl") + GENERAL_ONTOLOGY_FILES: str = os.getenv("GENERAL_ONTOLOGY_FILES", "api/app/core/memory/ontology_services/General_purpose_entity.ttl") # 是否启用通用本体类型功能 ENABLE_GENERAL_ONTOLOGY_TYPES: bool = os.getenv("ENABLE_GENERAL_ONTOLOGY_TYPES", "true").lower() == "true" diff --git a/api/app/core/memory/analytics/hot_memory_tags.py b/api/app/core/memory/analytics/hot_memory_tags.py index da08e88e..1d2d5259 100644 --- a/api/app/core/memory/analytics/hot_memory_tags.py +++ b/api/app/core/memory/analytics/hot_memory_tags.py @@ -281,16 +281,25 @@ async def get_interest_distribution(end_user_id: str, limit: int = 10, by_user: return [] raw_tag_names = [tag for tag, freq in raw_tags_with_freq] + raw_freq_map = {tag: freq for tag, freq in raw_tags_with_freq} - # 使用兴趣活动专用prompt进行筛选 + # 使用兴趣活动专用prompt进行筛选(支持语义推断出新标签) interest_tag_names = await filter_interests_with_llm(raw_tag_names, end_user_id, language=language) - # 保留原始频率,按兴趣筛选结果过滤 - final_tags = [ - (tag, freq) - for tag, freq in raw_tags_with_freq - if tag in interest_tag_names - ] + # 构建最终标签列表: + # - 原始标签中存在的,保留原始频率 + # - LLM推断出的新标签(不在原始列表中),赋予默认频率1 + final_tags = [] + seen = set() + for tag in interest_tag_names: + if tag in seen: + continue + seen.add(tag) + freq = raw_freq_map.get(tag, 1) + final_tags.append((tag, freq)) + + # 按频率降序排列 + final_tags.sort(key=lambda x: x[1], reverse=True) return final_tags[:limit] finally: diff --git a/api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2 b/api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2 index 1e3aac55..7957bf1c 100644 --- a/api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2 +++ b/api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2 @@ -1,47 +1,67 @@ {% if language == "zh" %} -You are a user interest analysis expert. Your task is to identify activity-based tags from a tag list that represent the user's hobbies and interests. Please output the results in Chinese. +You are a user interest analysis expert. Your task is to infer and extract the user's core hobby/interest activities from a tag list. The tags may be specific project names, tool names, or compound nouns — your job is to identify the underlying interest they represent. -**Keep Rules** (keep if any condition is met): -- Tags representing sports or physical activities the user actively participates in (e.g., '攀岩', '篮球', '游泳', '跑步') -- Tags representing cultural or entertainment hobbies (e.g., '读书', '看电影', '听音乐', '摄影') -- Tags representing learning or creative activities (e.g., '编程', '绘画', '写作', '烹饪') -- Tags representing specific interest domains or hobby categories (e.g., '历史', '天文', '园艺') +**Step 1 - Infer the underlying interest from each tag**: +Look at each tag and ask: "What hobby or interest does this tag suggest the user has?" -**Filter Rules** (remove if any condition is met): -- Pure object or tool names that do not represent an activity (e.g., '篮球鞋', '相机', '书桌') -- Pure location or venue names (e.g., '篮球场', '图书馆', '健身房') -- Abstract concepts or quality descriptions (e.g., '核心力量', '团队合作', '专注力') -- Person names, brand names, or proper nouns (e.g., '乔丹', 'Nike') +Examples of inference: +- '攀岩', '室内攀岩馆', '攀岩者数据仪表盘', '路线解锁地图', '指力', '路线等级', '当日攀岩流畅度' → '攀岩' +- '风光摄影元数据增强器', 'EXIF数据', '.CR2文件', '.NEF文件', '日出拍摄点', '曝光补偿', '光圈', '太阳高度角', '云量预测图层' → '摄影' +- '晨间冥想坚持天数', '身心协同峰值' → '冥想' +- '川味可视化', '川菜' → '烹饪' +- '开源项目命名建议', 'climbviz', '可视化', '力量增长雷达图' → '编程' 或 '数据可视化' +- '吉他', '指弹', '琴谱' → '吉他' +- '跑步', '5公里', '跑鞋' → '跑步' +- '瑜伽垫', '瑜伽课' → '瑜伽' -**Merge Rules**: For semantically similar tags, keep only the most representative one. -For example: keep '篮球' over '打篮球'; keep '读书' over '阅读'. +**Step 2 - Consolidate and deduplicate**: +- Merge tags that point to the same interest into one representative label +- Use concise, standard hobby names (e.g., '攀岩', '摄影', '编程', '烹饪', '冥想', '吉他', '跑步') +- If multiple tags all point to '攀岩', output '攀岩' only once + +**Step 3 - Filter out non-interest tags**: +Remove tags that do NOT suggest any hobby or interest: +- Generic system/assistant terms (e.g., '助手', '用户', 'AI') +- Pure abstract metrics with no clear hobby link (e.g., '完成时间', '日期', '自我评分') +- Location names with no clear hobby link (e.g., '青城山后山' alone — but if combined with photography context, infer '摄影') + +**Output format**: Return a list of concise interest activity names in Chinese. **Example**: -Input: ['攀岩', '篮球场', '篮球鞋', '篮球', '《三体》', '历史', '核心力量', '烹饪', '菜刀'] -Output: ['攀岩', '篮球', '历史', '烹饪'] +Input: ['攀岩', '攀岩者数据仪表盘', '路线解锁地图', '指力', '风光摄影元数据增强器', 'EXIF数据', '晨间冥想坚持天数', '川味可视化', '可视化', '助手', '完成时间'] +Output: ['攀岩', '摄影', '冥想', '烹饪', '编程'] -Please filter the following tag list and return only the tags that represent user interest activities in Chinese: {{ tag_list }} +Now process the following tag list and return the inferred interest activities in Chinese: {{ tag_list }} {% else %} -You are a user interest analysis expert. Your task is to identify activity-based tags from a tag list that represent the user's hobbies and interests. Please output the results in English. +You are a user interest analysis expert. Your task is to infer and extract the user's core hobby/interest activities from a tag list. The tags may be specific project names, tool names, or compound nouns — your job is to identify the underlying interest they represent. -**Keep Rules** (keep if any condition is met): -- Tags representing sports or physical activities the user actively participates in (e.g., 'rock climbing', 'basketball', 'swimming', 'running') -- Tags representing cultural or entertainment hobbies (e.g., 'reading', 'watching movies', 'listening to music', 'photography') -- Tags representing learning or creative activities (e.g., 'programming', 'painting', 'writing', 'cooking') -- Tags representing specific interest domains or hobby categories (e.g., 'history', 'astronomy', 'gardening') +**Step 1 - Infer the underlying interest from each tag**: +Look at each tag and ask: "What hobby or interest does this tag suggest the user has?" -**Filter Rules** (remove if any condition is met): -- Pure object or tool names that do not represent an activity (e.g., 'basketball shoes', 'camera', 'desk') -- Pure location or venue names (e.g., 'basketball court', 'library', 'gym') -- Abstract concepts or quality descriptions (e.g., 'core strength', 'teamwork', 'focus') -- Person names, brand names, or proper nouns (e.g., 'Jordan', 'Nike') +Examples of inference: +- 'rock climbing', 'indoor climbing gym', 'climber dashboard', 'route map', 'finger strength' → 'rock climbing' +- 'landscape photography metadata enhancer', 'EXIF data', 'sunrise shooting spot', 'exposure compensation' → 'photography' +- 'morning meditation streak', 'mind-body peak' → 'meditation' +- 'Sichuan cuisine visualization', 'Sichuan food' → 'cooking' +- 'open source project', 'data visualization tool', 'Python' → 'programming' +- 'guitar', 'fingerpicking', 'sheet music' → 'guitar' +- 'running', '5km', 'running shoes' → 'running' -**Merge Rules**: For semantically similar tags, keep only the most representative one. -For example: keep 'basketball' over 'playing basketball'; keep 'reading' over 'reading books'. +**Step 2 - Consolidate and deduplicate**: +- Merge tags that point to the same interest into one representative label +- Use concise, standard hobby names (e.g., 'rock climbing', 'photography', 'programming', 'cooking', 'meditation') +- If multiple tags all point to 'rock climbing', output 'rock climbing' only once + +**Step 3 - Filter out non-interest tags**: +Remove tags that do NOT suggest any hobby or interest: +- Generic system/assistant terms (e.g., 'assistant', 'user', 'AI') +- Pure abstract metrics with no clear hobby link (e.g., 'completion time', 'date', 'self-rating') + +**Output format**: Return a list of concise interest activity names in English. **Example**: -Input: ['rock climbing', 'basketball court', 'basketball shoes', 'basketball', 'The Three-Body Problem', 'history', 'core strength', 'cooking', 'kitchen knife'] -Output: ['rock climbing', 'basketball', 'history', 'cooking'] +Input: ['rock climbing', 'climber dashboard', 'route map', 'finger strength', 'landscape photography metadata enhancer', 'EXIF data', 'morning meditation streak', 'Sichuan cuisine visualization', 'visualization', 'assistant', 'completion time'] +Output: ['rock climbing', 'photography', 'meditation', 'cooking', 'programming'] -Please filter the following tag list and return only the tags that represent user interest activities in English: {{ tag_list }} +Now process the following tag list and return the inferred interest activities in English: {{ tag_list }} {% endif %} diff --git a/api/env.example b/api/env.example index d67bbf7c..1dc4536c 100644 --- a/api/env.example +++ b/api/env.example @@ -139,7 +139,7 @@ SMTP_USER= SMTP_PASSWORD= # 本体类型融合配置 (记得写入env_example) -GENERAL_ONTOLOGY_FILES=app/core/memory/ontology_services/General_purpose_entity.ttl # 指定要加载的本体文件路径,多个文件用逗号分隔 +GENERAL_ONTOLOGY_FILES=api/app/core/memory/ontology_services/General_purpose_entity.ttl # 指定要加载的本体文件路径,多个文件用逗号分隔 ENABLE_GENERAL_ONTOLOGY_TYPES=true # 总开关,控制是否启用通用本体类型融合功能(false = 不使用任何本体类型指导) MAX_ONTOLOGY_TYPES_IN_PROMPT=100 # 限制传给 LLM 的类型数量,防止 Prompt 过长 CORE_GENERAL_TYPES=Person,Organization,Place,Event,Work,Concept # 定义核心类型列表,这些类型会优先包含在合并结果中