[add] Set cache for the distribution of interest tags
This commit is contained in:
3
api/app/cache/__init__.py
vendored
3
api/app/cache/__init__.py
vendored
@@ -3,9 +3,10 @@ Cache 缓存模块
|
|||||||
|
|
||||||
提供各种缓存功能的统一入口
|
提供各种缓存功能的统一入口
|
||||||
"""
|
"""
|
||||||
from .memory import EmotionMemoryCache, ImplicitMemoryCache
|
from .memory import EmotionMemoryCache, ImplicitMemoryCache, InterestMemoryCache
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"EmotionMemoryCache",
|
"EmotionMemoryCache",
|
||||||
"ImplicitMemoryCache",
|
"ImplicitMemoryCache",
|
||||||
|
"InterestMemoryCache",
|
||||||
]
|
]
|
||||||
|
|||||||
2
api/app/cache/memory/__init__.py
vendored
2
api/app/cache/memory/__init__.py
vendored
@@ -5,8 +5,10 @@ Memory 缓存模块
|
|||||||
"""
|
"""
|
||||||
from .emotion_memory import EmotionMemoryCache
|
from .emotion_memory import EmotionMemoryCache
|
||||||
from .implicit_memory import ImplicitMemoryCache
|
from .implicit_memory import ImplicitMemoryCache
|
||||||
|
from .interest_memory import InterestMemoryCache
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"EmotionMemoryCache",
|
"EmotionMemoryCache",
|
||||||
"ImplicitMemoryCache",
|
"ImplicitMemoryCache",
|
||||||
|
"InterestMemoryCache",
|
||||||
]
|
]
|
||||||
|
|||||||
122
api/app/cache/memory/interest_memory.py
vendored
Normal file
122
api/app/cache/memory/interest_memory.py
vendored
Normal file
@@ -0,0 +1,122 @@
|
|||||||
|
"""
|
||||||
|
Interest Distribution Cache
|
||||||
|
|
||||||
|
兴趣分布缓存模块
|
||||||
|
用于缓存用户的兴趣分布标签数据,避免重复调用模型生成
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from typing import Optional, List, Dict, Any
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from app.aioRedis import aio_redis
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# 缓存过期时间:24小时
|
||||||
|
INTEREST_CACHE_EXPIRE = 86400
|
||||||
|
|
||||||
|
|
||||||
|
class InterestMemoryCache:
|
||||||
|
"""兴趣分布缓存类"""
|
||||||
|
|
||||||
|
PREFIX = "cache:memory:interest_distribution"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _get_key(cls, end_user_id: str, language: str) -> str:
|
||||||
|
"""生成 Redis key
|
||||||
|
|
||||||
|
Args:
|
||||||
|
end_user_id: 用户ID
|
||||||
|
language: 语言类型
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
完整的 Redis key
|
||||||
|
"""
|
||||||
|
return f"{cls.PREFIX}:by_user:{end_user_id}:{language}"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def set_interest_distribution(
|
||||||
|
cls,
|
||||||
|
end_user_id: str,
|
||||||
|
language: str,
|
||||||
|
data: List[Dict[str, Any]],
|
||||||
|
expire: int = INTEREST_CACHE_EXPIRE,
|
||||||
|
) -> bool:
|
||||||
|
"""设置用户兴趣分布缓存
|
||||||
|
|
||||||
|
Args:
|
||||||
|
end_user_id: 用户ID
|
||||||
|
language: 语言类型
|
||||||
|
data: 兴趣分布列表,格式 [{"name": "...", "frequency": ...}, ...]
|
||||||
|
expire: 过期时间(秒),默认24小时
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
是否设置成功
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
key = cls._get_key(end_user_id, language)
|
||||||
|
payload = {
|
||||||
|
"data": data,
|
||||||
|
"generated_at": datetime.now().isoformat(),
|
||||||
|
"cached": True,
|
||||||
|
}
|
||||||
|
value = json.dumps(payload, ensure_ascii=False)
|
||||||
|
await aio_redis.set(key, value, ex=expire)
|
||||||
|
logger.info(f"设置兴趣分布缓存成功: {key}, 过期时间: {expire}秒")
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"设置兴趣分布缓存失败: {e}", exc_info=True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def get_interest_distribution(
|
||||||
|
cls,
|
||||||
|
end_user_id: str,
|
||||||
|
language: str,
|
||||||
|
) -> Optional[List[Dict[str, Any]]]:
|
||||||
|
"""获取用户兴趣分布缓存
|
||||||
|
|
||||||
|
Args:
|
||||||
|
end_user_id: 用户ID
|
||||||
|
language: 语言类型
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
兴趣分布列表,缓存不存在或已过期返回 None
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
key = cls._get_key(end_user_id, language)
|
||||||
|
value = await aio_redis.get(key)
|
||||||
|
if value:
|
||||||
|
payload = json.loads(value)
|
||||||
|
logger.info(f"命中兴趣分布缓存: {key}")
|
||||||
|
return payload.get("data")
|
||||||
|
logger.info(f"兴趣分布缓存不存在或已过期: {key}")
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"获取兴趣分布缓存失败: {e}", exc_info=True)
|
||||||
|
return None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def delete_interest_distribution(
|
||||||
|
cls,
|
||||||
|
end_user_id: str,
|
||||||
|
language: str,
|
||||||
|
) -> bool:
|
||||||
|
"""删除用户兴趣分布缓存
|
||||||
|
|
||||||
|
Args:
|
||||||
|
end_user_id: 用户ID
|
||||||
|
language: 语言类型
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
是否删除成功
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
key = cls._get_key(end_user_id, language)
|
||||||
|
result = await aio_redis.delete(key)
|
||||||
|
logger.info(f"删除兴趣分布缓存: {key}, 结果: {result}")
|
||||||
|
return result > 0
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"删除兴趣分布缓存失败: {e}", exc_info=True)
|
||||||
|
return False
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from app.cache.memory.interest_memory import InterestMemoryCache
|
||||||
from app.celery_app import celery_app
|
from app.celery_app import celery_app
|
||||||
from app.core.error_codes import BizCode
|
from app.core.error_codes import BizCode
|
||||||
from app.core.language_utils import get_language_from_header
|
from app.core.language_utils import get_language_from_header
|
||||||
@@ -684,11 +685,29 @@ async def get_interest_distribution_by_user_api(
|
|||||||
language = get_language_from_header(language_type)
|
language = get_language_from_header(language_type)
|
||||||
api_logger.info(f"Interest distribution by user requested: end_user_id={end_user_id}, language={language}")
|
api_logger.info(f"Interest distribution by user requested: end_user_id={end_user_id}, language={language}")
|
||||||
try:
|
try:
|
||||||
|
# 优先读取缓存
|
||||||
|
cached = await InterestMemoryCache.get_interest_distribution(
|
||||||
|
end_user_id=end_user_id,
|
||||||
|
language=language,
|
||||||
|
)
|
||||||
|
if cached is not None:
|
||||||
|
api_logger.info(f"Interest distribution cache hit: end_user_id={end_user_id}")
|
||||||
|
return success(data=cached, msg="获取兴趣分布标签成功")
|
||||||
|
|
||||||
|
# 缓存未命中,调用模型生成
|
||||||
result = await memory_agent_service.get_interest_distribution_by_user(
|
result = await memory_agent_service.get_interest_distribution_by_user(
|
||||||
end_user_id=end_user_id,
|
end_user_id=end_user_id,
|
||||||
limit=limit,
|
limit=limit,
|
||||||
language=language
|
language=language
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# 写入缓存,24小时过期
|
||||||
|
await InterestMemoryCache.set_interest_distribution(
|
||||||
|
end_user_id=end_user_id,
|
||||||
|
language=language,
|
||||||
|
data=result,
|
||||||
|
)
|
||||||
|
|
||||||
return success(data=result, msg="获取兴趣分布标签成功")
|
return success(data=result, msg="获取兴趣分布标签成功")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
api_logger.error(f"Interest distribution by user failed: {str(e)}")
|
api_logger.error(f"Interest distribution by user failed: {str(e)}")
|
||||||
|
|||||||
@@ -230,7 +230,7 @@ class Settings:
|
|||||||
# General Ontology Type Configuration
|
# General Ontology Type Configuration
|
||||||
# ========================================================================
|
# ========================================================================
|
||||||
# 通用本体文件路径列表(逗号分隔)
|
# 通用本体文件路径列表(逗号分隔)
|
||||||
GENERAL_ONTOLOGY_FILES: str = os.getenv("GENERAL_ONTOLOGY_FILES", "app/core/memory/ontology_services/General_purpose_entity.ttl")
|
GENERAL_ONTOLOGY_FILES: str = os.getenv("GENERAL_ONTOLOGY_FILES", "api/app/core/memory/ontology_services/General_purpose_entity.ttl")
|
||||||
|
|
||||||
# 是否启用通用本体类型功能
|
# 是否启用通用本体类型功能
|
||||||
ENABLE_GENERAL_ONTOLOGY_TYPES: bool = os.getenv("ENABLE_GENERAL_ONTOLOGY_TYPES", "true").lower() == "true"
|
ENABLE_GENERAL_ONTOLOGY_TYPES: bool = os.getenv("ENABLE_GENERAL_ONTOLOGY_TYPES", "true").lower() == "true"
|
||||||
|
|||||||
@@ -281,16 +281,25 @@ async def get_interest_distribution(end_user_id: str, limit: int = 10, by_user:
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
raw_tag_names = [tag for tag, freq in raw_tags_with_freq]
|
raw_tag_names = [tag for tag, freq in raw_tags_with_freq]
|
||||||
|
raw_freq_map = {tag: freq for tag, freq in raw_tags_with_freq}
|
||||||
|
|
||||||
# 使用兴趣活动专用prompt进行筛选
|
# 使用兴趣活动专用prompt进行筛选(支持语义推断出新标签)
|
||||||
interest_tag_names = await filter_interests_with_llm(raw_tag_names, end_user_id, language=language)
|
interest_tag_names = await filter_interests_with_llm(raw_tag_names, end_user_id, language=language)
|
||||||
|
|
||||||
# 保留原始频率,按兴趣筛选结果过滤
|
# 构建最终标签列表:
|
||||||
final_tags = [
|
# - 原始标签中存在的,保留原始频率
|
||||||
(tag, freq)
|
# - LLM推断出的新标签(不在原始列表中),赋予默认频率1
|
||||||
for tag, freq in raw_tags_with_freq
|
final_tags = []
|
||||||
if tag in interest_tag_names
|
seen = set()
|
||||||
]
|
for tag in interest_tag_names:
|
||||||
|
if tag in seen:
|
||||||
|
continue
|
||||||
|
seen.add(tag)
|
||||||
|
freq = raw_freq_map.get(tag, 1)
|
||||||
|
final_tags.append((tag, freq))
|
||||||
|
|
||||||
|
# 按频率降序排列
|
||||||
|
final_tags.sort(key=lambda x: x[1], reverse=True)
|
||||||
|
|
||||||
return final_tags[:limit]
|
return final_tags[:limit]
|
||||||
finally:
|
finally:
|
||||||
|
|||||||
@@ -1,47 +1,67 @@
|
|||||||
{% if language == "zh" %}
|
{% if language == "zh" %}
|
||||||
You are a user interest analysis expert. Your task is to identify activity-based tags from a tag list that represent the user's hobbies and interests. Please output the results in Chinese.
|
You are a user interest analysis expert. Your task is to infer and extract the user's core hobby/interest activities from a tag list. The tags may be specific project names, tool names, or compound nouns — your job is to identify the underlying interest they represent.
|
||||||
|
|
||||||
**Keep Rules** (keep if any condition is met):
|
**Step 1 - Infer the underlying interest from each tag**:
|
||||||
- Tags representing sports or physical activities the user actively participates in (e.g., '攀岩', '篮球', '游泳', '跑步')
|
Look at each tag and ask: "What hobby or interest does this tag suggest the user has?"
|
||||||
- Tags representing cultural or entertainment hobbies (e.g., '读书', '看电影', '听音乐', '摄影')
|
|
||||||
- Tags representing learning or creative activities (e.g., '编程', '绘画', '写作', '烹饪')
|
|
||||||
- Tags representing specific interest domains or hobby categories (e.g., '历史', '天文', '园艺')
|
|
||||||
|
|
||||||
**Filter Rules** (remove if any condition is met):
|
Examples of inference:
|
||||||
- Pure object or tool names that do not represent an activity (e.g., '篮球鞋', '相机', '书桌')
|
- '攀岩', '室内攀岩馆', '攀岩者数据仪表盘', '路线解锁地图', '指力', '路线等级', '当日攀岩流畅度' → '攀岩'
|
||||||
- Pure location or venue names (e.g., '篮球场', '图书馆', '健身房')
|
- '风光摄影元数据增强器', 'EXIF数据', '.CR2文件', '.NEF文件', '日出拍摄点', '曝光补偿', '光圈', '太阳高度角', '云量预测图层' → '摄影'
|
||||||
- Abstract concepts or quality descriptions (e.g., '核心力量', '团队合作', '专注力')
|
- '晨间冥想坚持天数', '身心协同峰值' → '冥想'
|
||||||
- Person names, brand names, or proper nouns (e.g., '乔丹', 'Nike')
|
- '川味可视化', '川菜' → '烹饪'
|
||||||
|
- '开源项目命名建议', 'climbviz', '可视化', '力量增长雷达图' → '编程' 或 '数据可视化'
|
||||||
|
- '吉他', '指弹', '琴谱' → '吉他'
|
||||||
|
- '跑步', '5公里', '跑鞋' → '跑步'
|
||||||
|
- '瑜伽垫', '瑜伽课' → '瑜伽'
|
||||||
|
|
||||||
**Merge Rules**: For semantically similar tags, keep only the most representative one.
|
**Step 2 - Consolidate and deduplicate**:
|
||||||
For example: keep '篮球' over '打篮球'; keep '读书' over '阅读'.
|
- Merge tags that point to the same interest into one representative label
|
||||||
|
- Use concise, standard hobby names (e.g., '攀岩', '摄影', '编程', '烹饪', '冥想', '吉他', '跑步')
|
||||||
|
- If multiple tags all point to '攀岩', output '攀岩' only once
|
||||||
|
|
||||||
|
**Step 3 - Filter out non-interest tags**:
|
||||||
|
Remove tags that do NOT suggest any hobby or interest:
|
||||||
|
- Generic system/assistant terms (e.g., '助手', '用户', 'AI')
|
||||||
|
- Pure abstract metrics with no clear hobby link (e.g., '完成时间', '日期', '自我评分')
|
||||||
|
- Location names with no clear hobby link (e.g., '青城山后山' alone — but if combined with photography context, infer '摄影')
|
||||||
|
|
||||||
|
**Output format**: Return a list of concise interest activity names in Chinese.
|
||||||
|
|
||||||
**Example**:
|
**Example**:
|
||||||
Input: ['攀岩', '篮球场', '篮球鞋', '篮球', '《三体》', '历史', '核心力量', '烹饪', '菜刀']
|
Input: ['攀岩', '攀岩者数据仪表盘', '路线解锁地图', '指力', '风光摄影元数据增强器', 'EXIF数据', '晨间冥想坚持天数', '川味可视化', '可视化', '助手', '完成时间']
|
||||||
Output: ['攀岩', '篮球', '历史', '烹饪']
|
Output: ['攀岩', '摄影', '冥想', '烹饪', '编程']
|
||||||
|
|
||||||
Please filter the following tag list and return only the tags that represent user interest activities in Chinese: {{ tag_list }}
|
Now process the following tag list and return the inferred interest activities in Chinese: {{ tag_list }}
|
||||||
{% else %}
|
{% else %}
|
||||||
You are a user interest analysis expert. Your task is to identify activity-based tags from a tag list that represent the user's hobbies and interests. Please output the results in English.
|
You are a user interest analysis expert. Your task is to infer and extract the user's core hobby/interest activities from a tag list. The tags may be specific project names, tool names, or compound nouns — your job is to identify the underlying interest they represent.
|
||||||
|
|
||||||
**Keep Rules** (keep if any condition is met):
|
**Step 1 - Infer the underlying interest from each tag**:
|
||||||
- Tags representing sports or physical activities the user actively participates in (e.g., 'rock climbing', 'basketball', 'swimming', 'running')
|
Look at each tag and ask: "What hobby or interest does this tag suggest the user has?"
|
||||||
- Tags representing cultural or entertainment hobbies (e.g., 'reading', 'watching movies', 'listening to music', 'photography')
|
|
||||||
- Tags representing learning or creative activities (e.g., 'programming', 'painting', 'writing', 'cooking')
|
|
||||||
- Tags representing specific interest domains or hobby categories (e.g., 'history', 'astronomy', 'gardening')
|
|
||||||
|
|
||||||
**Filter Rules** (remove if any condition is met):
|
Examples of inference:
|
||||||
- Pure object or tool names that do not represent an activity (e.g., 'basketball shoes', 'camera', 'desk')
|
- 'rock climbing', 'indoor climbing gym', 'climber dashboard', 'route map', 'finger strength' → 'rock climbing'
|
||||||
- Pure location or venue names (e.g., 'basketball court', 'library', 'gym')
|
- 'landscape photography metadata enhancer', 'EXIF data', 'sunrise shooting spot', 'exposure compensation' → 'photography'
|
||||||
- Abstract concepts or quality descriptions (e.g., 'core strength', 'teamwork', 'focus')
|
- 'morning meditation streak', 'mind-body peak' → 'meditation'
|
||||||
- Person names, brand names, or proper nouns (e.g., 'Jordan', 'Nike')
|
- 'Sichuan cuisine visualization', 'Sichuan food' → 'cooking'
|
||||||
|
- 'open source project', 'data visualization tool', 'Python' → 'programming'
|
||||||
|
- 'guitar', 'fingerpicking', 'sheet music' → 'guitar'
|
||||||
|
- 'running', '5km', 'running shoes' → 'running'
|
||||||
|
|
||||||
**Merge Rules**: For semantically similar tags, keep only the most representative one.
|
**Step 2 - Consolidate and deduplicate**:
|
||||||
For example: keep 'basketball' over 'playing basketball'; keep 'reading' over 'reading books'.
|
- Merge tags that point to the same interest into one representative label
|
||||||
|
- Use concise, standard hobby names (e.g., 'rock climbing', 'photography', 'programming', 'cooking', 'meditation')
|
||||||
|
- If multiple tags all point to 'rock climbing', output 'rock climbing' only once
|
||||||
|
|
||||||
|
**Step 3 - Filter out non-interest tags**:
|
||||||
|
Remove tags that do NOT suggest any hobby or interest:
|
||||||
|
- Generic system/assistant terms (e.g., 'assistant', 'user', 'AI')
|
||||||
|
- Pure abstract metrics with no clear hobby link (e.g., 'completion time', 'date', 'self-rating')
|
||||||
|
|
||||||
|
**Output format**: Return a list of concise interest activity names in English.
|
||||||
|
|
||||||
**Example**:
|
**Example**:
|
||||||
Input: ['rock climbing', 'basketball court', 'basketball shoes', 'basketball', 'The Three-Body Problem', 'history', 'core strength', 'cooking', 'kitchen knife']
|
Input: ['rock climbing', 'climber dashboard', 'route map', 'finger strength', 'landscape photography metadata enhancer', 'EXIF data', 'morning meditation streak', 'Sichuan cuisine visualization', 'visualization', 'assistant', 'completion time']
|
||||||
Output: ['rock climbing', 'basketball', 'history', 'cooking']
|
Output: ['rock climbing', 'photography', 'meditation', 'cooking', 'programming']
|
||||||
|
|
||||||
Please filter the following tag list and return only the tags that represent user interest activities in English: {{ tag_list }}
|
Now process the following tag list and return the inferred interest activities in English: {{ tag_list }}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|||||||
@@ -139,7 +139,7 @@ SMTP_USER=
|
|||||||
SMTP_PASSWORD=
|
SMTP_PASSWORD=
|
||||||
|
|
||||||
# 本体类型融合配置 (记得写入env_example)
|
# 本体类型融合配置 (记得写入env_example)
|
||||||
GENERAL_ONTOLOGY_FILES=app/core/memory/ontology_services/General_purpose_entity.ttl # 指定要加载的本体文件路径,多个文件用逗号分隔
|
GENERAL_ONTOLOGY_FILES=api/app/core/memory/ontology_services/General_purpose_entity.ttl # 指定要加载的本体文件路径,多个文件用逗号分隔
|
||||||
ENABLE_GENERAL_ONTOLOGY_TYPES=true # 总开关,控制是否启用通用本体类型融合功能(false = 不使用任何本体类型指导)
|
ENABLE_GENERAL_ONTOLOGY_TYPES=true # 总开关,控制是否启用通用本体类型融合功能(false = 不使用任何本体类型指导)
|
||||||
MAX_ONTOLOGY_TYPES_IN_PROMPT=100 # 限制传给 LLM 的类型数量,防止 Prompt 过长
|
MAX_ONTOLOGY_TYPES_IN_PROMPT=100 # 限制传给 LLM 的类型数量,防止 Prompt 过长
|
||||||
CORE_GENERAL_TYPES=Person,Organization,Place,Event,Work,Concept # 定义核心类型列表,这些类型会优先包含在合并结果中
|
CORE_GENERAL_TYPES=Person,Organization,Place,Event,Work,Concept # 定义核心类型列表,这些类型会优先包含在合并结果中
|
||||||
|
|||||||
Reference in New Issue
Block a user