From 8466c8e0192c84641ec2bd8f088075604b814bb2 Mon Sep 17 00:00:00 2001
From: lanceyq <1982376970@qq.com>
Date: Tue, 3 Mar 2026 23:30:54 +0800
Subject: [PATCH 1/6] [fix] Revising the judgment method for the interest
 analysis tags

---
 .../controllers/memory_agent_controller.py    |  32 ++---
 .../core/memory/analytics/hot_memory_tags.py  | 112 ++++++++++++++++++
 .../core/memory/utils/prompt/prompt_utils.py  |  17 +++
 .../prompt/prompts/interest_filter.jinja2     |  47 ++++++++
 api/app/services/memory_agent_service.py      |  32 ++---
 5 files changed, 210 insertions(+), 30 deletions(-)
 create mode 100644 api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2

diff --git a/api/app/controllers/memory_agent_controller.py b/api/app/controllers/memory_agent_controller.py
index b88e65ff..8f2e5c31 100644
--- a/api/app/controllers/memory_agent_controller.py
+++ b/api/app/controllers/memory_agent_controller.py
@@ -661,34 +661,38 @@ async def get_knowledge_type_stats_api(
         return fail(BizCode.INTERNAL_ERROR, "获取知识库类型统计失败", str(e))
 
 
-@router.get("/analytics/hot_memory_tags/by_user", response_model=ApiResponse)
-async def get_hot_memory_tags_by_user_api(
-    end_user_id: Optional[str] = Query(None, description="用户ID（可选）"),
-    limit: int = Query(20, description="返回标签数量限制"),
+@router.get("/analytics/interest_distribution/by_user", response_model=ApiResponse)
+async def get_interest_distribution_by_user_api(
+    end_user_id: Optional[str] = Query(None, description="用户ID（必填）"),
+    limit: int = Query(5, le=5, description="返回兴趣标签数量限制，最多5个"),
+    language_type: str = Header(default=None, alias="X-Language-Type"),
     current_user: User = Depends(get_current_user),
-    db: Session=Depends(get_db),
+    db: Session = Depends(get_db),
 ):
     """
-    获取指定用户的热门记忆标签
+    获取指定用户的兴趣分布标签
     
-    注意：标签语言由写入时的 X-Language-Type 决定，查询时不进行翻译
+    与热门标签不同，此接口专注于识别用户的兴趣活动（运动、爱好、学习、创作等），
+    过滤掉纯物品、工具、地点等不代表用户主动参与活动的名词。
     
     返回格式：
     [
-        {"name": "标签名", "frequency": 频次},
+        {"name": "兴趣活动名", "frequency": 频次},
         ...
     ]
     """
-    api_logger.info(f"Hot memory tags by user requested: end_user_id={end_user_id}")
+    language = get_language_from_header(language_type)
+    api_logger.info(f"Interest distribution by user requested: end_user_id={end_user_id}, language={language}")
     try:
-        result = await memory_agent_service.get_hot_memory_tags_by_user(
+        result = await memory_agent_service.get_interest_distribution_by_user(
             end_user_id=end_user_id,
-            limit=limit
+            limit=limit,
+            language=language
         )
-        return success(data=result, msg="获取热门记忆标签成功")
+        return success(data=result, msg="获取兴趣分布标签成功")
     except Exception as e:
-        api_logger.error(f"Hot memory tags by user failed: {str(e)}")
-        return fail(BizCode.INTERNAL_ERROR, "获取热门记忆标签失败", str(e))
+        api_logger.error(f"Interest distribution by user failed: {str(e)}")
+        return fail(BizCode.INTERNAL_ERROR, "获取兴趣分布标签失败", str(e))
 
 
 @router.get("/analytics/user_profile", response_model=ApiResponse)
diff --git a/api/app/core/memory/analytics/hot_memory_tags.py b/api/app/core/memory/analytics/hot_memory_tags.py
index abb0f138..da08e88e 100644
--- a/api/app/core/memory/analytics/hot_memory_tags.py
+++ b/api/app/core/memory/analytics/hot_memory_tags.py
@@ -16,6 +16,10 @@ class FilteredTags(BaseModel):
     """用于接收LLM筛选后的核心标签列表的模型。"""
     meaningful_tags: List[str] = Field(..., description="从原始列表中筛选出的具有核心代表意义的名词列表。")
 
+class InterestTags(BaseModel):
+    """用于接收LLM筛选后的兴趣活动标签列表的模型。"""
+    interest_tags: List[str] = Field(..., description="从原始列表中筛选出的代表用户兴趣活动的标签列表。")
+
 async def filter_tags_with_llm(tags: List[str], end_user_id: str) -> List[str]:
     """
     使用LLM筛选标签列表，仅保留具有代表性的核心名词。
@@ -89,6 +93,70 @@ async def filter_tags_with_llm(tags: List[str], end_user_id: str) -> List[str]:
         # 在LLM失败时返回原始标签，确保流程继续
         return tags
 
+async def filter_interests_with_llm(tags: List[str], end_user_id: str, language: str = "zh") -> List[str]:
+    """
+    使用LLM从标签列表中筛选出代表用户兴趣活动的标签。
+    
+    与 filter_tags_with_llm 不同，此函数专注于识别"活动/行为"类兴趣，
+    过滤掉纯物品、工具、地点等不代表用户主动参与活动的名词。
+    
+    Args:
+        tags: 原始标签列表
+        end_user_id: 用户ID，用于获取LLM配置
+        
+    Returns:
+        筛选后的兴趣活动标签列表
+    """
+    try:
+        with get_db_context() as db:
+            from app.services.memory_agent_service import (
+                get_end_user_connected_config,
+            )
+            connected_config = get_end_user_connected_config(end_user_id, db)
+            config_id = connected_config.get("memory_config_id")
+            workspace_id = connected_config.get("workspace_id")
+
+            if not config_id and not workspace_id:
+                raise ValueError(
+                    f"No memory_config_id found for end_user_id: {end_user_id}."
+                )
+
+            config_service = MemoryConfigService(db)
+            memory_config = config_service.load_memory_config(
+                config_id=config_id,
+                workspace_id=workspace_id
+            )
+
+            if not memory_config.llm_model_id:
+                raise ValueError(
+                    f"No llm_model_id found in memory config {config_id}."
+                )
+
+            factory = MemoryClientFactory(db)
+            llm_client = factory.get_llm_client(memory_config.llm_model_id)
+
+        tag_list_str = ", ".join(tags)
+        from app.core.memory.utils.prompt.prompt_utils import render_interest_filter_prompt
+        rendered_prompt = render_interest_filter_prompt(tag_list_str, language=language)
+        messages = [
+            {
+                "role": "user",
+                "content": rendered_prompt
+            }
+        ]
+
+        structured_response = await llm_client.response_structured(
+            messages=messages,
+            response_model=InterestTags
+        )
+
+        return structured_response.interest_tags
+
+    except Exception as e:
+        print(f"兴趣标签LLM筛选过程中发生错误: {e}")
+        return tags
+
+
 async def get_raw_tags_from_db(
     connector: Neo4jConnector,
     end_user_id: str,
@@ -183,3 +251,47 @@ async def get_hot_memory_tags(end_user_id: str, limit: int = 10, by_user: bool =
     finally:
         # 确保关闭连接
         await connector.close()
+
+async def get_interest_distribution(end_user_id: str, limit: int = 10, by_user: bool = False, language: str = "zh") -> List[Tuple[str, int]]:
+    """
+    获取用户的兴趣分布标签。
+    
+    与 get_hot_memory_tags 不同，此函数使用专门针对"活动/行为"的LLM prompt，
+    过滤掉纯物品、工具、地点等，只保留能代表用户兴趣爱好的活动类标签。
+
+    Args:
+        end_user_id: 必需参数。如果by_user=False，则为end_user_id；如果by_user=True，则为user_id
+        limit: 最终返回的标签数量限制（默认10）
+        by_user: 是否按user_id查询（默认False，按end_user_id查询）
+
+    Raises:
+        ValueError: 如果end_user_id未提供或为空
+    """
+    if not end_user_id or not end_user_id.strip():
+        raise ValueError(
+            "end_user_id is required. Please provide a valid end_user_id or user_id."
+        )
+
+    connector = Neo4jConnector()
+    try:
+        # 查询更多原始标签，给LLM提供充足上下文
+        query_limit = 40
+        raw_tags_with_freq = await get_raw_tags_from_db(connector, end_user_id, query_limit, by_user=by_user)
+        if not raw_tags_with_freq:
+            return []
+
+        raw_tag_names = [tag for tag, freq in raw_tags_with_freq]
+
+        # 使用兴趣活动专用prompt进行筛选
+        interest_tag_names = await filter_interests_with_llm(raw_tag_names, end_user_id, language=language)
+
+        # 保留原始频率，按兴趣筛选结果过滤
+        final_tags = [
+            (tag, freq)
+            for tag, freq in raw_tags_with_freq
+            if tag in interest_tag_names
+        ]
+
+        return final_tags[:limit]
+    finally:
+        await connector.close()
diff --git a/api/app/core/memory/utils/prompt/prompt_utils.py b/api/app/core/memory/utils/prompt/prompt_utils.py
index d88f50cf..0cea98f2 100644
--- a/api/app/core/memory/utils/prompt/prompt_utils.py
+++ b/api/app/core/memory/utils/prompt/prompt_utils.py
@@ -548,3 +548,20 @@ async def render_ontology_extraction_prompt(
     })
     
     return rendered_prompt
+
+
+def render_interest_filter_prompt(tag_list: str, language: str = "zh") -> str:
+    """
+    Renders the interest filter prompt using the interest_filter.jinja2 template.
+
+    Args:
+        tag_list: Comma-separated string of raw tags to filter
+        language: Output language ("zh" for Chinese, "en" for English)
+
+    Returns:
+        Rendered prompt content as string
+    """
+    template = prompt_env.get_template("interest_filter.jinja2")
+    rendered_prompt = template.render(tag_list=tag_list, language=language)
+    log_prompt_rendering('interest filter', rendered_prompt)
+    return rendered_prompt
diff --git a/api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2 b/api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2
new file mode 100644
index 00000000..1e3aac55
--- /dev/null
+++ b/api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2
@@ -0,0 +1,47 @@
+{% if language == "zh" %}
+You are a user interest analysis expert. Your task is to identify activity-based tags from a tag list that represent the user's hobbies and interests. Please output the results in Chinese.
+
+**Keep Rules** (keep if any condition is met):
+- Tags representing sports or physical activities the user actively participates in (e.g., '攀岩', '篮球', '游泳', '跑步')
+- Tags representing cultural or entertainment hobbies (e.g., '读书', '看电影', '听音乐', '摄影')
+- Tags representing learning or creative activities (e.g., '编程', '绘画', '写作', '烹饪')
+- Tags representing specific interest domains or hobby categories (e.g., '历史', '天文', '园艺')
+
+**Filter Rules** (remove if any condition is met):
+- Pure object or tool names that do not represent an activity (e.g., '篮球鞋', '相机', '书桌')
+- Pure location or venue names (e.g., '篮球场', '图书馆', '健身房')
+- Abstract concepts or quality descriptions (e.g., '核心力量', '团队合作', '专注力')
+- Person names, brand names, or proper nouns (e.g., '乔丹', 'Nike')
+
+**Merge Rules**: For semantically similar tags, keep only the most representative one.
+For example: keep '篮球' over '打篮球'; keep '读书' over '阅读'.
+
+**Example**:
+Input: ['攀岩', '篮球场', '篮球鞋', '篮球', '《三体》', '历史', '核心力量', '烹饪', '菜刀']
+Output: ['攀岩', '篮球', '历史', '烹饪']
+
+Please filter the following tag list and return only the tags that represent user interest activities in Chinese: {{ tag_list }}
+{% else %}
+You are a user interest analysis expert. Your task is to identify activity-based tags from a tag list that represent the user's hobbies and interests. Please output the results in English.
+
+**Keep Rules** (keep if any condition is met):
+- Tags representing sports or physical activities the user actively participates in (e.g., 'rock climbing', 'basketball', 'swimming', 'running')
+- Tags representing cultural or entertainment hobbies (e.g., 'reading', 'watching movies', 'listening to music', 'photography')
+- Tags representing learning or creative activities (e.g., 'programming', 'painting', 'writing', 'cooking')
+- Tags representing specific interest domains or hobby categories (e.g., 'history', 'astronomy', 'gardening')
+
+**Filter Rules** (remove if any condition is met):
+- Pure object or tool names that do not represent an activity (e.g., 'basketball shoes', 'camera', 'desk')
+- Pure location or venue names (e.g., 'basketball court', 'library', 'gym')
+- Abstract concepts or quality descriptions (e.g., 'core strength', 'teamwork', 'focus')
+- Person names, brand names, or proper nouns (e.g., 'Jordan', 'Nike')
+
+**Merge Rules**: For semantically similar tags, keep only the most representative one.
+For example: keep 'basketball' over 'playing basketball'; keep 'reading' over 'reading books'.
+
+**Example**:
+Input: ['rock climbing', 'basketball court', 'basketball shoes', 'basketball', 'The Three-Body Problem', 'history', 'core strength', 'cooking', 'kitchen knife']
+Output: ['rock climbing', 'basketball', 'history', 'cooking']
+
+Please filter the following tag list and return only the tags that represent user interest activities in English: {{ tag_list }}
+{% endif %}
diff --git a/api/app/services/memory_agent_service.py b/api/app/services/memory_agent_service.py
index 1f3667a6..16aee283 100644
--- a/api/app/services/memory_agent_service.py
+++ b/api/app/services/memory_agent_service.py
@@ -36,7 +36,7 @@ from app.core.memory.agent.utils.messages_tools import (
 )
 from app.core.memory.agent.utils.type_classifier import status_typle
 from app.core.memory.agent.utils.write_tools import write  # 新增：直接导入 write 函数
-from app.core.memory.analytics.hot_memory_tags import get_hot_memory_tags
+from app.core.memory.analytics.hot_memory_tags import get_hot_memory_tags, get_interest_distribution
 from app.core.memory.utils.llm.llm_utils import MemoryClientFactory
 from app.db import get_db_context
 from app.models.knowledge_model import Knowledge, KnowledgeType
@@ -890,36 +890,36 @@ class MemoryAgentService:
         return result
 
 
-    async def get_hot_memory_tags_by_user(
+
+    async def get_interest_distribution_by_user(
         self,
         end_user_id: Optional[str] = None,
-        limit: int = 20
+        limit: int = 5,
+        language: str = "zh"
     ) -> List[Dict[str, Any]]:
         """
-        获取指定用户的热门记忆标签
+        获取指定用户的兴趣分布标签。
+        
+        与热门标签不同，此接口专注于识别用户的兴趣活动（运动、爱好、学习等），
+        过滤掉纯物品、工具、地点等不代表用户主动参与活动的名词。
 
         参数：
-        - end_user_id: 用户ID（可选），对应Neo4j中的end_user_id字段
+        - end_user_id: 用户ID（必填）
         - limit: 返回标签数量限制
+        - language: 输出语言（"zh" 中文, "en" 英文）
 
         返回格式：
         [
-            {"name": "标签名", "frequency": 频次},
+            {"name": "兴趣活动名", "frequency": 频次},
             ...
         ]
-        
-        注意：标签语言由写入时的 X-Language-Type 决定，查询时不进行翻译
         """
         try:
-            # by_user=False 表示按 end_user_id 查询（在Neo4j中，end_user_id就是用户维度）
-            tags = await get_hot_memory_tags(end_user_id, limit=limit, by_user=False)
-            payload = []
-            for tag, freq in tags:
-                payload.append({"name": tag, "frequency": freq})
-            return payload
+            tags = await get_interest_distribution(end_user_id, limit=limit, by_user=False, language=language)
+            return [{"name": tag, "frequency": freq} for tag, freq in tags]
         except Exception as e:
-            logger.error(f"热门记忆标签查询失败: {e}")
-            raise Exception(f"热门记忆标签查询失败: {e}")
+            logger.error(f"兴趣分布标签查询失败: {e}")
+            raise Exception(f"兴趣分布标签查询失败: {e}")
 
 
     async def get_user_profile(

From 9115ad6950dc30c5fa20aaa0c0d9f3aa23cca7a9 Mon Sep 17 00:00:00 2001
From: lanceyq <1982376970@qq.com>
Date: Tue, 3 Mar 2026 23:30:54 +0800
Subject: [PATCH 2/6] [fix] Revising the judgment method for the interest
 analysis tags

---
 .../controllers/memory_agent_controller.py    |  32 ++---
 .../core/memory/analytics/hot_memory_tags.py  | 112 ++++++++++++++++++
 .../core/memory/utils/prompt/prompt_utils.py  |  17 +++
 .../prompt/prompts/interest_filter.jinja2     |  47 ++++++++
 api/app/services/memory_agent_service.py      |  32 ++---
 5 files changed, 210 insertions(+), 30 deletions(-)
 create mode 100644 api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2

diff --git a/api/app/controllers/memory_agent_controller.py b/api/app/controllers/memory_agent_controller.py
index b88e65ff..8f2e5c31 100644
--- a/api/app/controllers/memory_agent_controller.py
+++ b/api/app/controllers/memory_agent_controller.py
@@ -661,34 +661,38 @@ async def get_knowledge_type_stats_api(
         return fail(BizCode.INTERNAL_ERROR, "获取知识库类型统计失败", str(e))
 
 
-@router.get("/analytics/hot_memory_tags/by_user", response_model=ApiResponse)
-async def get_hot_memory_tags_by_user_api(
-    end_user_id: Optional[str] = Query(None, description="用户ID（可选）"),
-    limit: int = Query(20, description="返回标签数量限制"),
+@router.get("/analytics/interest_distribution/by_user", response_model=ApiResponse)
+async def get_interest_distribution_by_user_api(
+    end_user_id: Optional[str] = Query(None, description="用户ID（必填）"),
+    limit: int = Query(5, le=5, description="返回兴趣标签数量限制，最多5个"),
+    language_type: str = Header(default=None, alias="X-Language-Type"),
     current_user: User = Depends(get_current_user),
-    db: Session=Depends(get_db),
+    db: Session = Depends(get_db),
 ):
     """
-    获取指定用户的热门记忆标签
+    获取指定用户的兴趣分布标签
     
-    注意：标签语言由写入时的 X-Language-Type 决定，查询时不进行翻译
+    与热门标签不同，此接口专注于识别用户的兴趣活动（运动、爱好、学习、创作等），
+    过滤掉纯物品、工具、地点等不代表用户主动参与活动的名词。
     
     返回格式：
     [
-        {"name": "标签名", "frequency": 频次},
+        {"name": "兴趣活动名", "frequency": 频次},
         ...
     ]
     """
-    api_logger.info(f"Hot memory tags by user requested: end_user_id={end_user_id}")
+    language = get_language_from_header(language_type)
+    api_logger.info(f"Interest distribution by user requested: end_user_id={end_user_id}, language={language}")
     try:
-        result = await memory_agent_service.get_hot_memory_tags_by_user(
+        result = await memory_agent_service.get_interest_distribution_by_user(
             end_user_id=end_user_id,
-            limit=limit
+            limit=limit,
+            language=language
         )
-        return success(data=result, msg="获取热门记忆标签成功")
+        return success(data=result, msg="获取兴趣分布标签成功")
     except Exception as e:
-        api_logger.error(f"Hot memory tags by user failed: {str(e)}")
-        return fail(BizCode.INTERNAL_ERROR, "获取热门记忆标签失败", str(e))
+        api_logger.error(f"Interest distribution by user failed: {str(e)}")
+        return fail(BizCode.INTERNAL_ERROR, "获取兴趣分布标签失败", str(e))
 
 
 @router.get("/analytics/user_profile", response_model=ApiResponse)
diff --git a/api/app/core/memory/analytics/hot_memory_tags.py b/api/app/core/memory/analytics/hot_memory_tags.py
index abb0f138..da08e88e 100644
--- a/api/app/core/memory/analytics/hot_memory_tags.py
+++ b/api/app/core/memory/analytics/hot_memory_tags.py
@@ -16,6 +16,10 @@ class FilteredTags(BaseModel):
     """用于接收LLM筛选后的核心标签列表的模型。"""
     meaningful_tags: List[str] = Field(..., description="从原始列表中筛选出的具有核心代表意义的名词列表。")
 
+class InterestTags(BaseModel):
+    """用于接收LLM筛选后的兴趣活动标签列表的模型。"""
+    interest_tags: List[str] = Field(..., description="从原始列表中筛选出的代表用户兴趣活动的标签列表。")
+
 async def filter_tags_with_llm(tags: List[str], end_user_id: str) -> List[str]:
     """
     使用LLM筛选标签列表，仅保留具有代表性的核心名词。
@@ -89,6 +93,70 @@ async def filter_tags_with_llm(tags: List[str], end_user_id: str) -> List[str]:
         # 在LLM失败时返回原始标签，确保流程继续
         return tags
 
+async def filter_interests_with_llm(tags: List[str], end_user_id: str, language: str = "zh") -> List[str]:
+    """
+    使用LLM从标签列表中筛选出代表用户兴趣活动的标签。
+    
+    与 filter_tags_with_llm 不同，此函数专注于识别"活动/行为"类兴趣，
+    过滤掉纯物品、工具、地点等不代表用户主动参与活动的名词。
+    
+    Args:
+        tags: 原始标签列表
+        end_user_id: 用户ID，用于获取LLM配置
+        
+    Returns:
+        筛选后的兴趣活动标签列表
+    """
+    try:
+        with get_db_context() as db:
+            from app.services.memory_agent_service import (
+                get_end_user_connected_config,
+            )
+            connected_config = get_end_user_connected_config(end_user_id, db)
+            config_id = connected_config.get("memory_config_id")
+            workspace_id = connected_config.get("workspace_id")
+
+            if not config_id and not workspace_id:
+                raise ValueError(
+                    f"No memory_config_id found for end_user_id: {end_user_id}."
+                )
+
+            config_service = MemoryConfigService(db)
+            memory_config = config_service.load_memory_config(
+                config_id=config_id,
+                workspace_id=workspace_id
+            )
+
+            if not memory_config.llm_model_id:
+                raise ValueError(
+                    f"No llm_model_id found in memory config {config_id}."
+                )
+
+            factory = MemoryClientFactory(db)
+            llm_client = factory.get_llm_client(memory_config.llm_model_id)
+
+        tag_list_str = ", ".join(tags)
+        from app.core.memory.utils.prompt.prompt_utils import render_interest_filter_prompt
+        rendered_prompt = render_interest_filter_prompt(tag_list_str, language=language)
+        messages = [
+            {
+                "role": "user",
+                "content": rendered_prompt
+            }
+        ]
+
+        structured_response = await llm_client.response_structured(
+            messages=messages,
+            response_model=InterestTags
+        )
+
+        return structured_response.interest_tags
+
+    except Exception as e:
+        print(f"兴趣标签LLM筛选过程中发生错误: {e}")
+        return tags
+
+
 async def get_raw_tags_from_db(
     connector: Neo4jConnector,
     end_user_id: str,
@@ -183,3 +251,47 @@ async def get_hot_memory_tags(end_user_id: str, limit: int = 10, by_user: bool =
     finally:
         # 确保关闭连接
         await connector.close()
+
+async def get_interest_distribution(end_user_id: str, limit: int = 10, by_user: bool = False, language: str = "zh") -> List[Tuple[str, int]]:
+    """
+    获取用户的兴趣分布标签。
+    
+    与 get_hot_memory_tags 不同，此函数使用专门针对"活动/行为"的LLM prompt，
+    过滤掉纯物品、工具、地点等，只保留能代表用户兴趣爱好的活动类标签。
+
+    Args:
+        end_user_id: 必需参数。如果by_user=False，则为end_user_id；如果by_user=True，则为user_id
+        limit: 最终返回的标签数量限制（默认10）
+        by_user: 是否按user_id查询（默认False，按end_user_id查询）
+
+    Raises:
+        ValueError: 如果end_user_id未提供或为空
+    """
+    if not end_user_id or not end_user_id.strip():
+        raise ValueError(
+            "end_user_id is required. Please provide a valid end_user_id or user_id."
+        )
+
+    connector = Neo4jConnector()
+    try:
+        # 查询更多原始标签，给LLM提供充足上下文
+        query_limit = 40
+        raw_tags_with_freq = await get_raw_tags_from_db(connector, end_user_id, query_limit, by_user=by_user)
+        if not raw_tags_with_freq:
+            return []
+
+        raw_tag_names = [tag for tag, freq in raw_tags_with_freq]
+
+        # 使用兴趣活动专用prompt进行筛选
+        interest_tag_names = await filter_interests_with_llm(raw_tag_names, end_user_id, language=language)
+
+        # 保留原始频率，按兴趣筛选结果过滤
+        final_tags = [
+            (tag, freq)
+            for tag, freq in raw_tags_with_freq
+            if tag in interest_tag_names
+        ]
+
+        return final_tags[:limit]
+    finally:
+        await connector.close()
diff --git a/api/app/core/memory/utils/prompt/prompt_utils.py b/api/app/core/memory/utils/prompt/prompt_utils.py
index d88f50cf..0cea98f2 100644
--- a/api/app/core/memory/utils/prompt/prompt_utils.py
+++ b/api/app/core/memory/utils/prompt/prompt_utils.py
@@ -548,3 +548,20 @@ async def render_ontology_extraction_prompt(
     })
     
     return rendered_prompt
+
+
+def render_interest_filter_prompt(tag_list: str, language: str = "zh") -> str:
+    """
+    Renders the interest filter prompt using the interest_filter.jinja2 template.
+
+    Args:
+        tag_list: Comma-separated string of raw tags to filter
+        language: Output language ("zh" for Chinese, "en" for English)
+
+    Returns:
+        Rendered prompt content as string
+    """
+    template = prompt_env.get_template("interest_filter.jinja2")
+    rendered_prompt = template.render(tag_list=tag_list, language=language)
+    log_prompt_rendering('interest filter', rendered_prompt)
+    return rendered_prompt
diff --git a/api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2 b/api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2
new file mode 100644
index 00000000..1e3aac55
--- /dev/null
+++ b/api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2
@@ -0,0 +1,47 @@
+{% if language == "zh" %}
+You are a user interest analysis expert. Your task is to identify activity-based tags from a tag list that represent the user's hobbies and interests. Please output the results in Chinese.
+
+**Keep Rules** (keep if any condition is met):
+- Tags representing sports or physical activities the user actively participates in (e.g., '攀岩', '篮球', '游泳', '跑步')
+- Tags representing cultural or entertainment hobbies (e.g., '读书', '看电影', '听音乐', '摄影')
+- Tags representing learning or creative activities (e.g., '编程', '绘画', '写作', '烹饪')
+- Tags representing specific interest domains or hobby categories (e.g., '历史', '天文', '园艺')
+
+**Filter Rules** (remove if any condition is met):
+- Pure object or tool names that do not represent an activity (e.g., '篮球鞋', '相机', '书桌')
+- Pure location or venue names (e.g., '篮球场', '图书馆', '健身房')
+- Abstract concepts or quality descriptions (e.g., '核心力量', '团队合作', '专注力')
+- Person names, brand names, or proper nouns (e.g., '乔丹', 'Nike')
+
+**Merge Rules**: For semantically similar tags, keep only the most representative one.
+For example: keep '篮球' over '打篮球'; keep '读书' over '阅读'.
+
+**Example**:
+Input: ['攀岩', '篮球场', '篮球鞋', '篮球', '《三体》', '历史', '核心力量', '烹饪', '菜刀']
+Output: ['攀岩', '篮球', '历史', '烹饪']
+
+Please filter the following tag list and return only the tags that represent user interest activities in Chinese: {{ tag_list }}
+{% else %}
+You are a user interest analysis expert. Your task is to identify activity-based tags from a tag list that represent the user's hobbies and interests. Please output the results in English.
+
+**Keep Rules** (keep if any condition is met):
+- Tags representing sports or physical activities the user actively participates in (e.g., 'rock climbing', 'basketball', 'swimming', 'running')
+- Tags representing cultural or entertainment hobbies (e.g., 'reading', 'watching movies', 'listening to music', 'photography')
+- Tags representing learning or creative activities (e.g., 'programming', 'painting', 'writing', 'cooking')
+- Tags representing specific interest domains or hobby categories (e.g., 'history', 'astronomy', 'gardening')
+
+**Filter Rules** (remove if any condition is met):
+- Pure object or tool names that do not represent an activity (e.g., 'basketball shoes', 'camera', 'desk')
+- Pure location or venue names (e.g., 'basketball court', 'library', 'gym')
+- Abstract concepts or quality descriptions (e.g., 'core strength', 'teamwork', 'focus')
+- Person names, brand names, or proper nouns (e.g., 'Jordan', 'Nike')
+
+**Merge Rules**: For semantically similar tags, keep only the most representative one.
+For example: keep 'basketball' over 'playing basketball'; keep 'reading' over 'reading books'.
+
+**Example**:
+Input: ['rock climbing', 'basketball court', 'basketball shoes', 'basketball', 'The Three-Body Problem', 'history', 'core strength', 'cooking', 'kitchen knife']
+Output: ['rock climbing', 'basketball', 'history', 'cooking']
+
+Please filter the following tag list and return only the tags that represent user interest activities in English: {{ tag_list }}
+{% endif %}
diff --git a/api/app/services/memory_agent_service.py b/api/app/services/memory_agent_service.py
index 1f3667a6..16aee283 100644
--- a/api/app/services/memory_agent_service.py
+++ b/api/app/services/memory_agent_service.py
@@ -36,7 +36,7 @@ from app.core.memory.agent.utils.messages_tools import (
 )
 from app.core.memory.agent.utils.type_classifier import status_typle
 from app.core.memory.agent.utils.write_tools import write  # 新增：直接导入 write 函数
-from app.core.memory.analytics.hot_memory_tags import get_hot_memory_tags
+from app.core.memory.analytics.hot_memory_tags import get_hot_memory_tags, get_interest_distribution
 from app.core.memory.utils.llm.llm_utils import MemoryClientFactory
 from app.db import get_db_context
 from app.models.knowledge_model import Knowledge, KnowledgeType
@@ -890,36 +890,36 @@ class MemoryAgentService:
         return result
 
 
-    async def get_hot_memory_tags_by_user(
+
+    async def get_interest_distribution_by_user(
         self,
         end_user_id: Optional[str] = None,
-        limit: int = 20
+        limit: int = 5,
+        language: str = "zh"
     ) -> List[Dict[str, Any]]:
         """
-        获取指定用户的热门记忆标签
+        获取指定用户的兴趣分布标签。
+        
+        与热门标签不同，此接口专注于识别用户的兴趣活动（运动、爱好、学习等），
+        过滤掉纯物品、工具、地点等不代表用户主动参与活动的名词。
 
         参数：
-        - end_user_id: 用户ID（可选），对应Neo4j中的end_user_id字段
+        - end_user_id: 用户ID（必填）
         - limit: 返回标签数量限制
+        - language: 输出语言（"zh" 中文, "en" 英文）
 
         返回格式：
         [
-            {"name": "标签名", "frequency": 频次},
+            {"name": "兴趣活动名", "frequency": 频次},
             ...
         ]
-        
-        注意：标签语言由写入时的 X-Language-Type 决定，查询时不进行翻译
         """
         try:
-            # by_user=False 表示按 end_user_id 查询（在Neo4j中，end_user_id就是用户维度）
-            tags = await get_hot_memory_tags(end_user_id, limit=limit, by_user=False)
-            payload = []
-            for tag, freq in tags:
-                payload.append({"name": tag, "frequency": freq})
-            return payload
+            tags = await get_interest_distribution(end_user_id, limit=limit, by_user=False, language=language)
+            return [{"name": tag, "frequency": freq} for tag, freq in tags]
         except Exception as e:
-            logger.error(f"热门记忆标签查询失败: {e}")
-            raise Exception(f"热门记忆标签查询失败: {e}")
+            logger.error(f"兴趣分布标签查询失败: {e}")
+            raise Exception(f"兴趣分布标签查询失败: {e}")
 
 
     async def get_user_profile(

From df34735a9bb2f15f7d9d19e92edbe857eaf75c5d Mon Sep 17 00:00:00 2001
From: lanceyq <1982376970@qq.com>
Date: Wed, 4 Mar 2026 12:08:57 +0800
Subject: [PATCH 3/6] [add] Set cache for the distribution of interest tags

---
 api/app/cache/__init__.py                     |   3 +-
 api/app/cache/memory/__init__.py              |   2 +
 api/app/cache/memory/interest_memory.py       | 122 ++++++++++++++++++
 .../controllers/memory_agent_controller.py    |  19 +++
 api/app/core/config.py                        |   2 +-
 .../core/memory/analytics/hot_memory_tags.py  |  23 +++-
 .../prompt/prompts/interest_filter.jinja2     |  84 +++++++-----
 api/env.example                               |   2 +-
 8 files changed, 215 insertions(+), 42 deletions(-)
 create mode 100644 api/app/cache/memory/interest_memory.py

diff --git a/api/app/cache/__init__.py b/api/app/cache/__init__.py
index a79d4cb2..46d1c959 100644
--- a/api/app/cache/__init__.py
+++ b/api/app/cache/__init__.py
@@ -3,9 +3,10 @@ Cache 缓存模块
 
 提供各种缓存功能的统一入口
 """
-from .memory import EmotionMemoryCache, ImplicitMemoryCache
+from .memory import EmotionMemoryCache, ImplicitMemoryCache, InterestMemoryCache
 
 __all__ = [
     "EmotionMemoryCache",
     "ImplicitMemoryCache",
+    "InterestMemoryCache",
 ]
diff --git a/api/app/cache/memory/__init__.py b/api/app/cache/memory/__init__.py
index 4ada3153..0e21df0f 100644
--- a/api/app/cache/memory/__init__.py
+++ b/api/app/cache/memory/__init__.py
@@ -5,8 +5,10 @@ Memory 缓存模块
 """
 from .emotion_memory import EmotionMemoryCache
 from .implicit_memory import ImplicitMemoryCache
+from .interest_memory import InterestMemoryCache
 
 __all__ = [
     "EmotionMemoryCache",
     "ImplicitMemoryCache",
+    "InterestMemoryCache",
 ]
diff --git a/api/app/cache/memory/interest_memory.py b/api/app/cache/memory/interest_memory.py
new file mode 100644
index 00000000..108e2a37
--- /dev/null
+++ b/api/app/cache/memory/interest_memory.py
@@ -0,0 +1,122 @@
+"""
+Interest Distribution Cache
+
+兴趣分布缓存模块
+用于缓存用户的兴趣分布标签数据，避免重复调用模型生成
+"""
+import json
+import logging
+from typing import Optional, List, Dict, Any
+from datetime import datetime
+
+from app.aioRedis import aio_redis
+
+logger = logging.getLogger(__name__)
+
+# 缓存过期时间：24小时
+INTEREST_CACHE_EXPIRE = 86400
+
+
+class InterestMemoryCache:
+    """兴趣分布缓存类"""
+
+    PREFIX = "cache:memory:interest_distribution"
+
+    @classmethod
+    def _get_key(cls, end_user_id: str, language: str) -> str:
+        """生成 Redis key
+
+        Args:
+            end_user_id: 用户ID
+            language: 语言类型
+
+        Returns:
+            完整的 Redis key
+        """
+        return f"{cls.PREFIX}:by_user:{end_user_id}:{language}"
+
+    @classmethod
+    async def set_interest_distribution(
+        cls,
+        end_user_id: str,
+        language: str,
+        data: List[Dict[str, Any]],
+        expire: int = INTEREST_CACHE_EXPIRE,
+    ) -> bool:
+        """设置用户兴趣分布缓存
+
+        Args:
+            end_user_id: 用户ID
+            language: 语言类型
+            data: 兴趣分布列表，格式 [{"name": "...", "frequency": ...}, ...]
+            expire: 过期时间（秒），默认24小时
+
+        Returns:
+            是否设置成功
+        """
+        try:
+            key = cls._get_key(end_user_id, language)
+            payload = {
+                "data": data,
+                "generated_at": datetime.now().isoformat(),
+                "cached": True,
+            }
+            value = json.dumps(payload, ensure_ascii=False)
+            await aio_redis.set(key, value, ex=expire)
+            logger.info(f"设置兴趣分布缓存成功: {key}, 过期时间: {expire}秒")
+            return True
+        except Exception as e:
+            logger.error(f"设置兴趣分布缓存失败: {e}", exc_info=True)
+            return False
+
+    @classmethod
+    async def get_interest_distribution(
+        cls,
+        end_user_id: str,
+        language: str,
+    ) -> Optional[List[Dict[str, Any]]]:
+        """获取用户兴趣分布缓存
+
+        Args:
+            end_user_id: 用户ID
+            language: 语言类型
+
+        Returns:
+            兴趣分布列表，缓存不存在或已过期返回 None
+        """
+        try:
+            key = cls._get_key(end_user_id, language)
+            value = await aio_redis.get(key)
+            if value:
+                payload = json.loads(value)
+                logger.info(f"命中兴趣分布缓存: {key}")
+                return payload.get("data")
+            logger.info(f"兴趣分布缓存不存在或已过期: {key}")
+            return None
+        except Exception as e:
+            logger.error(f"获取兴趣分布缓存失败: {e}", exc_info=True)
+            return None
+
+    @classmethod
+    async def delete_interest_distribution(
+        cls,
+        end_user_id: str,
+        language: str,
+    ) -> bool:
+        """删除用户兴趣分布缓存
+
+        Args:
+            end_user_id: 用户ID
+            language: 语言类型
+
+        Returns:
+            是否删除成功
+        """
+        try:
+            key = cls._get_key(end_user_id, language)
+            result = await aio_redis.delete(key)
+            logger.info(f"删除兴趣分布缓存: {key}, 结果: {result}")
+            return result > 0
+        except Exception as e:
+            logger.error(f"删除兴趣分布缓存失败: {e}", exc_info=True)
+            return False
diff --git a/api/app/controllers/memory_agent_controller.py b/api/app/controllers/memory_agent_controller.py
index 8f2e5c31..1f070eb6 100644
--- a/api/app/controllers/memory_agent_controller.py
+++ b/api/app/controllers/memory_agent_controller.py
@@ -1,5 +1,6 @@
 from typing import List, Optional
 
+from app.cache.memory.interest_memory import InterestMemoryCache
 from app.celery_app import celery_app
 from app.core.error_codes import BizCode
 from app.core.language_utils import get_language_from_header
@@ -684,11 +685,29 @@ async def get_interest_distribution_by_user_api(
     language = get_language_from_header(language_type)
     api_logger.info(f"Interest distribution by user requested: end_user_id={end_user_id}, language={language}")
     try:
+        # 优先读取缓存
+        cached = await InterestMemoryCache.get_interest_distribution(
+            end_user_id=end_user_id,
+            language=language,
+        )
+        if cached is not None:
+            api_logger.info(f"Interest distribution cache hit: end_user_id={end_user_id}")
+            return success(data=cached, msg="获取兴趣分布标签成功")
+
+        # 缓存未命中，调用模型生成
         result = await memory_agent_service.get_interest_distribution_by_user(
             end_user_id=end_user_id,
             limit=limit,
             language=language
         )
+
+        # 写入缓存，24小时过期
+        await InterestMemoryCache.set_interest_distribution(
+            end_user_id=end_user_id,
+            language=language,
+            data=result,
+        )
+
         return success(data=result, msg="获取兴趣分布标签成功")
     except Exception as e:
         api_logger.error(f"Interest distribution by user failed: {str(e)}")
diff --git a/api/app/core/config.py b/api/app/core/config.py
index 6a2cf206..d9132be2 100644
--- a/api/app/core/config.py
+++ b/api/app/core/config.py
@@ -230,7 +230,7 @@ class Settings:
     # General Ontology Type Configuration
     # ========================================================================
     # 通用本体文件路径列表（逗号分隔）
-    GENERAL_ONTOLOGY_FILES: str = os.getenv("GENERAL_ONTOLOGY_FILES", "app/core/memory/ontology_services/General_purpose_entity.ttl")
+    GENERAL_ONTOLOGY_FILES: str = os.getenv("GENERAL_ONTOLOGY_FILES", "api/app/core/memory/ontology_services/General_purpose_entity.ttl")
 
     # 是否启用通用本体类型功能
     ENABLE_GENERAL_ONTOLOGY_TYPES: bool = os.getenv("ENABLE_GENERAL_ONTOLOGY_TYPES", "true").lower() == "true"
diff --git a/api/app/core/memory/analytics/hot_memory_tags.py b/api/app/core/memory/analytics/hot_memory_tags.py
index da08e88e..1d2d5259 100644
--- a/api/app/core/memory/analytics/hot_memory_tags.py
+++ b/api/app/core/memory/analytics/hot_memory_tags.py
@@ -281,16 +281,25 @@ async def get_interest_distribution(end_user_id: str, limit: int = 10, by_user:
             return []
 
         raw_tag_names = [tag for tag, freq in raw_tags_with_freq]
+        raw_freq_map = {tag: freq for tag, freq in raw_tags_with_freq}
 
-        # 使用兴趣活动专用prompt进行筛选
+        # 使用兴趣活动专用prompt进行筛选（支持语义推断出新标签）
         interest_tag_names = await filter_interests_with_llm(raw_tag_names, end_user_id, language=language)
 
-        # 保留原始频率，按兴趣筛选结果过滤
-        final_tags = [
-            (tag, freq)
-            for tag, freq in raw_tags_with_freq
-            if tag in interest_tag_names
-        ]
+        # 构建最终标签列表：
+        # - 原始标签中存在的，保留原始频率
+        # - LLM推断出的新标签（不在原始列表中），赋予默认频率1
+        final_tags = []
+        seen = set()
+        for tag in interest_tag_names:
+            if tag in seen:
+                continue
+            seen.add(tag)
+            freq = raw_freq_map.get(tag, 1)
+            final_tags.append((tag, freq))
+
+        # 按频率降序排列
+        final_tags.sort(key=lambda x: x[1], reverse=True)
 
         return final_tags[:limit]
     finally:
diff --git a/api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2 b/api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2
index 1e3aac55..7957bf1c 100644
--- a/api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2
+++ b/api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2
@@ -1,47 +1,67 @@
 {% if language == "zh" %}
-You are a user interest analysis expert. Your task is to identify activity-based tags from a tag list that represent the user's hobbies and interests. Please output the results in Chinese.
+You are a user interest analysis expert. Your task is to infer and extract the user's core hobby/interest activities from a tag list. The tags may be specific project names, tool names, or compound nouns — your job is to identify the underlying interest they represent.
 
-**Keep Rules** (keep if any condition is met):
-- Tags representing sports or physical activities the user actively participates in (e.g., '攀岩', '篮球', '游泳', '跑步')
-- Tags representing cultural or entertainment hobbies (e.g., '读书', '看电影', '听音乐', '摄影')
-- Tags representing learning or creative activities (e.g., '编程', '绘画', '写作', '烹饪')
-- Tags representing specific interest domains or hobby categories (e.g., '历史', '天文', '园艺')
+**Step 1 - Infer the underlying interest from each tag**:
+Look at each tag and ask: "What hobby or interest does this tag suggest the user has?"
 
-**Filter Rules** (remove if any condition is met):
-- Pure object or tool names that do not represent an activity (e.g., '篮球鞋', '相机', '书桌')
-- Pure location or venue names (e.g., '篮球场', '图书馆', '健身房')
-- Abstract concepts or quality descriptions (e.g., '核心力量', '团队合作', '专注力')
-- Person names, brand names, or proper nouns (e.g., '乔丹', 'Nike')
+Examples of inference:
+- '攀岩', '室内攀岩馆', '攀岩者数据仪表盘', '路线解锁地图', '指力', '路线等级', '当日攀岩流畅度' → '攀岩'
+- '风光摄影元数据增强器', 'EXIF数据', '.CR2文件', '.NEF文件', '日出拍摄点', '曝光补偿', '光圈', '太阳高度角', '云量预测图层' → '摄影'
+- '晨间冥想坚持天数', '身心协同峰值' → '冥想'
+- '川味可视化', '川菜' → '烹饪'
+- '开源项目命名建议', 'climbviz', '可视化', '力量增长雷达图' → '编程' 或 '数据可视化'
+- '吉他', '指弹', '琴谱' → '吉他'
+- '跑步', '5公里', '跑鞋' → '跑步'
+- '瑜伽垫', '瑜伽课' → '瑜伽'
 
-**Merge Rules**: For semantically similar tags, keep only the most representative one.
-For example: keep '篮球' over '打篮球'; keep '读书' over '阅读'.
+**Step 2 - Consolidate and deduplicate**:
+- Merge tags that point to the same interest into one representative label
+- Use concise, standard hobby names (e.g., '攀岩', '摄影', '编程', '烹饪', '冥想', '吉他', '跑步')
+- If multiple tags all point to '攀岩', output '攀岩' only once
+
+**Step 3 - Filter out non-interest tags**:
+Remove tags that do NOT suggest any hobby or interest:
+- Generic system/assistant terms (e.g., '助手', '用户', 'AI')
+- Pure abstract metrics with no clear hobby link (e.g., '完成时间', '日期', '自我评分')
+- Location names with no clear hobby link (e.g., '青城山后山' alone — but if combined with photography context, infer '摄影')
+
+**Output format**: Return a list of concise interest activity names in Chinese.
 
 **Example**:
-Input: ['攀岩', '篮球场', '篮球鞋', '篮球', '《三体》', '历史', '核心力量', '烹饪', '菜刀']
-Output: ['攀岩', '篮球', '历史', '烹饪']
+Input: ['攀岩', '攀岩者数据仪表盘', '路线解锁地图', '指力', '风光摄影元数据增强器', 'EXIF数据', '晨间冥想坚持天数', '川味可视化', '可视化', '助手', '完成时间']
+Output: ['攀岩', '摄影', '冥想', '烹饪', '编程']
 
-Please filter the following tag list and return only the tags that represent user interest activities in Chinese: {{ tag_list }}
+Now process the following tag list and return the inferred interest activities in Chinese: {{ tag_list }}
 {% else %}
-You are a user interest analysis expert. Your task is to identify activity-based tags from a tag list that represent the user's hobbies and interests. Please output the results in English.
+You are a user interest analysis expert. Your task is to infer and extract the user's core hobby/interest activities from a tag list. The tags may be specific project names, tool names, or compound nouns — your job is to identify the underlying interest they represent.
 
-**Keep Rules** (keep if any condition is met):
-- Tags representing sports or physical activities the user actively participates in (e.g., 'rock climbing', 'basketball', 'swimming', 'running')
-- Tags representing cultural or entertainment hobbies (e.g., 'reading', 'watching movies', 'listening to music', 'photography')
-- Tags representing learning or creative activities (e.g., 'programming', 'painting', 'writing', 'cooking')
-- Tags representing specific interest domains or hobby categories (e.g., 'history', 'astronomy', 'gardening')
+**Step 1 - Infer the underlying interest from each tag**:
+Look at each tag and ask: "What hobby or interest does this tag suggest the user has?"
 
-**Filter Rules** (remove if any condition is met):
-- Pure object or tool names that do not represent an activity (e.g., 'basketball shoes', 'camera', 'desk')
-- Pure location or venue names (e.g., 'basketball court', 'library', 'gym')
-- Abstract concepts or quality descriptions (e.g., 'core strength', 'teamwork', 'focus')
-- Person names, brand names, or proper nouns (e.g., 'Jordan', 'Nike')
+Examples of inference:
+- 'rock climbing', 'indoor climbing gym', 'climber dashboard', 'route map', 'finger strength' → 'rock climbing'
+- 'landscape photography metadata enhancer', 'EXIF data', 'sunrise shooting spot', 'exposure compensation' → 'photography'
+- 'morning meditation streak', 'mind-body peak' → 'meditation'
+- 'Sichuan cuisine visualization', 'Sichuan food' → 'cooking'
+- 'open source project', 'data visualization tool', 'Python' → 'programming'
+- 'guitar', 'fingerpicking', 'sheet music' → 'guitar'
+- 'running', '5km', 'running shoes' → 'running'
 
-**Merge Rules**: For semantically similar tags, keep only the most representative one.
-For example: keep 'basketball' over 'playing basketball'; keep 'reading' over 'reading books'.
+**Step 2 - Consolidate and deduplicate**:
+- Merge tags that point to the same interest into one representative label
+- Use concise, standard hobby names (e.g., 'rock climbing', 'photography', 'programming', 'cooking', 'meditation')
+- If multiple tags all point to 'rock climbing', output 'rock climbing' only once
+
+**Step 3 - Filter out non-interest tags**:
+Remove tags that do NOT suggest any hobby or interest:
+- Generic system/assistant terms (e.g., 'assistant', 'user', 'AI')
+- Pure abstract metrics with no clear hobby link (e.g., 'completion time', 'date', 'self-rating')
+
+**Output format**: Return a list of concise interest activity names in English.
 
 **Example**:
-Input: ['rock climbing', 'basketball court', 'basketball shoes', 'basketball', 'The Three-Body Problem', 'history', 'core strength', 'cooking', 'kitchen knife']
-Output: ['rock climbing', 'basketball', 'history', 'cooking']
+Input: ['rock climbing', 'climber dashboard', 'route map', 'finger strength', 'landscape photography metadata enhancer', 'EXIF data', 'morning meditation streak', 'Sichuan cuisine visualization', 'visualization', 'assistant', 'completion time']
+Output: ['rock climbing', 'photography', 'meditation', 'cooking', 'programming']
 
-Please filter the following tag list and return only the tags that represent user interest activities in English: {{ tag_list }}
+Now process the following tag list and return the inferred interest activities in English: {{ tag_list }}
 {% endif %}
diff --git a/api/env.example b/api/env.example
index d67bbf7c..1dc4536c 100644
--- a/api/env.example
+++ b/api/env.example
@@ -139,7 +139,7 @@ SMTP_USER=
 SMTP_PASSWORD=
 
 # 本体类型融合配置 (记得写入env_example)
-GENERAL_ONTOLOGY_FILES=app/core/memory/ontology_services/General_purpose_entity.ttl  # 指定要加载的本体文件路径，多个文件用逗号分隔
+GENERAL_ONTOLOGY_FILES=api/app/core/memory/ontology_services/General_purpose_entity.ttl  # 指定要加载的本体文件路径，多个文件用逗号分隔
 ENABLE_GENERAL_ONTOLOGY_TYPES=true # 总开关，控制是否启用通用本体类型融合功能(false = 不使用任何本体类型指导)
 MAX_ONTOLOGY_TYPES_IN_PROMPT=100 # 限制传给 LLM 的类型数量，防止 Prompt 过长
 CORE_GENERAL_TYPES=Person,Organization,Place,Event,Work,Concept # 	定义核心类型列表，这些类型会优先包含在合并结果中

From b5703c1b8282e3e8472e897968c76cec7ac0430b Mon Sep 17 00:00:00 2001
From: lanceyq <1982376970@qq.com>
Date: Tue, 3 Mar 2026 23:30:54 +0800
Subject: [PATCH 4/6] [fix] Revising the judgment method for the interest
 analysis tags

---
 .../controllers/memory_agent_controller.py    |  32 ++---
 .../core/memory/analytics/hot_memory_tags.py  | 112 ++++++++++++++++++
 .../core/memory/utils/prompt/prompt_utils.py  |  17 +++
 .../prompt/prompts/interest_filter.jinja2     |  47 ++++++++
 api/app/services/memory_agent_service.py      |  32 ++---
 5 files changed, 210 insertions(+), 30 deletions(-)
 create mode 100644 api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2

diff --git a/api/app/controllers/memory_agent_controller.py b/api/app/controllers/memory_agent_controller.py
index b88e65ff..8f2e5c31 100644
--- a/api/app/controllers/memory_agent_controller.py
+++ b/api/app/controllers/memory_agent_controller.py
@@ -661,34 +661,38 @@ async def get_knowledge_type_stats_api(
         return fail(BizCode.INTERNAL_ERROR, "获取知识库类型统计失败", str(e))
 
 
-@router.get("/analytics/hot_memory_tags/by_user", response_model=ApiResponse)
-async def get_hot_memory_tags_by_user_api(
-    end_user_id: Optional[str] = Query(None, description="用户ID（可选）"),
-    limit: int = Query(20, description="返回标签数量限制"),
+@router.get("/analytics/interest_distribution/by_user", response_model=ApiResponse)
+async def get_interest_distribution_by_user_api(
+    end_user_id: Optional[str] = Query(None, description="用户ID（必填）"),
+    limit: int = Query(5, le=5, description="返回兴趣标签数量限制，最多5个"),
+    language_type: str = Header(default=None, alias="X-Language-Type"),
     current_user: User = Depends(get_current_user),
-    db: Session=Depends(get_db),
+    db: Session = Depends(get_db),
 ):
     """
-    获取指定用户的热门记忆标签
+    获取指定用户的兴趣分布标签
     
-    注意：标签语言由写入时的 X-Language-Type 决定，查询时不进行翻译
+    与热门标签不同，此接口专注于识别用户的兴趣活动（运动、爱好、学习、创作等），
+    过滤掉纯物品、工具、地点等不代表用户主动参与活动的名词。
     
     返回格式：
     [
-        {"name": "标签名", "frequency": 频次},
+        {"name": "兴趣活动名", "frequency": 频次},
         ...
     ]
     """
-    api_logger.info(f"Hot memory tags by user requested: end_user_id={end_user_id}")
+    language = get_language_from_header(language_type)
+    api_logger.info(f"Interest distribution by user requested: end_user_id={end_user_id}, language={language}")
     try:
-        result = await memory_agent_service.get_hot_memory_tags_by_user(
+        result = await memory_agent_service.get_interest_distribution_by_user(
             end_user_id=end_user_id,
-            limit=limit
+            limit=limit,
+            language=language
         )
-        return success(data=result, msg="获取热门记忆标签成功")
+        return success(data=result, msg="获取兴趣分布标签成功")
     except Exception as e:
-        api_logger.error(f"Hot memory tags by user failed: {str(e)}")
-        return fail(BizCode.INTERNAL_ERROR, "获取热门记忆标签失败", str(e))
+        api_logger.error(f"Interest distribution by user failed: {str(e)}")
+        return fail(BizCode.INTERNAL_ERROR, "获取兴趣分布标签失败", str(e))
 
 
 @router.get("/analytics/user_profile", response_model=ApiResponse)
diff --git a/api/app/core/memory/analytics/hot_memory_tags.py b/api/app/core/memory/analytics/hot_memory_tags.py
index abb0f138..da08e88e 100644
--- a/api/app/core/memory/analytics/hot_memory_tags.py
+++ b/api/app/core/memory/analytics/hot_memory_tags.py
@@ -16,6 +16,10 @@ class FilteredTags(BaseModel):
     """用于接收LLM筛选后的核心标签列表的模型。"""
     meaningful_tags: List[str] = Field(..., description="从原始列表中筛选出的具有核心代表意义的名词列表。")
 
+class InterestTags(BaseModel):
+    """用于接收LLM筛选后的兴趣活动标签列表的模型。"""
+    interest_tags: List[str] = Field(..., description="从原始列表中筛选出的代表用户兴趣活动的标签列表。")
+
 async def filter_tags_with_llm(tags: List[str], end_user_id: str) -> List[str]:
     """
     使用LLM筛选标签列表，仅保留具有代表性的核心名词。
@@ -89,6 +93,70 @@ async def filter_tags_with_llm(tags: List[str], end_user_id: str) -> List[str]:
         # 在LLM失败时返回原始标签，确保流程继续
         return tags
 
+async def filter_interests_with_llm(tags: List[str], end_user_id: str, language: str = "zh") -> List[str]:
+    """
+    使用LLM从标签列表中筛选出代表用户兴趣活动的标签。
+    
+    与 filter_tags_with_llm 不同，此函数专注于识别"活动/行为"类兴趣，
+    过滤掉纯物品、工具、地点等不代表用户主动参与活动的名词。
+    
+    Args:
+        tags: 原始标签列表
+        end_user_id: 用户ID，用于获取LLM配置
+        
+    Returns:
+        筛选后的兴趣活动标签列表
+    """
+    try:
+        with get_db_context() as db:
+            from app.services.memory_agent_service import (
+                get_end_user_connected_config,
+            )
+            connected_config = get_end_user_connected_config(end_user_id, db)
+            config_id = connected_config.get("memory_config_id")
+            workspace_id = connected_config.get("workspace_id")
+
+            if not config_id and not workspace_id:
+                raise ValueError(
+                    f"No memory_config_id found for end_user_id: {end_user_id}."
+                )
+
+            config_service = MemoryConfigService(db)
+            memory_config = config_service.load_memory_config(
+                config_id=config_id,
+                workspace_id=workspace_id
+            )
+
+            if not memory_config.llm_model_id:
+                raise ValueError(
+                    f"No llm_model_id found in memory config {config_id}."
+                )
+
+            factory = MemoryClientFactory(db)
+            llm_client = factory.get_llm_client(memory_config.llm_model_id)
+
+        tag_list_str = ", ".join(tags)
+        from app.core.memory.utils.prompt.prompt_utils import render_interest_filter_prompt
+        rendered_prompt = render_interest_filter_prompt(tag_list_str, language=language)
+        messages = [
+            {
+                "role": "user",
+                "content": rendered_prompt
+            }
+        ]
+
+        structured_response = await llm_client.response_structured(
+            messages=messages,
+            response_model=InterestTags
+        )
+
+        return structured_response.interest_tags
+
+    except Exception as e:
+        print(f"兴趣标签LLM筛选过程中发生错误: {e}")
+        return tags
+
+
 async def get_raw_tags_from_db(
     connector: Neo4jConnector,
     end_user_id: str,
@@ -183,3 +251,47 @@ async def get_hot_memory_tags(end_user_id: str, limit: int = 10, by_user: bool =
     finally:
         # 确保关闭连接
         await connector.close()
+
+async def get_interest_distribution(end_user_id: str, limit: int = 10, by_user: bool = False, language: str = "zh") -> List[Tuple[str, int]]:
+    """
+    获取用户的兴趣分布标签。
+    
+    与 get_hot_memory_tags 不同，此函数使用专门针对"活动/行为"的LLM prompt，
+    过滤掉纯物品、工具、地点等，只保留能代表用户兴趣爱好的活动类标签。
+
+    Args:
+        end_user_id: 必需参数。如果by_user=False，则为end_user_id；如果by_user=True，则为user_id
+        limit: 最终返回的标签数量限制（默认10）
+        by_user: 是否按user_id查询（默认False，按end_user_id查询）
+
+    Raises:
+        ValueError: 如果end_user_id未提供或为空
+    """
+    if not end_user_id or not end_user_id.strip():
+        raise ValueError(
+            "end_user_id is required. Please provide a valid end_user_id or user_id."
+        )
+
+    connector = Neo4jConnector()
+    try:
+        # 查询更多原始标签，给LLM提供充足上下文
+        query_limit = 40
+        raw_tags_with_freq = await get_raw_tags_from_db(connector, end_user_id, query_limit, by_user=by_user)
+        if not raw_tags_with_freq:
+            return []
+
+        raw_tag_names = [tag for tag, freq in raw_tags_with_freq]
+
+        # 使用兴趣活动专用prompt进行筛选
+        interest_tag_names = await filter_interests_with_llm(raw_tag_names, end_user_id, language=language)
+
+        # 保留原始频率，按兴趣筛选结果过滤
+        final_tags = [
+            (tag, freq)
+            for tag, freq in raw_tags_with_freq
+            if tag in interest_tag_names
+        ]
+
+        return final_tags[:limit]
+    finally:
+        await connector.close()
diff --git a/api/app/core/memory/utils/prompt/prompt_utils.py b/api/app/core/memory/utils/prompt/prompt_utils.py
index d88f50cf..0cea98f2 100644
--- a/api/app/core/memory/utils/prompt/prompt_utils.py
+++ b/api/app/core/memory/utils/prompt/prompt_utils.py
@@ -548,3 +548,20 @@ async def render_ontology_extraction_prompt(
     })
     
     return rendered_prompt
+
+
+def render_interest_filter_prompt(tag_list: str, language: str = "zh") -> str:
+    """
+    Renders the interest filter prompt using the interest_filter.jinja2 template.
+
+    Args:
+        tag_list: Comma-separated string of raw tags to filter
+        language: Output language ("zh" for Chinese, "en" for English)
+
+    Returns:
+        Rendered prompt content as string
+    """
+    template = prompt_env.get_template("interest_filter.jinja2")
+    rendered_prompt = template.render(tag_list=tag_list, language=language)
+    log_prompt_rendering('interest filter', rendered_prompt)
+    return rendered_prompt
diff --git a/api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2 b/api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2
new file mode 100644
index 00000000..1e3aac55
--- /dev/null
+++ b/api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2
@@ -0,0 +1,47 @@
+{% if language == "zh" %}
+You are a user interest analysis expert. Your task is to identify activity-based tags from a tag list that represent the user's hobbies and interests. Please output the results in Chinese.
+
+**Keep Rules** (keep if any condition is met):
+- Tags representing sports or physical activities the user actively participates in (e.g., '攀岩', '篮球', '游泳', '跑步')
+- Tags representing cultural or entertainment hobbies (e.g., '读书', '看电影', '听音乐', '摄影')
+- Tags representing learning or creative activities (e.g., '编程', '绘画', '写作', '烹饪')
+- Tags representing specific interest domains or hobby categories (e.g., '历史', '天文', '园艺')
+
+**Filter Rules** (remove if any condition is met):
+- Pure object or tool names that do not represent an activity (e.g., '篮球鞋', '相机', '书桌')
+- Pure location or venue names (e.g., '篮球场', '图书馆', '健身房')
+- Abstract concepts or quality descriptions (e.g., '核心力量', '团队合作', '专注力')
+- Person names, brand names, or proper nouns (e.g., '乔丹', 'Nike')
+
+**Merge Rules**: For semantically similar tags, keep only the most representative one.
+For example: keep '篮球' over '打篮球'; keep '读书' over '阅读'.
+
+**Example**:
+Input: ['攀岩', '篮球场', '篮球鞋', '篮球', '《三体》', '历史', '核心力量', '烹饪', '菜刀']
+Output: ['攀岩', '篮球', '历史', '烹饪']
+
+Please filter the following tag list and return only the tags that represent user interest activities in Chinese: {{ tag_list }}
+{% else %}
+You are a user interest analysis expert. Your task is to identify activity-based tags from a tag list that represent the user's hobbies and interests. Please output the results in English.
+
+**Keep Rules** (keep if any condition is met):
+- Tags representing sports or physical activities the user actively participates in (e.g., 'rock climbing', 'basketball', 'swimming', 'running')
+- Tags representing cultural or entertainment hobbies (e.g., 'reading', 'watching movies', 'listening to music', 'photography')
+- Tags representing learning or creative activities (e.g., 'programming', 'painting', 'writing', 'cooking')
+- Tags representing specific interest domains or hobby categories (e.g., 'history', 'astronomy', 'gardening')
+
+**Filter Rules** (remove if any condition is met):
+- Pure object or tool names that do not represent an activity (e.g., 'basketball shoes', 'camera', 'desk')
+- Pure location or venue names (e.g., 'basketball court', 'library', 'gym')
+- Abstract concepts or quality descriptions (e.g., 'core strength', 'teamwork', 'focus')
+- Person names, brand names, or proper nouns (e.g., 'Jordan', 'Nike')
+
+**Merge Rules**: For semantically similar tags, keep only the most representative one.
+For example: keep 'basketball' over 'playing basketball'; keep 'reading' over 'reading books'.
+
+**Example**:
+Input: ['rock climbing', 'basketball court', 'basketball shoes', 'basketball', 'The Three-Body Problem', 'history', 'core strength', 'cooking', 'kitchen knife']
+Output: ['rock climbing', 'basketball', 'history', 'cooking']
+
+Please filter the following tag list and return only the tags that represent user interest activities in English: {{ tag_list }}
+{% endif %}
diff --git a/api/app/services/memory_agent_service.py b/api/app/services/memory_agent_service.py
index 1f3667a6..16aee283 100644
--- a/api/app/services/memory_agent_service.py
+++ b/api/app/services/memory_agent_service.py
@@ -36,7 +36,7 @@ from app.core.memory.agent.utils.messages_tools import (
 )
 from app.core.memory.agent.utils.type_classifier import status_typle
 from app.core.memory.agent.utils.write_tools import write  # 新增：直接导入 write 函数
-from app.core.memory.analytics.hot_memory_tags import get_hot_memory_tags
+from app.core.memory.analytics.hot_memory_tags import get_hot_memory_tags, get_interest_distribution
 from app.core.memory.utils.llm.llm_utils import MemoryClientFactory
 from app.db import get_db_context
 from app.models.knowledge_model import Knowledge, KnowledgeType
@@ -890,36 +890,36 @@ class MemoryAgentService:
         return result
 
 
-    async def get_hot_memory_tags_by_user(
+
+    async def get_interest_distribution_by_user(
         self,
         end_user_id: Optional[str] = None,
-        limit: int = 20
+        limit: int = 5,
+        language: str = "zh"
     ) -> List[Dict[str, Any]]:
         """
-        获取指定用户的热门记忆标签
+        获取指定用户的兴趣分布标签。
+        
+        与热门标签不同，此接口专注于识别用户的兴趣活动（运动、爱好、学习等），
+        过滤掉纯物品、工具、地点等不代表用户主动参与活动的名词。
 
         参数：
-        - end_user_id: 用户ID（可选），对应Neo4j中的end_user_id字段
+        - end_user_id: 用户ID（必填）
         - limit: 返回标签数量限制
+        - language: 输出语言（"zh" 中文, "en" 英文）
 
         返回格式：
         [
-            {"name": "标签名", "frequency": 频次},
+            {"name": "兴趣活动名", "frequency": 频次},
             ...
         ]
-        
-        注意：标签语言由写入时的 X-Language-Type 决定，查询时不进行翻译
         """
         try:
-            # by_user=False 表示按 end_user_id 查询（在Neo4j中，end_user_id就是用户维度）
-            tags = await get_hot_memory_tags(end_user_id, limit=limit, by_user=False)
-            payload = []
-            for tag, freq in tags:
-                payload.append({"name": tag, "frequency": freq})
-            return payload
+            tags = await get_interest_distribution(end_user_id, limit=limit, by_user=False, language=language)
+            return [{"name": tag, "frequency": freq} for tag, freq in tags]
         except Exception as e:
-            logger.error(f"热门记忆标签查询失败: {e}")
-            raise Exception(f"热门记忆标签查询失败: {e}")
+            logger.error(f"兴趣分布标签查询失败: {e}")
+            raise Exception(f"兴趣分布标签查询失败: {e}")
 
 
     async def get_user_profile(

From c31a92bf01a87721afb0c87272975704b0322ad7 Mon Sep 17 00:00:00 2001
From: lanceyq <1982376970@qq.com>
Date: Wed, 4 Mar 2026 12:08:57 +0800
Subject: [PATCH 5/6] [add] Set cache for the distribution of interest tags

---
 api/app/cache/__init__.py                     |   3 +-
 api/app/cache/memory/__init__.py              |   2 +
 api/app/cache/memory/interest_memory.py       | 122 ++++++++++++++++++
 .../controllers/memory_agent_controller.py    |  19 +++
 api/app/core/config.py                        |   2 +-
 .../core/memory/analytics/hot_memory_tags.py  |  23 +++-
 .../prompt/prompts/interest_filter.jinja2     |  84 +++++++-----
 api/env.example                               |   2 +-
 8 files changed, 215 insertions(+), 42 deletions(-)
 create mode 100644 api/app/cache/memory/interest_memory.py

diff --git a/api/app/cache/__init__.py b/api/app/cache/__init__.py
index a79d4cb2..46d1c959 100644
--- a/api/app/cache/__init__.py
+++ b/api/app/cache/__init__.py
@@ -3,9 +3,10 @@ Cache 缓存模块
 
 提供各种缓存功能的统一入口
 """
-from .memory import EmotionMemoryCache, ImplicitMemoryCache
+from .memory import EmotionMemoryCache, ImplicitMemoryCache, InterestMemoryCache
 
 __all__ = [
     "EmotionMemoryCache",
     "ImplicitMemoryCache",
+    "InterestMemoryCache",
 ]
diff --git a/api/app/cache/memory/__init__.py b/api/app/cache/memory/__init__.py
index 4ada3153..0e21df0f 100644
--- a/api/app/cache/memory/__init__.py
+++ b/api/app/cache/memory/__init__.py
@@ -5,8 +5,10 @@ Memory 缓存模块
 """
 from .emotion_memory import EmotionMemoryCache
 from .implicit_memory import ImplicitMemoryCache
+from .interest_memory import InterestMemoryCache
 
 __all__ = [
     "EmotionMemoryCache",
     "ImplicitMemoryCache",
+    "InterestMemoryCache",
 ]
diff --git a/api/app/cache/memory/interest_memory.py b/api/app/cache/memory/interest_memory.py
new file mode 100644
index 00000000..108e2a37
--- /dev/null
+++ b/api/app/cache/memory/interest_memory.py
@@ -0,0 +1,122 @@
+"""
+Interest Distribution Cache
+
+兴趣分布缓存模块
+用于缓存用户的兴趣分布标签数据，避免重复调用模型生成
+"""
+import json
+import logging
+from typing import Optional, List, Dict, Any
+from datetime import datetime
+
+from app.aioRedis import aio_redis
+
+logger = logging.getLogger(__name__)
+
+# 缓存过期时间：24小时
+INTEREST_CACHE_EXPIRE = 86400
+
+
+class InterestMemoryCache:
+    """兴趣分布缓存类"""
+
+    PREFIX = "cache:memory:interest_distribution"
+
+    @classmethod
+    def _get_key(cls, end_user_id: str, language: str) -> str:
+        """生成 Redis key
+
+        Args:
+            end_user_id: 用户ID
+            language: 语言类型
+
+        Returns:
+            完整的 Redis key
+        """
+        return f"{cls.PREFIX}:by_user:{end_user_id}:{language}"
+
+    @classmethod
+    async def set_interest_distribution(
+        cls,
+        end_user_id: str,
+        language: str,
+        data: List[Dict[str, Any]],
+        expire: int = INTEREST_CACHE_EXPIRE,
+    ) -> bool:
+        """设置用户兴趣分布缓存
+
+        Args:
+            end_user_id: 用户ID
+            language: 语言类型
+            data: 兴趣分布列表，格式 [{"name": "...", "frequency": ...}, ...]
+            expire: 过期时间（秒），默认24小时
+
+        Returns:
+            是否设置成功
+        """
+        try:
+            key = cls._get_key(end_user_id, language)
+            payload = {
+                "data": data,
+                "generated_at": datetime.now().isoformat(),
+                "cached": True,
+            }
+            value = json.dumps(payload, ensure_ascii=False)
+            await aio_redis.set(key, value, ex=expire)
+            logger.info(f"设置兴趣分布缓存成功: {key}, 过期时间: {expire}秒")
+            return True
+        except Exception as e:
+            logger.error(f"设置兴趣分布缓存失败: {e}", exc_info=True)
+            return False
+
+    @classmethod
+    async def get_interest_distribution(
+        cls,
+        end_user_id: str,
+        language: str,
+    ) -> Optional[List[Dict[str, Any]]]:
+        """获取用户兴趣分布缓存
+
+        Args:
+            end_user_id: 用户ID
+            language: 语言类型
+
+        Returns:
+            兴趣分布列表，缓存不存在或已过期返回 None
+        """
+        try:
+            key = cls._get_key(end_user_id, language)
+            value = await aio_redis.get(key)
+            if value:
+                payload = json.loads(value)
+                logger.info(f"命中兴趣分布缓存: {key}")
+                return payload.get("data")
+            logger.info(f"兴趣分布缓存不存在或已过期: {key}")
+            return None
+        except Exception as e:
+            logger.error(f"获取兴趣分布缓存失败: {e}", exc_info=True)
+            return None
+
+    @classmethod
+    async def delete_interest_distribution(
+        cls,
+        end_user_id: str,
+        language: str,
+    ) -> bool:
+        """删除用户兴趣分布缓存
+
+        Args:
+            end_user_id: 用户ID
+            language: 语言类型
+
+        Returns:
+            是否删除成功
+        """
+        try:
+            key = cls._get_key(end_user_id, language)
+            result = await aio_redis.delete(key)
+            logger.info(f"删除兴趣分布缓存: {key}, 结果: {result}")
+            return result > 0
+        except Exception as e:
+            logger.error(f"删除兴趣分布缓存失败: {e}", exc_info=True)
+            return False
diff --git a/api/app/controllers/memory_agent_controller.py b/api/app/controllers/memory_agent_controller.py
index 8f2e5c31..1f070eb6 100644
--- a/api/app/controllers/memory_agent_controller.py
+++ b/api/app/controllers/memory_agent_controller.py
@@ -1,5 +1,6 @@
 from typing import List, Optional
 
+from app.cache.memory.interest_memory import InterestMemoryCache
 from app.celery_app import celery_app
 from app.core.error_codes import BizCode
 from app.core.language_utils import get_language_from_header
@@ -684,11 +685,29 @@ async def get_interest_distribution_by_user_api(
     language = get_language_from_header(language_type)
     api_logger.info(f"Interest distribution by user requested: end_user_id={end_user_id}, language={language}")
     try:
+        # 优先读取缓存
+        cached = await InterestMemoryCache.get_interest_distribution(
+            end_user_id=end_user_id,
+            language=language,
+        )
+        if cached is not None:
+            api_logger.info(f"Interest distribution cache hit: end_user_id={end_user_id}")
+            return success(data=cached, msg="获取兴趣分布标签成功")
+
+        # 缓存未命中，调用模型生成
         result = await memory_agent_service.get_interest_distribution_by_user(
             end_user_id=end_user_id,
             limit=limit,
             language=language
         )
+
+        # 写入缓存，24小时过期
+        await InterestMemoryCache.set_interest_distribution(
+            end_user_id=end_user_id,
+            language=language,
+            data=result,
+        )
+
         return success(data=result, msg="获取兴趣分布标签成功")
     except Exception as e:
         api_logger.error(f"Interest distribution by user failed: {str(e)}")
diff --git a/api/app/core/config.py b/api/app/core/config.py
index 4472d373..d04e2a43 100644
--- a/api/app/core/config.py
+++ b/api/app/core/config.py
@@ -229,7 +229,7 @@ class Settings:
     # General Ontology Type Configuration
     # ========================================================================
     # 通用本体文件路径列表（逗号分隔）
-    GENERAL_ONTOLOGY_FILES: str = os.getenv("GENERAL_ONTOLOGY_FILES", "app/core/memory/ontology_services/General_purpose_entity.ttl")
+    GENERAL_ONTOLOGY_FILES: str = os.getenv("GENERAL_ONTOLOGY_FILES", "api/app/core/memory/ontology_services/General_purpose_entity.ttl")
 
     # 是否启用通用本体类型功能
     ENABLE_GENERAL_ONTOLOGY_TYPES: bool = os.getenv("ENABLE_GENERAL_ONTOLOGY_TYPES", "true").lower() == "true"
diff --git a/api/app/core/memory/analytics/hot_memory_tags.py b/api/app/core/memory/analytics/hot_memory_tags.py
index da08e88e..1d2d5259 100644
--- a/api/app/core/memory/analytics/hot_memory_tags.py
+++ b/api/app/core/memory/analytics/hot_memory_tags.py
@@ -281,16 +281,25 @@ async def get_interest_distribution(end_user_id: str, limit: int = 10, by_user:
             return []
 
         raw_tag_names = [tag for tag, freq in raw_tags_with_freq]
+        raw_freq_map = {tag: freq for tag, freq in raw_tags_with_freq}
 
-        # 使用兴趣活动专用prompt进行筛选
+        # 使用兴趣活动专用prompt进行筛选（支持语义推断出新标签）
         interest_tag_names = await filter_interests_with_llm(raw_tag_names, end_user_id, language=language)
 
-        # 保留原始频率，按兴趣筛选结果过滤
-        final_tags = [
-            (tag, freq)
-            for tag, freq in raw_tags_with_freq
-            if tag in interest_tag_names
-        ]
+        # 构建最终标签列表：
+        # - 原始标签中存在的，保留原始频率
+        # - LLM推断出的新标签（不在原始列表中），赋予默认频率1
+        final_tags = []
+        seen = set()
+        for tag in interest_tag_names:
+            if tag in seen:
+                continue
+            seen.add(tag)
+            freq = raw_freq_map.get(tag, 1)
+            final_tags.append((tag, freq))
+
+        # 按频率降序排列
+        final_tags.sort(key=lambda x: x[1], reverse=True)
 
         return final_tags[:limit]
     finally:
diff --git a/api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2 b/api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2
index 1e3aac55..7957bf1c 100644
--- a/api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2
+++ b/api/app/core/memory/utils/prompt/prompts/interest_filter.jinja2
@@ -1,47 +1,67 @@
 {% if language == "zh" %}
-You are a user interest analysis expert. Your task is to identify activity-based tags from a tag list that represent the user's hobbies and interests. Please output the results in Chinese.
+You are a user interest analysis expert. Your task is to infer and extract the user's core hobby/interest activities from a tag list. The tags may be specific project names, tool names, or compound nouns — your job is to identify the underlying interest they represent.
 
-**Keep Rules** (keep if any condition is met):
-- Tags representing sports or physical activities the user actively participates in (e.g., '攀岩', '篮球', '游泳', '跑步')
-- Tags representing cultural or entertainment hobbies (e.g., '读书', '看电影', '听音乐', '摄影')
-- Tags representing learning or creative activities (e.g., '编程', '绘画', '写作', '烹饪')
-- Tags representing specific interest domains or hobby categories (e.g., '历史', '天文', '园艺')
+**Step 1 - Infer the underlying interest from each tag**:
+Look at each tag and ask: "What hobby or interest does this tag suggest the user has?"
 
-**Filter Rules** (remove if any condition is met):
-- Pure object or tool names that do not represent an activity (e.g., '篮球鞋', '相机', '书桌')
-- Pure location or venue names (e.g., '篮球场', '图书馆', '健身房')
-- Abstract concepts or quality descriptions (e.g., '核心力量', '团队合作', '专注力')
-- Person names, brand names, or proper nouns (e.g., '乔丹', 'Nike')
+Examples of inference:
+- '攀岩', '室内攀岩馆', '攀岩者数据仪表盘', '路线解锁地图', '指力', '路线等级', '当日攀岩流畅度' → '攀岩'
+- '风光摄影元数据增强器', 'EXIF数据', '.CR2文件', '.NEF文件', '日出拍摄点', '曝光补偿', '光圈', '太阳高度角', '云量预测图层' → '摄影'
+- '晨间冥想坚持天数', '身心协同峰值' → '冥想'
+- '川味可视化', '川菜' → '烹饪'
+- '开源项目命名建议', 'climbviz', '可视化', '力量增长雷达图' → '编程' 或 '数据可视化'
+- '吉他', '指弹', '琴谱' → '吉他'
+- '跑步', '5公里', '跑鞋' → '跑步'
+- '瑜伽垫', '瑜伽课' → '瑜伽'
 
-**Merge Rules**: For semantically similar tags, keep only the most representative one.
-For example: keep '篮球' over '打篮球'; keep '读书' over '阅读'.
+**Step 2 - Consolidate and deduplicate**:
+- Merge tags that point to the same interest into one representative label
+- Use concise, standard hobby names (e.g., '攀岩', '摄影', '编程', '烹饪', '冥想', '吉他', '跑步')
+- If multiple tags all point to '攀岩', output '攀岩' only once
+
+**Step 3 - Filter out non-interest tags**:
+Remove tags that do NOT suggest any hobby or interest:
+- Generic system/assistant terms (e.g., '助手', '用户', 'AI')
+- Pure abstract metrics with no clear hobby link (e.g., '完成时间', '日期', '自我评分')
+- Location names with no clear hobby link (e.g., '青城山后山' alone — but if combined with photography context, infer '摄影')
+
+**Output format**: Return a list of concise interest activity names in Chinese.
 
 **Example**:
-Input: ['攀岩', '篮球场', '篮球鞋', '篮球', '《三体》', '历史', '核心力量', '烹饪', '菜刀']
-Output: ['攀岩', '篮球', '历史', '烹饪']
+Input: ['攀岩', '攀岩者数据仪表盘', '路线解锁地图', '指力', '风光摄影元数据增强器', 'EXIF数据', '晨间冥想坚持天数', '川味可视化', '可视化', '助手', '完成时间']
+Output: ['攀岩', '摄影', '冥想', '烹饪', '编程']
 
-Please filter the following tag list and return only the tags that represent user interest activities in Chinese: {{ tag_list }}
+Now process the following tag list and return the inferred interest activities in Chinese: {{ tag_list }}
 {% else %}
-You are a user interest analysis expert. Your task is to identify activity-based tags from a tag list that represent the user's hobbies and interests. Please output the results in English.
+You are a user interest analysis expert. Your task is to infer and extract the user's core hobby/interest activities from a tag list. The tags may be specific project names, tool names, or compound nouns — your job is to identify the underlying interest they represent.
 
-**Keep Rules** (keep if any condition is met):
-- Tags representing sports or physical activities the user actively participates in (e.g., 'rock climbing', 'basketball', 'swimming', 'running')
-- Tags representing cultural or entertainment hobbies (e.g., 'reading', 'watching movies', 'listening to music', 'photography')
-- Tags representing learning or creative activities (e.g., 'programming', 'painting', 'writing', 'cooking')
-- Tags representing specific interest domains or hobby categories (e.g., 'history', 'astronomy', 'gardening')
+**Step 1 - Infer the underlying interest from each tag**:
+Look at each tag and ask: "What hobby or interest does this tag suggest the user has?"
 
-**Filter Rules** (remove if any condition is met):
-- Pure object or tool names that do not represent an activity (e.g., 'basketball shoes', 'camera', 'desk')
-- Pure location or venue names (e.g., 'basketball court', 'library', 'gym')
-- Abstract concepts or quality descriptions (e.g., 'core strength', 'teamwork', 'focus')
-- Person names, brand names, or proper nouns (e.g., 'Jordan', 'Nike')
+Examples of inference:
+- 'rock climbing', 'indoor climbing gym', 'climber dashboard', 'route map', 'finger strength' → 'rock climbing'
+- 'landscape photography metadata enhancer', 'EXIF data', 'sunrise shooting spot', 'exposure compensation' → 'photography'
+- 'morning meditation streak', 'mind-body peak' → 'meditation'
+- 'Sichuan cuisine visualization', 'Sichuan food' → 'cooking'
+- 'open source project', 'data visualization tool', 'Python' → 'programming'
+- 'guitar', 'fingerpicking', 'sheet music' → 'guitar'
+- 'running', '5km', 'running shoes' → 'running'
 
-**Merge Rules**: For semantically similar tags, keep only the most representative one.
-For example: keep 'basketball' over 'playing basketball'; keep 'reading' over 'reading books'.
+**Step 2 - Consolidate and deduplicate**:
+- Merge tags that point to the same interest into one representative label
+- Use concise, standard hobby names (e.g., 'rock climbing', 'photography', 'programming', 'cooking', 'meditation')
+- If multiple tags all point to 'rock climbing', output 'rock climbing' only once
+
+**Step 3 - Filter out non-interest tags**:
+Remove tags that do NOT suggest any hobby or interest:
+- Generic system/assistant terms (e.g., 'assistant', 'user', 'AI')
+- Pure abstract metrics with no clear hobby link (e.g., 'completion time', 'date', 'self-rating')
+
+**Output format**: Return a list of concise interest activity names in English.
 
 **Example**:
-Input: ['rock climbing', 'basketball court', 'basketball shoes', 'basketball', 'The Three-Body Problem', 'history', 'core strength', 'cooking', 'kitchen knife']
-Output: ['rock climbing', 'basketball', 'history', 'cooking']
+Input: ['rock climbing', 'climber dashboard', 'route map', 'finger strength', 'landscape photography metadata enhancer', 'EXIF data', 'morning meditation streak', 'Sichuan cuisine visualization', 'visualization', 'assistant', 'completion time']
+Output: ['rock climbing', 'photography', 'meditation', 'cooking', 'programming']
 
-Please filter the following tag list and return only the tags that represent user interest activities in English: {{ tag_list }}
+Now process the following tag list and return the inferred interest activities in English: {{ tag_list }}
 {% endif %}
diff --git a/api/env.example b/api/env.example
index d67bbf7c..1dc4536c 100644
--- a/api/env.example
+++ b/api/env.example
@@ -139,7 +139,7 @@ SMTP_USER=
 SMTP_PASSWORD=
 
 # 本体类型融合配置 (记得写入env_example)
-GENERAL_ONTOLOGY_FILES=app/core/memory/ontology_services/General_purpose_entity.ttl  # 指定要加载的本体文件路径，多个文件用逗号分隔
+GENERAL_ONTOLOGY_FILES=api/app/core/memory/ontology_services/General_purpose_entity.ttl  # 指定要加载的本体文件路径，多个文件用逗号分隔
 ENABLE_GENERAL_ONTOLOGY_TYPES=true # 总开关，控制是否启用通用本体类型融合功能(false = 不使用任何本体类型指导)
 MAX_ONTOLOGY_TYPES_IN_PROMPT=100 # 限制传给 LLM 的类型数量，防止 Prompt 过长
 CORE_GENERAL_TYPES=Person,Organization,Place,Event,Work,Concept # 	定义核心类型列表，这些类型会优先包含在合并结果中

From c488eb0cd00e3fccc6f0ce090b6fc909597fd441 Mon Sep 17 00:00:00 2001
From: lanceyq <1982376970@qq.com>
Date: Wed, 4 Mar 2026 12:17:34 +0800
Subject: [PATCH 6/6] [changes] 1.Use structured logs;           2.Align the
 type and default value of "end_user_id" with the semantic meaning of
 "required".

---
 api/app/controllers/memory_agent_controller.py   | 2 +-
 api/app/core/memory/analytics/hot_memory_tags.py | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/api/app/controllers/memory_agent_controller.py b/api/app/controllers/memory_agent_controller.py
index 1f070eb6..ccf93d68 100644
--- a/api/app/controllers/memory_agent_controller.py
+++ b/api/app/controllers/memory_agent_controller.py
@@ -664,7 +664,7 @@ async def get_knowledge_type_stats_api(
 
 @router.get("/analytics/interest_distribution/by_user", response_model=ApiResponse)
 async def get_interest_distribution_by_user_api(
-    end_user_id: Optional[str] = Query(None, description="用户ID（必填）"),
+    end_user_id: str = Query(..., description="用户ID（必填）"),
     limit: int = Query(5, le=5, description="返回兴趣标签数量限制，最多5个"),
     language_type: str = Header(default=None, alias="X-Language-Type"),
     current_user: User = Depends(get_current_user),
diff --git a/api/app/core/memory/analytics/hot_memory_tags.py b/api/app/core/memory/analytics/hot_memory_tags.py
index 1d2d5259..6afcec6d 100644
--- a/api/app/core/memory/analytics/hot_memory_tags.py
+++ b/api/app/core/memory/analytics/hot_memory_tags.py
@@ -1,9 +1,12 @@
 import asyncio
 import json
+import logging
 import os
 from typing import List, Tuple
 
 from app.core.config import settings
+
+logger = logging.getLogger(__name__)
 from app.core.memory.utils.llm.llm_utils import MemoryClientFactory
 from app.db import get_db_context
 from app.repositories.neo4j.neo4j_connector import Neo4jConnector
@@ -89,7 +92,7 @@ async def filter_tags_with_llm(tags: List[str], end_user_id: str) -> List[str]:
         return structured_response.meaningful_tags
 
     except Exception as e:
-        print(f"LLM筛选过程中发生错误: {e}")
+        logger.error(f"LLM筛选过程中发生错误: {e}", exc_info=True)
         # 在LLM失败时返回原始标签，确保流程继续
         return tags
 
@@ -153,7 +156,7 @@ async def filter_interests_with_llm(tags: List[str], end_user_id: str, language:
         return structured_response.interest_tags
 
     except Exception as e:
-        print(f"兴趣标签LLM筛选过程中发生错误: {e}")
+        logger.error(f"兴趣标签LLM筛选过程中发生错误: {e}", exc_info=True)
         return tags