Feature/behavior analysis (#53)

* init behavior analysis * init behavior analysis * feat(implicit-memory): add implicit memory analytics system
2026-01-07 18:14:25 +08:00
parent 28eccd6ce9
commit b3f8de3062
20 changed files with 3447 additions and 22 deletions
--- a/api/app/controllers/init.py
+++ b/api/app/controllers/init.py
@@ -4,41 +4,44 @@
 认证方式: JWT Token
 """
 from fastapi import APIRouter
 from . import (
-    model_controller,
+    api_key_controller,
    task_controller,
    test_controller,
    user_controller,
    auth_controller,
    workspace_controller,
    setup_controller,
    file_controller,
    document_controller,
    knowledge_controller,
    chunk_controller,
    knowledgeshare_controller,
    app_controller,
-    upload_controller,
+    auth_controller,
    chunk_controller,
    document_controller,
    emotion_config_controller,
    emotion_controller,
    file_controller,
    home_page_controller,
    implicit_memory_controller,
    knowledge_controller,
    knowledgeshare_controller,
    memory_agent_controller,
    memory_dashboard_controller,
-    memory_storage_controller,
+    memory_forget_controller,
    memory_dashboard_controller,
    memory_reflection_controller,
    memory_short_term_controller,
-    api_key_controller,
+    memory_storage_controller,
-    release_share_controller,
+    model_controller,
    public_share_controller,
    multi_agent_controller,
    workflow_controller,
    emotion_controller,
    emotion_config_controller,
    prompt_optimizer_controller,
    public_share_controller,
    release_share_controller,
    setup_controller,
    task_controller,
    test_controller,
    tool_controller,
    upload_controller,
    user_controller,
    user_memory_controllers,
    workflow_controller,
    workspace_controller,
    memory_forget_controller,
    home_page_controller,
    memory_perceptual_controller
 )
 from . import user_memory_controllers
 # 创建管理端 API 路由器
 manager_router = APIRouter()
@@ -77,6 +80,7 @@ manager_router.include_router(memory_short_term_controller.router)
 manager_router.include_router(tool_controller.router)
 manager_router.include_router(memory_forget_controller.router)
 manager_router.include_router(home_page_controller.router)
 manager_router.include_router(implicit_memory_controller.router)
 manager_router.include_router(memory_perceptual_controller.router)
 __all__ = ["manager_router"]
--- a/api/app/controllers/implicit_memory_controller.py
+++ b/api/app/controllers/implicit_memory_controller.py
@@ -0,0 +1,302 @@
 from datetime import datetime
 from typing import Optional
 from app.core.error_codes import BizCode
 from app.core.logging_config import get_api_logger
 from app.core.response_utils import fail, success
 from app.db import get_db
 from app.dependencies import (
    cur_workspace_access_guard,
    get_current_user,
 )
 from app.models.user_model import User
 from app.schemas.response_schema import ApiResponse
 from app.services.implicit_memory_service import ImplicitMemoryService
 from fastapi import APIRouter, Depends, Query
 from sqlalchemy.orm import Session
 api_logger = get_api_logger()
 router = APIRouter(
    prefix="/memory/implicit-memory",
    tags=["Implicit Memory"],
 )
 def handle_implicit_memory_error(e: Exception, operation: str, user_id: str = None) -> dict:
    """
    Centralized error handling for implicit memory operations.
    Args:
        e: The exception that occurred
        operation: Description of the operation that failed
        user_id: Optional user ID for logging context
    Returns:
        Standardized error response
    """
    error_context = f"user_id={user_id}" if user_id else "unknown user"
    if isinstance(e, ValueError):
        if "user" in str(e).lower() and "not found" in str(e).lower():
            api_logger.warning(f"Invalid user ID for {operation}: {error_context}")
            return fail(BizCode.INVALID_USER_ID, "无效的用户ID", str(e))
        elif "insufficient" in str(e).lower() or "no data" in str(e).lower():
            api_logger.warning(f"Insufficient data for {operation}: {error_context}")
            return fail(BizCode.INSUFFICIENT_DATA, "数据不足，无法进行分析", str(e))
        else:
            api_logger.warning(f"Invalid parameters for {operation}: {error_context}")
            return fail(BizCode.INVALID_FILTER_PARAMS, "无效的参数", str(e))
    elif isinstance(e, KeyError):
        api_logger.warning(f"Missing required data for {operation}: {error_context}")
        return fail(BizCode.INSUFFICIENT_DATA, "缺少必要的数据", str(e))
    elif isinstance(e, (ConnectionError, TimeoutError)):
        api_logger.error(f"Service unavailable for {operation}: {error_context}")
        return fail(BizCode.SERVICE_UNAVAILABLE, "服务暂时不可用", str(e))
    elif "analysis" in str(e).lower() or "llm" in str(e).lower():
        api_logger.error(f"Analysis failed for {operation}: {error_context}", exc_info=True)
        return fail(BizCode.ANALYSIS_FAILED, "分析处理失败", str(e))
    elif "storage" in str(e).lower() or "database" in str(e).lower():
        api_logger.error(f"Storage error for {operation}: {error_context}", exc_info=True)
        return fail(BizCode.PROFILE_STORAGE_ERROR, "数据存储失败", str(e))
    else:
        api_logger.error(f"Unexpected error for {operation}: {error_context}", exc_info=True)
        return fail(BizCode.INTERNAL_ERROR, f"{operation}失败", str(e))
 def validate_user_id(user_id: str) -> None:
    """
    Validate user ID format and constraints.
    Args:
        user_id: User ID to validate
    Raises:
        ValueError: If user ID is invalid
    """
    if not user_id or not user_id.strip():
        raise ValueError("User ID cannot be empty")
    if len(user_id.strip()) < 1:
        raise ValueError("User ID is too short")
 def validate_date_range(start_date: Optional[datetime], end_date: Optional[datetime]) -> None:
    """
    Validate date range parameters.
    Args:
        start_date: Start date
        end_date: End date
    Raises:
        ValueError: If date range is invalid
    """
    if (start_date and not end_date) or (end_date and not start_date):
        raise ValueError("Both start_date and end_date must be provided together")
    if start_date and end_date and start_date >= end_date:
        raise ValueError("start_date must be before end_date")
    if start_date and start_date > datetime.now():
        raise ValueError("start_date cannot be in the future")
 def validate_confidence_threshold(threshold: float) -> None:
    """
    Validate confidence threshold parameter.
    Args:
        threshold: Confidence threshold to validate
    Raises:
        ValueError: If threshold is invalid
    """
    if not 0.0 <= threshold <= 1.0:
        raise ValueError("confidence_threshold must be between 0.0 and 1.0")
@router.get("/preferences/{user_id}", response_model=ApiResponse)
@cur_workspace_access_guard()
 async def get_preference_tags(
    user_id: str,
    confidence_threshold: float = Query(0.5, ge=0.0, le=1.0, description="Minimum confidence threshold"),
    tag_category: Optional[str] = Query(None, description="Filter by tag category"),
    start_date: Optional[datetime] = Query(None, description="Filter start date"),
    end_date: Optional[datetime] = Query(None, description="Filter end date"),
    db: Session = Depends(get_db),
    current_user: User = Depends(get_current_user)
 ) -> ApiResponse:
    """
    Get user preference tags with filtering options.
    Args:
        user_id: Target user ID
        confidence_threshold: Minimum confidence score (0.0-1.0)
        tag_category: Optional category filter
        start_date: Optional start date filter
        end_date: Optional end date filter
    Returns:
        List of preference tags matching the filters
    """
    api_logger.info(f"Preference tags requested for user: {user_id}")
    try:
        # Validate inputs
        validate_user_id(user_id)
        validate_confidence_threshold(confidence_threshold)
        validate_date_range(start_date, end_date)
        # Create service with user-specific config
        service = ImplicitMemoryService(db=db, end_user_id=user_id)
        # Build date range
        date_range = None
        if start_date and end_date:
            from app.schemas.implicit_memory_schema import DateRange
            date_range = DateRange(start_date=start_date, end_date=end_date)
        # Get preference tags
        tags = await service.get_preference_tags(
            user_id=user_id,
            confidence_threshold=confidence_threshold,
            tag_category=tag_category,
            date_range=date_range
        )
        api_logger.info(f"Retrieved {len(tags)} preference tags for user: {user_id}")
        return success(data=[tag.dict() for tag in tags], msg="偏好标签获取成功")
    except Exception as e:
        return handle_implicit_memory_error(e, "偏好标签获取", user_id)
@router.get("/portrait/{user_id}", response_model=ApiResponse)
@cur_workspace_access_guard()
 async def get_dimension_portrait(
    user_id: str,
    include_history: bool = Query(False, description="Include historical trends"),
    db: Session = Depends(get_db),
    current_user: User = Depends(get_current_user)
 ) -> ApiResponse:
    """
    Get user's four-dimension personality portrait.
    Args:
        user_id: Target user ID
        include_history: Whether to include historical trend data
    Returns:
        Four-dimension personality portrait with scores and evidence
    """
    api_logger.info(f"Dimension portrait requested for user: {user_id}")
    try:
        # Validate inputs
        validate_user_id(user_id)
        # Create service with user-specific config
        service = ImplicitMemoryService(db=db, end_user_id=user_id)
        portrait = await service.get_dimension_portrait(
            user_id=user_id,
            include_history=include_history
        )
        api_logger.info(f"Dimension portrait retrieved for user: {user_id}")
        return success(data=portrait.dict(), msg="四维画像获取成功")
    except Exception as e:
        return handle_implicit_memory_error(e, "四维画像获取", user_id)
@router.get("/interest-areas/{user_id}", response_model=ApiResponse)
@cur_workspace_access_guard()
 async def get_interest_area_distribution(
    user_id: str,
    include_trends: bool = Query(False, description="Include trend analysis"),
    db: Session = Depends(get_db),
    current_user: User = Depends(get_current_user)
 ) -> ApiResponse:
    """
    Get user's interest area distribution across four areas.
    Args:
        user_id: Target user ID
        include_trends: Whether to include trend analysis data
    Returns:
        Interest area distribution with percentages and evidence
    """
    api_logger.info(f"Interest area distribution requested for user: {user_id}")
    try:
        # Validate inputs
        validate_user_id(user_id)
        # Create service with user-specific config
        service = ImplicitMemoryService(db=db, end_user_id=user_id)
        distribution = await service.get_interest_area_distribution(
            user_id=user_id,
            include_trends=include_trends
        )
        api_logger.info(f"Interest area distribution retrieved for user: {user_id}")
        return success(data=distribution.dict(), msg="兴趣领域分布获取成功")
    except Exception as e:
        return handle_implicit_memory_error(e, "兴趣领域分布获取", user_id)
@router.get("/habits/{user_id}", response_model=ApiResponse)
@cur_workspace_access_guard()
 async def get_behavior_habits(
    user_id: str,
    confidence_level: Optional[str] = Query(None, regex="^(high|medium|low)$", description="Filter by confidence level"),
    frequency_pattern: Optional[str] = Query(None, regex="^(daily|weekly|monthly|seasonal|occasional|event_triggered)$", description="Filter by frequency pattern"),
    time_period: Optional[str] = Query(None, regex="^(current|past)$", description="Filter by time period"),
    db: Session = Depends(get_db),
    current_user: User = Depends(get_current_user)
 ) -> ApiResponse:
    """
    Get user's behavioral habits with filtering options.
    Args:
        user_id: Target user ID
        confidence_level: Filter by confidence level (high, medium, low)
        frequency_pattern: Filter by frequency pattern (daily, weekly, monthly, seasonal, occasional, event_triggered)
        time_period: Filter by time period (current, past)
    Returns:
        List of behavioral habits matching the filters
    """
    api_logger.info(f"Behavior habits requested for user: {user_id}")
    try:
        # Validate inputs
        validate_user_id(user_id)
        # Create service with user-specific config
        service = ImplicitMemoryService(db=db, end_user_id=user_id)
        habits = await service.get_behavior_habits(
            user_id=user_id,
            confidence_level=confidence_level,
            frequency_pattern=frequency_pattern,
            time_period=time_period
        )
        api_logger.info(f"Retrieved {len(habits)} behavior habits for user: {user_id}")
        return success(data=[habit.dict() for habit in habits], msg="行为习惯获取成功")
    except Exception as e:
        return handle_implicit_memory_error(e, "行为习惯获取", user_id)
--- a/api/app/core/error_codes.py
+++ b/api/app/core/error_codes.py
@@ -82,6 +82,13 @@ class BizCode(IntEnum):
    MEMORY_WRITE_FAILED = 9501
    MEMORY_READ_FAILED = 9502
    MEMORY_CONFIG_NOT_FOUND = 9503
    # Implicit Memory API（96xx）
    INVALID_USER_ID = 9601
    INSUFFICIENT_DATA = 9602
    INVALID_FILTER_PARAMS = 9603
    ANALYSIS_FAILED = 9604
    PROFILE_STORAGE_ERROR = 9605
    # 系统（100xx）
    INTERNAL_ERROR = 10001
@@ -159,6 +166,13 @@ HTTP_MAPPING = {
    BizCode.MEMORY_READ_FAILED: 500,
    BizCode.MEMORY_CONFIG_NOT_FOUND: 400,
    # Implicit Memory API 错误码映射
    BizCode.INVALID_USER_ID: 400,
    BizCode.INSUFFICIENT_DATA: 400,
    BizCode.INVALID_FILTER_PARAMS: 400,
    BizCode.ANALYSIS_FAILED: 500,
    BizCode.PROFILE_STORAGE_ERROR: 500,
    BizCode.INTERNAL_ERROR: 500,
    BizCode.DB_ERROR: 500,
    BizCode.SERVICE_UNAVAILABLE: 503,
--- a/api/app/core/memory/analytics/implicit_memory/init.py
+++ b/api/app/core/memory/analytics/implicit_memory/init.py
@@ -0,0 +1,6 @@
 """Implicit Memory Module
 This module provides behavior analysis capabilities that build comprehensive user profiles
 by analyzing memory summary nodes from Neo4j. It creates detailed user portraits across
 multiple dimensions, tracks interest distributions, and identifies behavioral habits.
 """
--- a/api/app/core/memory/analytics/implicit_memory/analyzers/init.py
+++ b/api/app/core/memory/analytics/implicit_memory/analyzers/init.py
@@ -0,0 +1 @@
 """Analyzers package for implicit memory analysis components."""
--- a/api/app/core/memory/analytics/implicit_memory/analyzers/dimension_analyzer.py
+++ b/api/app/core/memory/analytics/implicit_memory/analyzers/dimension_analyzer.py
@@ -0,0 +1,264 @@
 """Dimension Analyzer for Implicit Memory System
 This module implements LLM-based personality dimension analysis from user memory summaries.
 It analyzes four key dimensions: creativity, aesthetic, technology, and literature,
 providing percentage scores with evidence and reasoning.
 """
 import logging
 from datetime import datetime
 from typing import Any, Dict, List, Optional
 from app.core.memory.analytics.implicit_memory.llm_client import ImplicitMemoryLLMClient
 from app.core.memory.llm_tools.llm_client import LLMClientException
 from app.schemas.implicit_memory_schema import (
    ConfidenceLevel,
    DimensionPortrait,
    DimensionScore,
    UserMemorySummary,
 )
 from pydantic import BaseModel, Field
 from sqlalchemy.orm import Session
 logger = logging.getLogger(__name__)
 class DimensionData(BaseModel):
    """Individual dimension analysis data."""
    percentage: float = Field(ge=0.0, le=100.0)
    evidence: List[str] = Field(default_factory=list)
    reasoning: str = ""
    confidence_level: str = "medium"
 class DimensionAnalysisResponse(BaseModel):
    """Response model for dimension analysis."""
    dimensions: Dict[str, DimensionData] = Field(default_factory=dict)
 class DimensionAnalyzer:
    """Analyzes user memory summaries to extract personality dimensions."""
    # Define the four dimensions we analyze
    DIMENSIONS = ["creativity", "aesthetic", "technology", "literature"]
    def __init__(self, db: Session, llm_model_id: Optional[str] = None):
        """Initialize the dimension analyzer.
        Args:
            db: Database session
            llm_model_id: Optional LLM model ID to use for analysis
        """
        self.db = db
        self.llm_model_id = llm_model_id
        self._llm_client = ImplicitMemoryLLMClient(db, llm_model_id)
    async def analyze_dimensions(
        self,
        user_id: str,
        user_summaries: List[UserMemorySummary],
        existing_portrait: Optional[DimensionPortrait] = None
    ) -> DimensionPortrait:
        """Analyze user summaries to extract personality dimensions.
        Args:
            user_id: Target user ID
            user_summaries: List of user-specific memory summaries
            existing_portrait: Optional existing portrait for incremental updates
        Returns:
            Dimension portrait with four personality dimensions
        Raises:
            LLMClientException: If LLM analysis fails
        """
        if not user_summaries:
            logger.warning(f"No summaries provided for user {user_id}")
            return self._create_empty_portrait(user_id)
        try:
            logger.info(f"Analyzing dimensions for user {user_id} with {len(user_summaries)} summaries")
            # Use the LLM client wrapper for analysis
            response = await self._llm_client.analyze_dimensions(
                user_summaries=user_summaries,
                user_id=user_id,
                model_id=self.llm_model_id
            )
            # Create dimension scores
            dimension_scores = {}
            current_time = datetime.now()
            for dimension_name in self.DIMENSIONS:
                # Handle response as dictionary
                dimensions_data = response.get("dimensions", {})
                dimension_data = dimensions_data.get(dimension_name)
                if dimension_data:
                    # Validate and create dimension score
                    score = self._create_dimension_score(
                        dimension_name=dimension_name,
                        dimension_data=dimension_data
                    )
                    dimension_scores[dimension_name] = score
                else:
                    # Create default score if missing
                    logger.warning(f"Missing dimension data for {dimension_name}, using default")
                    dimension_scores[dimension_name] = self._create_default_dimension_score(dimension_name)
            # Create dimension portrait
            portrait = DimensionPortrait(
                user_id=user_id,
                creativity=dimension_scores["creativity"],
                aesthetic=dimension_scores["aesthetic"],
                technology=dimension_scores["technology"],
                literature=dimension_scores["literature"],
                analysis_timestamp=current_time,
                total_summaries_analyzed=len(user_summaries),
                historical_trends=self._calculate_historical_trends(existing_portrait) if existing_portrait else None
            )
            logger.info(f"Created dimension portrait for user {user_id}")
            return portrait
        except LLMClientException:
            raise
        except Exception as e:
            logger.error(f"Dimension analysis failed for user {user_id}: {e}")
            raise LLMClientException(f"Dimension analysis failed: {e}") from e
    def _create_dimension_score(
        self,
        dimension_name: str,
        dimension_data: dict
    ) -> DimensionScore:
        """Create a dimension score from analysis data.
        Args:
            dimension_name: Name of the dimension
            dimension_data: Analysis data dictionary for the dimension
        Returns:
            DimensionScore object
        """
        # Validate percentage - handle dict access
        percentage = dimension_data.get("percentage", 0.0)
        percentage = max(0.0, min(100.0, float(percentage)))
        # Validate confidence level
        confidence_level_str = dimension_data.get("confidence_level", "low")
        confidence_level = self._validate_confidence_level(confidence_level_str)
        # Ensure evidence is not empty
        evidence = dimension_data.get("evidence", [])
        if not evidence:
            evidence = ["No specific evidence found"]
        # Ensure reasoning is not empty
        reasoning = dimension_data.get("reasoning", "")
        if not reasoning:
            reasoning = f"Analysis for {dimension_name} dimension"
        return DimensionScore(
            dimension_name=dimension_name,
            percentage=percentage,
            evidence=evidence,
            reasoning=reasoning,
            confidence_level=confidence_level
        )
    def _create_default_dimension_score(self, dimension_name: str) -> DimensionScore:
        """Create a default dimension score when analysis fails.
        Args:
            dimension_name: Name of the dimension
        Returns:
            Default DimensionScore object
        """
        return DimensionScore(
            dimension_name=dimension_name,
            percentage=0.0,
            evidence=["Insufficient data for analysis"],
            reasoning=f"No clear evidence found for {dimension_name} dimension",
            confidence_level=ConfidenceLevel.LOW
        )
    def _validate_confidence_level(self, confidence_str: str) -> ConfidenceLevel:
        """Validate and convert confidence level string.
        Args:
            confidence_str: Confidence level as string
        Returns:
            ConfidenceLevel enum value
        """
        if not confidence_str:
            return ConfidenceLevel.MEDIUM
        confidence_str = str(confidence_str).lower().strip()
        if confidence_str in ["high", "높음"]:
            return ConfidenceLevel.HIGH
        elif confidence_str in ["medium", "중간"]:
            return ConfidenceLevel.MEDIUM
        elif confidence_str in ["low", "낮음"]:
            return ConfidenceLevel.LOW
        else:
            logger.warning(f"Unknown confidence level: {confidence_str}, defaulting to medium")
            return ConfidenceLevel.MEDIUM
    def _create_empty_portrait(self, user_id: str) -> DimensionPortrait:
        """Create an empty dimension portrait when no data is available.
        Args:
            user_id: Target user ID
        Returns:
            Empty DimensionPortrait
        """
        current_time = datetime.now()
        return DimensionPortrait(
            user_id=user_id,
            creativity=self._create_default_dimension_score("creativity"),
            aesthetic=self._create_default_dimension_score("aesthetic"),
            technology=self._create_default_dimension_score("technology"),
            literature=self._create_default_dimension_score("literature"),
            analysis_timestamp=current_time,
            total_summaries_analyzed=0,
            historical_trends=None
        )
    def _calculate_historical_trends(
        self,
        existing_portrait: DimensionPortrait
    ) -> List[Dict[str, Any]]:
        """Calculate historical trends from existing portrait.
        Args:
            existing_portrait: Previous dimension portrait
        Returns:
            List of historical trend data
        """
        if not existing_portrait:
            return []
        # Create trend entry from existing portrait
        trend_entry = {
            "timestamp": existing_portrait.analysis_timestamp.isoformat(),
            "creativity": existing_portrait.creativity.percentage,
            "aesthetic": existing_portrait.aesthetic.percentage,
            "technology": existing_portrait.technology.percentage,
            "literature": existing_portrait.literature.percentage,
            "total_summaries": existing_portrait.total_summaries_analyzed
        }
        # Combine with existing trends
        existing_trends = existing_portrait.historical_trends or []
        # Keep only recent trends (last 10 entries)
        all_trends = existing_trends + [trend_entry]
        return all_trends[-10:]
--- a/api/app/core/memory/analytics/implicit_memory/analyzers/habit_analyzer.py
+++ b/api/app/core/memory/analytics/implicit_memory/analyzers/habit_analyzer.py
@@ -0,0 +1,452 @@
 """Habit Analyzer for Implicit Memory System
 This module implements LLM-based behavioral habit analysis from user memory summaries.
 It identifies recurring behavioral patterns, temporal patterns, and consolidates
 similar habits with confidence scoring.
 """
 import logging
 from datetime import datetime, timedelta
 from typing import Any, Dict, List, Optional
 from app.core.memory.analytics.implicit_memory.llm_client import ImplicitMemoryLLMClient
 from app.core.memory.llm_tools.llm_client import LLMClientException
 from app.schemas.implicit_memory_schema import (
    BehaviorHabit,
    ConfidenceLevel,
    FrequencyPattern,
    UserMemorySummary,
 )
 from pydantic import BaseModel, Field
 from sqlalchemy.orm import Session
 logger = logging.getLogger(__name__)
 class HabitData(BaseModel):
    """Individual habit analysis data."""
    habit_description: str
    frequency_pattern: str
    time_context: str
    confidence_level: str
    supporting_summaries: List[str] = Field(default_factory=list)
    specific_examples: List[str] = Field(default_factory=list)
    is_current: bool = True
 class HabitAnalysisResponse(BaseModel):
    """Response model for habit analysis."""
    habits: List[HabitData] = Field(default_factory=list)
 class HabitAnalyzer:
    """Analyzes user memory summaries to extract behavioral habits."""
    def __init__(self, db: Session, llm_model_id: Optional[str] = None):
        """Initialize the habit analyzer.
        Args:
            db: Database session
            llm_model_id: Optional LLM model ID to use for analysis
        """
        self.db = db
        self.llm_model_id = llm_model_id
        self._llm_client = ImplicitMemoryLLMClient(db, llm_model_id)
    async def analyze_habits(
        self,
        user_id: str,
        user_summaries: List[UserMemorySummary],
        existing_habits: Optional[List[BehaviorHabit]] = None
    ) -> List[BehaviorHabit]:
        """Analyze user summaries to extract behavioral habits.
        Args:
            user_id: Target user ID
            user_summaries: List of user-specific memory summaries
            existing_habits: Optional existing habits for consolidation
        Returns:
            List of extracted behavioral habits
        Raises:
            LLMClientException: If LLM analysis fails
        """
        if not user_summaries:
            logger.warning(f"No summaries provided for user {user_id}")
            return existing_habits or []
        try:
            logger.info(f"Analyzing habits for user {user_id} with {len(user_summaries)} summaries")
            # Use the LLM client wrapper for analysis
            response = await self._llm_client.analyze_habits(
                user_summaries=user_summaries,
                user_id=user_id,
                model_id=self.llm_model_id
            )
            # Convert to BehaviorHabit objects
            behavior_habits = []
            current_time = datetime.now()
            for habit_data in response.get("habits", []):
                try:
                    # Handle habit_data as dictionary
                    supporting_summaries = habit_data.get("supporting_summaries", [])
                    specific_examples = habit_data.get("specific_examples", [])
                    # Determine observation dates from summaries
                    first_observed, last_observed = self._determine_observation_dates(
                        user_summaries, supporting_summaries
                    )
                    behavior_habit = BehaviorHabit(
                        habit_description=habit_data.get("habit_description", ""),
                        frequency_pattern=self._validate_frequency_pattern(habit_data.get("frequency_pattern", "occasional")),
                        time_context=habit_data.get("time_context", ""),
                        confidence_level=self._validate_confidence_level(habit_data.get("confidence_level", "medium")),
                        supporting_summaries=supporting_summaries,
                        specific_examples=specific_examples,
                        first_observed=first_observed,
                        last_observed=last_observed,
                        is_current=habit_data.get("is_current", True)
                    )
                    # Validate habit
                    if self._is_valid_habit(behavior_habit):
                        behavior_habits.append(behavior_habit)
                    else:
                        logger.warning(f"Invalid habit skipped: {behavior_habit.habit_description}")
                except Exception as e:
                    logger.error(f"Error creating behavior habit: {e}")
                    continue
            # Consolidate with existing habits if provided
            if existing_habits:
                behavior_habits = self._consolidate_habits(
                    new_habits=behavior_habits,
                    existing_habits=existing_habits
                )
            # Sort habits by confidence and recency
            behavior_habits = self._sort_habits_by_priority(behavior_habits)
            logger.info(f"Extracted {len(behavior_habits)} habits for user {user_id}")
            return behavior_habits
        except LLMClientException:
            raise
        except Exception as e:
            logger.error(f"Habit analysis failed for user {user_id}: {e}")
            raise LLMClientException(f"Habit analysis failed: {e}") from e
    def _validate_frequency_pattern(self, frequency_str: str) -> FrequencyPattern:
        """Validate and convert frequency pattern string.
        Args:
            frequency_str: Frequency pattern as string
        Returns:
            FrequencyPattern enum value
        """
        frequency_str = frequency_str.lower().strip()
        frequency_mapping = {
            "daily": FrequencyPattern.DAILY,
            "weekly": FrequencyPattern.WEEKLY,
            "monthly": FrequencyPattern.MONTHLY,
            "seasonal": FrequencyPattern.SEASONAL,
            "occasional": FrequencyPattern.OCCASIONAL,
            "event_triggered": FrequencyPattern.EVENT_TRIGGERED,
            "event-triggered": FrequencyPattern.EVENT_TRIGGERED,
        }
        return frequency_mapping.get(frequency_str, FrequencyPattern.OCCASIONAL)
    def _validate_confidence_level(self, confidence_str: str) -> ConfidenceLevel:
        """Validate and convert confidence level string.
        Args:
            confidence_str: Confidence level as string
        Returns:
            ConfidenceLevel enum value
        """
        confidence_str = confidence_str.lower().strip()
        if confidence_str in ["high", "높음"]:
            return ConfidenceLevel.HIGH
        elif confidence_str in ["medium", "중간"]:
            return ConfidenceLevel.MEDIUM
        elif confidence_str in ["low", "낮음"]:
            return ConfidenceLevel.LOW
        else:
            logger.warning(f"Unknown confidence level: {confidence_str}, defaulting to medium")
            return ConfidenceLevel.MEDIUM
    def _determine_observation_dates(
        self,
        user_summaries: List[UserMemorySummary],
        supporting_summary_ids: List[str]
    ) -> tuple[datetime, datetime]:
        """Determine first and last observation dates for a habit.
        Args:
            user_summaries: List of user summaries
            supporting_summary_ids: IDs of summaries supporting the habit
        Returns:
            Tuple of (first_observed, last_observed) dates
        """
        from datetime import timezone
        # Find summaries that support this habit
        supporting_summaries = [
            summary for summary in user_summaries
            if summary.summary_id in supporting_summary_ids
        ]
        if not supporting_summaries:
            # Use all summaries if no specific supporting summaries found
            supporting_summaries = user_summaries
        if not supporting_summaries:
            current_time = datetime.now(timezone.utc).replace(tzinfo=None)
            return current_time, current_time
        # Get date range from supporting summaries - normalize to naive datetimes
        timestamps = []
        for summary in supporting_summaries:
            ts = summary.timestamp
            # Convert to naive datetime if it's timezone-aware
            if ts.tzinfo is not None:
                ts = ts.replace(tzinfo=None)
            timestamps.append(ts)
        first_observed = min(timestamps)
        last_observed = max(timestamps)
        return first_observed, last_observed
    def _is_valid_habit(self, habit: BehaviorHabit) -> bool:
        """Validate a behavioral habit.
        Args:
            habit: Behavioral habit to validate
        Returns:
            True if valid, False otherwise
        """
        try:
            # Check required fields
            if not habit.habit_description or not habit.habit_description.strip():
                return False
            # Check time context
            if not habit.time_context or not habit.time_context.strip():
                return False
            # Check supporting summaries
            if not habit.supporting_summaries or len(habit.supporting_summaries) == 0:
                return False
            # Check specific examples
            if not habit.specific_examples or len(habit.specific_examples) == 0:
                return False
            # Check observation dates
            if habit.first_observed > habit.last_observed:
                return False
            return True
        except Exception as e:
            logger.error(f"Error validating habit: {e}")
            return False
    def _consolidate_habits(
        self,
        new_habits: List[BehaviorHabit],
        existing_habits: List[BehaviorHabit],
        similarity_threshold: float = 0.7
    ) -> List[BehaviorHabit]:
        """Consolidate new habits with existing ones.
        Args:
            new_habits: Newly extracted habits
            existing_habits: Existing habits
            similarity_threshold: Threshold for considering habits similar
        Returns:
            Consolidated list of habits
        """
        consolidated = existing_habits.copy()
        current_time = datetime.now()
        for new_habit in new_habits:
            # Find similar existing habit
            similar_habit = self._find_similar_habit(
                new_habit, existing_habits, similarity_threshold
            )
            if similar_habit:
                # Update existing habit
                updated_habit = self._merge_habits(similar_habit, new_habit, current_time)
                # Replace in consolidated list
                for i, habit in enumerate(consolidated):
                    if habit.habit_description == similar_habit.habit_description:
                        consolidated[i] = updated_habit
                        break
            else:
                # Add as new habit
                consolidated.append(new_habit)
        return consolidated
    def _find_similar_habit(
        self,
        target_habit: BehaviorHabit,
        existing_habits: List[BehaviorHabit],
        threshold: float
    ) -> Optional[BehaviorHabit]:
        """Find similar habit in existing list.
        Args:
            target_habit: Habit to find similarity for
            existing_habits: List of existing habits
            threshold: Similarity threshold
        Returns:
            Similar habit if found, None otherwise
        """
        target_desc = target_habit.habit_description.lower().strip()
        for existing_habit in existing_habits:
            existing_desc = existing_habit.habit_description.lower().strip()
            # Check description similarity
            desc_similarity = self._calculate_text_similarity(target_desc, existing_desc)
            # Check frequency pattern match
            frequency_match = (target_habit.frequency_pattern == existing_habit.frequency_pattern)
            # Check time context similarity
            time_similarity = self._calculate_text_similarity(
                target_habit.time_context.lower(),
                existing_habit.time_context.lower()
            )
            # Combined similarity score
            combined_similarity = (desc_similarity * 0.6 + time_similarity * 0.4)
            if frequency_match:
                combined_similarity += 0.1  # Bonus for frequency match
            if combined_similarity >= threshold:
                return existing_habit
        return None
    def _calculate_text_similarity(self, text1: str, text2: str) -> float:
        """Calculate simple text similarity based on common words.
        Args:
            text1: First text
            text2: Second text
        Returns:
            Similarity score between 0.0 and 1.0
        """
        if not text1 or not text2:
            return 0.0
        # Simple word-based similarity
        words1 = set(text1.lower().split())
        words2 = set(text2.lower().split())
        if not words1 or not words2:
            return 0.0
        intersection = words1.intersection(words2)
        union = words1.union(words2)
        return len(intersection) / len(union) if union else 0.0
    def _merge_habits(
        self,
        existing_habit: BehaviorHabit,
        new_habit: BehaviorHabit,
        current_time: datetime
    ) -> BehaviorHabit:
        """Merge two similar habits.
        Args:
            existing_habit: Existing habit
            new_habit: New habit to merge
            current_time: Current timestamp
        Returns:
            Merged behavioral habit
        """
        # Combine supporting summaries
        combined_summaries = list(set(
            existing_habit.supporting_summaries + new_habit.supporting_summaries
        ))
        # Combine specific examples
        combined_examples = list(set(
            existing_habit.specific_examples + new_habit.specific_examples
        ))
        # Update confidence level (take higher confidence)
        confidence_levels = [existing_habit.confidence_level, new_habit.confidence_level]
        new_confidence = max(confidence_levels, key=lambda x: ["low", "medium", "high"].index(x.value))
        # Update observation dates
        first_observed = min(existing_habit.first_observed, new_habit.first_observed)
        last_observed = max(existing_habit.last_observed, new_habit.last_observed)
        # Determine if habit is current (observed within last 30 days)
        is_current = (current_time - last_observed).days <= 30
        # Combine time context
        combined_time_context = existing_habit.time_context
        if new_habit.time_context and new_habit.time_context not in combined_time_context:
            combined_time_context += f"; {new_habit.time_context}"
        return BehaviorHabit(
            habit_description=existing_habit.habit_description,  # Keep original description
            frequency_pattern=existing_habit.frequency_pattern,  # Keep original frequency
            time_context=combined_time_context,
            confidence_level=new_confidence,
            supporting_summaries=combined_summaries,
            specific_examples=combined_examples,
            first_observed=first_observed,
            last_observed=last_observed,
            is_current=is_current
        )
    def _sort_habits_by_priority(self, habits: List[BehaviorHabit]) -> List[BehaviorHabit]:
        """Sort habits by confidence level and recency.
        Args:
            habits: List of habits to sort
        Returns:
            Sorted list of habits
        """
        def priority_score(habit: BehaviorHabit) -> tuple:
            # Confidence level score (high=3, medium=2, low=1)
            confidence_score = {"high": 3, "medium": 2, "low": 1}.get(habit.confidence_level.value, 1)
            # Recency score (more recent = higher score)
            days_since_last = (datetime.now() - habit.last_observed).days
            recency_score = max(0, 365 - days_since_last)  # Max 365 days
            # Current habit bonus
            current_bonus = 100 if habit.is_current else 0
            return (confidence_score, recency_score + current_bonus, habit.last_observed)
        return sorted(habits, key=priority_score, reverse=True)
--- a/api/app/core/memory/analytics/implicit_memory/analyzers/interest_analyzer.py
+++ b/api/app/core/memory/analytics/implicit_memory/analyzers/interest_analyzer.py
@@ -0,0 +1,277 @@
 """Interest Analyzer for Implicit Memory System
 This module implements LLM-based interest area analysis from user memory summaries.
 It categorizes user interests into four areas: tech, lifestyle, music, and art,
 providing percentage distribution that totals 100%.
 """
 import logging
 from datetime import datetime
 from typing import Any, Dict, List, Optional
 from app.core.memory.analytics.implicit_memory.llm_client import ImplicitMemoryLLMClient
 from app.core.memory.llm_tools.llm_client import LLMClientException
 from app.schemas.implicit_memory_schema import (
    InterestAreaDistribution,
    InterestCategory,
    UserMemorySummary,
 )
 from pydantic import BaseModel, Field
 from sqlalchemy.orm import Session
 logger = logging.getLogger(__name__)
 class InterestData(BaseModel):
    """Individual interest category analysis data."""
    percentage: float = Field(ge=0.0, le=100.0)
    evidence: List[str] = Field(default_factory=list)
    trending_direction: Optional[str] = None
 class InterestAnalysisResponse(BaseModel):
    """Response model for interest analysis."""
    interest_distribution: Dict[str, InterestData] = Field(default_factory=dict)
 class InterestAnalyzer:
    """Analyzes user memory summaries to extract interest area distribution."""
    # Define the four interest categories we analyze
    INTEREST_CATEGORIES = ["tech", "lifestyle", "music", "art"]
    def __init__(self, db: Session, llm_model_id: Optional[str] = None):
        """Initialize the interest analyzer.
        Args:
            db: Database session
            llm_model_id: Optional LLM model ID to use for analysis
        """
        self.db = db
        self.llm_model_id = llm_model_id
        self._llm_client = ImplicitMemoryLLMClient(db, llm_model_id)
    async def analyze_interests(
        self,
        user_id: str,
        user_summaries: List[UserMemorySummary],
        existing_distribution: Optional[InterestAreaDistribution] = None
    ) -> InterestAreaDistribution:
        """Analyze user summaries to extract interest area distribution.
        Args:
            user_id: Target user ID
            user_summaries: List of user-specific memory summaries
            existing_distribution: Optional existing distribution for trend tracking
        Returns:
            Interest area distribution across four categories
        Raises:
            LLMClientException: If LLM analysis fails
        """
        if not user_summaries:
            logger.warning(f"No summaries provided for user {user_id}")
            return self._create_empty_distribution(user_id)
        try:
            logger.info(f"Analyzing interests for user {user_id} with {len(user_summaries)} summaries")
            # Use the LLM client wrapper for analysis
            response = await self._llm_client.analyze_interests(
                user_summaries=user_summaries,
                user_id=user_id,
                model_id=self.llm_model_id
            )
            # Create interest categories
            interest_categories = {}
            current_time = datetime.now()
            # Extract interest_distribution from response dict
            interest_distribution = response.get("interest_distribution", {})
            # Extract and validate interest data
            raw_interests = {}
            for category_name in self.INTEREST_CATEGORIES:
                interest_data_dict = interest_distribution.get(category_name)
                if interest_data_dict:
                    raw_interests[category_name] = InterestData(
                        percentage=interest_data_dict.get("percentage", 0.0),
                        evidence=interest_data_dict.get("evidence", []),
                        trending_direction=interest_data_dict.get("trending_direction")
                    )
                else:
                    # Create default if missing
                    logger.warning(f"Missing interest data for {category_name}, using default")
                    raw_interests[category_name] = InterestData(
                        percentage=0.0,
                        evidence=["No specific evidence found"],
                        trending_direction=None
                    )
            # Normalize percentages to ensure they sum to 100%
            normalized_interests = self._normalize_percentages(raw_interests)
            # Create interest category objects
            for category_name in self.INTEREST_CATEGORIES:
                interest_data = normalized_interests[category_name]
                # Calculate trending direction if we have existing data
                trending_direction = self._calculate_trending_direction(
                    category_name=category_name,
                    current_percentage=interest_data.percentage,
                    existing_distribution=existing_distribution
                ) if existing_distribution else interest_data.trending_direction
                interest_categories[category_name] = InterestCategory(
                    category_name=category_name,
                    percentage=interest_data.percentage,
                    evidence=interest_data.evidence if interest_data.evidence else ["No specific evidence found"],
                    trending_direction=trending_direction
                )
            # Create interest area distribution
            distribution = InterestAreaDistribution(
                user_id=user_id,
                tech=interest_categories["tech"],
                lifestyle=interest_categories["lifestyle"],
                music=interest_categories["music"],
                art=interest_categories["art"],
                analysis_timestamp=current_time,
                total_summaries_analyzed=len(user_summaries)
            )
            # Validate that percentages sum to 100%
            total_percentage = distribution.total_percentage
            if not (99.9 <= total_percentage <= 100.1):
                logger.warning(f"Interest percentages sum to {total_percentage}, expected ~100%")
            logger.info(f"Created interest distribution for user {user_id}")
            return distribution
        except LLMClientException:
            raise
        except Exception as e:
            logger.error(f"Interest analysis failed for user {user_id}: {e}")
            raise LLMClientException(f"Interest analysis failed: {e}") from e
    def _normalize_percentages(self, raw_interests: Dict[str, InterestData]) -> Dict[str, InterestData]:
        """Normalize percentages to ensure they sum to 100%.
        Args:
            raw_interests: Raw interest data with potentially unnormalized percentages
        Returns:
            Normalized interest data
        """
        # Calculate current total
        total = sum(interest.percentage for interest in raw_interests.values())
        if total == 0:
            # If all percentages are 0, distribute equally
            equal_percentage = 100.0 / len(self.INTEREST_CATEGORIES)
            normalized = {}
            for category_name, interest_data in raw_interests.items():
                normalized[category_name] = InterestData(
                    percentage=equal_percentage,
                    evidence=interest_data.evidence,
                    trending_direction=interest_data.trending_direction
                )
            return normalized
        # Normalize to sum to 100%
        normalization_factor = 100.0 / total
        normalized = {}
        for category_name, interest_data in raw_interests.items():
            normalized_percentage = interest_data.percentage * normalization_factor
            normalized[category_name] = InterestData(
                percentage=round(normalized_percentage, 1),
                evidence=interest_data.evidence,
                trending_direction=interest_data.trending_direction
            )
        # Handle rounding errors by adjusting the largest category
        current_total = sum(interest.percentage for interest in normalized.values())
        if abs(current_total - 100.0) > 0.1:
            # Find category with largest percentage and adjust
            largest_category = max(normalized.keys(), key=lambda k: normalized[k].percentage)
            adjustment = 100.0 - current_total
            adjusted_percentage = normalized[largest_category].percentage + adjustment
            normalized[largest_category] = InterestData(
                percentage=round(max(0.0, adjusted_percentage), 1),
                evidence=normalized[largest_category].evidence,
                trending_direction=normalized[largest_category].trending_direction
            )
        return normalized
    def _calculate_trending_direction(
        self,
        category_name: str,
        current_percentage: float,
        existing_distribution: InterestAreaDistribution,
        threshold: float = 5.0
    ) -> Optional[str]:
        """Calculate trending direction for an interest category.
        Args:
            category_name: Name of the interest category
            current_percentage: Current percentage for the category
            existing_distribution: Previous distribution for comparison
            threshold: Minimum percentage change to consider a trend
        Returns:
            Trending direction: "increasing", "decreasing", "stable", or None
        """
        try:
            # Get previous percentage
            previous_category = getattr(existing_distribution, category_name, None)
            if not previous_category:
                return None
            previous_percentage = previous_category.percentage
            change = current_percentage - previous_percentage
            if abs(change) < threshold:
                return "stable"
            elif change > 0:
                return "increasing"
            else:
                return "decreasing"
        except Exception as e:
            logger.error(f"Error calculating trending direction for {category_name}: {e}")
            return None
    def _create_empty_distribution(self, user_id: str) -> InterestAreaDistribution:
        """Create an empty interest distribution when no data is available.
        Args:
            user_id: Target user ID
        Returns:
            Empty InterestAreaDistribution with equal percentages
        """
        current_time = datetime.now()
        equal_percentage = 25.0  # 100% / 4 categories
        default_category = lambda name: InterestCategory(
            category_name=name,
            percentage=equal_percentage,
            evidence=["Insufficient data for analysis"],
            trending_direction=None
        )
        return InterestAreaDistribution(
            user_id=user_id,
            tech=default_category("tech"),
            lifestyle=default_category("lifestyle"),
            music=default_category("music"),
            art=default_category("art"),
            analysis_timestamp=current_time,
            total_summaries_analyzed=0
        )
--- a/api/app/core/memory/analytics/implicit_memory/analyzers/preference_analyzer.py
+++ b/api/app/core/memory/analytics/implicit_memory/analyzers/preference_analyzer.py
@@ -0,0 +1,302 @@
 """Preference Analyzer for Implicit Memory System
 This module implements LLM-based preference extraction from user memory summaries.
 It identifies implicit preferences, consolidates similar preferences, and calculates
 confidence scores based on evidence strength.
 """
 import logging
 from datetime import datetime
 from typing import Any, Dict, List, Optional
 from app.core.memory.analytics.implicit_memory.llm_client import ImplicitMemoryLLMClient
 from app.core.memory.llm_tools.llm_client import LLMClientException
 from app.schemas.implicit_memory_schema import (
    PreferenceTag,
    UserMemorySummary,
 )
 from pydantic import BaseModel, Field
 from sqlalchemy.orm import Session
 logger = logging.getLogger(__name__)
 class PreferenceAnalysisResponse(BaseModel):
    """Response model for preference analysis."""
    preferences: List[Dict[str, Any]] = Field(default_factory=list)
 class PreferenceAnalyzer:
    """Analyzes user memory summaries to extract implicit preferences."""
    def __init__(self, db: Session, llm_model_id: Optional[str] = None):
        """Initialize the preference analyzer.
        Args:
            db: Database session
            llm_model_id: Optional LLM model ID to use for analysis
        """
        self.db = db
        self.llm_model_id = llm_model_id
        self._llm_client = ImplicitMemoryLLMClient(db, llm_model_id)
    async def analyze_preferences(
        self,
        user_id: str,
        user_summaries: List[UserMemorySummary],
        existing_preferences: Optional[List[PreferenceTag]] = None
    ) -> List[PreferenceTag]:
        """Analyze user summaries to extract preferences.
        Args:
            user_id: Target user ID
            user_summaries: List of user-specific memory summaries
            existing_preferences: Optional existing preferences for consolidation
        Returns:
            List of extracted preference tags
        Raises:
            LLMClientException: If LLM analysis fails
        """
        if not user_summaries:
            logger.warning(f"No summaries provided for user {user_id}")
            return []
        try:
            logger.info(f"Analyzing preferences for user {user_id} with {len(user_summaries)} summaries")
            # Use the LLM client wrapper for analysis
            response = await self._llm_client.analyze_preferences(
                user_summaries=user_summaries,
                user_id=user_id,
                model_id=self.llm_model_id
            )
            # Convert to PreferenceTag objects
            preference_tags = []
            current_time = datetime.now()
            for pref_data in response.get("preferences", []):
                try:
                    # Extract conversation references from summaries
                    conversation_refs = [s.summary_id for s in user_summaries]
                    preference_tag = PreferenceTag(
                        tag_name=pref_data.get("tag_name", ""),
                        confidence_score=float(pref_data.get("confidence_score", 0.0)),
                        supporting_evidence=pref_data.get("supporting_evidence", []),
                        context_details=pref_data.get("context_details", ""),
                        category=pref_data.get("category"),
                        conversation_references=conversation_refs,
                        created_at=current_time,
                        updated_at=current_time
                    )
                    # Validate preference tag
                    if self._is_valid_preference(preference_tag):
                        preference_tags.append(preference_tag)
                    else:
                        logger.warning(f"Invalid preference tag skipped: {preference_tag.tag_name}")
                except Exception as e:
                    logger.error(f"Error creating preference tag: {e}")
                    continue
            # Consolidate with existing preferences if provided
            if existing_preferences:
                preference_tags = self._consolidate_preferences(
                    new_preferences=preference_tags,
                    existing_preferences=existing_preferences
                )
            logger.info(f"Extracted {len(preference_tags)} preferences for user {user_id}")
            return preference_tags
        except LLMClientException:
            raise
        except Exception as e:
            logger.error(f"Preference analysis failed for user {user_id}: {e}")
            raise LLMClientException(f"Preference analysis failed: {e}") from e
    def _is_valid_preference(self, preference: PreferenceTag) -> bool:
        """Validate a preference tag.
        Args:
            preference: Preference tag to validate
        Returns:
            True if valid, False otherwise
        """
        try:
            # Check required fields
            if not preference.tag_name or not preference.tag_name.strip():
                return False
            # Check confidence score range
            if not (0.0 <= preference.confidence_score <= 1.0):
                return False
            # Check supporting evidence
            if not preference.supporting_evidence or len(preference.supporting_evidence) == 0:
                return False
            # Check context details
            if not preference.context_details or not preference.context_details.strip():
                return False
            return True
        except Exception as e:
            logger.error(f"Error validating preference: {e}")
            return False
    def _consolidate_preferences(
        self,
        new_preferences: List[PreferenceTag],
        existing_preferences: List[PreferenceTag],
        similarity_threshold: float = 0.8
    ) -> List[PreferenceTag]:
        """Consolidate new preferences with existing ones.
        Args:
            new_preferences: Newly extracted preferences
            existing_preferences: Existing preferences
            similarity_threshold: Threshold for considering preferences similar
        Returns:
            Consolidated list of preferences
        """
        consolidated = existing_preferences.copy()
        current_time = datetime.now()
        for new_pref in new_preferences:
            # Find similar existing preference
            similar_pref = self._find_similar_preference(
                new_pref, existing_preferences, similarity_threshold
            )
            if similar_pref:
                # Update existing preference
                updated_pref = self._merge_preferences(similar_pref, new_pref, current_time)
                # Replace in consolidated list
                for i, pref in enumerate(consolidated):
                    if pref.tag_name == similar_pref.tag_name:
                        consolidated[i] = updated_pref
                        break
            else:
                # Add as new preference
                consolidated.append(new_pref)
        return consolidated
    def _find_similar_preference(
        self,
        target_preference: PreferenceTag,
        existing_preferences: List[PreferenceTag],
        threshold: float
    ) -> Optional[PreferenceTag]:
        """Find similar preference in existing list.
        Args:
            target_preference: Preference to find similarity for
            existing_preferences: List of existing preferences
            threshold: Similarity threshold
        Returns:
            Similar preference if found, None otherwise
        """
        target_name = target_preference.tag_name.lower().strip()
        for existing_pref in existing_preferences:
            existing_name = existing_pref.tag_name.lower().strip()
            # Simple similarity check based on common words
            similarity = self._calculate_text_similarity(target_name, existing_name)
            if similarity >= threshold:
                return existing_pref
        return None
    def _calculate_text_similarity(self, text1: str, text2: str) -> float:
        """Calculate simple text similarity based on common words.
        Args:
            text1: First text
            text2: Second text
        Returns:
            Similarity score between 0.0 and 1.0
        """
        if not text1 or not text2:
            return 0.0
        # Simple word-based similarity
        words1 = set(text1.lower().split())
        words2 = set(text2.lower().split())
        if not words1 or not words2:
            return 0.0
        intersection = words1.intersection(words2)
        union = words1.union(words2)
        return len(intersection) / len(union) if union else 0.0
    def _merge_preferences(
        self,
        existing_pref: PreferenceTag,
        new_pref: PreferenceTag,
        current_time: datetime
    ) -> PreferenceTag:
        """Merge two similar preferences.
        Args:
            existing_pref: Existing preference
            new_pref: New preference to merge
            current_time: Current timestamp
        Returns:
            Merged preference tag
        """
        # Combine supporting evidence
        combined_evidence = list(set(
            existing_pref.supporting_evidence + new_pref.supporting_evidence
        ))
        # Combine conversation references
        combined_refs = list(set(
            existing_pref.conversation_references + new_pref.conversation_references
        ))
        # Calculate new confidence score (weighted average)
        evidence_weight = len(new_pref.supporting_evidence)
        total_weight = len(existing_pref.supporting_evidence) + evidence_weight
        if total_weight > 0:
            new_confidence = (
                (existing_pref.confidence_score * len(existing_pref.supporting_evidence) +
                 new_pref.confidence_score * evidence_weight) / total_weight
            )
        else:
            new_confidence = max(existing_pref.confidence_score, new_pref.confidence_score)
        # Ensure confidence doesn't exceed 1.0
        new_confidence = min(new_confidence, 1.0)
        # Combine context details
        combined_context = existing_pref.context_details
        if new_pref.context_details and new_pref.context_details not in combined_context:
            combined_context += f"; {new_pref.context_details}"
        return PreferenceTag(
            tag_name=existing_pref.tag_name,  # Keep original name
            confidence_score=new_confidence,
            supporting_evidence=combined_evidence,
            context_details=combined_context,
            category=existing_pref.category or new_pref.category,
            conversation_references=combined_refs,
            created_at=existing_pref.created_at,
            updated_at=current_time
        )
--- a/api/app/core/memory/analytics/implicit_memory/data_source.py
+++ b/api/app/core/memory/analytics/implicit_memory/data_source.py
@@ -0,0 +1,97 @@
 """
 Memory Data Source
 Handles retrieval and processing of memory data from Neo4j using direct Cypher queries.
 """
 import logging
 from datetime import datetime
 from typing import Any, Dict, List, Optional
 from app.repositories.neo4j.memory_summary_repository import MemorySummaryRepository
 from app.repositories.neo4j.neo4j_connector import Neo4jConnector
 from app.schemas.implicit_memory_schema import TimeRange, UserMemorySummary
 from sqlalchemy.orm import Session
 logger = logging.getLogger(__name__)
 class MemoryDataSource:
    """Retrieves processed memory data from Neo4j using direct Cypher queries."""
    def __init__(
        self,
        db: Session,
        neo4j_connector: Optional[Neo4jConnector] = None
    ):
        self.db = db
        self.neo4j_connector = neo4j_connector or Neo4jConnector()
        self.memory_summary_repo = MemorySummaryRepository(self.neo4j_connector)
    def _parse_timestamp(self, timestamp: Any) -> datetime:
        """Parse timestamp from various formats."""
        if isinstance(timestamp, str):
            return datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
        elif timestamp is None:
            return datetime.now()
        return timestamp
    def _dict_to_user_summary(self, summary_dict: Dict, user_id: str) -> Optional[UserMemorySummary]:
        """Convert a Neo4j dict directly to UserMemorySummary."""
        try:
            content = summary_dict.get("content", summary_dict.get("summary", ""))
            if not content or not content.strip():
                return None
            return UserMemorySummary(
                summary_id=summary_dict.get("id", summary_dict.get("uuid", "")),
                user_id=user_id,
                user_content=content,
                timestamp=self._parse_timestamp(summary_dict.get("created_at")),
                confidence_score=1.0,
                summary_type="memory_summary"
            )
        except Exception as e:
            logger.warning(f"Failed to parse summary {summary_dict.get('id', 'unknown')}: {e}")
            return None
    async def get_user_summaries(
        self,
        user_id: str,
        time_range: Optional[TimeRange] = None,
        limit: int = 1000
    ) -> List[UserMemorySummary]:
        """Retrieve user memory summaries from Neo4j.
        Args:
            user_id: Target user ID
            time_range: Optional time range filter
            limit: Maximum number of summaries
        Returns:
            List of user memory summaries
        """
        try:
            start_date = time_range.start_date if time_range else None
            end_date = time_range.end_date if time_range else None
            summary_dicts = await self.memory_summary_repo.find_by_group_id(
                group_id=user_id,
                limit=limit,
                start_date=start_date,
                end_date=end_date
            )
            summaries = []
            for summary_dict in summary_dicts:
                summary = self._dict_to_user_summary(summary_dict, user_id)
                if summary:
                    summaries.append(summary)
            logger.info(f"Retrieved {len(summaries)} summaries for user {user_id}")
            return summaries
        except Exception as e:
            logger.error(f"Failed to retrieve summaries for user {user_id}: {e}")
            raise
--- a/api/app/core/memory/analytics/implicit_memory/habit_detector.py
+++ b/api/app/core/memory/analytics/implicit_memory/habit_detector.py
@@ -0,0 +1,234 @@
 """Habit Detector for Implicit Memory System
 This module implements the HabitDetector class that specializes in identifying
 and ranking behavioral habits from user memory summaries. It provides advanced
 habit analysis with confidence scoring, recency weighting, and current vs past
 habit distinction.
 """
 import logging
 from datetime import datetime, timedelta
 from typing import List, Optional
 from app.core.memory.analytics.implicit_memory.analyzers.habit_analyzer import (
    HabitAnalyzer,
 )
 from app.core.memory.llm_tools.llm_client import LLMClientException
 from app.schemas.implicit_memory_schema import (
    BehaviorHabit,
    ConfidenceLevel,
    FrequencyPattern,
    UserMemorySummary,
 )
 from sqlalchemy.orm import Session
 logger = logging.getLogger(__name__)
 class HabitDetector:
    """Detects and ranks behavioral habits from user memory summaries."""
    def __init__(
        self,
        db: Session,
        llm_model_id: Optional[str] = None
    ):
        """Initialize the habit detector.
        Args:
            db: Database session
            llm_model_id: Optional LLM model ID to use for analysis
        """
        self.db = db
        self.llm_model_id = llm_model_id
        self.habit_analyzer = HabitAnalyzer(db, llm_model_id)
    async def detect_habits(
        self,
        user_id: str,
        user_summaries: List[UserMemorySummary],
        existing_habits: Optional[List[BehaviorHabit]] = None
    ) -> List[BehaviorHabit]:
        """Detect behavioral habits from user summaries.
        Args:
            user_id: Target user ID
            user_summaries: List of user-specific memory summaries
            existing_habits: Optional existing habits for consolidation
        Returns:
            List of detected and ranked behavioral habits
        Raises:
            LLMClientException: If habit analysis fails
        """
        if not user_summaries:
            logger.warning(f"No summaries provided for user {user_id}")
            return existing_habits or []
        logger.info(f"Detecting habits for user {user_id} with {len(user_summaries)} summaries")
        try:
            # Use the habit analyzer to extract habits
            detected_habits = await self.habit_analyzer.analyze_habits(
                user_id=user_id,
                user_summaries=user_summaries,
                existing_habits=existing_habits
            )
            # Apply advanced ranking and filtering
            ranked_habits = self.rank_habits_by_confidence_and_recency(detected_habits)
            # Distinguish current vs past habits
            categorized_habits = self.distinguish_current_vs_past_habits(ranked_habits)
            logger.info(f"Detected {len(categorized_habits)} habits for user {user_id}")
            return categorized_habits
        except LLMClientException:
            logger.error(f"Habit detection failed for user {user_id}")
            raise
        except Exception as e:
            logger.error(f"Habit detection failed for user {user_id}: {e}")
            raise LLMClientException(f"Habit detection failed: {e}") from e
    def rank_habits_by_confidence_and_recency(
        self,
        habits: List[BehaviorHabit],
        confidence_weight: float = 0.6,
        recency_weight: float = 0.4
    ) -> List[BehaviorHabit]:
        """Rank habits by confidence level and recency.
        Args:
            habits: List of habits to rank
            confidence_weight: Weight for confidence score (0.0-1.0)
            recency_weight: Weight for recency score (0.0-1.0)
        Returns:
            List of habits ranked by combined score
        """
        if not habits:
            return []
        logger.info(f"Ranking {len(habits)} habits by confidence and recency")
        def calculate_ranking_score(habit: BehaviorHabit) -> float:
            """Calculate combined ranking score for a habit."""
            # Confidence score (0.0-1.0)
            confidence_scores = {
                ConfidenceLevel.HIGH: 1.0,
                ConfidenceLevel.MEDIUM: 0.6,
                ConfidenceLevel.LOW: 0.3
            }
            confidence_score = confidence_scores.get(habit.confidence_level, 0.3)
            # Recency score (0.0-1.0)
            current_time = datetime.now()
            days_since_last = (current_time - habit.last_observed).days
            # Exponential decay for recency (habits lose relevance over time)
            if days_since_last <= 7:
                recency_score = 1.0  # Very recent
            elif days_since_last <= 30:
                recency_score = 0.8  # Recent
            elif days_since_last <= 90:
                recency_score = 0.5  # Somewhat recent
            elif days_since_last <= 180:
                recency_score = 0.3  # Old
            else:
                recency_score = 0.1  # Very old
            # Frequency pattern bonus
            frequency_bonuses = {
                FrequencyPattern.DAILY: 0.2,
                FrequencyPattern.WEEKLY: 0.15,
                FrequencyPattern.MONTHLY: 0.1,
                FrequencyPattern.SEASONAL: 0.05,
                FrequencyPattern.OCCASIONAL: 0.0,
                FrequencyPattern.EVENT_TRIGGERED: 0.05
            }
            frequency_bonus = frequency_bonuses.get(habit.frequency_pattern, 0.0)
            # Evidence quality bonus
            evidence_bonus = min(len(habit.supporting_summaries) / 10.0, 0.1)  # Max 0.1 bonus
            # Current habit bonus
            current_bonus = 0.1 if habit.is_current else 0.0
            # Calculate final score
            base_score = (confidence_score * confidence_weight + 
                         recency_score * recency_weight)
            final_score = base_score + frequency_bonus + evidence_bonus + current_bonus
            return min(final_score, 1.0)  # Cap at 1.0
        # Sort habits by ranking score (descending)
        ranked_habits = sorted(habits, key=calculate_ranking_score, reverse=True)
        logger.info(f"Ranked habits with scores: {[calculate_ranking_score(h) for h in ranked_habits[:5]]}")
        return ranked_habits
    def distinguish_current_vs_past_habits(
        self,
        habits: List[BehaviorHabit],
        current_threshold_days: int = 30
    ) -> List[BehaviorHabit]:
        """Distinguish between current and past habits based on recency.
        Args:
            habits: List of habits to categorize
            current_threshold_days: Days threshold for considering a habit current
        Returns:
            List of habits with updated is_current status
        """
        if not habits:
            return []
        current_time = datetime.now()
        cutoff_date = current_time - timedelta(days=current_threshold_days)
        current_habits = []
        past_habits = []
        for habit in habits:
            # Update is_current status based on last observation
            if habit.last_observed >= cutoff_date:
                # Create updated habit with is_current = True
                updated_habit = BehaviorHabit(
                    habit_description=habit.habit_description,
                    frequency_pattern=habit.frequency_pattern,
                    time_context=habit.time_context,
                    confidence_level=habit.confidence_level,
                    supporting_summaries=habit.supporting_summaries,
                    specific_examples=habit.specific_examples,
                    first_observed=habit.first_observed,
                    last_observed=habit.last_observed,
                    is_current=True
                )
                current_habits.append(updated_habit)
            else:
                # Create updated habit with is_current = False
                updated_habit = BehaviorHabit(
                    habit_description=habit.habit_description,
                    frequency_pattern=habit.frequency_pattern,
                    time_context=habit.time_context,
                    confidence_level=habit.confidence_level,
                    supporting_summaries=habit.supporting_summaries,
                    specific_examples=habit.specific_examples,
                    first_observed=habit.first_observed,
                    last_observed=habit.last_observed,
                    is_current=False
                )
                past_habits.append(updated_habit)
        # Return current habits first, then past habits
        categorized_habits = current_habits + past_habits
        logger.info(f"Categorized habits: {len(current_habits)} current, {len(past_habits)} past")
        return categorized_habits
--- a/api/app/core/memory/analytics/implicit_memory/llm_client.py
+++ b/api/app/core/memory/analytics/implicit_memory/llm_client.py
@@ -0,0 +1,321 @@
 """LLM Client Wrapper for Implicit Memory Analysis
 This module provides a specialized LLM client wrapper that integrates with the
 MemoryClientFactory to perform implicit memory analysis tasks including preference
 extraction, personality dimension analysis, interest categorization, and habit detection.
 """
 import logging
 from typing import Any, Dict, List, Optional
 from app.core.memory.analytics.implicit_memory.prompts import (
    get_dimension_analysis_prompt,
    get_habit_analysis_prompt,
    get_interest_analysis_prompt,
    get_preference_analysis_prompt,
 )
 from app.core.memory.llm_tools.llm_client import LLMClientException
 from app.core.memory.utils.llm.llm_utils import MemoryClientFactory
 from app.schemas.implicit_memory_schema import UserMemorySummary
 from pydantic import BaseModel, Field
 from sqlalchemy.orm import Session
 logger = logging.getLogger(__name__)
 # Response Models for LLM Analysis
 class PreferenceAnalysisResponse(BaseModel):
    """Response model for preference analysis."""
    preferences: List[Dict[str, Any]] = Field(default_factory=list)
 class DimensionAnalysisResponse(BaseModel):
    """Response model for dimension analysis."""
    dimensions: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
 class InterestAnalysisResponse(BaseModel):
    """Response model for interest analysis."""
    interest_distribution: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
 class HabitAnalysisResponse(BaseModel):
    """Response model for habit analysis."""
    habits: List[Dict[str, Any]] = Field(default_factory=list)
 class ImplicitMemoryLLMClient:
    """LLM client wrapper for implicit memory analysis.
    This class provides a high-level interface for performing LLM-based analysis
    of user memory summaries to extract preferences, personality dimensions,
    interests, and behavioral habits.
    """
    def __init__(self, db: Session, default_model_id: Optional[str] = None):
        """Initialize the LLM client wrapper.
        Args:
            db: Database session for accessing model configurations
            default_model_id: Default LLM model ID to use if none specified
        """
        self.db = db
        self.default_model_id = default_model_id
        self._client_factory = MemoryClientFactory(db)
        logger.info("ImplicitMemoryLLMClient initialized")
    def _get_llm_client(self, model_id: Optional[str] = None):
        """Get LLM client instance.
        Args:
            model_id: LLM model ID to use, defaults to default_model_id
        Returns:
            LLM client instance
        Raises:
            ValueError: If no model ID is provided and no default is set
            LLMClientException: If client creation fails
        """
        effective_model_id = model_id or self.default_model_id
        if not effective_model_id:
            raise ValueError("No LLM model ID provided and no default model ID set")
        try:
            client = self._client_factory.get_llm_client(effective_model_id)
            logger.debug(f"Created LLM client for model: {effective_model_id}")
            return client
        except Exception as e:
            logger.error(f"Failed to create LLM client for model {effective_model_id}: {e}")
            raise LLMClientException(f"Failed to create LLM client: {e}") from e
    def _prepare_summaries_for_analysis(self, user_summaries: List[UserMemorySummary]) -> List[Dict[str, Any]]:
        """Prepare user memory summaries for LLM analysis.
        Args:
            user_summaries: List of user memory summaries
        Returns:
            List of formatted summary dictionaries
        """
        formatted_summaries = []
        for summary in user_summaries:
            formatted_summary = {
                'summary_id': summary.summary_id,
                'user_content': summary.user_content,
                'timestamp': summary.timestamp.isoformat(),
                'summary_type': summary.summary_type,
                'confidence_score': summary.confidence_score
            }
            formatted_summaries.append(formatted_summary)
        logger.debug(f"Prepared {len(formatted_summaries)} summaries for analysis")
        return formatted_summaries
    async def analyze_preferences(
        self,
        user_summaries: List[UserMemorySummary],
        user_id: str,
        model_id: Optional[str] = None
    ) -> Dict[str, Any]:
        """Analyze user preferences from memory summaries.
        Args:
            user_summaries: List of user memory summaries to analyze
            user_id: Target user ID for analysis
            model_id: Optional LLM model ID to use
        Returns:
            Dictionary containing extracted preferences
        Raises:
            LLMClientException: If LLM analysis fails
            ValueError: If input validation fails
        """
        if not user_summaries:
            logger.warning(f"No summaries provided for preference analysis of user {user_id}")
            return {"preferences": []}
        if not user_id:
            raise ValueError("User ID is required for preference analysis")
        try:
            # Prepare summaries and get prompt
            formatted_summaries = self._prepare_summaries_for_analysis(user_summaries)
            prompt = get_preference_analysis_prompt(formatted_summaries, user_id)
            # Get LLM client and perform analysis
            llm_client = self._get_llm_client(model_id)
            messages = [{"role": "user", "content": prompt}]
            # Use structured output for reliable parsing
            response = await llm_client.response_structured(
                messages=messages,
                response_model=PreferenceAnalysisResponse
            )
            result = response.model_dump()
            logger.info(f"Analyzed preferences for user {user_id}: found {len(result.get('preferences', []))} preferences")
            return result
        except Exception as e:
            logger.error(f"Preference analysis failed for user {user_id}: {e}")
            raise LLMClientException(f"Preference analysis failed: {e}") from e
    async def analyze_dimensions(
        self,
        user_summaries: List[UserMemorySummary],
        user_id: str,
        model_id: Optional[str] = None
    ) -> Dict[str, Any]:
        """Analyze user personality dimensions from memory summaries.
        Args:
            user_summaries: List of user memory summaries to analyze
            user_id: Target user ID for analysis
            model_id: Optional LLM model ID to use
        Returns:
            Dictionary containing dimension scores and analysis
        Raises:
            LLMClientException: If LLM analysis fails
            ValueError: If input validation fails
        """
        if not user_summaries:
            logger.warning(f"No summaries provided for dimension analysis of user {user_id}")
            return {"dimensions": {}}
        if not user_id:
            raise ValueError("User ID is required for dimension analysis")
        try:
            # Prepare summaries and get prompt
            formatted_summaries = self._prepare_summaries_for_analysis(user_summaries)
            prompt = get_dimension_analysis_prompt(formatted_summaries, user_id)
            # Get LLM client and perform analysis
            llm_client = self._get_llm_client(model_id)
            messages = [{"role": "user", "content": prompt}]
            # Use structured output for reliable parsing
            response = await llm_client.response_structured(
                messages=messages,
                response_model=DimensionAnalysisResponse
            )
            result = response.model_dump()
            dimensions = result.get('dimensions', {})
            logger.info(f"Analyzed dimensions for user {user_id}: {list(dimensions.keys())}")
            return result
        except Exception as e:
            logger.error(f"Dimension analysis failed for user {user_id}: {e}")
            raise LLMClientException(f"Dimension analysis failed: {e}") from e
    async def analyze_interests(
        self,
        user_summaries: List[UserMemorySummary],
        user_id: str,
        model_id: Optional[str] = None
    ) -> Dict[str, Any]:
        """Analyze user interest distribution from memory summaries.
        Args:
            user_summaries: List of user memory summaries to analyze
            user_id: Target user ID for analysis
            model_id: Optional LLM model ID to use
        Returns:
            Dictionary containing interest area distribution
        Raises:
            LLMClientException: If LLM analysis fails
            ValueError: If input validation fails
        """
        if not user_summaries:
            logger.warning(f"No summaries provided for interest analysis of user {user_id}")
            return {"interest_distribution": {}}
        if not user_id:
            raise ValueError("User ID is required for interest analysis")
        try:
            # Prepare summaries and get prompt
            formatted_summaries = self._prepare_summaries_for_analysis(user_summaries)
            prompt = get_interest_analysis_prompt(formatted_summaries, user_id)
            # Get LLM client and perform analysis
            llm_client = self._get_llm_client(model_id)
            messages = [{"role": "user", "content": prompt}]
            # Use structured output for reliable parsing
            response = await llm_client.response_structured(
                messages=messages,
                response_model=InterestAnalysisResponse
            )
            result = response.model_dump()
            interest_dist = result.get('interest_distribution', {})
            logger.info(f"Analyzed interests for user {user_id}: {list(interest_dist.keys())}")
            return result
        except Exception as e:
            logger.error(f"Interest analysis failed for user {user_id}: {e}")
            raise LLMClientException(f"Interest analysis failed: {e}") from e
    async def analyze_habits(
        self,
        user_summaries: List[UserMemorySummary],
        user_id: str,
        model_id: Optional[str] = None
    ) -> Dict[str, Any]:
        """Analyze user behavioral habits from memory summaries.
        Args:
            user_summaries: List of user memory summaries to analyze
            user_id: Target user ID for analysis
            model_id: Optional LLM model ID to use
        Returns:
            Dictionary containing identified behavioral habits
        Raises:
            LLMClientException: If LLM analysis fails
            ValueError: If input validation fails
        """
        if not user_summaries:
            logger.warning(f"No summaries provided for habit analysis of user {user_id}")
            return {"habits": []}
        if not user_id:
            raise ValueError("User ID is required for habit analysis")
        try:
            # Prepare summaries and get prompt
            formatted_summaries = self._prepare_summaries_for_analysis(user_summaries)
            prompt = get_habit_analysis_prompt(formatted_summaries, user_id)
            # Get LLM client and perform analysis
            llm_client = self._get_llm_client(model_id)
            messages = [{"role": "user", "content": prompt}]
            # Use structured output for reliable parsing
            response = await llm_client.response_structured(
                messages=messages,
                response_model=HabitAnalysisResponse
            )
            result = response.model_dump()
            logger.info(f"Analyzed habits for user {user_id}: found {len(result.get('habits', []))} habits")
            return result
        except Exception as e:
            logger.error(f"Habit analysis failed for user {user_id}: {e}")
            raise LLMClientException(f"Habit analysis failed: {e}") from e
--- a/api/app/core/memory/analytics/implicit_memory/prompts.py
+++ b/api/app/core/memory/analytics/implicit_memory/prompts.py
@@ -0,0 +1,69 @@
 """LLM Prompt Templates for Implicit Memory Analysis
 This module contains prompt rendering functions for analyzing user memory summaries
 to extract preferences, personality dimensions, interests, and behavioral habits.
 """
 import os
 from typing import Any, Dict, List
 from jinja2 import Environment, FileSystemLoader
 # Setup Jinja2 environment
 current_dir = os.path.dirname(os.path.abspath(__file__))
 prompt_dir = os.path.join(current_dir, "prompts")
 prompt_env = Environment(loader=FileSystemLoader(prompt_dir))
 def _render_template(template_name: str, **kwargs) -> str:
    """Helper function to render Jinja2 templates."""
    template = prompt_env.get_template(template_name)
    return template.render(**kwargs)
 def get_preference_analysis_prompt(
    memory_summaries: List[Dict[str, Any]], 
    user_id: str
 ) -> str:
    """Get formatted preference analysis prompt using Jinja2 template."""
    return _render_template(
        "preference_analysis.jinja2",
        memory_summaries=memory_summaries,
        user_id=user_id
    )
 def get_dimension_analysis_prompt(
    memory_summaries: List[Dict[str, Any]], 
    user_id: str
 ) -> str:
    """Get formatted dimension analysis prompt using Jinja2 template."""
    return _render_template(
        "dimension_analysis.jinja2",
        memory_summaries=memory_summaries,
        user_id=user_id
    )
 def get_interest_analysis_prompt(
    memory_summaries: List[Dict[str, Any]], 
    user_id: str
 ) -> str:
    """Get formatted interest analysis prompt using Jinja2 template."""
    return _render_template(
        "interest_analysis.jinja2",
        memory_summaries=memory_summaries,
        user_id=user_id
    )
 def get_habit_analysis_prompt(
    memory_summaries: List[Dict[str, Any]], 
    user_id: str
 ) -> str:
    """Get formatted habit analysis prompt using Jinja2 template."""
    return _render_template(
        "habit_analysis.jinja2",
        memory_summaries=memory_summaries,
        user_id=user_id
    )
--- a/api/app/core/memory/analytics/implicit_memory/prompts/dimension_analysis.jinja2
+++ b/api/app/core/memory/analytics/implicit_memory/prompts/dimension_analysis.jinja2
@@ -0,0 +1,41 @@
 You are an expert personality analyst. Analyze memory summaries to assess the user's personality across four dimensions.
 ## Memory Summaries
 {% for summary in memory_summaries %}
 Summary {{ loop.index }}:
 {{ summary.content or summary.user_content or '' }}
 ---
 {% endfor %}
 ## Target User ID
 {{ user_id }}
 ## Dimensions to Analyze
 1. **Creativity** (0-100%): Creative thinking, artistic interests, innovative ideas
 2. **Aesthetic** (0-100%): Design preferences, visual interests, artistic appreciation
 3. **Technology** (0-100%): Technical discussions, tool usage, programming interests
 4. **Literature** (0-100%): Reading habits, writing style, literary references
 ## Instructions
 1. Analyze the user's content for each dimension
 2. Calculate percentage scores (0-100%)
 ## Output Format
 {
  "dimensions": {
    "creativity": {"percentage": 0-100},
    "aesthetic": {"percentage": 0-100},
    "technology": {"percentage": 0-100},
    "literature": {"percentage": 0-100}
  }
 }
 ## Example
 {
  "dimensions": {
    "creativity": {"percentage": 75},
    "aesthetic": {"percentage": 45},
    "technology": {"percentage": 60},
    "literature": {"percentage": 30}
  }
 }
--- a/api/app/core/memory/analytics/implicit_memory/prompts/habit_analysis.jinja2
+++ b/api/app/core/memory/analytics/implicit_memory/prompts/habit_analysis.jinja2
@@ -0,0 +1,70 @@
 You are an expert at identifying behavioral patterns and habits from memory summaries.
 ## Memory Summaries
 {% for summary in memory_summaries %}
 Summary {{ loop.index }}:
 {{ summary.content or summary.user_content or '' }}
 ---
 {% endfor %}
 ## Target User ID
 {{ user_id }}
 ## Instructions
 1. Identify recurring behavioral patterns mentioned by the SPECIFIED USER
 2. Focus on specific, concrete habits with temporal patterns
 3. For each habit, provide:
   - habit_description: Clear, specific description
   - frequency_pattern: "daily", "weekly", "monthly", "seasonal", "occasional", "event_triggered"
   - time_context: When it typically happens
   - confidence_level: "high", "medium", "low"
   - supporting_summaries: References to evidence
   - specific_examples: Concrete examples from summaries
   - is_current: true if current habit, false if past habit
 4. Only include habits with medium or high confidence
 5. **IMPORTANT: Output language MUST match the input language. If summaries are in Chinese, output in Chinese. If in English, output in English.**
 ## Output Format
 {
  "habits": [
    {
      "habit_description": "string",
      "frequency_pattern": "daily|weekly|monthly|seasonal|occasional|event_triggered",
      "time_context": "string",
      "confidence_level": "high|medium|low",
      "supporting_summaries": ["id1", "id2"],
      "specific_examples": ["example1", "example2"],
      "is_current": true|false
    }
  ]
 }
 ## Example (English input → English output)
 {
  "habits": [
    {
      "habit_description": "drinks coffee every morning",
      "frequency_pattern": "daily",
      "time_context": "morning routine",
      "confidence_level": "high",
      "supporting_summaries": ["s1", "s2"],
      "specific_examples": ["needs coffee to start the day"],
      "is_current": true
    }
  ]
 }
 ## Example (Chinese input → Chinese output)
 {
  "habits": [
    {
      "habit_description": "每天早上喝咖啡",
      "frequency_pattern": "daily",
      "time_context": "早晨日常",
      "confidence_level": "high",
      "supporting_summaries": ["s1", "s2"],
      "specific_examples": ["需要咖啡来开始一天"],
      "is_current": true
    }
  ]
 }
--- a/api/app/core/memory/analytics/implicit_memory/prompts/interest_analysis.jinja2
+++ b/api/app/core/memory/analytics/implicit_memory/prompts/interest_analysis.jinja2
@@ -0,0 +1,54 @@
 You are an expert at analyzing user interests from memory summaries.
 ## Memory Summaries
 {% for summary in memory_summaries %}
 Summary {{ loop.index }}:
 {{ summary.content or summary.user_content or '' }}
 ---
 {% endfor %}
 ## Target User ID
 {{ user_id }}
 ## Interest Categories
 1. **Tech**: Programming, technology, software tools, hardware
 2. **Lifestyle**: Daily routines, health, hobbies, social activities
 3. **Music**: Music preferences, instruments, concerts
 4. **Art**: Visual arts, creative projects, design, aesthetics
 ## Instructions
 1. Categorize the user's interests into the four areas
 2. Calculate percentage distribution (must total 100%)
 3. Provide specific evidence for each interest area
 4. Use "increasing", "decreasing", or "stable" for trending direction
 5. **IMPORTANT: Output language MUST match the input language. If summaries are in Chinese, output in Chinese. If in English, output in English.**
 ## Output Format
 {
  "interest_distribution": {
    "tech": {"percentage": 0-100, "evidence": [], "trending_direction": "increasing|decreasing|stable|null"},
    "lifestyle": {"percentage": 0-100, "evidence": [], "trending_direction": "increasing|decreasing|stable|null"},
    "music": {"percentage": 0-100, "evidence": [], "trending_direction": "increasing|decreasing|stable|null"},
    "art": {"percentage": 0-100, "evidence": [], "trending_direction": "increasing|decreasing|stable|null"}
  }
 }
 ## Example (English input → English output)
 {
  "interest_distribution": {
    "tech": {"percentage": 40, "evidence": ["discusses programming frequently"], "trending_direction": "increasing"},
    "lifestyle": {"percentage": 35, "evidence": ["talks about fitness routine"], "trending_direction": "stable"},
    "music": {"percentage": 15, "evidence": ["mentioned favorite bands"], "trending_direction": "stable"},
    "art": {"percentage": 10, "evidence": ["visited art museum"], "trending_direction": "stable"}
  }
 }
 ## Example (Chinese input → Chinese output)
 {
  "interest_distribution": {
    "tech": {"percentage": 40, "evidence": ["经常讨论编程"], "trending_direction": "increasing"},
    "lifestyle": {"percentage": 35, "evidence": ["谈论健身日常"], "trending_direction": "stable"},
    "music": {"percentage": 15, "evidence": ["提到喜欢的乐队"], "trending_direction": "stable"},
    "art": {"percentage": 10, "evidence": ["参观了艺术博物馆"], "trending_direction": "stable"}
  }
 }
--- a/api/app/core/memory/analytics/implicit_memory/prompts/preference_analysis.jinja2
+++ b/api/app/core/memory/analytics/implicit_memory/prompts/preference_analysis.jinja2
@@ -0,0 +1,47 @@
 You are an expert at analyzing user memory summaries to identify implicit preferences.
 ## Memory Summaries
 {% for summary in memory_summaries %}
 Summary {{ loop.index }}:
 {{ summary.content or summary.user_content or '' }}
 ---
 {% endfor %}
 ## Target User ID
 {{ user_id }}
 ## Instructions
 1. Focus ONLY on the specified user's preferences
 2. Extract SHORT preference tags (1-3 words max), like: "音乐", "咖啡", "科幻", "设计", "古典", "吉他"
 3. DO NOT use long phrases - use short nouns or noun phrases
 4. Only include preferences with confidence_score >= 0.3
 5. **IMPORTANT: Output language MUST match the input language. If summaries are in Chinese, output in Chinese. If in English, output in English.**
 ## Output Format
 {
  "preferences": [
    {
      "tag_name": "short tag",
      "confidence_score": 0.0-1.0,
      "supporting_evidence": ["evidence1", "evidence2"],
      "context_details": "brief context",
      "category": "category or null"
    }
  ]
 }
 ## Example (Chinese input → Chinese output)
 {
  "preferences": [
    {"tag_name": "咖啡", "confidence_score": 0.8, "supporting_evidence": ["每天早上喝咖啡"], "context_details": "日常习惯", "category": "lifestyle"},
    {"tag_name": "古典音乐", "confidence_score": 0.7, "supporting_evidence": ["喜欢听古典"], "context_details": "音乐偏好", "category": "music"}
  ]
 }
 ## Example (English input → English output)
 {
  "preferences": [
    {"tag_name": "coffee", "confidence_score": 0.8, "supporting_evidence": ["drinks coffee every morning"], "context_details": "daily routine", "category": "lifestyle"},
    {"tag_name": "classical music", "confidence_score": 0.7, "supporting_evidence": ["enjoys classical"], "context_details": "music preference", "category": "music"}
  ]
 }
--- a/api/app/repositories/neo4j/memory_summary_repository.py
+++ b/api/app/repositories/neo4j/memory_summary_repository.py
@@ -0,0 +1,273 @@
 # -*- coding: utf-8 -*-
 """Memory Summary Repository Module
 This module provides data access functionality for MemorySummary nodes.
 Classes:
    MemorySummaryRepository: Repository for managing MemorySummary CRUD operations
 """
 from datetime import datetime
 from typing import Any, Dict, List, Optional
 from app.repositories.neo4j.base_neo4j_repository import BaseNeo4jRepository
 from app.repositories.neo4j.neo4j_connector import Neo4jConnector
 class MemorySummaryRepository(BaseNeo4jRepository):
    """Memory Summary Repository
    Manages CRUD operations for MemorySummary nodes.
    Provides methods to query summaries by group_id, user_id, and time ranges.
    Attributes:
        connector: Neo4j connector instance
        node_label: Node label, fixed as "MemorySummary"
    """
    def __init__(self, connector: Neo4jConnector):
        """Initialize memory summary repository
        Args:
            connector: Neo4j connector instance
        """
        super().__init__(connector, "MemorySummary")
    def _map_to_dict(self, node_data: Dict) -> Dict[str, Any]:
        """Map node data to dictionary format
        Args:
            node_data: Node data returned from Neo4j query
        Returns:
            Dict[str, Any]: Memory summary data dictionary
        """
        # Extract node data from query result
        n = node_data.get('n', node_data)
        # Handle datetime fields
        if isinstance(n.get('created_at'), str):
            n['created_at'] = datetime.fromisoformat(n['created_at'])
        return dict(n)
    async def find_by_group_id(
        self, 
        group_id: str, 
        limit: int = 1000,
        start_date: Optional[datetime] = None,
        end_date: Optional[datetime] = None
    ) -> List[Dict[str, Any]]:
        """Query memory summaries by group_id
        Args:
            group_id: Group ID to filter by
            limit: Maximum number of results to return
            start_date: Optional start date filter
            end_date: Optional end date filter
        Returns:
            List[Dict[str, Any]]: List of memory summary dictionaries
        """
        query = f"""
        MATCH (n:{self.node_label})
        WHERE n.group_id = $group_id
        """
        params = {"group_id": group_id, "limit": limit}
        # Add date range filters if provided
        if start_date:
            query += " AND n.created_at >= $start_date"
            params["start_date"] = start_date
        if end_date:
            query += " AND n.created_at <= $end_date"
            params["end_date"] = end_date
        query += """
        RETURN n
        ORDER BY n.created_at DESC
        LIMIT $limit
        """
        results = await self.connector.execute_query(query, **params)
        return [self._map_to_dict(r) for r in results]
    async def find_by_user_id(
        self, 
        user_id: str, 
        limit: int = 1000,
        start_date: Optional[datetime] = None,
        end_date: Optional[datetime] = None
    ) -> List[Dict[str, Any]]:
        """Query memory summaries by user_id
        Args:
            user_id: User ID to filter by
            limit: Maximum number of results to return
            start_date: Optional start date filter
            end_date: Optional end date filter
        Returns:
            List[Dict[str, Any]]: List of memory summary dictionaries
        """
        query = f"""
        MATCH (n:{self.node_label})
        WHERE n.user_id = $user_id
        """
        params = {"user_id": user_id, "limit": limit}
        # Add date range filters if provided
        if start_date:
            query += " AND n.created_at >= $start_date"
            params["start_date"] = start_date
        if end_date:
            query += " AND n.created_at <= $end_date"
            params["end_date"] = end_date
        query += """
        RETURN n
        ORDER BY n.created_at DESC
        LIMIT $limit
        """
        results = await self.connector.execute_query(query, **params)
        return [self._map_to_dict(r) for r in results]
    async def find_by_group_and_user(
        self,
        group_id: str,
        user_id: str,
        limit: int = 1000,
        start_date: Optional[datetime] = None,
        end_date: Optional[datetime] = None
    ) -> List[Dict[str, Any]]:
        """Query memory summaries by both group_id and user_id
        Args:
            group_id: Group ID to filter by
            user_id: User ID to filter by
            limit: Maximum number of results to return
            start_date: Optional start date filter
            end_date: Optional end date filter
        Returns:
            List[Dict[str, Any]]: List of memory summary dictionaries
        """
        query = f"""
        MATCH (n:{self.node_label})
        WHERE n.group_id = $group_id AND n.user_id = $user_id
        """
        params = {"group_id": group_id, "user_id": user_id, "limit": limit}
        # Add date range filters if provided
        if start_date:
            query += " AND n.created_at >= $start_date"
            params["start_date"] = start_date
        if end_date:
            query += " AND n.created_at <= $end_date"
            params["end_date"] = end_date
        query += """
        RETURN n
        ORDER BY n.created_at DESC
        LIMIT $limit
        """
        results = await self.connector.execute_query(query, **params)
        return [self._map_to_dict(r) for r in results]
    async def find_recent_summaries(
        self,
        group_id: str,
        days: int = 7,
        limit: int = 1000
    ) -> List[Dict[str, Any]]:
        """Query recent memory summaries
        Args:
            group_id: Group ID to filter by
            days: Number of recent days to query
            limit: Maximum number of results to return
        Returns:
            List[Dict[str, Any]]: List of memory summary dictionaries
        """
        query = f"""
        MATCH (n:{self.node_label})
        WHERE n.group_id = $group_id
        AND n.created_at >= datetime() - duration({{days: $days}})
        RETURN n
        ORDER BY n.created_at DESC
        LIMIT $limit
        """
        results = await self.connector.execute_query(
            query,
            group_id=group_id,
            days=days,
            limit=limit
        )
        return [self._map_to_dict(r) for r in results]
    async def find_by_content_keywords(
        self,
        group_id: str,
        keywords: List[str],
        limit: int = 100
    ) -> List[Dict[str, Any]]:
        """Query memory summaries by content keywords
        Args:
            group_id: Group ID to filter by
            keywords: List of keywords to search for in content
            limit: Maximum number of results to return
        Returns:
            List[Dict[str, Any]]: List of memory summary dictionaries
        """
        # Build keyword search conditions
        keyword_conditions = []
        params = {"group_id": group_id, "limit": limit}
        for i, keyword in enumerate(keywords):
            keyword_conditions.append(f"toLower(n.content) CONTAINS toLower($keyword_{i})")
            params[f"keyword_{i}"] = keyword
        keyword_filter = " OR ".join(keyword_conditions)
        query = f"""
        MATCH (n:{self.node_label})
        WHERE n.group_id = $group_id
        AND ({keyword_filter})
        RETURN n
        ORDER BY n.created_at DESC
        LIMIT $limit
        """
        results = await self.connector.execute_query(query, **params)
        return [self._map_to_dict(r) for r in results]
    async def get_summary_count_by_group(self, group_id: str) -> int:
        """Get count of memory summaries for a group
        Args:
            group_id: Group ID to count summaries for
        Returns:
            int: Number of memory summaries
        """
        query = f"""
        MATCH (n:{self.node_label})
        WHERE n.group_id = $group_id
        RETURN count(n) as count
        """
        results = await self.connector.execute_query(query, group_id=group_id)
        return results[0]['count'] if results else 0
--- a/api/app/schemas/implicit_memory_schema.py
+++ b/api/app/schemas/implicit_memory_schema.py
@@ -0,0 +1,212 @@
 """Implicit Memory Schemas
 This module defines the Pydantic schemas for the implicit memory system API.
 """
 import datetime
 from enum import Enum
 from typing import Any, Dict, List, Optional
 from pydantic import BaseModel, ConfigDict, Field, field_validator
 class ConfidenceLevel(str, Enum):
    """Confidence levels for analysis results."""
    HIGH = "high"
    MEDIUM = "medium"
    LOW = "low"
 class FrequencyPattern(str, Enum):
    """Frequency patterns for habits."""
    DAILY = "daily"
    WEEKLY = "weekly"
    MONTHLY = "monthly"
    SEASONAL = "seasonal"
    OCCASIONAL = "occasional"
    EVENT_TRIGGERED = "event_triggered"
 # Request Schemas
 class TimeRange(BaseModel):
    """Time range for analysis."""
    start_date: datetime.datetime
    end_date: datetime.datetime
    @field_validator('end_date')
    @classmethod
    def end_date_must_be_after_start_date(cls, v, info):
        if 'start_date' in info.data and v <= info.data['start_date']:
            raise ValueError('end_date must be after start_date')
        return v
 class DateRange(BaseModel):
    """Date range for filtering."""
    start_date: Optional[datetime.datetime] = None
    end_date: Optional[datetime.datetime] = None
    @field_validator('end_date')
    @classmethod
    def end_date_must_be_after_start_date(cls, v, info):
        if v and 'start_date' in info.data and info.data['start_date'] and v <= info.data['start_date']:
            raise ValueError('end_date must be after start_date')
        return v
 class AnalysisConfig(BaseModel):
    """Configuration for analysis operations."""
    llm_model_id: Optional[str] = None
    batch_size: int = 100
    confidence_threshold: float = 0.5
    include_historical_trends: bool = False
    max_conversations: Optional[int] = None
 # Response Schemas
 class PreferenceTagResponse(BaseModel):
    """A user preference tag with detailed context."""
    model_config = ConfigDict(from_attributes=True)
    tag_name: str
    confidence_score: float = Field(ge=0.0, le=1.0)
    supporting_evidence: List[str]
    context_details: str
    created_at: datetime.datetime
    updated_at: datetime.datetime
    conversation_references: List[str]
    category: Optional[str] = None
 class DimensionScoreResponse(BaseModel):
    """Score for a personality dimension."""
    model_config = ConfigDict(from_attributes=True)
    dimension_name: str
    percentage: float = Field(ge=0.0, le=100.0)
    evidence: List[str]
    reasoning: str
    confidence_level: ConfidenceLevel
 class DimensionPortraitResponse(BaseModel):
    """Four-dimension personality portrait."""
    model_config = ConfigDict(from_attributes=True)
    user_id: str
    creativity: DimensionScoreResponse
    aesthetic: DimensionScoreResponse
    technology: DimensionScoreResponse
    literature: DimensionScoreResponse
    analysis_timestamp: datetime.datetime
    total_summaries_analyzed: int
    historical_trends: Optional[List[Dict[str, Any]]] = None
 class InterestCategoryResponse(BaseModel):
    """Interest category with percentage and evidence."""
    model_config = ConfigDict(from_attributes=True)
    category_name: str
    percentage: float = Field(ge=0.0, le=100.0)
    evidence: List[str]
    trending_direction: Optional[str] = None
 class InterestAreaDistributionResponse(BaseModel):
    """Distribution of user interests across four areas."""
    model_config = ConfigDict(from_attributes=True)
    user_id: str
    tech: InterestCategoryResponse
    lifestyle: InterestCategoryResponse
    music: InterestCategoryResponse
    art: InterestCategoryResponse
    analysis_timestamp: datetime.datetime
    total_summaries_analyzed: int
    @property
    def total_percentage(self) -> float:
        """Calculate total percentage across all interest areas."""
        return self.tech.percentage + self.lifestyle.percentage + self.music.percentage + self.art.percentage
 class BehaviorHabitResponse(BaseModel):
    """A behavioral habit identified from conversations."""
    model_config = ConfigDict(from_attributes=True)
    habit_description: str
    frequency_pattern: FrequencyPattern
    time_context: str
    confidence_level: ConfidenceLevel
    supporting_summaries: List[str]
    first_observed: datetime.datetime
    last_observed: datetime.datetime
    is_current: bool = True
    specific_examples: List[str]
 class UserProfileResponse(BaseModel):
    """Comprehensive user profile."""
    model_config = ConfigDict(from_attributes=True)
    user_id: str
    preference_tags: List[PreferenceTagResponse]
    dimension_portrait: DimensionPortraitResponse
    interest_area_distribution: InterestAreaDistributionResponse
    behavior_habits: List[BehaviorHabitResponse]
    profile_version: int
    created_at: datetime.datetime
    updated_at: datetime.datetime
    total_summaries_analyzed: int
    analysis_completeness_score: float = Field(ge=0.0, le=1.0)
 # Internal/Business Logic Schemas
 class MemorySummary(BaseModel):
    """Memory summary from existing storage system."""
    model_config = ConfigDict(from_attributes=True)
    summary_id: str
    content: str
    timestamp: datetime.datetime
    participants: List[str]
    summary_type: str
 class UserMemorySummary(BaseModel):
    """Memory summary filtered for specific user content."""
    model_config = ConfigDict(from_attributes=True)
    summary_id: str
    user_id: str
    user_content: str
    timestamp: datetime.datetime
    confidence_score: float = Field(ge=0.0, le=1.0)
    summary_type: str
 class SummaryAnalysisResult(BaseModel):
    """Result of analyzing memory summaries."""
    model_config = ConfigDict(from_attributes=True)
    user_id: str
    preferences: List[PreferenceTagResponse]
    dimension_evidence: Dict[str, List[str]]
    interest_evidence: Dict[str, List[str]]
    habit_evidence: List[Dict[str, Any]]
    analysis_timestamp: datetime.datetime
    summaries_analyzed: List[str]
 # Aliases for backward compatibility with existing code
 PreferenceTag = PreferenceTagResponse
 DimensionScore = DimensionScoreResponse
 DimensionPortrait = DimensionPortraitResponse
 InterestCategory = InterestCategoryResponse
 InterestAreaDistribution = InterestAreaDistributionResponse
 BehaviorHabit = BehaviorHabitResponse
 UserProfile = UserProfileResponse
--- a/api/app/services/implicit_memory_service.py
+++ b/api/app/services/implicit_memory_service.py
@@ -0,0 +1,385 @@
 """
 Implicit Memory Service
 Main service orchestrating all implicit memory operations. This service coordinates
 profile building, data extraction, and provides high-level methods for analyzing
 user profiles from memory summaries.
 """
 import logging
 from datetime import datetime
 from typing import List, Optional
 from app.core.memory.analytics.implicit_memory.analyzers.dimension_analyzer import (
    DimensionAnalyzer,
 )
 from app.core.memory.analytics.implicit_memory.analyzers.interest_analyzer import (
    InterestAnalyzer,
 )
 from app.core.memory.analytics.implicit_memory.analyzers.preference_analyzer import (
    PreferenceAnalyzer,
 )
 from app.core.memory.analytics.implicit_memory.data_source import MemoryDataSource
 from app.core.memory.analytics.implicit_memory.habit_detector import HabitDetector
 from app.repositories.neo4j.neo4j_connector import Neo4jConnector
 from app.schemas.implicit_memory_schema import (
    BehaviorHabit,
    ConfidenceLevel,
    DateRange,
    DimensionPortrait,
    FrequencyPattern,
    InterestAreaDistribution,
    PreferenceTag,
    TimeRange,
    UserMemorySummary,
 )
 from app.schemas.memory_config_schema import MemoryConfig
 from sqlalchemy.orm import Session
 logger = logging.getLogger(__name__)
 class ImplicitMemoryService:
    """Main service for implicit memory operations."""
    def __init__(
        self,
        db: Session,
        end_user_id: str
    ):
        """Initialize the implicit memory service.
        Args:
            db: Database session
            end_user_id: End user ID to get connected memory configuration
        """
        self.db = db
        self.end_user_id = end_user_id
        # Get connected memory configuration for the user
        self.memory_config = self._get_user_memory_config()
        # Extract LLM model ID from memory config
        llm_model_id = str(self.memory_config.llm_model_id) if self.memory_config.llm_model_id else None
        # Initialize Neo4j connector
        self.neo4j_connector = Neo4jConnector()
        # Initialize core components with LLM model ID
        self.data_source = MemoryDataSource(db, self.neo4j_connector)
        self.preference_analyzer = PreferenceAnalyzer(db, llm_model_id)
        self.dimension_analyzer = DimensionAnalyzer(db, llm_model_id)
        self.interest_analyzer = InterestAnalyzer(db, llm_model_id)
        self.habit_detector = HabitDetector(db, llm_model_id)
        logger.info(f"ImplicitMemoryService initialized for end_user: {end_user_id}")
    def _get_user_memory_config(self) -> MemoryConfig:
        """Get memory configuration for the connected end user.
        Returns:
            MemoryConfig: User's connected memory configuration
        Raises:
            ValueError: If no memory configuration found for user
        """
        try:
            from app.services.memory_agent_service import get_end_user_connected_config
            from app.services.memory_config_service import MemoryConfigService
            # Get user's connected config
            connected_config = get_end_user_connected_config(self.end_user_id, self.db)
            config_id = connected_config.get("memory_config_id")
            if config_id is None:
                raise ValueError(f"No memory configuration found for end_user: {self.end_user_id}")
            # Load the memory configuration
            config_service = MemoryConfigService(self.db)
            memory_config = config_service.load_memory_config(config_id)
            logger.info(f"Loaded memory config {config_id} for end_user: {self.end_user_id}")
            return memory_config
        except Exception as e:
            logger.error(f"Failed to get memory config for end_user {self.end_user_id}: {e}")
            raise ValueError(f"Unable to get memory configuration for end_user {self.end_user_id}: {e}")
    async def extract_user_summaries(
        self,
        user_id: str,
        time_range: Optional[TimeRange] = None,
        limit: Optional[int] = None
    ) -> List[UserMemorySummary]:
        """Extract user-specific memory summaries.
        Args:
            user_id: Target user ID
            time_range: Optional time range to filter summaries
            limit: Optional limit on number of summaries
        Returns:
            List of user-specific memory summaries
        """
        logger.info(f"Extracting user summaries for user {user_id}")
        try:
            summaries = await self.data_source.get_user_summaries(
                user_id=user_id,
                time_range=time_range,
                limit=limit or 1000
            )
            logger.info(f"Extracted {len(summaries)} summaries for user {user_id}")
            return summaries
        except Exception as e:
            logger.error(f"Failed to extract user summaries for user {user_id}: {e}")
            raise
    async def get_preference_tags(
        self,
        user_id: str,
        confidence_threshold: float = 0.5,
        tag_category: Optional[str] = None,
        date_range: Optional[DateRange] = None
    ) -> List[PreferenceTag]:
        """Retrieve user preference tags with filtering.
        Args:
            user_id: Target user ID
            confidence_threshold: Minimum confidence score for tags
            tag_category: Optional category filter
            date_range: Optional date range filter
        Returns:
            List of filtered preference tags
        """
        logger.info(f"Getting preference tags for user {user_id}")
        try:
            # Get user summaries for analysis
            time_range = None
            if date_range:
                time_range = TimeRange(
                    start_date=date_range.start_date or datetime.min,
                    end_date=date_range.end_date or datetime.now()
                )
            user_summaries = await self.extract_user_summaries(
                user_id=user_id,
                time_range=time_range
            )
            if not user_summaries:
                logger.warning(f"No summaries found for user {user_id}")
                return []
            # Analyze preferences
            preference_tags = await self.preference_analyzer.analyze_preferences(
                user_id=user_id,
                user_summaries=user_summaries
            )
            # Apply filters
            filtered_tags = []
            for tag in preference_tags:
                # Filter by confidence threshold
                if tag.confidence_score < confidence_threshold:
                    continue
                # Filter by category if specified
                if tag_category and tag.category != tag_category:
                    continue
                # Filter by date range if specified
                if date_range:
                    if date_range.start_date and tag.created_at < date_range.start_date:
                        continue
                    if date_range.end_date and tag.created_at > date_range.end_date:
                        continue
                filtered_tags.append(tag)
            # Sort by confidence score and recency
            filtered_tags.sort(
                key=lambda x: (x.confidence_score, x.updated_at),
                reverse=True
            )
            logger.info(f"Retrieved {len(filtered_tags)} preference tags for user {user_id}")
            return filtered_tags
        except Exception as e:
            logger.error(f"Failed to get preference tags for user {user_id}: {e}")
            raise
    async def get_dimension_portrait(
        self,
        user_id: str,
        include_history: bool = False
    ) -> DimensionPortrait:
        """Get user's four-dimension personality portrait.
        Args:
            user_id: Target user ID
            include_history: Whether to include historical trends
        Returns:
            User's dimension portrait
        """
        logger.info(f"Getting dimension portrait for user {user_id}")
        try:
            # Get user summaries
            user_summaries = await self.extract_user_summaries(user_id=user_id)
            if not user_summaries:
                logger.warning(f"No summaries found for user {user_id}")
                return self.dimension_analyzer._create_empty_portrait(user_id)
            # Analyze dimensions
            dimension_portrait = await self.dimension_analyzer.analyze_dimensions(
                user_id=user_id,
                user_summaries=user_summaries
            )
            # Include historical trends if requested
            if include_history:
                # In a full implementation, this would retrieve historical data
                # For now, we'll leave historical_trends as None
                pass
            logger.info(f"Retrieved dimension portrait for user {user_id}")
            return dimension_portrait
        except Exception as e:
            logger.error(f"Failed to get dimension portrait for user {user_id}: {e}")
            raise
    async def get_interest_area_distribution(
        self,
        user_id: str,
        include_trends: bool = False
    ) -> InterestAreaDistribution:
        """Get user's interest area distribution across four areas.
        Args:
            user_id: Target user ID
            include_trends: Whether to include trending information
        Returns:
            User's interest area distribution
        """
        logger.info(f"Getting interest area distribution for user {user_id}")
        try:
            # Get user summaries
            user_summaries = await self.extract_user_summaries(user_id=user_id)
            if not user_summaries:
                logger.warning(f"No summaries found for user {user_id}")
                return self.interest_analyzer._create_empty_distribution(user_id)
            # Analyze interests
            interest_distribution = await self.interest_analyzer.analyze_interests(
                user_id=user_id,
                user_summaries=user_summaries
            )
            # Include trends if requested
            if include_trends:
                # In a full implementation, this would calculate trending directions
                # For now, we'll leave trending_direction as None for each category
                pass
            logger.info(f"Retrieved interest area distribution for user {user_id}")
            return interest_distribution
        except Exception as e:
            logger.error(f"Failed to get interest area distribution for user {user_id}: {e}")
            raise
    async def get_behavior_habits(
        self,
        user_id: str,
        confidence_level: Optional[str] = None,
        frequency_pattern: Optional[str] = None,
        time_period: Optional[str] = None
    ) -> List[BehaviorHabit]:
        """Get user's behavioral habits with filtering.
        Args:
            user_id: Target user ID
            confidence_level: Optional confidence level filter ("high", "medium", "low")
            frequency_pattern: Optional frequency pattern filter
            time_period: Optional time period filter ("current", "past")
        Returns:
            List of filtered behavioral habits
        """
        logger.info(f"Getting behavior habits for user {user_id}")
        try:
            # Get user summaries
            user_summaries = await self.extract_user_summaries(user_id=user_id)
            if not user_summaries:
                logger.warning(f"No summaries found for user {user_id}")
                return []
            # Detect habits
            behavior_habits = await self.habit_detector.detect_habits(
                user_id=user_id,
                user_summaries=user_summaries
            )
            # Apply filters
            filtered_habits = []
            for habit in behavior_habits:
                # Filter by confidence level
                if confidence_level:
                    try:
                        target_confidence = ConfidenceLevel(confidence_level.lower())
                        if habit.confidence_level != target_confidence:
                            continue
                    except ValueError:
                        logger.warning(f"Invalid confidence level: {confidence_level}")
                        continue
                # Filter by frequency pattern
                if frequency_pattern:
                    try:
                        target_frequency = FrequencyPattern(frequency_pattern.lower())
                        if habit.frequency_pattern != target_frequency:
                            continue
                    except ValueError:
                        logger.warning(f"Invalid frequency pattern: {frequency_pattern}")
                        continue
                # Filter by time period
                if time_period:
                    if time_period.lower() == "current" and not habit.is_current:
                        continue
                    elif time_period.lower() == "past" and habit.is_current:
                        continue
                filtered_habits.append(habit)
            # Sort by confidence level and recency
            confidence_order = {"high": 3, "medium": 2, "low": 1}
            filtered_habits.sort(
                key=lambda x: (
                    confidence_order.get(x.confidence_level.value, 0),
                    x.last_observed
                ),
                reverse=True
            )
            logger.info(f"Retrieved {len(filtered_habits)} behavior habits for user {user_id}")
            return filtered_habits
        except Exception as e:
            logger.error(f"Failed to get behavior habits for user {user_id}: {e}")
            raise
		`@@ -0,0 +1 @@`
							`"""Analyzers package for implicit memory analysis components."""`