feat(implicit memory): upgrade pydantic v2 compatibility and confidence level handling

- Replace deprecated `.dict()` with `.model_dump(mode='json')` for pydantic v2 compatibility - Convert confidence level from enum-based strings to numerical values (0-100 scale) - Add confidence level mapping in controller (high: 85, medium: 50, low: 20) - Update dimension analyzer to handle both string and numeric confidence inputs - Refactor habit analyzer confidence level validation logic - Remove ConfidenceLevel enum import and replace with integer-based approach - Update memory config validators for numerical confidence level support - Ensure all implicit memory schemas use model_dump for serialization - Improve type consistency across memory analytics modules
2026-01-08 17:50:01 +08:00
parent e05f33b286
commit 7167c2002f
7 changed files with 195 additions and 105 deletions
--- a/api/app/core/memory/analytics/implicit_memory/analyzers/dimension_analyzer.py
+++ b/api/app/core/memory/analytics/implicit_memory/analyzers/dimension_analyzer.py
@@ -12,7 +12,6 @@ from typing import Any, Dict, List, Optional
 from app.core.memory.analytics.implicit_memory.llm_client import ImplicitMemoryLLMClient
 from app.core.memory.llm_tools.llm_client import LLMClientException
 from app.schemas.implicit_memory_schema import (
-    ConfidenceLevel,
    DimensionPortrait,
    DimensionScore,
    UserMemorySummary,
@@ -28,7 +27,7 @@ class DimensionData(BaseModel):
    percentage: float = Field(ge=0.0, le=100.0)
    evidence: List[str] = Field(default_factory=list)
    reasoning: str = ""
-    confidence_level: str = "medium"
+    confidence_level: int = 50  # Default to medium confidence


 class DimensionAnalysisResponse(BaseModel):
@@ -147,8 +146,7 @@ class DimensionAnalyzer:
        percentage = max(0.0, min(100.0, float(percentage)))
        
        # Validate confidence level
-        confidence_level_str = dimension_data.get("confidence_level", "low")
-        confidence_level = self._validate_confidence_level(confidence_level_str)
+        confidence_level = self._validate_confidence_level(dimension_data.get("confidence_level", 50))
        
        # Ensure evidence is not empty
        evidence = dimension_data.get("evidence", [])
@@ -182,32 +180,41 @@ class DimensionAnalyzer:
            percentage=0.0,
            evidence=["Insufficient data for analysis"],
            reasoning=f"No clear evidence found for {dimension_name} dimension",
-            confidence_level=ConfidenceLevel.LOW
+            confidence_level=20  # Low confidence as numerical value
        )
    
-    def _validate_confidence_level(self, confidence_str: str) -> ConfidenceLevel:
-        """Validate and convert confidence level string.
+    def _validate_confidence_level(self, confidence_level) -> int:
+        """Return confidence level as integer, handling both string and numeric inputs.
        
        Args:
-            confidence_str: Confidence level as string
+            confidence_level: Confidence level (string or numeric)
            
        Returns:
-            ConfidenceLevel enum value
+            Confidence level as integer (0-100)
        """
-        if not confidence_str:
-            return ConfidenceLevel.MEDIUM
+        # If it's already a number, return it as int
+        if isinstance(confidence_level, (int, float)):
+            return int(confidence_level)
        
-        confidence_str = str(confidence_str).lower().strip()
+        # If it's a string, convert common values to numbers
+        if isinstance(confidence_level, str):
+            confidence_str = confidence_level.lower().strip()
+            if confidence_str in ["high", "높음"]:
+                return 85
+            elif confidence_str in ["medium", "중간"]:
+                return 50
+            elif confidence_str in ["low", "낮음"]:
+                return 20
+            else:
+                # Try to parse as number
+                try:
+                    return int(float(confidence_str))
+                except ValueError:
+                    logger.warning(f"Unknown confidence level: {confidence_level}, defaulting to medium")
+                    return 50
        
-        if confidence_str in ["high", "높음"]:
-            return ConfidenceLevel.HIGH
-        elif confidence_str in ["medium", "중간"]:
-            return ConfidenceLevel.MEDIUM
-        elif confidence_str in ["low", "낮음"]:
-            return ConfidenceLevel.LOW
-        else:
-            logger.warning(f"Unknown confidence level: {confidence_str}, defaulting to medium")
-            return ConfidenceLevel.MEDIUM
+        # Default fallback
+        return 50
    
    def _create_empty_portrait(self, user_id: str) -> DimensionPortrait:
        """Create an empty dimension portrait when no data is available.
--- a/api/app/core/memory/analytics/implicit_memory/analyzers/habit_analyzer.py
+++ b/api/app/core/memory/analytics/implicit_memory/analyzers/habit_analyzer.py
@@ -6,14 +6,13 @@ similar habits with confidence scoring.
 """

 import logging
-from datetime import datetime, timedelta
-from typing import Any, Dict, List, Optional
+from datetime import datetime
+from typing import List, Optional

 from app.core.memory.analytics.implicit_memory.llm_client import ImplicitMemoryLLMClient
 from app.core.memory.llm_tools.llm_client import LLMClientException
 from app.schemas.implicit_memory_schema import (
    BehaviorHabit,
-    ConfidenceLevel,
    FrequencyPattern,
    UserMemorySummary,
 )
@@ -28,7 +27,7 @@ class HabitData(BaseModel):
    habit_description: str
    frequency_pattern: str
    time_context: str
-    confidence_level: str
+    confidence_level: int = 50  # Default to medium confidence
    supporting_summaries: List[str] = Field(default_factory=list)
    specific_examples: List[str] = Field(default_factory=list)
    is_current: bool = True
@@ -88,7 +87,6 @@ class HabitAnalyzer:
            
            # Convert to BehaviorHabit objects
            behavior_habits = []
-            current_time = datetime.now()
            
            for habit_data in response.get("habits", []):
                try:
@@ -105,8 +103,7 @@ class HabitAnalyzer:
                        habit_description=habit_data.get("habit_description", ""),
                        frequency_pattern=self._validate_frequency_pattern(habit_data.get("frequency_pattern", "occasional")),
                        time_context=habit_data.get("time_context", ""),
-                        confidence_level=self._validate_confidence_level(habit_data.get("confidence_level", "medium")),
-                        supporting_summaries=supporting_summaries,
+                        confidence_level=self._validate_confidence_level(habit_data.get("confidence_level", 50)),
                        specific_examples=specific_examples,
                        first_observed=first_observed,
                        last_observed=last_observed,
@@ -165,26 +162,38 @@ class HabitAnalyzer:
        
        return frequency_mapping.get(frequency_str, FrequencyPattern.OCCASIONAL)
    
-    def _validate_confidence_level(self, confidence_str: str) -> ConfidenceLevel:
-        """Validate and convert confidence level string.
+    def _validate_confidence_level(self, confidence_level) -> int:
+        """Return confidence level as integer, handling both string and numeric inputs.
        
        Args:
-            confidence_str: Confidence level as string
+            confidence_level: Confidence level (string or numeric)
            
        Returns:
-            ConfidenceLevel enum value
+            Confidence level as integer (0-100)
        """
-        confidence_str = confidence_str.lower().strip()
+        # If it's already a number, return it as int
+        if isinstance(confidence_level, (int, float)):
+            return int(confidence_level)
        
-        if confidence_str in ["high", "높음"]:
-            return ConfidenceLevel.HIGH
-        elif confidence_str in ["medium", "중간"]:
-            return ConfidenceLevel.MEDIUM
-        elif confidence_str in ["low", "낮음"]:
-            return ConfidenceLevel.LOW
-        else:
-            logger.warning(f"Unknown confidence level: {confidence_str}, defaulting to medium")
-            return ConfidenceLevel.MEDIUM
+        # If it's a string, convert common values to numbers
+        if isinstance(confidence_level, str):
+            confidence_str = confidence_level.lower().strip()
+            if confidence_str in ["high", "높음"]:
+                return 85
+            elif confidence_str in ["medium", "중간"]:
+                return 50
+            elif confidence_str in ["low", "낮음"]:
+                return 20
+            else:
+                # Try to parse as number
+                try:
+                    return int(float(confidence_str))
+                except ValueError:
+                    logger.warning(f"Unknown confidence level: {confidence_level}, defaulting to medium")
+                    return 50
+        
+        # Default fallback
+        return 50
    
    def _determine_observation_dates(
        self,
@@ -249,7 +258,7 @@ class HabitAnalyzer:
                return False
            
            # Check supporting summaries
-            if not habit.supporting_summaries or len(habit.supporting_summaries) == 0:
+            if not habit.specific_examples or len(habit.specific_examples) == 0:
                return False
            
            # Check specific examples
@@ -389,9 +398,9 @@ class HabitAnalyzer:
        Returns:
            Merged behavioral habit
        """
-        # Combine supporting summaries
-        combined_summaries = list(set(
-            existing_habit.supporting_summaries + new_habit.supporting_summaries
+        # Combine supporting summaries (using specific_examples instead)
+        combined_examples = list(set(
+            existing_habit.specific_examples + new_habit.specific_examples
        ))
        
        # Combine specific examples
@@ -400,8 +409,7 @@ class HabitAnalyzer:
        ))
        
        # Update confidence level (take higher confidence)
-        confidence_levels = [existing_habit.confidence_level, new_habit.confidence_level]
-        new_confidence = max(confidence_levels, key=lambda x: ["low", "medium", "high"].index(x.value))
+        new_confidence = max(existing_habit.confidence_level, new_habit.confidence_level)
        
        # Update observation dates
        first_observed = min(existing_habit.first_observed, new_habit.first_observed)
@@ -420,7 +428,6 @@ class HabitAnalyzer:
            frequency_pattern=existing_habit.frequency_pattern,  # Keep original frequency
            time_context=combined_time_context,
            confidence_level=new_confidence,
-            supporting_summaries=combined_summaries,
            specific_examples=combined_examples,
            first_observed=first_observed,
            last_observed=last_observed,
@@ -437,8 +444,8 @@ class HabitAnalyzer:
            Sorted list of habits
        """
        def priority_score(habit: BehaviorHabit) -> tuple:
-            # Confidence level score (high=3, medium=2, low=1)
-            confidence_score = {"high": 3, "medium": 2, "low": 1}.get(habit.confidence_level.value, 1)
+            # Confidence level score (0-100 scale)
+            confidence_score = habit.confidence_level
            
            # Recency score (more recent = higher score)
            days_since_last = (datetime.now() - habit.last_observed).days
--- a/api/app/core/memory/analytics/implicit_memory/habit_detector.py
+++ b/api/app/core/memory/analytics/implicit_memory/habit_detector.py
@@ -16,7 +16,6 @@ from app.core.memory.analytics.implicit_memory.analyzers.habit_analyzer import (
 from app.core.memory.llm_tools.llm_client import LLMClientException
 from app.schemas.implicit_memory_schema import (
    BehaviorHabit,
-    ConfidenceLevel,
    FrequencyPattern,
    UserMemorySummary,
 )
@@ -116,13 +115,8 @@ class HabitDetector:
        def calculate_ranking_score(habit: BehaviorHabit) -> float:
            """Calculate combined ranking score for a habit."""
            
-            # Confidence score (0.0-1.0)
-            confidence_scores = {
-                ConfidenceLevel.HIGH: 1.0,
-                ConfidenceLevel.MEDIUM: 0.6,
-                ConfidenceLevel.LOW: 0.3
-            }
-            confidence_score = confidence_scores.get(habit.confidence_level, 0.3)
+            # Confidence score (0.0-1.0) - convert from 0-100 scale
+            confidence_score = habit.confidence_level / 100.0
            
            # Recency score (0.0-1.0)
            current_time = datetime.now()
@@ -152,7 +146,7 @@ class HabitDetector:
            frequency_bonus = frequency_bonuses.get(habit.frequency_pattern, 0.0)
            
            # Evidence quality bonus
-            evidence_bonus = min(len(habit.supporting_summaries) / 10.0, 0.1)  # Max 0.1 bonus
+            evidence_bonus = min(len(habit.specific_examples) / 10.0, 0.1)  # Max 0.1 bonus
            
            # Current habit bonus
            current_bonus = 0.1 if habit.is_current else 0.0
@@ -204,7 +198,6 @@ class HabitDetector:
                    frequency_pattern=habit.frequency_pattern,
                    time_context=habit.time_context,
                    confidence_level=habit.confidence_level,
-                    supporting_summaries=habit.supporting_summaries,
                    specific_examples=habit.specific_examples,
                    first_observed=habit.first_observed,
                    last_observed=habit.last_observed,
@@ -218,7 +211,6 @@ class HabitDetector:
                    frequency_pattern=habit.frequency_pattern,
                    time_context=habit.time_context,
                    confidence_level=habit.confidence_level,
-                    supporting_summaries=habit.supporting_summaries,
                    specific_examples=habit.specific_examples,
                    first_observed=habit.first_observed,
                    last_observed=habit.last_observed,