feat(implicit memory): upgrade pydantic v2 compatibility and confidence level handling

- Replace deprecated `.dict()` with `.model_dump(mode='json')` for pydantic v2 compatibility
- Convert confidence level from enum-based strings to numerical values (0-100 scale)
- Add confidence level mapping in controller (high: 85, medium: 50, low: 20)
- Update dimension analyzer to handle both string and numeric confidence inputs
- Refactor habit analyzer confidence level validation logic
- Remove ConfidenceLevel enum import and replace with integer-based approach
- Update memory config validators for numerical confidence level support
- Ensure all implicit memory schemas use model_dump for serialization
- Improve type consistency across memory analytics modules
This commit is contained in:
Ke Sun
2026-01-08 17:50:01 +08:00
parent e05f33b286
commit 7167c2002f
7 changed files with 195 additions and 105 deletions

View File

@@ -12,7 +12,6 @@ from typing import Any, Dict, List, Optional
from app.core.memory.analytics.implicit_memory.llm_client import ImplicitMemoryLLMClient
from app.core.memory.llm_tools.llm_client import LLMClientException
from app.schemas.implicit_memory_schema import (
ConfidenceLevel,
DimensionPortrait,
DimensionScore,
UserMemorySummary,
@@ -28,7 +27,7 @@ class DimensionData(BaseModel):
percentage: float = Field(ge=0.0, le=100.0)
evidence: List[str] = Field(default_factory=list)
reasoning: str = ""
confidence_level: str = "medium"
confidence_level: int = 50 # Default to medium confidence
class DimensionAnalysisResponse(BaseModel):
@@ -147,8 +146,7 @@ class DimensionAnalyzer:
percentage = max(0.0, min(100.0, float(percentage)))
# Validate confidence level
confidence_level_str = dimension_data.get("confidence_level", "low")
confidence_level = self._validate_confidence_level(confidence_level_str)
confidence_level = self._validate_confidence_level(dimension_data.get("confidence_level", 50))
# Ensure evidence is not empty
evidence = dimension_data.get("evidence", [])
@@ -182,32 +180,41 @@ class DimensionAnalyzer:
percentage=0.0,
evidence=["Insufficient data for analysis"],
reasoning=f"No clear evidence found for {dimension_name} dimension",
confidence_level=ConfidenceLevel.LOW
confidence_level=20 # Low confidence as numerical value
)
def _validate_confidence_level(self, confidence_str: str) -> ConfidenceLevel:
"""Validate and convert confidence level string.
def _validate_confidence_level(self, confidence_level) -> int:
"""Return confidence level as integer, handling both string and numeric inputs.
Args:
confidence_str: Confidence level as string
confidence_level: Confidence level (string or numeric)
Returns:
ConfidenceLevel enum value
Confidence level as integer (0-100)
"""
if not confidence_str:
return ConfidenceLevel.MEDIUM
# If it's already a number, return it as int
if isinstance(confidence_level, (int, float)):
return int(confidence_level)
confidence_str = str(confidence_str).lower().strip()
# If it's a string, convert common values to numbers
if isinstance(confidence_level, str):
confidence_str = confidence_level.lower().strip()
if confidence_str in ["high", "높음"]:
return 85
elif confidence_str in ["medium", "중간"]:
return 50
elif confidence_str in ["low", "낮음"]:
return 20
else:
# Try to parse as number
try:
return int(float(confidence_str))
except ValueError:
logger.warning(f"Unknown confidence level: {confidence_level}, defaulting to medium")
return 50
if confidence_str in ["high", "높음"]:
return ConfidenceLevel.HIGH
elif confidence_str in ["medium", "중간"]:
return ConfidenceLevel.MEDIUM
elif confidence_str in ["low", "낮음"]:
return ConfidenceLevel.LOW
else:
logger.warning(f"Unknown confidence level: {confidence_str}, defaulting to medium")
return ConfidenceLevel.MEDIUM
# Default fallback
return 50
def _create_empty_portrait(self, user_id: str) -> DimensionPortrait:
"""Create an empty dimension portrait when no data is available.

View File

@@ -6,14 +6,13 @@ similar habits with confidence scoring.
"""
import logging
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional
from datetime import datetime
from typing import List, Optional
from app.core.memory.analytics.implicit_memory.llm_client import ImplicitMemoryLLMClient
from app.core.memory.llm_tools.llm_client import LLMClientException
from app.schemas.implicit_memory_schema import (
BehaviorHabit,
ConfidenceLevel,
FrequencyPattern,
UserMemorySummary,
)
@@ -28,7 +27,7 @@ class HabitData(BaseModel):
habit_description: str
frequency_pattern: str
time_context: str
confidence_level: str
confidence_level: int = 50 # Default to medium confidence
supporting_summaries: List[str] = Field(default_factory=list)
specific_examples: List[str] = Field(default_factory=list)
is_current: bool = True
@@ -88,7 +87,6 @@ class HabitAnalyzer:
# Convert to BehaviorHabit objects
behavior_habits = []
current_time = datetime.now()
for habit_data in response.get("habits", []):
try:
@@ -105,8 +103,7 @@ class HabitAnalyzer:
habit_description=habit_data.get("habit_description", ""),
frequency_pattern=self._validate_frequency_pattern(habit_data.get("frequency_pattern", "occasional")),
time_context=habit_data.get("time_context", ""),
confidence_level=self._validate_confidence_level(habit_data.get("confidence_level", "medium")),
supporting_summaries=supporting_summaries,
confidence_level=self._validate_confidence_level(habit_data.get("confidence_level", 50)),
specific_examples=specific_examples,
first_observed=first_observed,
last_observed=last_observed,
@@ -165,26 +162,38 @@ class HabitAnalyzer:
return frequency_mapping.get(frequency_str, FrequencyPattern.OCCASIONAL)
def _validate_confidence_level(self, confidence_str: str) -> ConfidenceLevel:
"""Validate and convert confidence level string.
def _validate_confidence_level(self, confidence_level) -> int:
"""Return confidence level as integer, handling both string and numeric inputs.
Args:
confidence_str: Confidence level as string
confidence_level: Confidence level (string or numeric)
Returns:
ConfidenceLevel enum value
Confidence level as integer (0-100)
"""
confidence_str = confidence_str.lower().strip()
# If it's already a number, return it as int
if isinstance(confidence_level, (int, float)):
return int(confidence_level)
if confidence_str in ["high", "높음"]:
return ConfidenceLevel.HIGH
elif confidence_str in ["medium", "중간"]:
return ConfidenceLevel.MEDIUM
elif confidence_str in ["low", "낮음"]:
return ConfidenceLevel.LOW
else:
logger.warning(f"Unknown confidence level: {confidence_str}, defaulting to medium")
return ConfidenceLevel.MEDIUM
# If it's a string, convert common values to numbers
if isinstance(confidence_level, str):
confidence_str = confidence_level.lower().strip()
if confidence_str in ["high", "높음"]:
return 85
elif confidence_str in ["medium", "중간"]:
return 50
elif confidence_str in ["low", "낮음"]:
return 20
else:
# Try to parse as number
try:
return int(float(confidence_str))
except ValueError:
logger.warning(f"Unknown confidence level: {confidence_level}, defaulting to medium")
return 50
# Default fallback
return 50
def _determine_observation_dates(
self,
@@ -249,7 +258,7 @@ class HabitAnalyzer:
return False
# Check supporting summaries
if not habit.supporting_summaries or len(habit.supporting_summaries) == 0:
if not habit.specific_examples or len(habit.specific_examples) == 0:
return False
# Check specific examples
@@ -389,9 +398,9 @@ class HabitAnalyzer:
Returns:
Merged behavioral habit
"""
# Combine supporting summaries
combined_summaries = list(set(
existing_habit.supporting_summaries + new_habit.supporting_summaries
# Combine supporting summaries (using specific_examples instead)
combined_examples = list(set(
existing_habit.specific_examples + new_habit.specific_examples
))
# Combine specific examples
@@ -400,8 +409,7 @@ class HabitAnalyzer:
))
# Update confidence level (take higher confidence)
confidence_levels = [existing_habit.confidence_level, new_habit.confidence_level]
new_confidence = max(confidence_levels, key=lambda x: ["low", "medium", "high"].index(x.value))
new_confidence = max(existing_habit.confidence_level, new_habit.confidence_level)
# Update observation dates
first_observed = min(existing_habit.first_observed, new_habit.first_observed)
@@ -420,7 +428,6 @@ class HabitAnalyzer:
frequency_pattern=existing_habit.frequency_pattern, # Keep original frequency
time_context=combined_time_context,
confidence_level=new_confidence,
supporting_summaries=combined_summaries,
specific_examples=combined_examples,
first_observed=first_observed,
last_observed=last_observed,
@@ -437,8 +444,8 @@ class HabitAnalyzer:
Sorted list of habits
"""
def priority_score(habit: BehaviorHabit) -> tuple:
# Confidence level score (high=3, medium=2, low=1)
confidence_score = {"high": 3, "medium": 2, "low": 1}.get(habit.confidence_level.value, 1)
# Confidence level score (0-100 scale)
confidence_score = habit.confidence_level
# Recency score (more recent = higher score)
days_since_last = (datetime.now() - habit.last_observed).days

View File

@@ -16,7 +16,6 @@ from app.core.memory.analytics.implicit_memory.analyzers.habit_analyzer import (
from app.core.memory.llm_tools.llm_client import LLMClientException
from app.schemas.implicit_memory_schema import (
BehaviorHabit,
ConfidenceLevel,
FrequencyPattern,
UserMemorySummary,
)
@@ -116,13 +115,8 @@ class HabitDetector:
def calculate_ranking_score(habit: BehaviorHabit) -> float:
"""Calculate combined ranking score for a habit."""
# Confidence score (0.0-1.0)
confidence_scores = {
ConfidenceLevel.HIGH: 1.0,
ConfidenceLevel.MEDIUM: 0.6,
ConfidenceLevel.LOW: 0.3
}
confidence_score = confidence_scores.get(habit.confidence_level, 0.3)
# Confidence score (0.0-1.0) - convert from 0-100 scale
confidence_score = habit.confidence_level / 100.0
# Recency score (0.0-1.0)
current_time = datetime.now()
@@ -152,7 +146,7 @@ class HabitDetector:
frequency_bonus = frequency_bonuses.get(habit.frequency_pattern, 0.0)
# Evidence quality bonus
evidence_bonus = min(len(habit.supporting_summaries) / 10.0, 0.1) # Max 0.1 bonus
evidence_bonus = min(len(habit.specific_examples) / 10.0, 0.1) # Max 0.1 bonus
# Current habit bonus
current_bonus = 0.1 if habit.is_current else 0.0
@@ -204,7 +198,6 @@ class HabitDetector:
frequency_pattern=habit.frequency_pattern,
time_context=habit.time_context,
confidence_level=habit.confidence_level,
supporting_summaries=habit.supporting_summaries,
specific_examples=habit.specific_examples,
first_observed=habit.first_observed,
last_observed=habit.last_observed,
@@ -218,7 +211,6 @@ class HabitDetector:
frequency_pattern=habit.frequency_pattern,
time_context=habit.time_context,
confidence_level=habit.confidence_level,
supporting_summaries=habit.supporting_summaries,
specific_examples=habit.specific_examples,
first_observed=habit.first_observed,
last_observed=habit.last_observed,