feat(implicit-emotions): add Redis resilience and connection pooling

- Replace single Redis client with connection pool for better concurrency and auto-reconnection
- Add graceful degradation when Redis is unavailable (None handling in get_users_needing_refresh)
- Add RedisError exception handling with fallback to process all users on mget failures
- Add type hints (Optional[redis.StrictRedis]) to Redis client parameters
- Add health check and socket timeout configuration to connection pool
- Add logging for Redis connection failures and degradation events
- Reorganize imports alphabetically for consistency across both files
- Update get_sync_redis_client to validate connection with ping() before returning
This commit is contained in:
Ke Sun
2026-03-09 14:12:53 +08:00
parent 476632294f
commit c8065b0c60
2 changed files with 102 additions and 35 deletions

View File

@@ -5,13 +5,15 @@ Implicit Emotions Storage Repository
事务由调用方控制,仓储层只使用 flush/refresh
"""
import logging
from datetime import datetime, date, timezone, timedelta
from typing import Optional, Generator
from sqlalchemy.orm import Session
from sqlalchemy import select, not_, exists
from datetime import date, datetime, timedelta, timezone
from typing import Generator, Optional
import redis
from sqlalchemy import exists, not_, select
from sqlalchemy.orm import Session
from app.models.implicit_emotions_storage_model import ImplicitEmotionsStorage
from app.models.end_user_model import EndUser
from app.models.implicit_emotions_storage_model import ImplicitEmotionsStorage
logger = logging.getLogger(__name__)
@@ -111,7 +113,7 @@ class ImplicitEmotionsStorageRepository:
logger.error(f"分批获取用户ID失败: offset={offset}, error={e}")
break
def get_users_needing_refresh(self, redis_client, batch_size: int = 100) -> Generator[str, None, None]:
def get_users_needing_refresh(self, redis_client: Optional[redis.StrictRedis], batch_size: int = 100) -> Generator[str, None, None]:
"""分批次获取需要刷新隐性记忆/情绪数据的存量用户ID。
筛选逻辑:
@@ -120,15 +122,28 @@ class ImplicitEmotionsStorageRepository:
- 若 Redis 中无记录(该用户从未写入过记忆),跳过
- 若 last_done > updated_at说明上次刷新后又有新记忆写入需要刷新
- 若 last_done <= updated_at说明已是最新跳过
如果 redis_client 为 None则降级为返回所有用户禁用时间过滤
Args:
redis_client: 同步 redis.StrictRedis 实例(连接 CELERY_BACKEND DB
redis_client: 同步 redis.StrictRedis 实例(连接 CELERY_BACKEND DB,如果为 None 则禁用时间过滤
batch_size: 每批次加载的数量
Yields:
需要刷新的用户ID字符串
"""
from datetime import timezone
from redis.exceptions import RedisError
# 如果 Redis 不可用,降级为处理所有用户
if redis_client is None:
logger.warning(
"Redis 客户端不可用,时间过滤已禁用,将处理所有存量用户"
)
yield from self.get_all_user_ids(batch_size)
return
offset = 0
while True:
try:
@@ -144,7 +159,18 @@ class ImplicitEmotionsStorageRepository:
# 批量获取当前批次所有用户的 last_done 时间戳(一次网络往返)
keys = [f"write_message:last_done:{end_user_id}" for end_user_id, _ in batch]
raw_values = redis_client.mget(keys)
try:
raw_values = redis_client.mget(keys)
except RedisError as e:
logger.error(
f"Redis mget 操作失败: {e},当前批次降级为处理所有用户",
extra={"offset": offset, "batch_size": len(batch)}
)
# Redis 操作失败,降级为返回当前批次所有用户
yield from (end_user_id for end_user_id, _ in batch)
offset += batch_size
continue
for (end_user_id, updated_at), raw in zip(batch, raw_values):
if raw is None:
@@ -190,7 +216,8 @@ class ImplicitEmotionsStorageRepository:
Yields:
用户ID字符串
"""
from sqlalchemy import cast, String as SAString
from sqlalchemy import String as SAString
from sqlalchemy import cast
CST = timezone(timedelta(hours=8))
now_cst = datetime.now(CST)
today_start = now_cst.replace(hour=0, minute=0, second=0, microsecond=0).astimezone(timezone.utc).replace(tzinfo=None)

View File

@@ -1,5 +1,6 @@
import asyncio
import json
import logging
import os
import re
import shutil
@@ -14,29 +15,62 @@ from uuid import UUID
import redis
import requests
from redis.exceptions import RedisError
# 模块级同步 Redis 客户端单例,供 Celery 任务共享使用(避免每次任务新建连接)
logger = logging.getLogger(__name__)
# 模块级同步 Redis 连接池,供 Celery 任务共享使用
# 连接 CELERY_BACKEND DB与 write_message:last_done 时间戳写入保持一致
def _build_sync_redis_client():
# 使用连接池而非单例客户端,提供更好的并发性能和自动重连
_sync_redis_pool: redis.ConnectionPool = None
def _get_or_create_redis_pool() -> redis.ConnectionPool:
"""获取或创建 Redis 连接池(懒初始化)"""
global _sync_redis_pool
if _sync_redis_pool is None:
try:
_sync_redis_pool = redis.ConnectionPool(
host=settings.REDIS_HOST,
port=settings.REDIS_PORT,
db=settings.REDIS_DB_CELERY_BACKEND,
password=settings.REDIS_PASSWORD,
decode_responses=True,
max_connections=10,
socket_connect_timeout=5,
socket_timeout=5,
retry_on_timeout=True,
health_check_interval=30,
)
logger.info("Redis connection pool created for Celery tasks")
except Exception as e:
logger.error(f"Failed to create Redis connection pool: {e}", exc_info=True)
return None
return _sync_redis_pool
def get_sync_redis_client() -> Optional[redis.StrictRedis]:
"""获取同步 Redis 客户端(使用连接池)
使用连接池提供的客户端,支持自动重连和健康检查。
如果 Redis 不可用,返回 None调用方应优雅降级。
Returns:
redis.StrictRedis: Redis 客户端实例,如果连接失败则返回 None
"""
try:
return redis.StrictRedis(
host=settings.REDIS_HOST,
port=settings.REDIS_PORT,
db=settings.REDIS_DB_CELERY_BACKEND,
password=settings.REDIS_PASSWORD,
decode_responses=True,
)
except Exception:
pool = _get_or_create_redis_pool()
if pool is None:
return None
client = redis.StrictRedis(connection_pool=pool)
# 验证连接可用性
client.ping()
return client
except RedisError as e:
logger.error(f"Redis connection failed: {e}", exc_info=True)
return None
except Exception as e:
logger.error(f"Unexpected error getting Redis client: {e}", exc_info=True)
return None
_sync_redis_client: redis.StrictRedis = None
def get_sync_redis_client() -> redis.StrictRedis:
"""获取模块级同步 Redis 客户端(懒初始化单例)"""
global _sync_redis_client
if _sync_redis_client is None:
_sync_redis_client = _build_sync_redis_client()
return _sync_redis_client
# Import a unified Celery instance
from app.celery_app import celery_app
@@ -1117,8 +1151,9 @@ def write_message_task(self, end_user_id: str, message: list[dict], config_id: s
try:
_r = get_sync_redis_client()
if _r is not None:
from datetime import timezone as _tz, timedelta as _td
_CST = _tz(timedelta(hours=8))
from datetime import timedelta as _td
from datetime import timezone as _tz
_CST = _tz(_td(hours=8))
_now_cst = datetime.now(_CST).replace(tzinfo=None).isoformat()
_r.set(
f"write_message:last_done:{end_user_id}",
@@ -2187,12 +2222,15 @@ def update_implicit_emotions_storage(self) -> Dict[str, Any]:
start_time = time.time()
async def _run() -> Dict[str, Any]:
from sqlalchemy import func, select
from app.core.logging_config import get_logger
from app.repositories.implicit_emotions_storage_repository import ImplicitEmotionsStorageRepository
from app.models.implicit_emotions_storage_model import ImplicitEmotionsStorage
from sqlalchemy import select, func
from app.services.implicit_memory_service import ImplicitMemoryService
from app.repositories.implicit_emotions_storage_repository import (
ImplicitEmotionsStorageRepository,
)
from app.services.emotion_analytics_service import EmotionAnalyticsService
from app.services.implicit_memory_service import ImplicitMemoryService
logger = get_logger(__name__)
logger.info("开始执行隐性记忆和情绪数据更新定时任务")
@@ -2476,9 +2514,11 @@ def init_implicit_emotions_for_users(self, end_user_ids: List[str]) -> Dict[str,
async def _run() -> Dict[str, Any]:
from app.core.logging_config import get_logger
from app.repositories.implicit_emotions_storage_repository import ImplicitEmotionsStorageRepository
from app.services.implicit_memory_service import ImplicitMemoryService
from app.repositories.implicit_emotions_storage_repository import (
ImplicitEmotionsStorageRepository,
)
from app.services.emotion_analytics_service import EmotionAnalyticsService
from app.services.implicit_memory_service import ImplicitMemoryService
logger = get_logger(__name__)
logger.info(f"开始按需初始化隐性记忆/情绪数据,候选用户数: {len(end_user_ids)}")