[add] The "update-implicit-emotions-storage" task uses the timeline to filter the updated data users.

This commit is contained in:
lanceyq
2026-03-07 16:23:59 +08:00
parent 2612abc9d0
commit c14f067afb
2 changed files with 109 additions and 49 deletions

View File

@@ -111,6 +111,57 @@ class ImplicitEmotionsStorageRepository:
logger.error(f"分批获取用户ID失败: offset={offset}, error={e}")
break
def get_users_needing_refresh(self, redis_client, batch_size: int = 100) -> Generator[str, None, None]:
"""分批次获取需要刷新隐性记忆/情绪数据的存量用户ID。
筛选逻辑:
- 查询 implicit_emotions_storage 中所有用户的 end_user_id 和 updated_at
- 从 Redis 读取 write_message:last_done:{end_user_id} 的时间戳
- 若 Redis 中无记录(该用户从未写入过记忆),跳过
- 若 last_done > updated_at说明上次刷新后又有新记忆写入需要刷新
- 若 last_done <= updated_at说明已是最新跳过
Args:
redis_client: 同步 redis.StrictRedis 实例(连接 CELERY_BACKEND DB
batch_size: 每批次加载的数量
Yields:
需要刷新的用户ID字符串
"""
from datetime import timezone
offset = 0
while True:
try:
stmt = (
select(ImplicitEmotionsStorage.end_user_id, ImplicitEmotionsStorage.updated_at)
.order_by(ImplicitEmotionsStorage.end_user_id)
.limit(batch_size)
.offset(offset)
)
batch = self.db.execute(stmt).all()
if not batch:
break
for end_user_id, updated_at in batch:
raw = redis_client.get(f"write_message:last_done:{end_user_id}")
if raw is None:
# 该用户从未有过 write_message 成功记录,跳过
continue
try:
last_done = datetime.fromisoformat(raw)
# 统一去掉时区信息做 naive 比较
if last_done.tzinfo is not None:
last_done = last_done.astimezone(timezone.utc).replace(tzinfo=None)
if updated_at is None or last_done > updated_at:
yield end_user_id
except Exception as e:
logger.warning(f"解析 last_done 时间戳失败: end_user_id={end_user_id}, raw={raw}, error={e}")
offset += batch_size
except Exception as e:
logger.error(f"get_users_needing_refresh 分批查询失败: offset={offset}, error={e}")
break
def get_new_user_ids_today(self, batch_size: int = 100) -> Generator[str, None, None]:
"""分批次获取当天新增的、尚未初始化隐性记忆和情绪建议数据的用户ID