[add] The "update-implicit-emotions-storage" task uses the timeline to filter the updated data users.
This commit is contained in:
@@ -111,6 +111,57 @@ class ImplicitEmotionsStorageRepository:
|
||||
logger.error(f"分批获取用户ID失败: offset={offset}, error={e}")
|
||||
break
|
||||
|
||||
def get_users_needing_refresh(self, redis_client, batch_size: int = 100) -> Generator[str, None, None]:
|
||||
"""分批次获取需要刷新隐性记忆/情绪数据的存量用户ID。
|
||||
|
||||
筛选逻辑:
|
||||
- 查询 implicit_emotions_storage 中所有用户的 end_user_id 和 updated_at
|
||||
- 从 Redis 读取 write_message:last_done:{end_user_id} 的时间戳
|
||||
- 若 Redis 中无记录(该用户从未写入过记忆),跳过
|
||||
- 若 last_done > updated_at,说明上次刷新后又有新记忆写入,需要刷新
|
||||
- 若 last_done <= updated_at,说明已是最新,跳过
|
||||
|
||||
Args:
|
||||
redis_client: 同步 redis.StrictRedis 实例(连接 CELERY_BACKEND DB)
|
||||
batch_size: 每批次加载的数量
|
||||
|
||||
Yields:
|
||||
需要刷新的用户ID字符串
|
||||
"""
|
||||
from datetime import timezone
|
||||
offset = 0
|
||||
while True:
|
||||
try:
|
||||
stmt = (
|
||||
select(ImplicitEmotionsStorage.end_user_id, ImplicitEmotionsStorage.updated_at)
|
||||
.order_by(ImplicitEmotionsStorage.end_user_id)
|
||||
.limit(batch_size)
|
||||
.offset(offset)
|
||||
)
|
||||
batch = self.db.execute(stmt).all()
|
||||
if not batch:
|
||||
break
|
||||
|
||||
for end_user_id, updated_at in batch:
|
||||
raw = redis_client.get(f"write_message:last_done:{end_user_id}")
|
||||
if raw is None:
|
||||
# 该用户从未有过 write_message 成功记录,跳过
|
||||
continue
|
||||
try:
|
||||
last_done = datetime.fromisoformat(raw)
|
||||
# 统一去掉时区信息做 naive 比较
|
||||
if last_done.tzinfo is not None:
|
||||
last_done = last_done.astimezone(timezone.utc).replace(tzinfo=None)
|
||||
if updated_at is None or last_done > updated_at:
|
||||
yield end_user_id
|
||||
except Exception as e:
|
||||
logger.warning(f"解析 last_done 时间戳失败: end_user_id={end_user_id}, raw={raw}, error={e}")
|
||||
|
||||
offset += batch_size
|
||||
except Exception as e:
|
||||
logger.error(f"get_users_needing_refresh 分批查询失败: offset={offset}, error={e}")
|
||||
break
|
||||
|
||||
def get_new_user_ids_today(self, batch_size: int = 100) -> Generator[str, None, None]:
|
||||
"""分批次获取当天新增的、尚未初始化隐性记忆和情绪建议数据的用户ID
|
||||
|
||||
|
||||
Reference in New Issue
Block a user