Merge pull request #729 from SuanmoSuanyangTechnology/fix/memory-write
fix(memory,task): add Redis fair lock for ordered memory writes
This commit is contained in:
@@ -65,7 +65,7 @@ class OpenAIClient(LLMClient):
|
|||||||
type=type_
|
type=type_
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(f"OpenAI 客户端初始化完成: type={type_}")
|
logger.debug(f"OpenAI 客户端初始化完成: type={type_}")
|
||||||
|
|
||||||
async def chat(self, messages: List[Dict[str, str]], **kwargs) -> Any:
|
async def chat(self, messages: List[Dict[str, str]], **kwargs) -> Any:
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import hashlib
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
@@ -38,12 +37,10 @@ from app.db import get_db, get_db_context
|
|||||||
from app.models import Document, File, Knowledge
|
from app.models import Document, File, Knowledge
|
||||||
from app.models.end_user_model import EndUser
|
from app.models.end_user_model import EndUser
|
||||||
from app.schemas import document_schema, file_schema
|
from app.schemas import document_schema, file_schema
|
||||||
from app.schemas.model_schema import ModelInfo
|
|
||||||
from app.services.memory_agent_service import MemoryAgentService, get_end_user_connected_config
|
from app.services.memory_agent_service import MemoryAgentService, get_end_user_connected_config
|
||||||
from app.services.memory_forget_service import MemoryForgetService
|
from app.services.memory_forget_service import MemoryForgetService
|
||||||
from app.services.memory_perceptual_service import MemoryPerceptualService
|
|
||||||
from app.utils.config_utils import resolve_config_id
|
from app.utils.config_utils import resolve_config_id
|
||||||
from app.utils.redis_lock import RedisLock
|
from app.utils.redis_lock import RedisFairLock
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
@@ -1148,8 +1145,28 @@ def write_message_task(
|
|||||||
logger.info(f"[CELERY WRITE] Write completed successfully: {result}")
|
logger.info(f"[CELERY WRITE] Write completed successfully: {result}")
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
redis_client = get_sync_redis_client()
|
||||||
|
lock = None
|
||||||
|
if redis_client is not None:
|
||||||
|
lock = RedisFairLock(
|
||||||
|
key=f"memory_write:{end_user_id}",
|
||||||
|
redis_client=redis_client,
|
||||||
|
expire=120,
|
||||||
|
timeout=300,
|
||||||
|
auto_renewal=True,
|
||||||
|
)
|
||||||
|
if not lock.acquire():
|
||||||
|
logger.warning(f"[CELERY WRITE] 获取锁超时,跳过本次写入: end_user_id={end_user_id}")
|
||||||
|
return {
|
||||||
|
"status": "SKIPPED",
|
||||||
|
"error": "acquire lock timeout",
|
||||||
|
"end_user_id": end_user_id,
|
||||||
|
"config_id": str(config_id),
|
||||||
|
"elapsed_time": time.time() - start_time,
|
||||||
|
"task_id": self.request.id,
|
||||||
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 尝试获取现有事件循环,如果不存在则创建新的
|
|
||||||
loop = set_asyncio_event_loop()
|
loop = set_asyncio_event_loop()
|
||||||
|
|
||||||
result = loop.run_until_complete(_run())
|
result = loop.run_until_complete(_run())
|
||||||
@@ -1158,7 +1175,6 @@ def write_message_task(
|
|||||||
logger.info(f"[CELERY WRITE] Task completed successfully "
|
logger.info(f"[CELERY WRITE] Task completed successfully "
|
||||||
f"- elapsed_time={elapsed_time:.2f}s, task_id={self.request.id}")
|
f"- elapsed_time={elapsed_time:.2f}s, task_id={self.request.id}")
|
||||||
|
|
||||||
# 记录该用户最后一次 write_message 成功的时间,供时间轴筛选使用
|
|
||||||
try:
|
try:
|
||||||
_r = get_sync_redis_client()
|
_r = get_sync_redis_client()
|
||||||
if _r is not None:
|
if _r is not None:
|
||||||
@@ -1199,9 +1215,12 @@ def write_message_task(
|
|||||||
"elapsed_time": elapsed_time,
|
"elapsed_time": elapsed_time,
|
||||||
"task_id": self.request.id
|
"task_id": self.request.id
|
||||||
}
|
}
|
||||||
|
finally:
|
||||||
|
if lock is not None:
|
||||||
# unused task
|
try:
|
||||||
|
lock.release()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"[CELERY WRITE] 释放锁失败: {e}")
|
||||||
# @celery_app.task(name="app.core.memory.agent.health.check_read_service")
|
# @celery_app.task(name="app.core.memory.agent.health.check_read_service")
|
||||||
# def check_read_service_task() -> Dict[str, str]:
|
# def check_read_service_task() -> Dict[str, str]:
|
||||||
# """Call read_service and write latest status to Redis.
|
# """Call read_service and write latest status to Redis.
|
||||||
@@ -2879,3 +2898,6 @@ def init_community_clustering_for_users(self, end_user_ids: List[str], workspace
|
|||||||
"elapsed_time": time.time() - start_time,
|
"elapsed_time": time.time() - start_time,
|
||||||
"task_id": self.request.id,
|
"task_id": self.request.id,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# unused task
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
import redis
|
import redis
|
||||||
import uuid
|
import uuid
|
||||||
import time
|
import time
|
||||||
|
import threading
|
||||||
|
|
||||||
UNLOCK_SCRIPT = """
|
UNLOCK_SCRIPT = """
|
||||||
if redis.call("get", KEYS[1]) == ARGV[1] then
|
if redis.call("get", KEYS[1]) == ARGV[1] then
|
||||||
@@ -10,45 +11,136 @@ else
|
|||||||
end
|
end
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
RENEW_SCRIPT = """
|
||||||
|
if redis.call("get", KEYS[1]) == ARGV[1] then
|
||||||
|
return redis.call("expire", KEYS[1], ARGV[2])
|
||||||
|
else
|
||||||
|
return 0
|
||||||
|
end
|
||||||
|
"""
|
||||||
|
|
||||||
class RedisLock:
|
CLEANUP_DEAD_HEAD_SCRIPT = """
|
||||||
|
local queue_key = KEYS[1]
|
||||||
|
local lock_key = KEYS[2]
|
||||||
|
|
||||||
|
local first = redis.call("lindex", queue_key, 0)
|
||||||
|
if not first then
|
||||||
|
return 0
|
||||||
|
end
|
||||||
|
|
||||||
|
if redis.call("exists", lock_key) == 1 then
|
||||||
|
return 0
|
||||||
|
end
|
||||||
|
|
||||||
|
redis.call("lpop", queue_key)
|
||||||
|
return 1
|
||||||
|
"""
|
||||||
|
|
||||||
|
SAFE_RELEASE_QUEUE_SCRIPT = """
|
||||||
|
local queue_key = KEYS[1]
|
||||||
|
local value = ARGV[1]
|
||||||
|
|
||||||
|
local first = redis.call("lindex", queue_key, 0)
|
||||||
|
if first == value then
|
||||||
|
redis.call("lpop", queue_key)
|
||||||
|
return 1
|
||||||
|
end
|
||||||
|
return 0
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_str(val):
|
||||||
|
"""统一将 Redis 返回值转为 str,兼容 decode_responses=True/False"""
|
||||||
|
if val is None:
|
||||||
|
return None
|
||||||
|
if isinstance(val, bytes):
|
||||||
|
return val.decode("utf-8")
|
||||||
|
return str(val)
|
||||||
|
|
||||||
|
|
||||||
|
class RedisFairLock:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
key: str,
|
key: str,
|
||||||
redis_client: redis.StrictRedis,
|
redis_client: redis.StrictRedis,
|
||||||
expire: int = 60,
|
expire: int = 30,
|
||||||
retry_interval: float = 0.1,
|
retry_interval: float = 0.05,
|
||||||
timeout: float = 30
|
timeout: float = 600,
|
||||||
|
auto_renewal: bool = True
|
||||||
):
|
):
|
||||||
self.key = key
|
self.key = key
|
||||||
self.expire = expire
|
self.queue_key = f"{key}:queue"
|
||||||
self.value = str(uuid.uuid4())
|
self.value = str(uuid.uuid4())
|
||||||
self._locked = False
|
self.expire = expire
|
||||||
self.retry_interval = retry_interval
|
self.retry_interval = retry_interval
|
||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
self.redis_client = redis_client
|
self.redis = redis_client
|
||||||
|
self._locked = False
|
||||||
|
self.auto_renewal = auto_renewal
|
||||||
|
self._renew_thread = None
|
||||||
|
self._stop_renew = threading.Event()
|
||||||
|
|
||||||
def acquire(self) -> bool:
|
def acquire(self):
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
|
||||||
|
self.redis.rpush(self.queue_key, self.value)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
ok = self.redis_client.set(self.key, self.value, ex=self.expire, nx=True)
|
first = _ensure_str(self.redis.lindex(self.queue_key, 0))
|
||||||
if ok:
|
|
||||||
self._locked = True
|
if first == self.value:
|
||||||
return True
|
ok = self.redis.set(self.key, self.value, nx=True, ex=self.expire)
|
||||||
if time.time() - start >= self.timeout:
|
if ok:
|
||||||
|
self._locked = True
|
||||||
|
|
||||||
|
if self.auto_renewal:
|
||||||
|
self._start_renewal()
|
||||||
|
return True
|
||||||
|
|
||||||
|
if first:
|
||||||
|
self.redis.eval(CLEANUP_DEAD_HEAD_SCRIPT, 2, self.queue_key, self.key)
|
||||||
|
|
||||||
|
if time.time() - start > self.timeout:
|
||||||
|
self.redis.lrem(self.queue_key, 0, self.value)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
time.sleep(self.retry_interval)
|
time.sleep(self.retry_interval)
|
||||||
|
|
||||||
|
def _renewal_loop(self):
|
||||||
|
while not self._stop_renew.is_set():
|
||||||
|
time.sleep(self.expire / 3)
|
||||||
|
if self._stop_renew.is_set():
|
||||||
|
break
|
||||||
|
|
||||||
|
self.redis.eval(
|
||||||
|
RENEW_SCRIPT,
|
||||||
|
1,
|
||||||
|
self.key,
|
||||||
|
self.value,
|
||||||
|
str(self.expire)
|
||||||
|
)
|
||||||
|
|
||||||
|
def _start_renewal(self):
|
||||||
|
self._stop_renew = threading.Event()
|
||||||
|
self._renew_thread = threading.Thread(target=self._renewal_loop, daemon=True)
|
||||||
|
self._renew_thread.start()
|
||||||
|
|
||||||
|
def _stop_renewal(self):
|
||||||
|
self._stop_renew.set()
|
||||||
|
if self._renew_thread:
|
||||||
|
self._renew_thread.join(timeout=1)
|
||||||
|
|
||||||
def release(self):
|
def release(self):
|
||||||
if not self._locked:
|
if not self._locked:
|
||||||
return
|
return
|
||||||
self.redis_client.eval(
|
|
||||||
UNLOCK_SCRIPT,
|
if self.auto_renewal:
|
||||||
1,
|
self._stop_renewal()
|
||||||
self.key,
|
|
||||||
self.value
|
self.redis.eval(UNLOCK_SCRIPT, 1, self.key, self.value)
|
||||||
)
|
|
||||||
|
self.redis.eval(SAFE_RELEASE_QUEUE_SCRIPT, 1, self.queue_key, self.value)
|
||||||
|
|
||||||
self._locked = False
|
self._locked = False
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
@@ -59,3 +151,4 @@ class RedisLock:
|
|||||||
|
|
||||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
self.release()
|
self.release()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user