refactor(rate-limit): change rate limiting granularity from tenant to API Key
- Refactor rate limiting mechanism to limit per API Key instead of per tenant (workspace). - Update error code logic and Redis key naming conventions. - Adjust quota usage statistics to display the QPS of the API Key closest to its limit.
This commit is contained in:
@@ -106,12 +106,11 @@ def require_api_key(
|
|||||||
"error_msg": error_msg
|
"error_msg": error_msg
|
||||||
})
|
})
|
||||||
# 根据错误消息判断限流类型
|
# 根据错误消息判断限流类型
|
||||||
if "QPS" in error_msg:
|
if "Daily" in error_msg:
|
||||||
code = BizCode.API_KEY_QPS_LIMIT_EXCEEDED
|
|
||||||
elif "Daily" in error_msg:
|
|
||||||
code = BizCode.API_KEY_DAILY_LIMIT_EXCEEDED
|
code = BizCode.API_KEY_DAILY_LIMIT_EXCEEDED
|
||||||
elif "Tenant" in error_msg:
|
elif "QPS" in error_msg or "ops rate limit" in error_msg:
|
||||||
code = BizCode.API_KEY_QPS_LIMIT_EXCEEDED # 租户套餐速率超限,同属 QPS 类
|
# "QPS limit exceeded" 和 "API ops rate limit exceeded" 同属 QPS 类
|
||||||
|
code = BizCode.API_KEY_QPS_LIMIT_EXCEEDED
|
||||||
else:
|
else:
|
||||||
code = BizCode.API_KEY_QUOTA_EXCEEDED
|
code = BizCode.API_KEY_QUOTA_EXCEEDED
|
||||||
|
|
||||||
|
|||||||
@@ -19,8 +19,8 @@ from app.i18n.exceptions import QuotaExceededError, InternalServerError
|
|||||||
|
|
||||||
logger = get_auth_logger()
|
logger = get_auth_logger()
|
||||||
|
|
||||||
# Redis key 格式常量,与 RateLimiterService.check_tenant_rate_limit 保持一致
|
# Redis key 格式常量,与 RateLimiterService.check_qps 保持一致(per api_key 独立计数)
|
||||||
TENANT_QPS_REDIS_KEY = "rate_limit:tenant_qps:{tenant_id}"
|
API_KEY_QPS_REDIS_KEY = "rate_limit:qps:{api_key_id}"
|
||||||
|
|
||||||
|
|
||||||
def _get_user_from_kwargs(kwargs: dict):
|
def _get_user_from_kwargs(kwargs: dict):
|
||||||
@@ -595,9 +595,22 @@ async def get_quota_usage(db: Session, tenant_id: UUID) -> dict:
|
|||||||
api_ops_current = 0
|
api_ops_current = 0
|
||||||
try:
|
try:
|
||||||
from app.aioRedis import aio_redis as _aio_redis
|
from app.aioRedis import aio_redis as _aio_redis
|
||||||
|
from app.models.api_key_model import ApiKey
|
||||||
|
from app.models.workspace_model import Workspace
|
||||||
_now = time.time()
|
_now = time.time()
|
||||||
_rk = TENANT_QPS_REDIS_KEY.format(tenant_id=tenant_id)
|
# api_ops_rate_limit 限的是每个 api_key 每秒最高限额
|
||||||
api_ops_current = int(await _aio_redis.zcount(_rk, _now - 1, "+inf") or 0)
|
# 展示当前最接近触发限流的 key 的 QPS(取最大值)
|
||||||
|
api_key_ids = db.query(ApiKey.id).join(
|
||||||
|
Workspace, ApiKey.workspace_id == Workspace.id
|
||||||
|
).filter(
|
||||||
|
Workspace.tenant_id == tenant_id,
|
||||||
|
ApiKey.is_active.is_(True)
|
||||||
|
).all()
|
||||||
|
for (key_id,) in api_key_ids:
|
||||||
|
_rk = API_KEY_QPS_REDIS_KEY.format(api_key_id=key_id)
|
||||||
|
count = int(await _aio_redis.zcount(_rk, _now - 1, "+inf") or 0)
|
||||||
|
if count > api_ops_current:
|
||||||
|
api_ops_current = count
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"获取 api_ops_current 失败,返回 0: {type(e).__name__}: {e}")
|
logger.warning(f"获取 api_ops_current 失败,返回 0: {type(e).__name__}: {e}")
|
||||||
|
|
||||||
|
|||||||
@@ -284,8 +284,8 @@ class RateLimiterService:
|
|||||||
|
|
||||||
async def check_tenant_rate_limit(self, window_id: uuid.UUID, limit: int) -> Tuple[bool, dict]:
|
async def check_tenant_rate_limit(self, window_id: uuid.UUID, limit: int) -> Tuple[bool, dict]:
|
||||||
"""
|
"""
|
||||||
按 window_id(workspace_id)做 1 秒滑动窗口限速。
|
按 window_id(api_key_id)做 1 秒滑动窗口限速。
|
||||||
限制值来自套餐配额 api_ops_rate_limit。
|
限制值来自套餐配额 api_ops_rate_limit,每个 API Key 独立受此上限约束。
|
||||||
只有请求被允许时才计入窗口,超限请求不污染计数。
|
只有请求被允许时才计入窗口,超限请求不污染计数。
|
||||||
"""
|
"""
|
||||||
now = time.time()
|
now = time.time()
|
||||||
|
|||||||
Reference in New Issue
Block a user