refactor(rate-limit): change rate limiting granularity from tenant to API Key

- Refactor rate limiting mechanism to limit per API Key instead of per tenant (workspace).
- Update error code logic and Redis key naming conventions.
- Adjust quota usage statistics to display the QPS of the API Key closest to its limit.
This commit is contained in:
wwq
2026-04-20 16:13:30 +08:00
parent 48f3d9b105
commit c448cf0660
3 changed files with 23 additions and 11 deletions

View File

@@ -106,12 +106,11 @@ def require_api_key(
"error_msg": error_msg
})
# 根据错误消息判断限流类型
if "QPS" in error_msg:
code = BizCode.API_KEY_QPS_LIMIT_EXCEEDED
elif "Daily" in error_msg:
if "Daily" in error_msg:
code = BizCode.API_KEY_DAILY_LIMIT_EXCEEDED
elif "Tenant" in error_msg:
code = BizCode.API_KEY_QPS_LIMIT_EXCEEDED # 租户套餐速率超限,同属 QPS 类
elif "QPS" in error_msg or "ops rate limit" in error_msg:
# "QPS limit exceeded" 和 "API ops rate limit exceeded" 同属 QPS 类
code = BizCode.API_KEY_QPS_LIMIT_EXCEEDED
else:
code = BizCode.API_KEY_QUOTA_EXCEEDED

View File

@@ -19,8 +19,8 @@ from app.i18n.exceptions import QuotaExceededError, InternalServerError
logger = get_auth_logger()
# Redis key 格式常量,与 RateLimiterService.check_tenant_rate_limit 保持一致
TENANT_QPS_REDIS_KEY = "rate_limit:tenant_qps:{tenant_id}"
# Redis key 格式常量,与 RateLimiterService.check_qps 保持一致per api_key 独立计数)
API_KEY_QPS_REDIS_KEY = "rate_limit:qps:{api_key_id}"
def _get_user_from_kwargs(kwargs: dict):
@@ -595,9 +595,22 @@ async def get_quota_usage(db: Session, tenant_id: UUID) -> dict:
api_ops_current = 0
try:
from app.aioRedis import aio_redis as _aio_redis
from app.models.api_key_model import ApiKey
from app.models.workspace_model import Workspace
_now = time.time()
_rk = TENANT_QPS_REDIS_KEY.format(tenant_id=tenant_id)
api_ops_current = int(await _aio_redis.zcount(_rk, _now - 1, "+inf") or 0)
# api_ops_rate_limit 限的是每个 api_key 每秒最高限额
# 展示当前最接近触发限流的 key 的 QPS取最大值
api_key_ids = db.query(ApiKey.id).join(
Workspace, ApiKey.workspace_id == Workspace.id
).filter(
Workspace.tenant_id == tenant_id,
ApiKey.is_active.is_(True)
).all()
for (key_id,) in api_key_ids:
_rk = API_KEY_QPS_REDIS_KEY.format(api_key_id=key_id)
count = int(await _aio_redis.zcount(_rk, _now - 1, "+inf") or 0)
if count > api_ops_current:
api_ops_current = count
except Exception as e:
logger.warning(f"获取 api_ops_current 失败,返回 0: {type(e).__name__}: {e}")

View File

@@ -284,8 +284,8 @@ class RateLimiterService:
async def check_tenant_rate_limit(self, window_id: uuid.UUID, limit: int) -> Tuple[bool, dict]:
"""
按 window_idworkspace_id做 1 秒滑动窗口限速。
限制值来自套餐配额 api_ops_rate_limit。
按 window_idapi_key_id做 1 秒滑动窗口限速。
限制值来自套餐配额 api_ops_rate_limit,每个 API Key 独立受此上限约束
只有请求被允许时才计入窗口,超限请求不污染计数。
"""
now = time.time()