From c448cf06602c0e72acef2cbedd78f544ffb9e0bd Mon Sep 17 00:00:00 2001 From: wwq Date: Mon, 20 Apr 2026 16:13:30 +0800 Subject: [PATCH] refactor(rate-limit): change rate limiting granularity from tenant to API Key - Refactor rate limiting mechanism to limit per API Key instead of per tenant (workspace). - Update error code logic and Redis key naming conventions. - Adjust quota usage statistics to display the QPS of the API Key closest to its limit. --- api/app/core/api_key_auth.py | 9 ++++----- api/app/core/quota_manager.py | 21 +++++++++++++++++---- api/app/services/api_key_service.py | 4 ++-- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/api/app/core/api_key_auth.py b/api/app/core/api_key_auth.py index 297e5082..b7cacd21 100644 --- a/api/app/core/api_key_auth.py +++ b/api/app/core/api_key_auth.py @@ -106,12 +106,11 @@ def require_api_key( "error_msg": error_msg }) # 根据错误消息判断限流类型 - if "QPS" in error_msg: - code = BizCode.API_KEY_QPS_LIMIT_EXCEEDED - elif "Daily" in error_msg: + if "Daily" in error_msg: code = BizCode.API_KEY_DAILY_LIMIT_EXCEEDED - elif "Tenant" in error_msg: - code = BizCode.API_KEY_QPS_LIMIT_EXCEEDED # 租户套餐速率超限,同属 QPS 类 + elif "QPS" in error_msg or "ops rate limit" in error_msg: + # "QPS limit exceeded" 和 "API ops rate limit exceeded" 同属 QPS 类 + code = BizCode.API_KEY_QPS_LIMIT_EXCEEDED else: code = BizCode.API_KEY_QUOTA_EXCEEDED diff --git a/api/app/core/quota_manager.py b/api/app/core/quota_manager.py index 43d8fa42..28130dee 100644 --- a/api/app/core/quota_manager.py +++ b/api/app/core/quota_manager.py @@ -19,8 +19,8 @@ from app.i18n.exceptions import QuotaExceededError, InternalServerError logger = get_auth_logger() -# Redis key 格式常量,与 RateLimiterService.check_tenant_rate_limit 保持一致 -TENANT_QPS_REDIS_KEY = "rate_limit:tenant_qps:{tenant_id}" +# Redis key 格式常量,与 RateLimiterService.check_qps 保持一致(per api_key 独立计数) +API_KEY_QPS_REDIS_KEY = "rate_limit:qps:{api_key_id}" def _get_user_from_kwargs(kwargs: dict): @@ -595,9 +595,22 @@ async def get_quota_usage(db: Session, tenant_id: UUID) -> dict: api_ops_current = 0 try: from app.aioRedis import aio_redis as _aio_redis + from app.models.api_key_model import ApiKey + from app.models.workspace_model import Workspace _now = time.time() - _rk = TENANT_QPS_REDIS_KEY.format(tenant_id=tenant_id) - api_ops_current = int(await _aio_redis.zcount(_rk, _now - 1, "+inf") or 0) + # api_ops_rate_limit 限的是每个 api_key 每秒最高限额 + # 展示当前最接近触发限流的 key 的 QPS(取最大值) + api_key_ids = db.query(ApiKey.id).join( + Workspace, ApiKey.workspace_id == Workspace.id + ).filter( + Workspace.tenant_id == tenant_id, + ApiKey.is_active.is_(True) + ).all() + for (key_id,) in api_key_ids: + _rk = API_KEY_QPS_REDIS_KEY.format(api_key_id=key_id) + count = int(await _aio_redis.zcount(_rk, _now - 1, "+inf") or 0) + if count > api_ops_current: + api_ops_current = count except Exception as e: logger.warning(f"获取 api_ops_current 失败,返回 0: {type(e).__name__}: {e}") diff --git a/api/app/services/api_key_service.py b/api/app/services/api_key_service.py index 7b6b1172..4fe9d8b5 100644 --- a/api/app/services/api_key_service.py +++ b/api/app/services/api_key_service.py @@ -284,8 +284,8 @@ class RateLimiterService: async def check_tenant_rate_limit(self, window_id: uuid.UUID, limit: int) -> Tuple[bool, dict]: """ - 按 window_id(workspace_id)做 1 秒滑动窗口限速。 - 限制值来自套餐配额 api_ops_rate_limit。 + 按 window_id(api_key_id)做 1 秒滑动窗口限速。 + 限制值来自套餐配额 api_ops_rate_limit,每个 API Key 独立受此上限约束。 只有请求被允许时才计入窗口,超限请求不污染计数。 """ now = time.time()