Merge branch 'release/v0.3.1' into develop
This commit is contained in:
@@ -51,6 +51,19 @@ class ApiKeyService:
|
||||
if existing:
|
||||
raise BusinessException(f"API Key 名称 {data.name} 已存在", BizCode.API_KEY_DUPLICATE_NAME)
|
||||
|
||||
# 若 rate_limit 超过租户套餐的 api_ops_rate_limit,直接报错
|
||||
from app.models.workspace_model import Workspace
|
||||
from app.core.quota_manager import get_api_ops_rate_limit
|
||||
|
||||
workspace = db.query(Workspace).filter(Workspace.id == workspace_id).first()
|
||||
if workspace:
|
||||
tenant_api_ops_limit = get_api_ops_rate_limit(db, workspace.tenant_id)
|
||||
if tenant_api_ops_limit and data.rate_limit > tenant_api_ops_limit:
|
||||
raise BusinessException(
|
||||
f"API Key QPS 不能超过套餐上限 {tenant_api_ops_limit}",
|
||||
BizCode.BAD_REQUEST
|
||||
)
|
||||
|
||||
# 生成 API Key
|
||||
api_key = generate_api_key(data.type)
|
||||
|
||||
@@ -152,6 +165,20 @@ class ApiKeyService:
|
||||
if existing:
|
||||
raise BusinessException(f"API Key 名称 {data.name} 已存在", BizCode.API_KEY_DUPLICATE_NAME)
|
||||
|
||||
# 若 rate_limit 超过租户套餐的 api_ops_rate_limit,直接报错
|
||||
if data.rate_limit is not None:
|
||||
from app.models.workspace_model import Workspace
|
||||
from app.core.quota_manager import get_api_ops_rate_limit
|
||||
|
||||
workspace = db.query(Workspace).filter(Workspace.id == workspace_id).first()
|
||||
if workspace:
|
||||
tenant_api_ops_limit = get_api_ops_rate_limit(db, workspace.tenant_id)
|
||||
if tenant_api_ops_limit and data.rate_limit > tenant_api_ops_limit:
|
||||
raise BusinessException(
|
||||
f"API Key QPS 不能超过套餐上限 {tenant_api_ops_limit}",
|
||||
BizCode.BAD_REQUEST
|
||||
)
|
||||
|
||||
update_data = data.model_dump(exclude_unset=True)
|
||||
ApiKeyRepository.update(db, api_key_id, update_data)
|
||||
db.commit()
|
||||
@@ -248,42 +275,14 @@ class RateLimiterService:
|
||||
def __init__(self):
|
||||
self.redis = aio_redis
|
||||
|
||||
async def check_tenant_rate_limit(self, tenant_id: uuid.UUID, limit: int) -> Tuple[bool, dict]:
|
||||
"""
|
||||
按 tenant_id 做 1 秒滑动窗口限速,限制值来自套餐配额 api_ops_rate_limit
|
||||
"""
|
||||
now = time.time()
|
||||
window_start = now - 1 # 1 秒窗口
|
||||
key = f"rate_limit:tenant_qps:{tenant_id}"
|
||||
|
||||
async with self.redis.pipeline() as pipe:
|
||||
# 清理 1 秒前的旧记录
|
||||
pipe.zremrangebyscore(key, 0, window_start)
|
||||
# 加入当前请求(score=时间戳,member=时间戳+随机数保证唯一)
|
||||
pipe.zadd(key, {f"{now}:{uuid.uuid4().hex}": now})
|
||||
# 统计窗口内请求数
|
||||
pipe.zcard(key)
|
||||
# 设置 key 过期(2 秒后自动清理)
|
||||
pipe.expire(key, 2)
|
||||
results = await pipe.execute()
|
||||
|
||||
current = results[2]
|
||||
remaining = max(0, limit - current)
|
||||
reset_time = int(now) + 1
|
||||
|
||||
return current <= limit, {
|
||||
"limit": limit,
|
||||
"remaining": remaining,
|
||||
"reset": reset_time,
|
||||
}
|
||||
|
||||
async def check_qps(self, api_key_id: uuid.UUID, limit: int) -> Tuple[bool, dict]:
|
||||
"""
|
||||
检查QPS限制
|
||||
"""检查QPS限制
|
||||
|
||||
Returns:
|
||||
(is_allowed, rate_limit_info)
|
||||
"""
|
||||
key = f"rate_limit:qps:{api_key_id}"
|
||||
|
||||
async with self.redis.pipeline() as pipe:
|
||||
pipe.incr(key)
|
||||
pipe.expire(key, 1, nx=True) # 1 秒过期
|
||||
@@ -295,8 +294,9 @@ class RateLimiterService:
|
||||
|
||||
return current <= limit, {
|
||||
"limit": limit,
|
||||
"current": current,
|
||||
"remaining": remaining,
|
||||
"reset": reset_time
|
||||
"reset": reset_time,
|
||||
}
|
||||
|
||||
async def check_daily_requests(
|
||||
@@ -304,7 +304,9 @@ class RateLimiterService:
|
||||
api_key_id: uuid.UUID,
|
||||
limit: int
|
||||
) -> Tuple[bool, dict]:
|
||||
"""检查日调用量限制"""
|
||||
"""检查日调用量限制。
|
||||
使用原子 INCR,先写后判断,极低概率下允许轻微超限(并发场景下可接受)。
|
||||
"""
|
||||
today = datetime.now().strftime("%Y%m%d")
|
||||
key = f"rate_limit:daily:{api_key_id}:{today}"
|
||||
|
||||
@@ -313,6 +315,7 @@ class RateLimiterService:
|
||||
hour=0, minute=0, second=0, microsecond=0
|
||||
)
|
||||
expire_seconds = int((tomorrow_0 - now).total_seconds())
|
||||
reset_time = int(tomorrow_0.timestamp())
|
||||
|
||||
async with self.redis.pipeline() as pipe:
|
||||
pipe.incr(key)
|
||||
@@ -320,36 +323,74 @@ class RateLimiterService:
|
||||
results = await pipe.execute()
|
||||
|
||||
current = results[0]
|
||||
remaining = max(0, limit - current)
|
||||
reset_time = int(tomorrow_0.timestamp())
|
||||
|
||||
return current <= limit, {
|
||||
if current > limit:
|
||||
return False, {
|
||||
"limit": limit,
|
||||
"remaining": 0,
|
||||
"reset": reset_time,
|
||||
}
|
||||
|
||||
return True, {
|
||||
"limit": limit,
|
||||
"remaining": remaining,
|
||||
"reset": reset_time
|
||||
"remaining": max(0, limit - current),
|
||||
"reset": reset_time,
|
||||
}
|
||||
|
||||
async def check_all_limits(
|
||||
self,
|
||||
api_key: ApiKey
|
||||
api_key: ApiKey,
|
||||
db: Optional[Session] = None,
|
||||
) -> Tuple[bool, str, dict]:
|
||||
"""
|
||||
检查所有限制
|
||||
Returns:
|
||||
(is_allowed, error_message, rate_limit_headers)
|
||||
检查所有限制,按以下顺序:
|
||||
1. API Key QPS:取 api_key.rate_limit 与套餐 api_ops_rate_limit 的最小值作为限额
|
||||
2. API Key 日调用量
|
||||
"""
|
||||
# Check QPS
|
||||
qps_ok, qps_info = await self.check_qps(
|
||||
api_key.id,
|
||||
api_key.rate_limit
|
||||
)
|
||||
# 1. 取套餐限额与 api_key 自身限额的最小值
|
||||
effective_limit = api_key.rate_limit
|
||||
if db is not None:
|
||||
try:
|
||||
from app.models.workspace_model import Workspace
|
||||
from app.core.quota_manager import get_api_ops_rate_limit
|
||||
|
||||
cache_key = f"tenant_api_ops_limit:{api_key.workspace_id}"
|
||||
cached = await self.redis.get(cache_key)
|
||||
if cached is not None:
|
||||
try:
|
||||
tenant_limit = int(cached) if cached != "0" else None
|
||||
except (ValueError, TypeError):
|
||||
cached = None
|
||||
tenant_limit = None
|
||||
|
||||
if cached is None:
|
||||
workspace = db.query(Workspace).filter(Workspace.id == api_key.workspace_id).first()
|
||||
if workspace:
|
||||
tenant_limit = get_api_ops_rate_limit(db, workspace.tenant_id)
|
||||
await self.redis.set(cache_key, str(tenant_limit) if tenant_limit else "0", ex=60)
|
||||
else:
|
||||
tenant_limit = None
|
||||
|
||||
if tenant_limit:
|
||||
effective_limit = min(api_key.rate_limit, tenant_limit)
|
||||
except Exception as e:
|
||||
logger.warning(f"获取套餐限额失败,使用 api_key 自身限额: {e}")
|
||||
|
||||
# 用最终有效限额做 QPS 检查
|
||||
qps_ok, qps_info = await self.check_qps(api_key.id, effective_limit)
|
||||
if not qps_ok:
|
||||
return False, "QPS limit exceeded", {
|
||||
# 判断是套餐限额触发还是 api_key 自身限额触发
|
||||
if tenant_limit and effective_limit == tenant_limit and api_key.rate_limit > tenant_limit:
|
||||
error_msg = "Tenant limit exceeded"
|
||||
else:
|
||||
error_msg = "QPS limit exceeded"
|
||||
return False, error_msg, {
|
||||
"X-RateLimit-Limit-QPS": str(qps_info["limit"]),
|
||||
"X-RateLimit-Remaining-QPS": str(qps_info["remaining"]),
|
||||
"X-RateLimit-Reset": str(qps_info["reset"])
|
||||
}
|
||||
|
||||
# 2. 检查日调用量
|
||||
daily_ok, daily_info = await self.check_daily_requests(
|
||||
api_key.id,
|
||||
api_key.daily_request_limit
|
||||
@@ -361,14 +402,13 @@ class RateLimiterService:
|
||||
"X-RateLimit-Reset": str(daily_info["reset"])
|
||||
}
|
||||
|
||||
headers = {
|
||||
return True, "", {
|
||||
"X-RateLimit-Limit-QPS": str(qps_info["limit"]),
|
||||
"X-RateLimit-Remaining-QPS": str(qps_info["remaining"]),
|
||||
"X-RateLimit-Limit-Day": str(daily_info["limit"]),
|
||||
"X-RateLimit-Remaining-Day": str(daily_info["remaining"]),
|
||||
"X-RateLimit-Reset": str(daily_info["reset"])
|
||||
"X-RateLimit-Reset": str(daily_info["reset"]),
|
||||
}
|
||||
return True, "", headers
|
||||
|
||||
|
||||
class ApiKeyAuthService:
|
||||
|
||||
@@ -434,19 +434,37 @@ class AppDslService:
|
||||
def _resolve_model(self, ref: Optional[dict], tenant_id: uuid.UUID, warnings: list) -> Optional[uuid.UUID]:
|
||||
if not ref:
|
||||
return None
|
||||
q = self.db.query(ModelConfig).filter(
|
||||
ModelConfig.tenant_id == tenant_id,
|
||||
ModelConfig.name == ref.get("name"),
|
||||
ModelConfig.is_active.is_(True)
|
||||
)
|
||||
if ref.get("provider"):
|
||||
q = q.filter(ModelConfig.provider == ref["provider"])
|
||||
if ref.get("type"):
|
||||
q = q.filter(ModelConfig.type == ref["type"])
|
||||
m = q.first()
|
||||
if not m:
|
||||
warnings.append(f"模型 '{ref.get('name')}' 未匹配,已置空,请导入后手动配置")
|
||||
return m.id if m else None
|
||||
model_id = ref.get("id")
|
||||
if model_id:
|
||||
try:
|
||||
model_uuid = uuid.UUID(str(model_id))
|
||||
m = self.db.query(ModelConfig).filter(
|
||||
ModelConfig.id == model_uuid,
|
||||
ModelConfig.tenant_id == tenant_id,
|
||||
ModelConfig.is_active.is_(True)
|
||||
).first()
|
||||
if m:
|
||||
return str(m.id)
|
||||
except (ValueError, AttributeError):
|
||||
pass
|
||||
model_name = ref.get("name")
|
||||
if model_name:
|
||||
q = self.db.query(ModelConfig).filter(
|
||||
ModelConfig.tenant_id == tenant_id,
|
||||
ModelConfig.name == model_name,
|
||||
ModelConfig.is_active.is_(True)
|
||||
)
|
||||
if ref.get("provider"):
|
||||
q = q.filter(ModelConfig.provider == ref["provider"])
|
||||
if ref.get("type"):
|
||||
q = q.filter(ModelConfig.type == ref["type"])
|
||||
m = q.first()
|
||||
if m:
|
||||
return str(m.id)
|
||||
warnings.append(f"模型 '{model_name}' 未匹配,已置空,请导入后手动配置")
|
||||
else:
|
||||
warnings.append(f"模型 ID '{model_id}' 未匹配,已置空,请导入后手动配置")
|
||||
return None
|
||||
|
||||
def _resolve_kb(self, ref: Optional[dict], workspace_id: uuid.UUID, warnings: list) -> Optional[str]:
|
||||
if not ref:
|
||||
@@ -587,7 +605,7 @@ class AppDslService:
|
||||
if not kb_id:
|
||||
continue
|
||||
kb_ref = {}
|
||||
if isinstance(kb_id, str) and len(kb_id) >= 36:
|
||||
if isinstance(kb_id, str):
|
||||
try:
|
||||
uuid.UUID(kb_id)
|
||||
kb_ref["id"] = kb_id
|
||||
@@ -601,6 +619,33 @@ class AppDslService:
|
||||
else:
|
||||
warnings.append(f"[{node_label}] 知识库 '{kb_id}' 未匹配,已移除,请导入后手动配置")
|
||||
config["knowledge_bases"] = resolved_kbs
|
||||
elif node_type in (NodeType.LLM.value, NodeType.QUESTION_CLASSIFIER.value, NodeType.PARAMETER_EXTRACTOR.value):
|
||||
model_ref = config.get("model_id")
|
||||
if model_ref:
|
||||
ref_dict = None
|
||||
if isinstance(model_ref, dict):
|
||||
ref_id = model_ref.get("id")
|
||||
ref_name = model_ref.get("name")
|
||||
if ref_id:
|
||||
ref_dict = {"id": ref_id}
|
||||
elif ref_name is not None:
|
||||
ref_dict = {"name": ref_name, "provider": model_ref.get("provider"), "type": model_ref.get("type")}
|
||||
elif isinstance(model_ref, str):
|
||||
try:
|
||||
uuid.UUID(model_ref)
|
||||
ref_dict = {"id": model_ref}
|
||||
except ValueError:
|
||||
ref_dict = {"name": model_ref}
|
||||
if ref_dict:
|
||||
resolved_model_id = self._resolve_model(ref_dict, tenant_id, warnings)
|
||||
if resolved_model_id:
|
||||
config["model_id"] = resolved_model_id
|
||||
else:
|
||||
warnings.append(f"[{node_label}] 模型未匹配,已置空,请导入后手动配置")
|
||||
config["model_id"] = None
|
||||
else:
|
||||
warnings.append(f"[{node_label}] 模型未匹配,已置空,请导入后手动配置")
|
||||
config["model_id"] = None
|
||||
resolved_nodes.append({**node, "config": config})
|
||||
return resolved_nodes
|
||||
|
||||
|
||||
@@ -7,7 +7,6 @@ from app.models.models_model import ModelConfig
|
||||
from app.schemas.knowledge_schema import KnowledgeCreate, KnowledgeUpdate
|
||||
from app.repositories import knowledge_repository
|
||||
from app.core.logging_config import get_business_logger
|
||||
from app.repositories.model_repository import ModelConfigRepository
|
||||
from app.models.models_model import ModelType
|
||||
|
||||
business_logger = get_business_logger()
|
||||
@@ -78,41 +77,31 @@ def create_knowledge(
|
||||
tenant_id = workspace.tenant_id
|
||||
|
||||
if not knowledge.embedding_id:
|
||||
embedding_models = ModelConfigRepository.get_by_type(
|
||||
db=db, model_types=[ModelType.EMBEDDING], tenant_id=tenant_id, is_active=True
|
||||
)
|
||||
if embedding_models:
|
||||
knowledge.embedding_id = embedding_models[0].id
|
||||
business_logger.debug(f"Auto-bind embedding model: {embedding_models[0].id}")
|
||||
if not workspace.embedding:
|
||||
raise Exception("工作空间未配置 Embedding 模型,请先完善工作空间配置后重试")
|
||||
knowledge.embedding_id = workspace.embedding
|
||||
|
||||
if not knowledge.reranker_id:
|
||||
rerank_models = ModelConfigRepository.get_by_type(
|
||||
db=db, model_types=[ModelType.RERANK], tenant_id=tenant_id, is_active=True
|
||||
)
|
||||
if rerank_models:
|
||||
knowledge.reranker_id = rerank_models[0].id
|
||||
business_logger.debug(f"Auto-bind rerank model: {rerank_models[0].id}")
|
||||
if not workspace.rerank:
|
||||
raise Exception("工作空间未配置 Rerank 模型,请先完善工作空间配置后重试")
|
||||
knowledge.reranker_id = workspace.rerank
|
||||
|
||||
if not knowledge.llm_id:
|
||||
llm_models = ModelConfigRepository.get_by_type(
|
||||
db=db, model_types=[ModelType.LLM, ModelType.CHAT], tenant_id=tenant_id, is_active=True
|
||||
)
|
||||
if llm_models:
|
||||
knowledge.llm_id = llm_models[0].id
|
||||
business_logger.debug(f"Auto-bind llm model: {llm_models[0].id}")
|
||||
if not workspace.llm:
|
||||
raise Exception("工作空间未配置 LLM 模型,请先完善工作空间配置后重试")
|
||||
knowledge.llm_id = workspace.llm
|
||||
|
||||
if not knowledge.image2text_id:
|
||||
image2text_models = db.query(ModelConfig).filter(
|
||||
model = db.query(ModelConfig).filter(
|
||||
ModelConfig.tenant_id == tenant_id,
|
||||
ModelConfig.type.in_([ModelType.CHAT.value]),
|
||||
ModelConfig.type.in_([ModelType.CHAT.value, ModelType.LLM.value]),
|
||||
ModelConfig.capability.contains(["vision"]),
|
||||
ModelConfig.is_active == True,
|
||||
ModelConfig.is_composite == False
|
||||
).order_by(ModelConfig.created_at.desc()).all()
|
||||
if not image2text_models:
|
||||
).order_by(ModelConfig.created_at.desc()).first()
|
||||
if not model:
|
||||
raise Exception("租户下没有可用的视觉模型,创建知识库失败")
|
||||
knowledge.image2text_id = image2text_models[0].id
|
||||
business_logger.debug(f"Auto-bind image2text model: {image2text_models[0].id}")
|
||||
knowledge.image2text_id = model.id
|
||||
business_logger.debug(f"Auto-bind image2text model: {model.id}")
|
||||
|
||||
business_logger.debug(f"Start creating the knowledge base: {knowledge.name}")
|
||||
db_knowledge = knowledge_repository.create_knowledge(
|
||||
|
||||
@@ -1282,7 +1282,7 @@ def get_end_user_connected_config(end_user_id: str, db: Session) -> Dict[str, An
|
||||
}
|
||||
|
||||
logger.info(
|
||||
f"Successfully retrieved connected config: memory_config_id={memory_config_id}, workspace_id={app.workspace_id}")
|
||||
f"Successfully retrieved connected config: memory_config_id={memory_config_id}, workspace_id={end_user.workspace_id}")
|
||||
return result
|
||||
|
||||
|
||||
|
||||
@@ -125,11 +125,7 @@ class ModelConfigService:
|
||||
api_key=api_key,
|
||||
base_url=api_base,
|
||||
is_omni=is_omni,
|
||||
capability=capability,
|
||||
extra_params={
|
||||
"temperature": 0.7,
|
||||
"max_tokens": 100
|
||||
}
|
||||
capability=capability
|
||||
)
|
||||
|
||||
# 根据模型类型选择不同的验证方式
|
||||
@@ -373,6 +369,15 @@ class ModelConfigService:
|
||||
raise BusinessException("模型名称已存在", BizCode.DUPLICATE_NAME)
|
||||
|
||||
model = ModelConfigRepository.update(db, model_id, model_data, tenant_id=tenant_id)
|
||||
|
||||
# 同步更新关联 api_keys 的 capability 和 is_omni
|
||||
if model_data.capability is not None or model_data.is_omni is not None:
|
||||
for api_key in model.api_keys:
|
||||
if model_data.capability is not None:
|
||||
api_key.capability = model_data.capability
|
||||
if model_data.is_omni is not None:
|
||||
api_key.is_omni = model_data.is_omni
|
||||
|
||||
db.commit()
|
||||
db.refresh(model)
|
||||
return model
|
||||
|
||||
Reference in New Issue
Block a user