feat(multimodal): support document image extraction and inline vision processing

Added document image extraction capability for PDF and DOCX files, including page/index metadata and storage integration. Extended `process_files` with `document_image_recognition` flag to conditionally enable vision-based image processing when model supports it. Updated knowledge repository and workflow node logic to enforce status=1 checks. Added PyMuPDF dependency.
This commit is contained in:
Timebomb2018
2026-04-24 11:18:50 +08:00
parent 9fdb952396
commit 767eb5e6f2
13 changed files with 397 additions and 52 deletions

View File

@@ -8,6 +8,7 @@ from sqlalchemy.orm import Session
import uuid
from app.aioRedis import aio_redis_set, aio_redis_get, aio_redis_delete
from app.models import Workspace
from app.models.user_model import User
from app.repositories import user_repository
from app.schemas.user_schema import UserCreate
@@ -74,7 +75,7 @@ def create_initial_superuser(db: Session):
)
def create_user(db: Session, user: UserCreate) -> User:
def create_user(db: Session, user: UserCreate, workspace: Workspace) -> User:
business_logger.info(f"创建用户: {user.username}, email: {user.email}")
try:
@@ -93,24 +94,9 @@ def create_user(db: Session, user: UserCreate) -> User:
business_logger.debug(f"开始创建用户: {user.username}")
hashed_password = get_password_hash(user.password)
# 获取默认租户(第一个活跃租户)
from app.repositories.tenant_repository import TenantRepository
tenant_repo = TenantRepository(db)
tenants = tenant_repo.get_tenants(skip=0, limit=1, is_active=True)
if not tenants:
business_logger.error("系统中没有可用的租户")
raise BusinessException(
"系统配置错误:没有可用的租户",
code=BizCode.TENANT_NOT_FOUND,
context={"username": user.username, "email": user.email}
)
default_tenant = tenants[0]
new_user = user_repository.create_user(
db=db, user=user, hashed_password=hashed_password,
tenant_id=default_tenant.id, is_superuser=False
tenant_id=workspace.tenant_id, is_superuser=False
)
db.commit()