import os import platform import re from datetime import timedelta from urllib.parse import quote from celery import Celery from celery.schedules import crontab from app.core.config import settings from app.core.logging_config import get_logger logger = get_logger(__name__) def _mask_url(url: str) -> str: """隐藏 URL 中的密码部分,适用于 redis:// 和 amqp:// 等协议""" return re.sub(r'(://[^:]*:)[^@]+(@)', r'\1***\2', url) # macOS fork() safety - must be set before any Celery initialization if platform.system() == 'Darwin': os.environ.setdefault('OBJC_DISABLE_INITIALIZE_FORK_SAFETY', 'YES') # 创建 Celery 应用实例 # broker: 优先使用环境变量 CELERY_BROKER_URL(支持 amqp:// 等任意协议), # 未配置则回退到 Redis 方案 # backend: 结果存储(使用 Redis) # NOTE: 不要在 .env 中设置 BROKER_URL / RESULT_BACKEND / CELERY_BROKER / CELERY_BACKEND, # 这些名称会被 Celery CLI 的 Click 框架劫持,详见 docs/celery-env-bug-report.md _broker_url = os.getenv("CELERY_BROKER_URL") or \ f"redis://:{quote(settings.REDIS_PASSWORD)}@{settings.REDIS_HOST}:{settings.REDIS_PORT}/{settings.REDIS_DB_CELERY_BROKER}" _backend_url = f"redis://:{quote(settings.REDIS_PASSWORD)}@{settings.REDIS_HOST}:{settings.REDIS_PORT}/{settings.REDIS_DB_CELERY_BACKEND}" os.environ["CELERY_BROKER_URL"] = _broker_url os.environ["CELERY_RESULT_BACKEND"] = _backend_url # Neutralize legacy Celery env vars that can be hijacked by Celery's CLI/Click # integration and accidentally override our canonical URLs. os.environ.pop("BROKER_URL", None) os.environ.pop("RESULT_BACKEND", None) os.environ.pop("CELERY_BROKER", None) os.environ.pop("CELERY_BACKEND", None) celery_app = Celery( "redbear_tasks", broker=_broker_url, backend=_backend_url, ) logger.info( "Celery app initialized", extra={ "broker": _mask_url(_broker_url), "backend": _mask_url(_backend_url), }, ) # Default queue for unrouted tasks celery_app.conf.task_default_queue = 'memory_tasks' # macOS 兼容性配置 if platform.system() == 'Darwin': os.environ.setdefault('OBJC_DISABLE_INITIALIZE_FORK_SAFETY', 'YES') # Celery 配置 celery_app.conf.update( # 序列化 task_serializer='json', accept_content=['json'], result_serializer='json', # # 时区 # timezone='Asia/Shanghai', # enable_utc=False, # 任务追踪 task_track_started=True, task_ignore_result=False, # 超时设置 task_time_limit=3600, # 60分钟硬超时 task_soft_time_limit=3000, # 50分钟软超时 # Worker 设置 (per-worker settings are in docker-compose command line) worker_prefetch_multiplier=1, # Don't hoard tasks, fairer distribution worker_redirect_stdouts_level='INFO', # stdout/print → INFO instead of WARNING # 结果过期时间 result_expires=3600, # 结果保存1小时 # 任务确认设置 task_acks_late=True, task_reject_on_worker_lost=True, worker_disable_rate_limits=True, # FLower setting worker_send_task_events=True, task_send_sent_event=True, # task routing task_routes={ # Memory tasks → memory_tasks queue (threads worker) 'app.core.memory.agent.read_message_priority': {'queue': 'memory_tasks'}, 'app.core.memory.agent.read_message': {'queue': 'memory_tasks'}, 'app.core.memory.agent.write_message': {'queue': 'memory_tasks'}, # Long-term storage tasks → memory_tasks queue (batched write strategies) 'app.core.memory.agent.long_term_storage.window': {'queue': 'memory_tasks'}, 'app.core.memory.agent.long_term_storage.time': {'queue': 'memory_tasks'}, 'app.core.memory.agent.long_term_storage.aggregate': {'queue': 'memory_tasks'}, # Clustering tasks → memory_tasks queue (使用相同的 worker,避免 macOS fork 问题) 'app.tasks.run_incremental_clustering': {'queue': 'memory_tasks'}, # Metadata extraction → memory_tasks queue 'app.tasks.extract_user_metadata': {'queue': 'memory_tasks'}, # Document tasks → document_tasks queue (prefork worker) 'app.core.rag.tasks.parse_document': {'queue': 'document_tasks'}, 'app.core.rag.tasks.sync_knowledge_for_kb': {'queue': 'document_tasks'}, # GraphRAG tasks → graphrag_tasks queue (独立队列,避免阻塞文档解析) 'app.core.rag.tasks.build_graphrag_for_kb': {'queue': 'graphrag_tasks'}, 'app.core.rag.tasks.build_graphrag_for_document': {'queue': 'graphrag_tasks'}, # Beat/periodic tasks → periodic_tasks queue (dedicated periodic worker) 'app.tasks.workspace_reflection_task': {'queue': 'periodic_tasks'}, 'app.tasks.regenerate_memory_cache': {'queue': 'periodic_tasks'}, 'app.tasks.run_forgetting_cycle_task': {'queue': 'periodic_tasks'}, 'app.tasks.write_all_workspaces_memory_task': {'queue': 'periodic_tasks'}, 'app.tasks.update_implicit_emotions_storage': {'queue': 'periodic_tasks'}, 'app.tasks.init_implicit_emotions_for_users': {'queue': 'periodic_tasks'}, 'app.tasks.init_interest_distribution_for_users': {'queue': 'periodic_tasks'}, 'app.tasks.init_community_clustering_for_users': {'queue': 'periodic_tasks'}, }, ) # 自动发现任务模块 celery_app.autodiscover_tasks(['app']) # Celery Beat schedule for periodic tasks memory_increment_schedule = crontab(hour=settings.MEMORY_INCREMENT_HOUR, minute=settings.MEMORY_INCREMENT_MINUTE) memory_cache_regeneration_schedule = timedelta(hours=settings.MEMORY_CACHE_REGENERATION_HOURS) workspace_reflection_schedule = timedelta(seconds=settings.WORKSPACE_REFLECTION_INTERVAL_SECONDS) forgetting_cycle_schedule = timedelta(hours=settings.FORGETTING_CYCLE_INTERVAL_HOURS) implicit_emotions_update_schedule = crontab( hour=settings.IMPLICIT_EMOTIONS_UPDATE_HOUR, minute=settings.IMPLICIT_EMOTIONS_UPDATE_MINUTE, ) # 构建定时任务配置 beat_schedule_config = { "run-workspace-reflection": { "task": "app.tasks.workspace_reflection_task", "schedule": workspace_reflection_schedule, "args": (), }, "regenerate-memory-cache": { "task": "app.tasks.regenerate_memory_cache", "schedule": memory_cache_regeneration_schedule, "args": (), }, "run-forgetting-cycle": { "task": "app.tasks.run_forgetting_cycle_task", "schedule": forgetting_cycle_schedule, "kwargs": { "config_id": None, # 使用默认配置,可以通过环境变量配置 }, }, "write-all-workspaces-memory": { "task": "app.tasks.write_all_workspaces_memory_task", "schedule": memory_increment_schedule, "args": (), }, "update-implicit-emotions-storage": { "task": "app.tasks.update_implicit_emotions_storage", "schedule": implicit_emotions_update_schedule, "args": (), }, } celery_app.conf.beat_schedule = beat_schedule_config