Fix/develop memory bug (#350)

* 遗漏的历史映射 * 遗漏的历史映射 * fix_timeline_memories * fix_timeline_memories * write_gragp/bug_fix * write_gragp/bug_fix * write_gragp/bug_fix * write_gragp/bug_fix * Multiple independent transactions - single transaction * memory_content ->memory_config_id * memory_content ->memory_config_id * memory_content ->memory_config_id * memory_content ->memory_config_id * memory_content ->memory_config_id * memory_content ->memory_config_id * memory_content ->memory_config_id * tasks/bug_fix/long * tasks_reflection/bug/fix * tasks_reflection/bug/fix * tasks_reflection/bug/fix * tasks_reflection/bug/fix
2026-02-06 17:37:03 +08:00
parent 320f684354
commit 16cf6eee9b
2 changed files with 190 additions and 161 deletions
--- a/api/app/services/memory_reflection_service.py
+++ b/api/app/services/memory_reflection_service.py
@@ -364,6 +364,13 @@ class MemoryReflectionService:
        reflexion_range_value = config_data.get("reflexion_range")
        if reflexion_range_value is None or reflexion_range_value == "":
            reflexion_range_value = "partial"
            # Map legacy/invalid values to valid enum values
        reflexion_range_mapping = {
            "retrieval": "partial",  # Map old 'retrieval' to 'partial'
            "partial": "partial",
            "all": "all"
        }
        reflexion_range_value = reflexion_range_mapping.get(reflexion_range_value, "partial")
        reflexion_range = ReflectionRange(reflexion_range_value)
        baseline_value = config_data.get("baseline")
--- a/api/app/tasks.py
+++ b/api/app/tasks.py
@@ -405,7 +405,7 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
        # 2. sync data
        match db_knowledge.type:
-            case "Web": # Crawl webpages in batches through a web crawler
+            case "Web":  # Crawl webpages in batches through a web crawler
                entry_url = db_knowledge.parser_config.get("entry_url", "")
                max_pages = db_knowledge.parser_config.get("max_pages", 20)
                delay_seconds = db_knowledge.parser_config.get("delay_seconds", 1.0)
@@ -428,19 +428,21 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
                        db_file = db.query(File).filter(File.kb_id == db_knowledge.id,
                                                        File.file_url == crawled_document.url).first()
                        if db_file:
-                            if db_file.file_size == crawled_document.content_length: # same
+                            if db_file.file_size == crawled_document.content_length:  # same
                                continue
-                            else: # --update
+                            else:  # --update
                                if crawled_document.content_length:
                                    # 1. update file
                                    db_file.file_name = f"{crawled_document.title}.txt"
-                                    db_file.file_ext=".txt"
+                                    db_file.file_ext = ".txt"
-                                    db_file.file_size=crawled_document.content_length
+                                    db_file.file_size = crawled_document.content_length
                                    db.commit()
                                    db.refresh(db_file)
                                    # Construct a save path：/files/{kb_id}/{parent_id}/{file.id}{file_extension}
-                                    save_dir = os.path.join(settings.FILE_PATH, str(db_knowledge.id), str(db_knowledge.parent_id))
+                                    save_dir = os.path.join(settings.FILE_PATH, str(db_knowledge.id),
-                                    Path(save_dir).mkdir(parents=True, exist_ok=True)  # Ensure that the directory exists
+                                                            str(db_knowledge.parent_id))
                                    Path(save_dir).mkdir(parents=True,
                                                         exist_ok=True)  # Ensure that the directory exists
                                    save_path = os.path.join(save_dir, f"{db_file.id}{db_file.file_ext}")
                                    # update file
                                    if os.path.exists(save_path):
@@ -460,7 +462,7 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
                                        db.refresh(db_document)
                                        # 3. Document parsing, vectorization, and storage
                                        parse_document(file_path=save_path, document_id=db_document.id)
-                        else: # --add
+                        else:  # --add
                            if crawled_document.content_length:
                                # 1. upload file
                                upload_file = file_schema.FileCreate(
@@ -507,8 +509,9 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
                                db.commit()
                                # 3. Document parsing, vectorization, and storage
                                parse_document(file_path=save_path, document_id=db_document.id)
-                    db_files = db.query(File).filter(File.kb_id == db_knowledge.id, File.file_url.notin_(file_urls)).all()
+                    db_files = db.query(File).filter(File.kb_id == db_knowledge.id,
-                    if db_files: # --delete
+                                                     File.file_url.notin_(file_urls)).all()
                    if db_files:  # --delete
                        for db_file in db_files:
                            db_document = db.query(Document).filter(Document.kb_id == db_knowledge.id,
                                                                    Document.file_id == db_file.id).first()
@@ -535,7 +538,7 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
            case "Third-party":  # Integration of knowledge bases from three parties
                yuque_user_id = db_knowledge.parser_config.get("yuque_user_id", "")
                feishu_app_id = db_knowledge.parser_config.get("feishu_app_id", "")
-                if yuque_user_id: # Yuque Knowledge Base
+                if yuque_user_id:  # Yuque Knowledge Base
                    yuque_token = db_knowledge.parser_config.get("yuque_token", "")
                    # Create yuqueAPIClient
                    api_client = YuqueAPIClient(
@@ -571,11 +574,14 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
                                else:  # --update
                                    # 1. update file
                                    # Construct a save path：/files/{kb_id}/{parent_id}/{file.id}{file_extension}
-                                    save_dir = os.path.join(settings.FILE_PATH, str(db_knowledge.id), str(db_knowledge.parent_id))
+                                    save_dir = os.path.join(settings.FILE_PATH, str(db_knowledge.id),
-                                    Path(save_dir).mkdir(parents=True, exist_ok=True)  # Ensure that the directory exists
+                                                            str(db_knowledge.parent_id))
                                    Path(save_dir).mkdir(parents=True,
                                                         exist_ok=True)  # Ensure that the directory exists
                                    # download document from Feishu FileInfo
-                                    async def async_download_document(api_client: YuqueAPIClient, doc: YuqueDocInfo, save_dir: str):
+                                    async def async_download_document(api_client: YuqueAPIClient, doc: YuqueDocInfo,
                                                                      save_dir: str):
                                        async with api_client as client:
                                            file_path = await client.download_document(doc, save_dir)
                                            return file_path
@@ -613,11 +619,13 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
                            else:  # --add
                                # 1. update file
                                # Construct a save path：/files/{kb_id}/{parent_id}/{file.id}{file_extension}
-                                save_dir = os.path.join(settings.FILE_PATH, str(db_knowledge.id), str(db_knowledge.parent_id))
+                                save_dir = os.path.join(settings.FILE_PATH, str(db_knowledge.id),
                                                        str(db_knowledge.parent_id))
                                Path(save_dir).mkdir(parents=True, exist_ok=True)  # Ensure that the directory exists
                                # download document from Feishu FileInfo
-                                async def async_download_document(api_client: YuqueAPIClient, doc: YuqueDocInfo, save_dir: str):
+                                async def async_download_document(api_client: YuqueAPIClient, doc: YuqueDocInfo,
                                                                  save_dir: str):
                                    async with api_client as client:
                                        file_path = await client.download_document(doc, save_dir)
                                        return file_path
@@ -697,7 +705,7 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
                    except Exception as e:
                        print(f"\n\nError during fetch feishu: {e}")
-                if feishu_app_id: # Feishu Knowledge Base
+                if feishu_app_id:  # Feishu Knowledge Base
                    feishu_app_secret = db_knowledge.parser_config.get("feishu_app_secret", "")
                    feishu_folder_token = db_knowledge.parser_config.get("feishu_folder_token", "")
                    # Create feishuAPIClient
@@ -708,11 +716,13 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
                    try:
                        # 初始化存储获取飞书 URLs 的集合
                        file_urls = set()
                        # Get all files from folder
                        async def async_get_files(api_client: FeishuAPIClient, feishu_folder_token: str):
                            async with api_client as client:
                                files = await client.list_all_folder_files(feishu_folder_token, recursive=True)
                                return files
                        files = asyncio.run(async_get_files(api_client, feishu_folder_token))
                        # Filter out folders, only sync documents
                        documents = [f for f in files if f.type in ["doc", "docx", "sheet", "bitable", "file"]]
@@ -728,12 +738,16 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
                                    # Construct a save path：/files/{kb_id}/{parent_id}/{file.id}{file_extension}
                                    save_dir = os.path.join(settings.FILE_PATH, str(db_knowledge.id),
                                                            str(db_knowledge.parent_id))
-                                    Path(save_dir).mkdir(parents=True, exist_ok=True)  # Ensure that the directory exists
+                                    Path(save_dir).mkdir(parents=True,
                                                         exist_ok=True)  # Ensure that the directory exists
                                    # download document from Feishu FileInfo
-                                    async def async_download_document(api_client: FeishuAPIClient, doc: FileInfo, save_dir: str):
+                                    async def async_download_document(api_client: FeishuAPIClient, doc: FileInfo,
                                                                      save_dir: str):
                                        async with api_client as client:
                                            file_path = await client.download_document(document=doc, save_dir=save_dir)
                                            return file_path
                                    file_path = asyncio.run(async_download_document(api_client, doc, save_dir))
                                    save_path = os.path.join(save_dir, f"{db_file.id}{db_file.file_ext}")
@@ -770,11 +784,14 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
                                save_dir = os.path.join(settings.FILE_PATH, str(db_knowledge.id),
                                                        str(db_knowledge.parent_id))
                                Path(save_dir).mkdir(parents=True, exist_ok=True)  # Ensure that the directory exists
                                # download document from Feishu FileInfo
-                                async def async_download_document(api_client: FeishuAPIClient, doc: FileInfo, save_dir: str):
+                                async def async_download_document(api_client: FeishuAPIClient, doc: FileInfo,
                                                                  save_dir: str):
                                    async with api_client as client:
                                        file_path = await client.download_document(document=doc, save_dir=save_dir)
                                        return file_path
                                file_path = asyncio.run(async_download_document(api_client, doc, save_dir))
                                # add db_file
                                file_name = os.path.basename(file_path)
@@ -788,7 +805,7 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
                                    file_ext=file_extension.lower(),
                                    file_size=file_size,
                                    file_url=doc.url,
-                                    created_at = doc.modified_time
+                                    created_at=doc.modified_time
                                )
                                db_file = File(**upload_file.model_dump())
                                db.add(db_file)
@@ -853,7 +870,6 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
            case _:  # General
                print(f"General: No synchronization needed\n")
        result = f"sync knowledge '{db_knowledge.name}' processed successfully."
        return result
    except Exception as e:
@@ -866,8 +882,8 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
@celery_app.task(name="app.core.memory.agent.read_message", bind=True)
-def read_message_task(self, end_user_id: str, message: str, history: List[Dict[str, Any]], search_switch: str, config_id: str, storage_type:str, user_rag_memory_id:str) -> Dict[str, Any]:
+def read_message_task(self, end_user_id: str, message: str, history: List[Dict[str, Any]], search_switch: str,
-
+                      config_id: str, storage_type: str, user_rag_memory_id: str) -> Dict[str, Any]:
    """Celery task to process a read message via MemoryAgentService.
    Args:
@@ -912,7 +928,8 @@ def read_message_task(self, end_user_id: str, message: str, history: List[Dict[s
        db = next(get_db())
        try:
            service = MemoryAgentService()
-            return await service.read_memory(end_user_id, message, history, search_switch, actual_config_id, db, storage_type, user_rag_memory_id)
+            return await service.read_memory(end_user_id, message, history, search_switch, actual_config_id, db,
                                             storage_type, user_rag_memory_id)
        finally:
            db.close()
@@ -964,7 +981,8 @@ def read_message_task(self, end_user_id: str, message: str, history: List[Dict[s
@celery_app.task(name="app.core.memory.agent.write_message", bind=True)
-def write_message_task(self, end_user_id: str, message: str, config_id: str, storage_type:str, user_rag_memory_id:str, language: str = "zh") -> Dict[str, Any]:
+def write_message_task(self, end_user_id: str, message: str, config_id: str, storage_type: str, user_rag_memory_id: str,
                       language: str = "zh") -> Dict[str, Any]:
    """Celery task to process a write message via MemoryAgentService.
    Args:
@@ -984,7 +1002,8 @@ def write_message_task(self, end_user_id: str, message: str, config_id: str, sto
    from app.core.logging_config import get_logger
    logger = get_logger(__name__)
-    logger.info(f"[CELERY WRITE] Starting write task - end_user_id={end_user_id}, config_id={config_id}, storage_type={storage_type}, language={language}")
+    logger.info(
        f"[CELERY WRITE] Starting write task - end_user_id={end_user_id}, config_id={config_id}, storage_type={storage_type}, language={language}")
    start_time = time.time()
    # Convert config_id string to UUID
@@ -992,7 +1011,8 @@ def write_message_task(self, end_user_id: str, message: str, config_id: str, sto
    if config_id:
        try:
            actual_config_id = uuid.UUID(config_id) if isinstance(config_id, str) else config_id
-            logger.info(f"[CELERY WRITE] Converted config_id to UUID: {actual_config_id} (type: {type(actual_config_id).__name__})")
+            logger.info(
                f"[CELERY WRITE] Converted config_id to UUID: {actual_config_id} (type: {type(actual_config_id).__name__})")
        except (ValueError, AttributeError) as e:
            logger.error(f"[CELERY WRITE] Invalid config_id format: {config_id}, error: {e}")
            return {
@@ -1021,9 +1041,11 @@ def write_message_task(self, end_user_id: str, message: str, config_id: str, sto
    async def _run() -> str:
        db = next(get_db())
        try:
-            logger.info(f"[CELERY WRITE] Executing MemoryAgentService.write_memory with config_id={actual_config_id} (type: {type(actual_config_id).__name__}), language={language}")
+            logger.info(
                f"[CELERY WRITE] Executing MemoryAgentService.write_memory with config_id={actual_config_id} (type: {type(actual_config_id).__name__}), language={language}")
            service = MemoryAgentService()
-            result = await service.write_memory(end_user_id, message, actual_config_id, db, storage_type, user_rag_memory_id, language)
+            result = await service.write_memory(end_user_id, message, actual_config_id, db, storage_type,
                                                user_rag_memory_id, language)
            logger.info(f"[CELERY WRITE] Write completed successfully: {result}")
            return result
        except Exception as e:
@@ -1053,7 +1075,8 @@ def write_message_task(self, end_user_id: str, message: str, config_id: str, sto
        result = loop.run_until_complete(_run())
        elapsed_time = time.time() - start_time
-        logger.info(f"[CELERY WRITE] Task completed successfully - elapsed_time={elapsed_time:.2f}s, task_id={self.request.id}")
+        logger.info(
            f"[CELERY WRITE] Task completed successfully - elapsed_time={elapsed_time:.2f}s, task_id={self.request.id}")
        return {
            "status": "SUCCESS",
@@ -1072,7 +1095,8 @@ def write_message_task(self, end_user_id: str, message: str, config_id: str, sto
        else:
            detailed_error = str(e)
-        logger.error(f"[CELERY WRITE] Task failed - elapsed_time={elapsed_time:.2f}s, error={detailed_error}", exc_info=True)
+        logger.error(f"[CELERY WRITE] Task failed - elapsed_time={elapsed_time:.2f}s, error={detailed_error}",
                     exc_info=True)
        return {
            "status": "FAILURE",
@@ -1105,6 +1129,7 @@ def reflection_timer_task() -> None:
    """
    reflection_engine()
 # unused task
 # @celery_app.task(name="app.core.memory.agent.health.check_read_service")
 # def check_read_service_task() -> Dict[str, str]:
@@ -1460,7 +1485,6 @@ def regenerate_memory_cache(self) -> Dict[str, Any]:
        }
@celery_app.task(
    name="app.tasks.workspace_reflection_task",
    bind=True,
@@ -1518,15 +1542,16 @@ def workspace_reflection_task(self) -> Dict[str, Any]:
                        workspace_reflection_results = []
                        for data in result['apps_detailed_info']:
-                            if data['data_configs'] == []:
+                            if data['memory_configs'] == []:
                                continue
                            releases = data['releases']
-                            data_configs = data['data_configs']
+                            memory_configs = data['memory_configs']
                            end_users = data['end_users']
-                            for base, config, user in zip(releases, data_configs, end_users):
+                            for base, config, user in zip(releases, memory_configs, end_users):
-                                if str(base['config']) == str(config['config_id']) and str(base['app_id']) == str(user['app_id']):
+                                if str(base['config']) == str(config['config_id']) and str(base['app_id']) == str(
                                        user['app_id']):
                                    # 调用反思服务
                                    api_logger.info(f"为用户 {user['id']} 启动反思，config_id: {config['config_id']}")
@@ -1614,8 +1639,6 @@ def workspace_reflection_task(self) -> Dict[str, Any]:
        }
@celery_app.task(
    name="app.tasks.run_forgetting_cycle_task",
    bind=True,
@@ -1692,7 +1715,6 @@ def run_forgetting_cycle_task(self, config_id: Optional[uuid.UUID] = None) -> Di
    finally:
        loop.close()
 # =============================================================================
 # Long-term Memory Storage Tasks (Batched Write Strategies)
 # =============================================================================