Fix/develop memory bug (#350)
* 遗漏的历史映射 * 遗漏的历史映射 * fix_timeline_memories * fix_timeline_memories * write_gragp/bug_fix * write_gragp/bug_fix * write_gragp/bug_fix * write_gragp/bug_fix * Multiple independent transactions - single transaction * memory_content ->memory_config_id * memory_content ->memory_config_id * memory_content ->memory_config_id * memory_content ->memory_config_id * memory_content ->memory_config_id * memory_content ->memory_config_id * memory_content ->memory_config_id * tasks/bug_fix/long * tasks_reflection/bug/fix * tasks_reflection/bug/fix * tasks_reflection/bug/fix * tasks_reflection/bug/fix
This commit is contained in:
@@ -364,6 +364,13 @@ class MemoryReflectionService:
|
||||
reflexion_range_value = config_data.get("reflexion_range")
|
||||
if reflexion_range_value is None or reflexion_range_value == "":
|
||||
reflexion_range_value = "partial"
|
||||
# Map legacy/invalid values to valid enum values
|
||||
reflexion_range_mapping = {
|
||||
"retrieval": "partial", # Map old 'retrieval' to 'partial'
|
||||
"partial": "partial",
|
||||
"all": "all"
|
||||
}
|
||||
reflexion_range_value = reflexion_range_mapping.get(reflexion_range_value, "partial")
|
||||
reflexion_range = ReflectionRange(reflexion_range_value)
|
||||
|
||||
baseline_value = config_data.get("baseline")
|
||||
|
||||
@@ -434,13 +434,15 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
|
||||
if crawled_document.content_length:
|
||||
# 1. update file
|
||||
db_file.file_name = f"{crawled_document.title}.txt"
|
||||
db_file.file_ext=".txt"
|
||||
db_file.file_size=crawled_document.content_length
|
||||
db_file.file_ext = ".txt"
|
||||
db_file.file_size = crawled_document.content_length
|
||||
db.commit()
|
||||
db.refresh(db_file)
|
||||
# Construct a save path:/files/{kb_id}/{parent_id}/{file.id}{file_extension}
|
||||
save_dir = os.path.join(settings.FILE_PATH, str(db_knowledge.id), str(db_knowledge.parent_id))
|
||||
Path(save_dir).mkdir(parents=True, exist_ok=True) # Ensure that the directory exists
|
||||
save_dir = os.path.join(settings.FILE_PATH, str(db_knowledge.id),
|
||||
str(db_knowledge.parent_id))
|
||||
Path(save_dir).mkdir(parents=True,
|
||||
exist_ok=True) # Ensure that the directory exists
|
||||
save_path = os.path.join(save_dir, f"{db_file.id}{db_file.file_ext}")
|
||||
# update file
|
||||
if os.path.exists(save_path):
|
||||
@@ -507,7 +509,8 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
|
||||
db.commit()
|
||||
# 3. Document parsing, vectorization, and storage
|
||||
parse_document(file_path=save_path, document_id=db_document.id)
|
||||
db_files = db.query(File).filter(File.kb_id == db_knowledge.id, File.file_url.notin_(file_urls)).all()
|
||||
db_files = db.query(File).filter(File.kb_id == db_knowledge.id,
|
||||
File.file_url.notin_(file_urls)).all()
|
||||
if db_files: # --delete
|
||||
for db_file in db_files:
|
||||
db_document = db.query(Document).filter(Document.kb_id == db_knowledge.id,
|
||||
@@ -571,11 +574,14 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
|
||||
else: # --update
|
||||
# 1. update file
|
||||
# Construct a save path:/files/{kb_id}/{parent_id}/{file.id}{file_extension}
|
||||
save_dir = os.path.join(settings.FILE_PATH, str(db_knowledge.id), str(db_knowledge.parent_id))
|
||||
Path(save_dir).mkdir(parents=True, exist_ok=True) # Ensure that the directory exists
|
||||
save_dir = os.path.join(settings.FILE_PATH, str(db_knowledge.id),
|
||||
str(db_knowledge.parent_id))
|
||||
Path(save_dir).mkdir(parents=True,
|
||||
exist_ok=True) # Ensure that the directory exists
|
||||
|
||||
# download document from Feishu FileInfo
|
||||
async def async_download_document(api_client: YuqueAPIClient, doc: YuqueDocInfo, save_dir: str):
|
||||
async def async_download_document(api_client: YuqueAPIClient, doc: YuqueDocInfo,
|
||||
save_dir: str):
|
||||
async with api_client as client:
|
||||
file_path = await client.download_document(doc, save_dir)
|
||||
return file_path
|
||||
@@ -613,11 +619,13 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
|
||||
else: # --add
|
||||
# 1. update file
|
||||
# Construct a save path:/files/{kb_id}/{parent_id}/{file.id}{file_extension}
|
||||
save_dir = os.path.join(settings.FILE_PATH, str(db_knowledge.id), str(db_knowledge.parent_id))
|
||||
save_dir = os.path.join(settings.FILE_PATH, str(db_knowledge.id),
|
||||
str(db_knowledge.parent_id))
|
||||
Path(save_dir).mkdir(parents=True, exist_ok=True) # Ensure that the directory exists
|
||||
|
||||
# download document from Feishu FileInfo
|
||||
async def async_download_document(api_client: YuqueAPIClient, doc: YuqueDocInfo, save_dir: str):
|
||||
async def async_download_document(api_client: YuqueAPIClient, doc: YuqueDocInfo,
|
||||
save_dir: str):
|
||||
async with api_client as client:
|
||||
file_path = await client.download_document(doc, save_dir)
|
||||
return file_path
|
||||
@@ -708,11 +716,13 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
|
||||
try:
|
||||
# 初始化存储获取飞书 URLs 的集合
|
||||
file_urls = set()
|
||||
|
||||
# Get all files from folder
|
||||
async def async_get_files(api_client: FeishuAPIClient, feishu_folder_token: str):
|
||||
async with api_client as client:
|
||||
files = await client.list_all_folder_files(feishu_folder_token, recursive=True)
|
||||
return files
|
||||
|
||||
files = asyncio.run(async_get_files(api_client, feishu_folder_token))
|
||||
# Filter out folders, only sync documents
|
||||
documents = [f for f in files if f.type in ["doc", "docx", "sheet", "bitable", "file"]]
|
||||
@@ -728,12 +738,16 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
|
||||
# Construct a save path:/files/{kb_id}/{parent_id}/{file.id}{file_extension}
|
||||
save_dir = os.path.join(settings.FILE_PATH, str(db_knowledge.id),
|
||||
str(db_knowledge.parent_id))
|
||||
Path(save_dir).mkdir(parents=True, exist_ok=True) # Ensure that the directory exists
|
||||
Path(save_dir).mkdir(parents=True,
|
||||
exist_ok=True) # Ensure that the directory exists
|
||||
|
||||
# download document from Feishu FileInfo
|
||||
async def async_download_document(api_client: FeishuAPIClient, doc: FileInfo, save_dir: str):
|
||||
async def async_download_document(api_client: FeishuAPIClient, doc: FileInfo,
|
||||
save_dir: str):
|
||||
async with api_client as client:
|
||||
file_path = await client.download_document(document=doc, save_dir=save_dir)
|
||||
return file_path
|
||||
|
||||
file_path = asyncio.run(async_download_document(api_client, doc, save_dir))
|
||||
|
||||
save_path = os.path.join(save_dir, f"{db_file.id}{db_file.file_ext}")
|
||||
@@ -770,11 +784,14 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
|
||||
save_dir = os.path.join(settings.FILE_PATH, str(db_knowledge.id),
|
||||
str(db_knowledge.parent_id))
|
||||
Path(save_dir).mkdir(parents=True, exist_ok=True) # Ensure that the directory exists
|
||||
|
||||
# download document from Feishu FileInfo
|
||||
async def async_download_document(api_client: FeishuAPIClient, doc: FileInfo, save_dir: str):
|
||||
async def async_download_document(api_client: FeishuAPIClient, doc: FileInfo,
|
||||
save_dir: str):
|
||||
async with api_client as client:
|
||||
file_path = await client.download_document(document=doc, save_dir=save_dir)
|
||||
return file_path
|
||||
|
||||
file_path = asyncio.run(async_download_document(api_client, doc, save_dir))
|
||||
# add db_file
|
||||
file_name = os.path.basename(file_path)
|
||||
@@ -788,7 +805,7 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
|
||||
file_ext=file_extension.lower(),
|
||||
file_size=file_size,
|
||||
file_url=doc.url,
|
||||
created_at = doc.modified_time
|
||||
created_at=doc.modified_time
|
||||
)
|
||||
db_file = File(**upload_file.model_dump())
|
||||
db.add(db_file)
|
||||
@@ -853,7 +870,6 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
|
||||
case _: # General
|
||||
print(f"General: No synchronization needed\n")
|
||||
|
||||
|
||||
result = f"sync knowledge '{db_knowledge.name}' processed successfully."
|
||||
return result
|
||||
except Exception as e:
|
||||
@@ -866,8 +882,8 @@ def sync_knowledge_for_kb(kb_id: uuid.UUID):
|
||||
|
||||
|
||||
@celery_app.task(name="app.core.memory.agent.read_message", bind=True)
|
||||
def read_message_task(self, end_user_id: str, message: str, history: List[Dict[str, Any]], search_switch: str, config_id: str, storage_type:str, user_rag_memory_id:str) -> Dict[str, Any]:
|
||||
|
||||
def read_message_task(self, end_user_id: str, message: str, history: List[Dict[str, Any]], search_switch: str,
|
||||
config_id: str, storage_type: str, user_rag_memory_id: str) -> Dict[str, Any]:
|
||||
"""Celery task to process a read message via MemoryAgentService.
|
||||
|
||||
Args:
|
||||
@@ -912,7 +928,8 @@ def read_message_task(self, end_user_id: str, message: str, history: List[Dict[s
|
||||
db = next(get_db())
|
||||
try:
|
||||
service = MemoryAgentService()
|
||||
return await service.read_memory(end_user_id, message, history, search_switch, actual_config_id, db, storage_type, user_rag_memory_id)
|
||||
return await service.read_memory(end_user_id, message, history, search_switch, actual_config_id, db,
|
||||
storage_type, user_rag_memory_id)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
@@ -964,7 +981,8 @@ def read_message_task(self, end_user_id: str, message: str, history: List[Dict[s
|
||||
|
||||
|
||||
@celery_app.task(name="app.core.memory.agent.write_message", bind=True)
|
||||
def write_message_task(self, end_user_id: str, message: str, config_id: str, storage_type:str, user_rag_memory_id:str, language: str = "zh") -> Dict[str, Any]:
|
||||
def write_message_task(self, end_user_id: str, message: str, config_id: str, storage_type: str, user_rag_memory_id: str,
|
||||
language: str = "zh") -> Dict[str, Any]:
|
||||
"""Celery task to process a write message via MemoryAgentService.
|
||||
|
||||
Args:
|
||||
@@ -984,7 +1002,8 @@ def write_message_task(self, end_user_id: str, message: str, config_id: str, sto
|
||||
from app.core.logging_config import get_logger
|
||||
logger = get_logger(__name__)
|
||||
|
||||
logger.info(f"[CELERY WRITE] Starting write task - end_user_id={end_user_id}, config_id={config_id}, storage_type={storage_type}, language={language}")
|
||||
logger.info(
|
||||
f"[CELERY WRITE] Starting write task - end_user_id={end_user_id}, config_id={config_id}, storage_type={storage_type}, language={language}")
|
||||
start_time = time.time()
|
||||
|
||||
# Convert config_id string to UUID
|
||||
@@ -992,7 +1011,8 @@ def write_message_task(self, end_user_id: str, message: str, config_id: str, sto
|
||||
if config_id:
|
||||
try:
|
||||
actual_config_id = uuid.UUID(config_id) if isinstance(config_id, str) else config_id
|
||||
logger.info(f"[CELERY WRITE] Converted config_id to UUID: {actual_config_id} (type: {type(actual_config_id).__name__})")
|
||||
logger.info(
|
||||
f"[CELERY WRITE] Converted config_id to UUID: {actual_config_id} (type: {type(actual_config_id).__name__})")
|
||||
except (ValueError, AttributeError) as e:
|
||||
logger.error(f"[CELERY WRITE] Invalid config_id format: {config_id}, error: {e}")
|
||||
return {
|
||||
@@ -1021,9 +1041,11 @@ def write_message_task(self, end_user_id: str, message: str, config_id: str, sto
|
||||
async def _run() -> str:
|
||||
db = next(get_db())
|
||||
try:
|
||||
logger.info(f"[CELERY WRITE] Executing MemoryAgentService.write_memory with config_id={actual_config_id} (type: {type(actual_config_id).__name__}), language={language}")
|
||||
logger.info(
|
||||
f"[CELERY WRITE] Executing MemoryAgentService.write_memory with config_id={actual_config_id} (type: {type(actual_config_id).__name__}), language={language}")
|
||||
service = MemoryAgentService()
|
||||
result = await service.write_memory(end_user_id, message, actual_config_id, db, storage_type, user_rag_memory_id, language)
|
||||
result = await service.write_memory(end_user_id, message, actual_config_id, db, storage_type,
|
||||
user_rag_memory_id, language)
|
||||
logger.info(f"[CELERY WRITE] Write completed successfully: {result}")
|
||||
return result
|
||||
except Exception as e:
|
||||
@@ -1053,7 +1075,8 @@ def write_message_task(self, end_user_id: str, message: str, config_id: str, sto
|
||||
result = loop.run_until_complete(_run())
|
||||
elapsed_time = time.time() - start_time
|
||||
|
||||
logger.info(f"[CELERY WRITE] Task completed successfully - elapsed_time={elapsed_time:.2f}s, task_id={self.request.id}")
|
||||
logger.info(
|
||||
f"[CELERY WRITE] Task completed successfully - elapsed_time={elapsed_time:.2f}s, task_id={self.request.id}")
|
||||
|
||||
return {
|
||||
"status": "SUCCESS",
|
||||
@@ -1072,7 +1095,8 @@ def write_message_task(self, end_user_id: str, message: str, config_id: str, sto
|
||||
else:
|
||||
detailed_error = str(e)
|
||||
|
||||
logger.error(f"[CELERY WRITE] Task failed - elapsed_time={elapsed_time:.2f}s, error={detailed_error}", exc_info=True)
|
||||
logger.error(f"[CELERY WRITE] Task failed - elapsed_time={elapsed_time:.2f}s, error={detailed_error}",
|
||||
exc_info=True)
|
||||
|
||||
return {
|
||||
"status": "FAILURE",
|
||||
@@ -1105,6 +1129,7 @@ def reflection_timer_task() -> None:
|
||||
"""
|
||||
reflection_engine()
|
||||
|
||||
|
||||
# unused task
|
||||
# @celery_app.task(name="app.core.memory.agent.health.check_read_service")
|
||||
# def check_read_service_task() -> Dict[str, str]:
|
||||
@@ -1460,7 +1485,6 @@ def regenerate_memory_cache(self) -> Dict[str, Any]:
|
||||
}
|
||||
|
||||
|
||||
|
||||
@celery_app.task(
|
||||
name="app.tasks.workspace_reflection_task",
|
||||
bind=True,
|
||||
@@ -1518,15 +1542,16 @@ def workspace_reflection_task(self) -> Dict[str, Any]:
|
||||
workspace_reflection_results = []
|
||||
|
||||
for data in result['apps_detailed_info']:
|
||||
if data['data_configs'] == []:
|
||||
if data['memory_configs'] == []:
|
||||
continue
|
||||
|
||||
releases = data['releases']
|
||||
data_configs = data['data_configs']
|
||||
memory_configs = data['memory_configs']
|
||||
end_users = data['end_users']
|
||||
|
||||
for base, config, user in zip(releases, data_configs, end_users):
|
||||
if str(base['config']) == str(config['config_id']) and str(base['app_id']) == str(user['app_id']):
|
||||
for base, config, user in zip(releases, memory_configs, end_users):
|
||||
if str(base['config']) == str(config['config_id']) and str(base['app_id']) == str(
|
||||
user['app_id']):
|
||||
# 调用反思服务
|
||||
api_logger.info(f"为用户 {user['id']} 启动反思,config_id: {config['config_id']}")
|
||||
|
||||
@@ -1614,8 +1639,6 @@ def workspace_reflection_task(self) -> Dict[str, Any]:
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@celery_app.task(
|
||||
name="app.tasks.run_forgetting_cycle_task",
|
||||
bind=True,
|
||||
@@ -1692,7 +1715,6 @@ def run_forgetting_cycle_task(self, config_id: Optional[uuid.UUID] = None) -> Di
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Long-term Memory Storage Tasks (Batched Write Strategies)
|
||||
# =============================================================================
|
||||
|
||||
Reference in New Issue
Block a user