From ba36ccb21fd42c678f9b3748d7525dd747787c0e Mon Sep 17 00:00:00 2001 From: lanceyq <1982376970@qq.com> Date: Wed, 4 Mar 2026 17:46:13 +0800 Subject: [PATCH 1/4] [changes] Hide the user knowledge base and unify the display of memory capacity --- .../memory_dashboard_controller.py | 4 +- api/app/repositories/knowledge_repository.py | 43 +++++++++++++++++ api/app/services/memory_dashboard_service.py | 46 +++++++++++++++++-- api/app/tasks.py | 13 ++++-- 4 files changed, 97 insertions(+), 9 deletions(-) diff --git a/api/app/controllers/memory_dashboard_controller.py b/api/app/controllers/memory_dashboard_controller.py index 475d184e..1b5b45fb 100644 --- a/api/app/controllers/memory_dashboard_controller.py +++ b/api/app/controllers/memory_dashboard_controller.py @@ -606,8 +606,8 @@ async def dashboard_data( # 获取RAG相关数据 try: - # total_memory: 使用 total_chunk(总chunk数) - total_chunk = memory_dashboard_service.get_rag_total_chunk(db, current_user) + # total_memory: 只统计用户知识库(permission_id='Memory')的chunk数 + total_chunk = memory_dashboard_service.get_rag_user_kb_total_chunk(db, current_user) rag_data["total_memory"] = total_chunk # total_app: 统计当前空间下的所有app数量 diff --git a/api/app/repositories/knowledge_repository.py b/api/app/repositories/knowledge_repository.py index 681d1c10..e3832214 100644 --- a/api/app/repositories/knowledge_repository.py +++ b/api/app/repositories/knowledge_repository.py @@ -211,3 +211,46 @@ def get_total_kb_count_by_workspace(db: Session, workspace_id: uuid.UUID) -> int except Exception as e: db_logger.error(f"Failed to query total knowledge base count: workspace_id={workspace_id} - {str(e)}") raise + + +def get_user_kb_chunk_num_by_workspace(db: Session, workspace_id: uuid.UUID) -> int: + """ + 根据workspace_id查询knowledges表中permission_id='Memory'(用户知识库)的chunk_num总和 + """ + db_logger.debug(f"Query user KB chunk_num by workspace_id: workspace_id={workspace_id}") + + try: + from sqlalchemy import func + result = db.query(func.sum(Knowledge.chunk_num)).filter( + Knowledge.workspace_id == workspace_id, + Knowledge.status == 1, + Knowledge.permission_id == "Memory" + ).scalar() + + total = result if result is not None else 0 + db_logger.info(f"User KB chunk_num query successful: workspace_id={workspace_id}, total={total}") + return total + except Exception as e: + db_logger.error(f"Failed to query user KB chunk_num: workspace_id={workspace_id} - {str(e)}") + raise + + +def get_non_user_kb_count_by_workspace(db: Session, workspace_id: uuid.UUID) -> int: + """ + 根据workspace_id查询knowledges表中排除用户知识库(permission_id!='Memory')的数量 + """ + db_logger.debug(f"Query non-user KB count by workspace_id: workspace_id={workspace_id}") + + try: + count = db.query(Knowledge).filter( + Knowledge.workspace_id == workspace_id, + Knowledge.status == 1, + Knowledge.permission_id != "Memory" + ).count() + + db_logger.info(f"Non-user KB count query successful: workspace_id={workspace_id}, count={count}") + return count + except Exception as e: + db_logger.error(f"Failed to query non-user KB count: workspace_id={workspace_id} - {str(e)}") + raise + diff --git a/api/app/services/memory_dashboard_service.py b/api/app/services/memory_dashboard_service.py index 8d6071cc..22752805 100644 --- a/api/app/services/memory_dashboard_service.py +++ b/api/app/services/memory_dashboard_service.py @@ -390,19 +390,59 @@ def get_rag_total_kb( current_user: User ) -> int: """ - 根据当前用户所在的workspace_id查询konwledges表所有不同id的数量 + 根据当前用户所在的workspace_id查询konwledges表中排除用户知识库(permission_id!='Memory')的数量 """ workspace_id = current_user.current_workspace_id - business_logger.info(f"获取RAG总知识库数: workspace_id={workspace_id}, 操作者: {current_user.username}") + business_logger.info(f"获取RAG总知识库数(排除用户知识库): workspace_id={workspace_id}, 操作者: {current_user.username}") try: - total_kb = knowledge_repository.get_total_kb_count_by_workspace(db, workspace_id) + total_kb = knowledge_repository.get_non_user_kb_count_by_workspace(db, workspace_id) business_logger.info(f"成功获取RAG总知识库数: {total_kb}") return total_kb except Exception as e: business_logger.error(f"获取RAG总知识库数失败: workspace_id={workspace_id} - {str(e)}") raise + +def get_rag_user_kb_total_chunk( + db: Session, + current_user: User +) -> int: + """ + 根据当前用户所在的workspace_id,从documents表统计所有用户知识库的chunk总数。 + 与 /end_users 接口保持同源:查询 file_name 匹配 end_user_id.txt 的文档 chunk_num 之和。 + """ + workspace_id = current_user.current_workspace_id + business_logger.info(f"获取用户知识库总chunk数(documents表): workspace_id={workspace_id}, 操作者: {current_user.username}") + + try: + from app.models.document_model import Document + from app.models.end_user_model import EndUser + from app.models.app_model import App + from sqlalchemy import func + + # 通过 App 关联取该 workspace 下所有 end_user_id + end_user_ids = [ + str(u.id) for u in db.query(EndUser.id) + .join(App, EndUser.app_id == App.id) + .filter(App.workspace_id == workspace_id) + .all() + ] + if not end_user_ids: + return 0 + + file_names = [f"{uid}.txt" for uid in end_user_ids] + result = db.query(func.sum(Document.chunk_num)).filter( + Document.file_name.in_(file_names) + ).scalar() + + total_chunk = int(result or 0) + business_logger.info(f"成功获取用户知识库总chunk数: {total_chunk}") + return total_chunk + except Exception as e: + business_logger.error(f"获取用户知识库总chunk数失败: workspace_id={workspace_id} - {str(e)}") + raise + def get_current_user_total_chunk( end_user_id: str, db: Session, diff --git a/api/app/tasks.py b/api/app/tasks.py index 299d188b..671a03f4 100644 --- a/api/app/tasks.py +++ b/api/app/tasks.py @@ -62,7 +62,7 @@ def process_item(item: dict): @celery_app.task(name="app.core.rag.tasks.parse_document") -def parse_document(file_path: str, document_id: uuid.UUID): +def parse_document(file_path: str, document_id: str): """ Document parsing, vectorization, and storage """ @@ -74,6 +74,9 @@ def parse_document(file_path: str, document_id: uuid.UUID): db = next(get_db()) # Manually call the generator db_document = None db_knowledge = None + # 确保 document_id 是 UUID 对象 + if not isinstance(document_id, uuid.UUID): + document_id = uuid.UUID(str(document_id)) progress_msg = f"{datetime.now().strftime('%H:%M:%S')} Task has been received.\n" try: db_document = db.query(Document).filter(Document.id == document_id).first() @@ -282,11 +285,13 @@ def parse_document(file_path: str, document_id: uuid.UUID): result = f"parse document '{db_document.file_name}' processed successfully." return result except Exception as e: - if 'db_document' in locals(): - db_document.progress_msg += f"Failed to vectorize and import the parsed document:{str(e)}\n" + if db_document is not None: + db_document.progress_msg = (db_document.progress_msg or "") + f"Failed to vectorize and import the parsed document: {str(e)}\n" db_document.run = 0 db.commit() - result = f"parse document '{db_document.file_name}' failed." + result = f"parse document '{db_document.file_name}' failed." + else: + result = f"parse document '{document_id}' failed: document not found in DB. error={str(e)}" return result finally: db.close() From 850d9ee70b098b18d604c6b763e167a855c76fe5 Mon Sep 17 00:00:00 2001 From: lanceyq <1982376970@qq.com> Date: Wed, 4 Mar 2026 17:46:13 +0800 Subject: [PATCH 2/4] [changes] Hide the user knowledge base and unify the display of memory capacity --- .../memory_dashboard_controller.py | 4 +- api/app/repositories/knowledge_repository.py | 43 +++++++++++++++++ api/app/services/memory_dashboard_service.py | 46 +++++++++++++++++-- api/app/tasks.py | 13 ++++-- 4 files changed, 97 insertions(+), 9 deletions(-) diff --git a/api/app/controllers/memory_dashboard_controller.py b/api/app/controllers/memory_dashboard_controller.py index 475d184e..1b5b45fb 100644 --- a/api/app/controllers/memory_dashboard_controller.py +++ b/api/app/controllers/memory_dashboard_controller.py @@ -606,8 +606,8 @@ async def dashboard_data( # 获取RAG相关数据 try: - # total_memory: 使用 total_chunk(总chunk数) - total_chunk = memory_dashboard_service.get_rag_total_chunk(db, current_user) + # total_memory: 只统计用户知识库(permission_id='Memory')的chunk数 + total_chunk = memory_dashboard_service.get_rag_user_kb_total_chunk(db, current_user) rag_data["total_memory"] = total_chunk # total_app: 统计当前空间下的所有app数量 diff --git a/api/app/repositories/knowledge_repository.py b/api/app/repositories/knowledge_repository.py index 681d1c10..e3832214 100644 --- a/api/app/repositories/knowledge_repository.py +++ b/api/app/repositories/knowledge_repository.py @@ -211,3 +211,46 @@ def get_total_kb_count_by_workspace(db: Session, workspace_id: uuid.UUID) -> int except Exception as e: db_logger.error(f"Failed to query total knowledge base count: workspace_id={workspace_id} - {str(e)}") raise + + +def get_user_kb_chunk_num_by_workspace(db: Session, workspace_id: uuid.UUID) -> int: + """ + 根据workspace_id查询knowledges表中permission_id='Memory'(用户知识库)的chunk_num总和 + """ + db_logger.debug(f"Query user KB chunk_num by workspace_id: workspace_id={workspace_id}") + + try: + from sqlalchemy import func + result = db.query(func.sum(Knowledge.chunk_num)).filter( + Knowledge.workspace_id == workspace_id, + Knowledge.status == 1, + Knowledge.permission_id == "Memory" + ).scalar() + + total = result if result is not None else 0 + db_logger.info(f"User KB chunk_num query successful: workspace_id={workspace_id}, total={total}") + return total + except Exception as e: + db_logger.error(f"Failed to query user KB chunk_num: workspace_id={workspace_id} - {str(e)}") + raise + + +def get_non_user_kb_count_by_workspace(db: Session, workspace_id: uuid.UUID) -> int: + """ + 根据workspace_id查询knowledges表中排除用户知识库(permission_id!='Memory')的数量 + """ + db_logger.debug(f"Query non-user KB count by workspace_id: workspace_id={workspace_id}") + + try: + count = db.query(Knowledge).filter( + Knowledge.workspace_id == workspace_id, + Knowledge.status == 1, + Knowledge.permission_id != "Memory" + ).count() + + db_logger.info(f"Non-user KB count query successful: workspace_id={workspace_id}, count={count}") + return count + except Exception as e: + db_logger.error(f"Failed to query non-user KB count: workspace_id={workspace_id} - {str(e)}") + raise + diff --git a/api/app/services/memory_dashboard_service.py b/api/app/services/memory_dashboard_service.py index 8d6071cc..22752805 100644 --- a/api/app/services/memory_dashboard_service.py +++ b/api/app/services/memory_dashboard_service.py @@ -390,19 +390,59 @@ def get_rag_total_kb( current_user: User ) -> int: """ - 根据当前用户所在的workspace_id查询konwledges表所有不同id的数量 + 根据当前用户所在的workspace_id查询konwledges表中排除用户知识库(permission_id!='Memory')的数量 """ workspace_id = current_user.current_workspace_id - business_logger.info(f"获取RAG总知识库数: workspace_id={workspace_id}, 操作者: {current_user.username}") + business_logger.info(f"获取RAG总知识库数(排除用户知识库): workspace_id={workspace_id}, 操作者: {current_user.username}") try: - total_kb = knowledge_repository.get_total_kb_count_by_workspace(db, workspace_id) + total_kb = knowledge_repository.get_non_user_kb_count_by_workspace(db, workspace_id) business_logger.info(f"成功获取RAG总知识库数: {total_kb}") return total_kb except Exception as e: business_logger.error(f"获取RAG总知识库数失败: workspace_id={workspace_id} - {str(e)}") raise + +def get_rag_user_kb_total_chunk( + db: Session, + current_user: User +) -> int: + """ + 根据当前用户所在的workspace_id,从documents表统计所有用户知识库的chunk总数。 + 与 /end_users 接口保持同源:查询 file_name 匹配 end_user_id.txt 的文档 chunk_num 之和。 + """ + workspace_id = current_user.current_workspace_id + business_logger.info(f"获取用户知识库总chunk数(documents表): workspace_id={workspace_id}, 操作者: {current_user.username}") + + try: + from app.models.document_model import Document + from app.models.end_user_model import EndUser + from app.models.app_model import App + from sqlalchemy import func + + # 通过 App 关联取该 workspace 下所有 end_user_id + end_user_ids = [ + str(u.id) for u in db.query(EndUser.id) + .join(App, EndUser.app_id == App.id) + .filter(App.workspace_id == workspace_id) + .all() + ] + if not end_user_ids: + return 0 + + file_names = [f"{uid}.txt" for uid in end_user_ids] + result = db.query(func.sum(Document.chunk_num)).filter( + Document.file_name.in_(file_names) + ).scalar() + + total_chunk = int(result or 0) + business_logger.info(f"成功获取用户知识库总chunk数: {total_chunk}") + return total_chunk + except Exception as e: + business_logger.error(f"获取用户知识库总chunk数失败: workspace_id={workspace_id} - {str(e)}") + raise + def get_current_user_total_chunk( end_user_id: str, db: Session, diff --git a/api/app/tasks.py b/api/app/tasks.py index 093f081f..4f7bfacc 100644 --- a/api/app/tasks.py +++ b/api/app/tasks.py @@ -62,7 +62,7 @@ def process_item(item: dict): @celery_app.task(name="app.core.rag.tasks.parse_document") -def parse_document(file_path: str, document_id: uuid.UUID): +def parse_document(file_path: str, document_id: str): """ Document parsing, vectorization, and storage """ @@ -74,6 +74,9 @@ def parse_document(file_path: str, document_id: uuid.UUID): db = next(get_db()) # Manually call the generator db_document = None db_knowledge = None + # 确保 document_id 是 UUID 对象 + if not isinstance(document_id, uuid.UUID): + document_id = uuid.UUID(str(document_id)) progress_msg = f"{datetime.now().strftime('%H:%M:%S')} Task has been received.\n" try: db_document = db.query(Document).filter(Document.id == document_id).first() @@ -286,11 +289,13 @@ def parse_document(file_path: str, document_id: uuid.UUID): result = f"parse document '{db_document.file_name}' processed successfully." return result except Exception as e: - if 'db_document' in locals(): - db_document.progress_msg += f"Failed to vectorize and import the parsed document:{str(e)}\n" + if db_document is not None: + db_document.progress_msg = (db_document.progress_msg or "") + f"Failed to vectorize and import the parsed document: {str(e)}\n" db_document.run = 0 db.commit() - result = f"parse document '{db_document.file_name}' failed." + result = f"parse document '{db_document.file_name}' failed." + else: + result = f"parse document '{document_id}' failed: document not found in DB. error={str(e)}" return result finally: db.close() From 420f391f3c242ea4d485e44bcd5fa25ad6df9e29 Mon Sep 17 00:00:00 2001 From: lanceyq <1982376970@qq.com> Date: Wed, 4 Mar 2026 18:01:56 +0800 Subject: [PATCH 3/4] [fix] Fixed tuple unpacking and moved UUID conversion into the try block. --- api/app/services/memory_dashboard_service.py | 2 +- api/app/tasks.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/api/app/services/memory_dashboard_service.py b/api/app/services/memory_dashboard_service.py index 22752805..05aed57e 100644 --- a/api/app/services/memory_dashboard_service.py +++ b/api/app/services/memory_dashboard_service.py @@ -423,7 +423,7 @@ def get_rag_user_kb_total_chunk( # 通过 App 关联取该 workspace 下所有 end_user_id end_user_ids = [ - str(u.id) for u in db.query(EndUser.id) + str(eid) for (eid,) in db.query(EndUser.id) .join(App, EndUser.app_id == App.id) .filter(App.workspace_id == workspace_id) .all() diff --git a/api/app/tasks.py b/api/app/tasks.py index 4f7bfacc..2846071a 100644 --- a/api/app/tasks.py +++ b/api/app/tasks.py @@ -74,11 +74,11 @@ def parse_document(file_path: str, document_id: str): db = next(get_db()) # Manually call the generator db_document = None db_knowledge = None - # 确保 document_id 是 UUID 对象 - if not isinstance(document_id, uuid.UUID): - document_id = uuid.UUID(str(document_id)) progress_msg = f"{datetime.now().strftime('%H:%M:%S')} Task has been received.\n" try: + # 确保 document_id 是 UUID 对象 + if not isinstance(document_id, uuid.UUID): + document_id = uuid.UUID(str(document_id)) db_document = db.query(Document).filter(Document.id == document_id).first() db_knowledge = db.query(Knowledge).filter(Knowledge.id == db_document.kb_id).first() # 1. Document parsing & segmentation From 647a9788657e1ccea2e1b620d1141c2af28cf58e Mon Sep 17 00:00:00 2001 From: lanceyq <1982376970@qq.com> Date: Wed, 4 Mar 2026 19:07:40 +0800 Subject: [PATCH 4/4] [fix] Restore task --- api/app/tasks.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/api/app/tasks.py b/api/app/tasks.py index 2846071a..093f081f 100644 --- a/api/app/tasks.py +++ b/api/app/tasks.py @@ -62,7 +62,7 @@ def process_item(item: dict): @celery_app.task(name="app.core.rag.tasks.parse_document") -def parse_document(file_path: str, document_id: str): +def parse_document(file_path: str, document_id: uuid.UUID): """ Document parsing, vectorization, and storage """ @@ -76,9 +76,6 @@ def parse_document(file_path: str, document_id: str): db_knowledge = None progress_msg = f"{datetime.now().strftime('%H:%M:%S')} Task has been received.\n" try: - # 确保 document_id 是 UUID 对象 - if not isinstance(document_id, uuid.UUID): - document_id = uuid.UUID(str(document_id)) db_document = db.query(Document).filter(Document.id == document_id).first() db_knowledge = db.query(Knowledge).filter(Knowledge.id == db_document.kb_id).first() # 1. Document parsing & segmentation @@ -289,13 +286,11 @@ def parse_document(file_path: str, document_id: str): result = f"parse document '{db_document.file_name}' processed successfully." return result except Exception as e: - if db_document is not None: - db_document.progress_msg = (db_document.progress_msg or "") + f"Failed to vectorize and import the parsed document: {str(e)}\n" + if 'db_document' in locals(): + db_document.progress_msg += f"Failed to vectorize and import the parsed document:{str(e)}\n" db_document.run = 0 db.commit() - result = f"parse document '{db_document.file_name}' failed." - else: - result = f"parse document '{document_id}' failed: document not found in DB. error={str(e)}" + result = f"parse document '{db_document.file_name}' failed." return result finally: db.close()