From d1757796ad4299557aca92f8d908860cfe024de0 Mon Sep 17 00:00:00 2001 From: lixinyue <2569494688@qq.com> Date: Tue, 30 Dec 2025 16:33:06 +0800 Subject: [PATCH 01/18] =?UTF-8?q?Agent=E5=BA=94=E7=94=A8=E4=B8=AD=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E7=9F=A5=E8=AF=86=E5=BA=93=E7=9A=84=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E5=AD=97=E6=AE=B5=EF=BC=88=E6=8F=90=E7=A4=BA=E8=AF=8D=E4=BF=AE?= =?UTF-8?q?=E6=94=B9=E3=80=81=E5=8F=8D=E6=80=9D=E7=BB=99=E9=BB=98=E8=AE=A4?= =?UTF-8?q?=E5=80=BC=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/app/core/memory/utils/prompt/prompts/evaluate.jinja2 | 2 +- api/app/schemas/app_schema.py | 1 + api/app/schemas/memory_reflection_schemas.py | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/api/app/core/memory/utils/prompt/prompts/evaluate.jinja2 b/api/app/core/memory/utils/prompt/prompts/evaluate.jinja2 index 200f2667..e649897a 100644 --- a/api/app/core/memory/utils/prompt/prompts/evaluate.jinja2 +++ b/api/app/core/memory/utils/prompt/prompts/evaluate.jinja2 @@ -86,5 +86,5 @@ - **quality_assessment**: quality_assessment=true时输出评估对象,否则为null(注意:- summary输出的结果不允许含有(expired_at设为2024-01-01T00:00:00Z)等原数据字段以及涉及需要修改的字段以及内容) - **memory_verify**: memory_verify=true时输出隐私检测对象,否则为null - (注意:- summary输出的结果不允许含有(expired_at设为2024-01-01T00:00:00Z)等原数据字段以及涉及需要修改的字段以及内容) + (注意:- summary输出的结果不允许含有(expired_at设为2024-01-01T00:00:00Z、memory_verify=true\memory_verify=false)等原数据字段以及涉及需要修改的字段以及内容) 模式参考:{{ json_schema }} \ No newline at end of file diff --git a/api/app/schemas/app_schema.py b/api/app/schemas/app_schema.py index de0a4c53..81cd704d 100644 --- a/api/app/schemas/app_schema.py +++ b/api/app/schemas/app_schema.py @@ -32,6 +32,7 @@ class KnowledgeRetrievalConfig(BaseModel): ) reranker_id: Optional[str] = Field(default=None, description="多知识库结果融合的模型ID") reranker_top_k: int = Field(default=10, ge=0, le=1024, description="多知识库结果融合的模型参数") + use_graph: bool = Field(default=False, description="是否使用图搜索") class ToolConfig(BaseModel): diff --git a/api/app/schemas/memory_reflection_schemas.py b/api/app/schemas/memory_reflection_schemas.py index ada92cf2..860f1ef1 100644 --- a/api/app/schemas/memory_reflection_schemas.py +++ b/api/app/schemas/memory_reflection_schemas.py @@ -12,8 +12,8 @@ class Memory_Reflection(BaseModel): config_id: Optional[int] = None reflection_enabled: bool reflection_period_in_hours: str - reflexion_range: str - baseline: str + reflexion_range: Optional[str] = "partial" + baseline: Optional[str] = "TIME" reflection_model_id: str memory_verify: bool quality_assessment: bool From 7377abe884934d41f31e8e80497f42026e6d299d Mon Sep 17 00:00:00 2001 From: lixinyue <2569494688@qq.com> Date: Tue, 30 Dec 2025 18:11:25 +0800 Subject: [PATCH 02/18] =?UTF-8?q?=E6=8F=90=E7=A4=BA=E8=AF=8D=E4=BC=98?= =?UTF-8?q?=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 b/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 index 99476c82..db4d679f 100644 --- a/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 +++ b/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 @@ -111,7 +111,8 @@ - 隐私保护优先: 所有输出记录必须完成隐私脱敏 - 脱敏变更记录: 隐私脱敏变更也必须在change字段中记录{% endif %} - 不可修改数据: 数据被判定为正确时不可修改,无数据可输出时为空 -- 输出的结果reflexion字段中的reason字段和solution不允许含有(expired_at设为2024-01-01T00:00:00Z、memory_verify=true)等原数据字段以及涉及需要修改的字段以及内容 +- 输出的结果reflexion字段中的reason字段和solution不允许含有(expired_at设为2024-01-01T00:00:00Z、memory_verify=true、memory_verify=false)等原数据字段以及涉及需要修改的字段以及内容, + ,如果是FACT,只记录事实冲突相关的数据;如果是TIME,只记录时间冲突相关的数据;如果是HYBRID,则记录所有冲突相关的数据 **变更记录格式**: ```json From 4e7382027107b3b4ba7b7d5539c9913c902e3f9f Mon Sep 17 00:00:00 2001 From: lixinyue <2569494688@qq.com> Date: Tue, 30 Dec 2025 19:03:06 +0800 Subject: [PATCH 03/18] =?UTF-8?q?=E6=8F=90=E7=A4=BA=E8=AF=8D=E4=BC=98?= =?UTF-8?q?=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 b/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 index db4d679f..91b067ee 100644 --- a/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 +++ b/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 @@ -159,7 +159,8 @@ "conflict": true }, "reflexion": { - "reason": "该冲突类型的原因分析", + "reason": "该冲突类型的原因分析,如果是FACT就是存在事实冲突,分析该冲突原因,如果是TIME就是存在时间冲突,分析该冲突原因,如果是HYBRID,可以输出存在时间与事实的混合冲突再添加上原因分析, + 不可以随意分配冲突类型以及原因", "solution": "该冲突类型的解决方案" }, "resolved": { From 3aa52cc676362f236e75f06bcda70f8e51b1368d Mon Sep 17 00:00:00 2001 From: lixinyue <2569494688@qq.com> Date: Tue, 30 Dec 2025 19:25:40 +0800 Subject: [PATCH 04/18] =?UTF-8?q?=E6=8F=90=E7=A4=BA=E8=AF=8D=E4=BC=98?= =?UTF-8?q?=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../reflection_engine/example/example.json | 48 ++++--------------- 1 file changed, 10 insertions(+), 38 deletions(-) diff --git a/api/app/core/memory/storage_services/reflection_engine/example/example.json b/api/app/core/memory/storage_services/reflection_engine/example/example.json index 18a2b185..09429829 100644 --- a/api/app/core/memory/storage_services/reflection_engine/example/example.json +++ b/api/app/core/memory/storage_services/reflection_engine/example/example.json @@ -2,52 +2,39 @@ "memory_verify": { "source_data": [ { - "statement_name": "用户是2023年春天去北京工作的。", - "statement_id": "62beac695b1346f4871740a45db88782" + "statement_name": "我是 2023 年春天去北京工作的,后来基本一直都在北京上班,也没怎么换过城市。不过后来公司调整,2024 年上半年我被调到上海待了差不多半年,那段时间每天都是在上海办公室打卡。当时入职资料用的还是我之前的身份信息,身份证号是 11010119950308123X,银行卡是 6222023847595898,这些一直没变。对了,其实我 从 2023 年开始就一直在北京生活,从来没有长期离开过北京,上海那段更多算是远程配合。" }, { - "statement_name": "用户后来基本一直都在北京上班。", - "statement_id": "4cba5ac08b674d7fb1e2ae634d2b8f0b" + "statement_name": "用户后来基本一直都在北京上班。" }, { - "statement_name": "用户从2023年开始就一直在北京生活。", - "statement_id": "e612a44da4db483993c350df7c97a1a1" + "statement_name": "用户从2023年开始就一直在北京生活。" }, { - "statement_name": "用户从来没有长期离开过北京。", - "statement_id": "b3c787a2e33c49f7981accabbbb4538a" + "statement_name": "用户从来没有长期离开过北京。" }, { - "statement_name": "由于公司调整,用户在2024年上半年被调到上海待了差不多半年。", - "statement_id": "64cde4230cb24a4da726e7db9e7aa616" + "statement_name": "由于公司调整,用户在2024年上半年被调到上海待了差不多半年。" }, { - "statement_name": "用户在被调到上海期间每天都是在上海办公室打卡。", - "statement_id": "8b1b12e23b844b8088dfeb67da6ad669" + "statement_name": "用户在被调到上海期间每天都是在上海办公室打卡。" }, { - "statement_name": "用户在入职时使用的身份信息是之前的,身份证号为11010119950308123X。", - "statement_id": "030afd362e9b4110b139e68e5d3e7143" + "statement_name": "用户在入职时使用的身份信息是之前的,身份证号为11010119950308123X。" }, { - "statement_name": "用户的银行卡号是6222023847595898。", - "statement_id": "6c7567cd1f3c478bb42d1b65383e6f2f" + "statement_name": "用户的银行卡号是6222023847595898。" }, { - "statement_name": "用户的身份信息和银行卡信息一直没变。", - "statement_id": "b3ca618e1e204b83bebd70e75cf2073f" + "statement_name": "用户的身份信息和银行卡信息一直没变。" }, { - "statement_name": "用户认为在上海的那段时间更多算是远程配合。", - "statement_id": "150af89d2c154e6eb41ff1a91e37f962" + "statement_name": "用户认为在上海的那段时间更多算是远程配合。" } ], "databasets": [ { "entity1_name": "Person", - "description": "表示人类个体的通用类型", - "statement_id": "62beac695b1346f4871740a45db88782", - "entity2_name": "用户", "entity2": { "description": "叙述者,讲述个人工作与生活经历的个体", "name": "用户" @@ -55,9 +42,6 @@ }, { "entity1_name": "用户", - "description": "叙述者,讲述个人工作与生活经历的个体", - "statement_id": "62beac695b1346f4871740a45db88782", - "entity2_name": "身份信息", "entity2": { "description": "用于个人身份识别的数据", "name": "身份信息" @@ -65,9 +49,6 @@ }, { "entity1_name": "用户", - "description": "叙述者,讲述个人工作与生活经历的个体", - "statement_id": "62beac695b1346f4871740a45db88782", - "entity2_name": "6222023847595898", "entity2": { "description": "用户的银行卡号码", "name": "6222023847595898" @@ -76,33 +57,24 @@ { "entity1_name": "用户", "description": "叙述者,讲述个人工作与生活经历的个体", - "statement_id": "62beac695b1346f4871740a45db88782", - "entity2_name": "上海办公室", "entity2": { "entity_idx": 1, "aliases": ["上海办"], - "description": "位于上海的工作办公场所", "name": "上海办公室" } }, { "entity1_name": "用户", "description": "叙述者,讲述个人工作与生活经历的个体", - "statement_id": "62beac695b1346f4871740a45db88782", - "entity2_name": "北京", "entity2": { "aliases": ["京", "京城", "北平"], - "description": "中国的首都城市,用户主要工作和生活所在地", "name": "北京" } }, { "entity1_name": "11010119950308123X", "description": "具体的身份证号码值", - "statement_id": "030afd362e9b4110b139e68e5d3e7143", - "entity2_name": "身份证号", "entity2": { - "description": "中华人民共和国公民的身份号码", "name": "身份证号" } } From a09d3be31085572494f4a73366af1d49c81c8054 Mon Sep 17 00:00:00 2001 From: lixinyue <2569494688@qq.com> Date: Tue, 30 Dec 2025 19:46:48 +0800 Subject: [PATCH 05/18] =?UTF-8?q?=E6=8F=90=E7=A4=BA=E8=AF=8D=E4=BC=98?= =?UTF-8?q?=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../reflection_engine/self_reflexion.py | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/api/app/core/memory/storage_services/reflection_engine/self_reflexion.py b/api/app/core/memory/storage_services/reflection_engine/self_reflexion.py index 97f51fb9..e9fb8855 100644 --- a/api/app/core/memory/storage_services/reflection_engine/self_reflexion.py +++ b/api/app/core/memory/storage_services/reflection_engine/self_reflexion.py @@ -387,7 +387,7 @@ class ReflectionEngine: result_data['memory_verifies'] = memory_verifies result_data['quality_assessments'] = quality_assessments conflicts_found='' - + REMOVE_KEYS = {"created_at", "expired_at","relationship","predicate","statement_id","id","statement_id","relationship_statement_id"} # Clearn conflict_data,And memory_verify和quality_assessment cleaned_conflict_data = [] for item in conflict_data: @@ -396,7 +396,23 @@ class ReflectionEngine: 'conflict': item['conflict'] } cleaned_conflict_data.append(cleaned_item) - + cleaned_conflict_data_=[] + for item in conflict_data: + cleaned_data = [] + for row in item.get("data", []): + # 删除 created_at / expired_at + cleaned_row = { + k: v + for k, v in row.items() + if k not in REMOVE_KEYS + } + cleaned_data.append(cleaned_row) + cleaned_item = { + "data": cleaned_data, + "conflict": item.get("conflict"), + } + cleaned_conflict_data_.append(cleaned_item) + print(cleaned_conflict_data_) # 3. 解决冲突 solved_data = await self._resolve_conflicts(cleaned_conflict_data, source_data) if not solved_data: From 07bcb54ed3320742cab0e469096899a0c2c1f031 Mon Sep 17 00:00:00 2001 From: lixiangcheng1 Date: Wed, 31 Dec 2025 08:54:36 +0800 Subject: [PATCH 06/18] [fix]entity_resolution.py:199: SyntaxWarning: invalid escape sequence '\d' --- api/app/core/rag/graphrag/entity_resolution.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/app/core/rag/graphrag/entity_resolution.py b/api/app/core/rag/graphrag/entity_resolution.py index d648e5a1..20af8818 100644 --- a/api/app/core/rag/graphrag/entity_resolution.py +++ b/api/app/core/rag/graphrag/entity_resolution.py @@ -196,7 +196,7 @@ class EntityResolution(Extractor): ans_list = [] records = [r.strip() for r in results.split(record_delimiter)] for record in records: - pattern_int = f"{re.escape(entity_index_delimiter)}(\d+){re.escape(entity_index_delimiter)}" + pattern_int = fr"{re.escape(entity_index_delimiter)}(\d+){re.escape(entity_index_delimiter)}" match_int = re.search(pattern_int, record) res_int = int(str(match_int.group(1) if match_int else '0')) if res_int > records_length: From c78dc1fd4719cbb543b57b7cf69a6c397a083416 Mon Sep 17 00:00:00 2001 From: lixiangcheng1 Date: Wed, 31 Dec 2025 09:51:00 +0800 Subject: [PATCH 07/18] [fix]parsed excel document error:float division by zero --- api/app/core/rag/app/naive.py | 1 + .../core/rag/deepdoc/parser/excel_parser.py | 77 +++++++++++++++---- 2 files changed, 61 insertions(+), 17 deletions(-) diff --git a/api/app/core/rag/app/naive.py b/api/app/core/rag/app/naive.py index 6d6b933a..23f0c4ba 100644 --- a/api/app/core/rag/app/naive.py +++ b/api/app/core/rag/app/naive.py @@ -672,6 +672,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, excel_parser = ExcelParser() if parser_config.get("html4excel"): sections = [(_, "") for _ in excel_parser.html(binary, 12) if _] + parser_config["chunk_token_num"] = 0 else: sections = [(_, "") for _ in excel_parser(binary) if _] parser_config["chunk_token_num"] = 12800 diff --git a/api/app/core/rag/deepdoc/parser/excel_parser.py b/api/app/core/rag/deepdoc/parser/excel_parser.py index f7601ee3..a161f4ca 100644 --- a/api/app/core/rag/deepdoc/parser/excel_parser.py +++ b/api/app/core/rag/deepdoc/parser/excel_parser.py @@ -5,6 +5,7 @@ from io import BytesIO import pandas as pd from openpyxl import Workbook, load_workbook +from PIL import Image from app.core.rag.nlp import find_codec @@ -28,7 +29,7 @@ class RAGExcelParser: try: file_like_object.seek(0) - df = pd.read_csv(file_like_object) + df = pd.read_csv(file_like_object, on_bad_lines='skip') return RAGExcelParser._dataframe_to_workbook(df) except Exception as e_csv: @@ -42,14 +43,12 @@ class RAGExcelParser: file_like_object.seek(0) try: dfs = pd.read_excel(file_like_object, sheet_name=None) - if isinstance(dfs, dict): - dfs = next(iter(dfs.values())) return RAGExcelParser._dataframe_to_workbook(dfs) except Exception as ex: logging.info(f"pandas with default engine load error: {ex}, try calamine instead") file_like_object.seek(0) df = pd.read_excel(file_like_object, engine="calamine") - print(df) + print("lxc1") return RAGExcelParser._dataframe_to_workbook(df) except Exception as e_pandas: raise Exception(f"pandas.read_excel error: {e_pandas}, original openpyxl error: {e}") @@ -68,7 +67,6 @@ class RAGExcelParser: # if contains multiple sheets use _dataframes_to_workbook if isinstance(df, dict) and len(df) > 1: return RAGExcelParser._dataframes_to_workbook(df) - df = RAGExcelParser._clean_dataframe(df) wb = Workbook() ws = wb.active @@ -80,15 +78,14 @@ class RAGExcelParser: for row_num, row in enumerate(df.values, 2): for col_num, value in enumerate(row, 1): ws.cell(row=row_num, column=col_num, value=value) - return wb - + @staticmethod def _dataframes_to_workbook(dfs: dict): wb = Workbook() default_sheet = wb.active wb.remove(default_sheet) - + for sheet_name, df in dfs.items(): df = RAGExcelParser._clean_dataframe(df) ws = wb.create_sheet(title=sheet_name) @@ -99,6 +96,52 @@ class RAGExcelParser: ws.cell(row=row_num, column=col_num, value=value) return wb + @staticmethod + def _extract_images_from_worksheet(ws, sheetname=None): + """ + Extract images from a worksheet and enrich them with vision-based descriptions. + + Returns: List[dict] + """ + images = getattr(ws, "_images", []) + if not images: + return [] + + raw_items = [] + + for img in images: + try: + img_bytes = img._data() + pil_img = Image.open(BytesIO(img_bytes)).convert("RGB") + + anchor = img.anchor + if hasattr(anchor, "_from") and hasattr(anchor, "_to"): + r1, c1 = anchor._from.row + 1, anchor._from.col + 1 + r2, c2 = anchor._to.row + 1, anchor._to.col + 1 + if r1 == r2 and c1 == c2: + span = "single_cell" + else: + span = "multi_cell" + else: + r1, c1 = anchor._from.row + 1, anchor._from.col + 1 + r2, c2 = r1, c1 + span = "single_cell" + + item = { + "sheet": sheetname or ws.title, + "image": pil_img, + "image_description": "", + "row_from": r1, + "col_from": c1, + "row_to": r2, + "col_to": c2, + "span_type": span, + } + raw_items.append(item) + except Exception: + continue + return raw_items + def html(self, fnm, chunk_rows=256): from html import escape @@ -131,7 +174,7 @@ class RAGExcelParser: tb = "" tb += f"" tb += tb_rows_0 - for r in list(rows[1 + chunk_i * chunk_rows : min(1 + (chunk_i + 1) * chunk_rows, len(rows))]): + for r in list(rows[1 + chunk_i * chunk_rows: min(1 + (chunk_i + 1) * chunk_rows, len(rows))]): tb += "" for i, c in enumerate(r): if c.value is None: @@ -154,7 +197,7 @@ class RAGExcelParser: except Exception as e: logging.warning(f"Parse spreadsheet error: {e}, trying to interpret as CSV file") file_like_object.seek(0) - df = pd.read_csv(file_like_object) + df = pd.read_csv(file_like_object, on_bad_lines='skip') df = df.replace(r"^\s*$", "", regex=True) return df.to_markdown(index=False) @@ -192,14 +235,14 @@ class RAGExcelParser: if fnm.split(".")[-1].lower().find("xls") >= 0: wb = RAGExcelParser._load_excel_to_workbook(BytesIO(binary)) total = 0 - + for sheetname in wb.sheetnames: - try: - ws = wb[sheetname] - total += len(list(ws.rows)) - except Exception as e: - logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}") - continue + try: + ws = wb[sheetname] + total += len(list(ws.rows)) + except Exception as e: + logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}") + continue return total if fnm.split(".")[-1].lower() in ["csv", "txt"]: From 6b0ee1b74a079f32d5482b52ae8a0836b19ef917 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=96=B0=E6=9C=88?= Date: Wed, 31 Dec 2025 02:20:45 +0000 Subject: [PATCH 08/18] Merge #88 into develop from fix/develop_kj_knowledge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 提示词优化 * fix/develop_kj_knowledge: (5 commits squashed) - Agent应用中添加知识库的配置字段(提示词修改、反思给默认值) - 提示词优化 - 提示词优化 - 提示词优化 - 提示词优化 Signed-off-by: aliyun8644380055 Reviewed-by: aliyun6762716068 Merged-by: aliyun6762716068 CR-link: https://codeup.aliyun.com/redbearai/python/redbear-mem-open/change/88 --- .../reflection_engine/example/example.json | 48 ++++--------------- .../reflection_engine/self_reflexion.py | 20 +++++++- .../utils/prompt/prompts/reflexion.jinja2 | 6 ++- 3 files changed, 32 insertions(+), 42 deletions(-) diff --git a/api/app/core/memory/storage_services/reflection_engine/example/example.json b/api/app/core/memory/storage_services/reflection_engine/example/example.json index 18a2b185..09429829 100644 --- a/api/app/core/memory/storage_services/reflection_engine/example/example.json +++ b/api/app/core/memory/storage_services/reflection_engine/example/example.json @@ -2,52 +2,39 @@ "memory_verify": { "source_data": [ { - "statement_name": "用户是2023年春天去北京工作的。", - "statement_id": "62beac695b1346f4871740a45db88782" + "statement_name": "我是 2023 年春天去北京工作的,后来基本一直都在北京上班,也没怎么换过城市。不过后来公司调整,2024 年上半年我被调到上海待了差不多半年,那段时间每天都是在上海办公室打卡。当时入职资料用的还是我之前的身份信息,身份证号是 11010119950308123X,银行卡是 6222023847595898,这些一直没变。对了,其实我 从 2023 年开始就一直在北京生活,从来没有长期离开过北京,上海那段更多算是远程配合。" }, { - "statement_name": "用户后来基本一直都在北京上班。", - "statement_id": "4cba5ac08b674d7fb1e2ae634d2b8f0b" + "statement_name": "用户后来基本一直都在北京上班。" }, { - "statement_name": "用户从2023年开始就一直在北京生活。", - "statement_id": "e612a44da4db483993c350df7c97a1a1" + "statement_name": "用户从2023年开始就一直在北京生活。" }, { - "statement_name": "用户从来没有长期离开过北京。", - "statement_id": "b3c787a2e33c49f7981accabbbb4538a" + "statement_name": "用户从来没有长期离开过北京。" }, { - "statement_name": "由于公司调整,用户在2024年上半年被调到上海待了差不多半年。", - "statement_id": "64cde4230cb24a4da726e7db9e7aa616" + "statement_name": "由于公司调整,用户在2024年上半年被调到上海待了差不多半年。" }, { - "statement_name": "用户在被调到上海期间每天都是在上海办公室打卡。", - "statement_id": "8b1b12e23b844b8088dfeb67da6ad669" + "statement_name": "用户在被调到上海期间每天都是在上海办公室打卡。" }, { - "statement_name": "用户在入职时使用的身份信息是之前的,身份证号为11010119950308123X。", - "statement_id": "030afd362e9b4110b139e68e5d3e7143" + "statement_name": "用户在入职时使用的身份信息是之前的,身份证号为11010119950308123X。" }, { - "statement_name": "用户的银行卡号是6222023847595898。", - "statement_id": "6c7567cd1f3c478bb42d1b65383e6f2f" + "statement_name": "用户的银行卡号是6222023847595898。" }, { - "statement_name": "用户的身份信息和银行卡信息一直没变。", - "statement_id": "b3ca618e1e204b83bebd70e75cf2073f" + "statement_name": "用户的身份信息和银行卡信息一直没变。" }, { - "statement_name": "用户认为在上海的那段时间更多算是远程配合。", - "statement_id": "150af89d2c154e6eb41ff1a91e37f962" + "statement_name": "用户认为在上海的那段时间更多算是远程配合。" } ], "databasets": [ { "entity1_name": "Person", - "description": "表示人类个体的通用类型", - "statement_id": "62beac695b1346f4871740a45db88782", - "entity2_name": "用户", "entity2": { "description": "叙述者,讲述个人工作与生活经历的个体", "name": "用户" @@ -55,9 +42,6 @@ }, { "entity1_name": "用户", - "description": "叙述者,讲述个人工作与生活经历的个体", - "statement_id": "62beac695b1346f4871740a45db88782", - "entity2_name": "身份信息", "entity2": { "description": "用于个人身份识别的数据", "name": "身份信息" @@ -65,9 +49,6 @@ }, { "entity1_name": "用户", - "description": "叙述者,讲述个人工作与生活经历的个体", - "statement_id": "62beac695b1346f4871740a45db88782", - "entity2_name": "6222023847595898", "entity2": { "description": "用户的银行卡号码", "name": "6222023847595898" @@ -76,33 +57,24 @@ { "entity1_name": "用户", "description": "叙述者,讲述个人工作与生活经历的个体", - "statement_id": "62beac695b1346f4871740a45db88782", - "entity2_name": "上海办公室", "entity2": { "entity_idx": 1, "aliases": ["上海办"], - "description": "位于上海的工作办公场所", "name": "上海办公室" } }, { "entity1_name": "用户", "description": "叙述者,讲述个人工作与生活经历的个体", - "statement_id": "62beac695b1346f4871740a45db88782", - "entity2_name": "北京", "entity2": { "aliases": ["京", "京城", "北平"], - "description": "中国的首都城市,用户主要工作和生活所在地", "name": "北京" } }, { "entity1_name": "11010119950308123X", "description": "具体的身份证号码值", - "statement_id": "030afd362e9b4110b139e68e5d3e7143", - "entity2_name": "身份证号", "entity2": { - "description": "中华人民共和国公民的身份号码", "name": "身份证号" } } diff --git a/api/app/core/memory/storage_services/reflection_engine/self_reflexion.py b/api/app/core/memory/storage_services/reflection_engine/self_reflexion.py index 97f51fb9..e9fb8855 100644 --- a/api/app/core/memory/storage_services/reflection_engine/self_reflexion.py +++ b/api/app/core/memory/storage_services/reflection_engine/self_reflexion.py @@ -387,7 +387,7 @@ class ReflectionEngine: result_data['memory_verifies'] = memory_verifies result_data['quality_assessments'] = quality_assessments conflicts_found='' - + REMOVE_KEYS = {"created_at", "expired_at","relationship","predicate","statement_id","id","statement_id","relationship_statement_id"} # Clearn conflict_data,And memory_verify和quality_assessment cleaned_conflict_data = [] for item in conflict_data: @@ -396,7 +396,23 @@ class ReflectionEngine: 'conflict': item['conflict'] } cleaned_conflict_data.append(cleaned_item) - + cleaned_conflict_data_=[] + for item in conflict_data: + cleaned_data = [] + for row in item.get("data", []): + # 删除 created_at / expired_at + cleaned_row = { + k: v + for k, v in row.items() + if k not in REMOVE_KEYS + } + cleaned_data.append(cleaned_row) + cleaned_item = { + "data": cleaned_data, + "conflict": item.get("conflict"), + } + cleaned_conflict_data_.append(cleaned_item) + print(cleaned_conflict_data_) # 3. 解决冲突 solved_data = await self._resolve_conflicts(cleaned_conflict_data, source_data) if not solved_data: diff --git a/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 b/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 index 99476c82..91b067ee 100644 --- a/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 +++ b/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 @@ -111,7 +111,8 @@ - 隐私保护优先: 所有输出记录必须完成隐私脱敏 - 脱敏变更记录: 隐私脱敏变更也必须在change字段中记录{% endif %} - 不可修改数据: 数据被判定为正确时不可修改,无数据可输出时为空 -- 输出的结果reflexion字段中的reason字段和solution不允许含有(expired_at设为2024-01-01T00:00:00Z、memory_verify=true)等原数据字段以及涉及需要修改的字段以及内容 +- 输出的结果reflexion字段中的reason字段和solution不允许含有(expired_at设为2024-01-01T00:00:00Z、memory_verify=true、memory_verify=false)等原数据字段以及涉及需要修改的字段以及内容, + ,如果是FACT,只记录事实冲突相关的数据;如果是TIME,只记录时间冲突相关的数据;如果是HYBRID,则记录所有冲突相关的数据 **变更记录格式**: ```json @@ -158,7 +159,8 @@ "conflict": true }, "reflexion": { - "reason": "该冲突类型的原因分析", + "reason": "该冲突类型的原因分析,如果是FACT就是存在事实冲突,分析该冲突原因,如果是TIME就是存在时间冲突,分析该冲突原因,如果是HYBRID,可以输出存在时间与事实的混合冲突再添加上原因分析, + 不可以随意分配冲突类型以及原因", "solution": "该冲突类型的解决方案" }, "resolved": { From 8d6e773a10f30f7f189baeb51a855c041f126de7 Mon Sep 17 00:00:00 2001 From: lixinyue <2569494688@qq.com> Date: Wed, 31 Dec 2025 11:21:12 +0800 Subject: [PATCH 09/18] =?UTF-8?q?=E6=8F=90=E7=A4=BA=E8=AF=8D=E4=BC=98?= =?UTF-8?q?=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 b/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 index 91b067ee..897681b0 100644 --- a/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 +++ b/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 @@ -9,9 +9,7 @@ ## 任务目标 作为数据冲突解决专家,分析冲突原因,按类型分组处理,为每种冲突生成独立解决方案。 - **数据关系**: statement_databasets中的statement_id对应data中的记录,statement_created_at为用户输入时间。 - **处理模式**: - memory_verify=false: 仅处理数据冲突 - memory_verify=true: 处理数据冲突 + 隐私脱敏 @@ -160,8 +158,8 @@ }, "reflexion": { "reason": "该冲突类型的原因分析,如果是FACT就是存在事实冲突,分析该冲突原因,如果是TIME就是存在时间冲突,分析该冲突原因,如果是HYBRID,可以输出存在时间与事实的混合冲突再添加上原因分析, - 不可以随意分配冲突类型以及原因", - "solution": "该冲突类型的解决方案" + 不可以随意分配冲突类型以及原因,不允许输出字段比如(statement、description、entity1_name、entity2_name、name、memory_verify、expired_at设为2024)等类似这种", + "solution": "该冲突类型的解决方案(不允许输出字段比如(statement、description、entity1_name、entity2_name、name、memory_verify、expired_at设为2024)等类似这种)" }, "resolved": { "original_memory_id": "被设为失效的记忆id", From b96a63fb2201219c02a401ad9087856ee2112515 Mon Sep 17 00:00:00 2001 From: lixinyue <2569494688@qq.com> Date: Wed, 31 Dec 2025 11:24:30 +0800 Subject: [PATCH 10/18] =?UTF-8?q?=E8=A1=A8=E7=9A=84=E9=BB=98=E8=AE=A4?= =?UTF-8?q?=E5=80=BC=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/app/core/validators/memory_config_validators.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/api/app/core/validators/memory_config_validators.py b/api/app/core/validators/memory_config_validators.py index 84c5b20c..eb2aaad8 100644 --- a/api/app/core/validators/memory_config_validators.py +++ b/api/app/core/validators/memory_config_validators.py @@ -197,6 +197,10 @@ def validate_embedding_model( embedding_id, "embedding", db, tenant_id, required=True, config_id=config_id, workspace_id=workspace_id ) + print(100*'-') + print(embedding_uuid) + print(_) + print(100*'-') if embedding_uuid is None: raise InvalidConfigError( From 4f9b090b34e7c6a769e06a2811eb1c417b7f072a Mon Sep 17 00:00:00 2001 From: lixinyue <2569494688@qq.com> Date: Wed, 31 Dec 2025 11:28:37 +0800 Subject: [PATCH 11/18] =?UTF-8?q?=E6=8F=90=E7=A4=BA=E8=AF=8D=E4=BC=98?= =?UTF-8?q?=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 b/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 index 897681b0..d6e67714 100644 --- a/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 +++ b/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 @@ -182,4 +182,5 @@ - **resolved.change**: 包含详细变更信息 - 无需修改的冲突类型resolved为null - 与baseline不匹配的冲突类型不包含在results中 -模式参考: {{ json_schema }} \ No newline at end of file +模式参考: {{ json_schema }} + From 8985d136357e654a56bf23c386d8510c0ca2fd7f Mon Sep 17 00:00:00 2001 From: lixinyue <2569494688@qq.com> Date: Wed, 31 Dec 2025 11:33:35 +0800 Subject: [PATCH 12/18] =?UTF-8?q?=E6=8F=90=E7=A4=BA=E8=AF=8D=E4=BC=98?= =?UTF-8?q?=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 b/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 index d6e67714..ed3aad32 100644 --- a/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 +++ b/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 @@ -158,8 +158,8 @@ }, "reflexion": { "reason": "该冲突类型的原因分析,如果是FACT就是存在事实冲突,分析该冲突原因,如果是TIME就是存在时间冲突,分析该冲突原因,如果是HYBRID,可以输出存在时间与事实的混合冲突再添加上原因分析, - 不可以随意分配冲突类型以及原因,不允许输出字段比如(statement、description、entity1_name、entity2_name、name、memory_verify、expired_at设为2024)等类似这种", - "solution": "该冲突类型的解决方案(不允许输出字段比如(statement、description、entity1_name、entity2_name、name、memory_verify、expired_at设为2024)等类似这种)" + 不可以随意分配冲突类型以及原因,不允许输出字段比如(statement、description、entity1_name、entity2_name、name、memory_verify、expired_at、conflict)等类似这种", + "solution": "该冲突类型的解决方案(不允许输出字段比如(statement、description、entity1_name、entity2_name、name、memory_verify、expired_at、conflict)等类似这种)" }, "resolved": { "original_memory_id": "被设为失效的记忆id", From f529525fbdd667ff920cf3ca354f81fc110ffdaf Mon Sep 17 00:00:00 2001 From: Mark Date: Wed, 31 Dec 2025 11:34:21 +0800 Subject: [PATCH 13/18] [bugfix] model_parameters JSON serializable --- api/app/services/multi_agent_service.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/api/app/services/multi_agent_service.py b/api/app/services/multi_agent_service.py index f4bc1e40..aaa32b26 100644 --- a/api/app/services/multi_agent_service.py +++ b/api/app/services/multi_agent_service.py @@ -290,12 +290,22 @@ class MultiAgentService: else: execution_config_data = convert_uuids_to_str(data.execution_config.model_dump()) + # 处理 model_parameters(可能是 None、字典或 Pydantic 模型) + if data.model_parameters is None: + model_parameters_data = None + elif isinstance(data.model_parameters, dict): + # 过滤掉值为 None 的字段 + model_parameters_data = {k: v for k, v in data.model_parameters.items() if v is not None} + else: + # 过滤掉值为 None 的字段 + model_parameters_data = {k: v for k, v in data.model_parameters.model_dump().items() if v is not None} + config = MultiAgentConfig( app_id=app_id, master_agent_id=data.master_agent_id, master_agent_name=data.master_agent_name, default_model_config_id=data.default_model_config_id, - model_parameters=data.model_parameters, + model_parameters=model_parameters_data, orchestration_mode=data.orchestration_mode, sub_agents=sub_agents_data, # routing_rules=routing_rules_data, From f5a057ddc555cf0e7e33cfe1f22cd98ec048ec0c Mon Sep 17 00:00:00 2001 From: lixinyue <2569494688@qq.com> Date: Wed, 31 Dec 2025 11:39:30 +0800 Subject: [PATCH 14/18] =?UTF-8?q?data=5Fconfig=E6=95=B0=E6=8D=AE=E5=BA=93?= =?UTF-8?q?=E9=BB=98=E8=AE=A4=E5=80=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/app/models/data_config_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/app/models/data_config_model.py b/api/app/models/data_config_model.py index 870d46b2..2914432e 100644 --- a/api/app/models/data_config_model.py +++ b/api/app/models/data_config_model.py @@ -51,8 +51,8 @@ class DataConfig(Base): # 自我反思配置 enable_self_reflexion = Column(Boolean, default=False, comment="是否启用自我反思") iteration_period = Column(String, default="3", comment="反思迭代周期") - reflexion_range = Column(String, default="retrieval", comment="反思范围:部分/全部") - baseline = Column(String, default="time", comment="基线:时间/事实/时间和事实") + reflexion_range = Column(String, default="partial", comment="反思范围:部分/全部") + baseline = Column(String, default="TIME", comment="基线:时间/事实/时间和事实") reflection_model_id = Column(String, nullable=True, comment="反思模型ID") memory_verify = Column(Boolean, default=True, comment="记忆验证") quality_assessment = Column(Boolean, default=True, comment="质量评估") From 8081c15d1107c23f965532979a9147702429c8ab Mon Sep 17 00:00:00 2001 From: Mark Date: Wed, 31 Dec 2025 12:05:42 +0800 Subject: [PATCH 15/18] [bugfix] model_parameters params --- api/app/services/master_agent_router.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/api/app/services/master_agent_router.py b/api/app/services/master_agent_router.py index 04c0daa2..5ff580f8 100644 --- a/api/app/services/master_agent_router.py +++ b/api/app/services/master_agent_router.py @@ -361,14 +361,14 @@ class MasterAgentRouter: "model_name": api_key_config.model_name } ) - temperature = 0.3 # 决策任务使用较低温度 - max_tokens = 1000 - if self.model_parameters: - temperature = self.model_parameters.temperature - max_tokens = self.model_parameters.max_tokens + # temperature = 0.3 # 决策任务使用较低温度 + # max_tokens = 1000 + # if self.model_parameters: + # temperature = self.model_parameters["temperature"] + # max_tokens = self.model_parameters["max_tokens"] - extra_params = {"temperature": temperature, - "max_tokens":max_tokens + extra_params = {"temperature": self.model_parameters.get("temperature", 0.3), + "max_tokens":self.model_parameters.get("max_tokens", 1000) } # 创建 RedBearModelConfig model_config = RedBearModelConfig( From e08e761319d69aa62d0de897da7d0eb823c8b652 Mon Sep 17 00:00:00 2001 From: mengyonghao <1533512157@qq.com> Date: Wed, 31 Dec 2025 12:06:50 +0800 Subject: [PATCH 16/18] fix(workflow): adapt node ID regex to support symbols --- api/app/core/workflow/nodes/end/node.py | 2 +- api/app/templates/workflows/simple_qa/template.yml | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/api/app/core/workflow/nodes/end/node.py b/api/app/core/workflow/nodes/end/node.py index efc62dc5..65bb6cb5 100644 --- a/api/app/core/workflow/nodes/end/node.py +++ b/api/app/core/workflow/nodes/end/node.py @@ -61,7 +61,7 @@ class EndNode(BaseNode): 引用的节点 ID 列表 """ # 匹配 {{node_id.xxx}} 格式 - pattern = r'\{\{([a-zA-Z0-9_]+)\.[a-zA-Z0-9_]+\}\}' + pattern = r'\{\{([a-zA-Z0-9_-]+)\.[a-zA-Z0-9_]+\}\}' matches = re.findall(pattern, template) return list(set(matches)) # 去重 diff --git a/api/app/templates/workflows/simple_qa/template.yml b/api/app/templates/workflows/simple_qa/template.yml index dc3a7c70..0843744d 100644 --- a/api/app/templates/workflows/simple_qa/template.yml +++ b/api/app/templates/workflows/simple_qa/template.yml @@ -41,8 +41,6 @@ nodes: - 使用友好、礼貌的语气 - 适当使用格式化(如列表、段落)提高可读性 - - role: user - content: "{{sys.message}}" model_id: null temperature: 0.7 From 742d54342b03ceffcf2f5e10a594511b110caf63 Mon Sep 17 00:00:00 2001 From: lixiangcheng1 Date: Wed, 31 Dec 2025 12:58:30 +0800 Subject: [PATCH 17/18] [fix]parsed excel document error:float division by zero --- .../core/rag/deepdoc/parser/excel_parser.py | 41 +++++++++++++------ 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/api/app/core/rag/deepdoc/parser/excel_parser.py b/api/app/core/rag/deepdoc/parser/excel_parser.py index a161f4ca..d66a21a8 100644 --- a/api/app/core/rag/deepdoc/parser/excel_parser.py +++ b/api/app/core/rag/deepdoc/parser/excel_parser.py @@ -48,7 +48,6 @@ class RAGExcelParser: logging.info(f"pandas with default engine load error: {ex}, try calamine instead") file_like_object.seek(0) df = pd.read_excel(file_like_object, engine="calamine") - print("lxc1") return RAGExcelParser._dataframe_to_workbook(df) except Exception as e_pandas: raise Exception(f"pandas.read_excel error: {e_pandas}, original openpyxl error: {e}") @@ -215,19 +214,35 @@ class RAGExcelParser: continue if not rows: continue + # 获取表头 ti = list(rows[0]) - for r in list(rows[1:]): - fields = [] - for i, c in enumerate(r): - if not c.value: - continue - t = str(ti[i].value) if i < len(ti) else "" - t += (":" if t else "") + str(c.value) - fields.append(t) - line = "; ".join(fields) - if sheetname.lower().find("sheet") < 0: - line += " ——" + sheetname - res.append(line) + header_fields = [] + for cell in ti: + if cell.value: # 只添加有值的表头 + header_fields.append(str(cell.value)) + + # 如果有数据行,处理数据行;否则只处理表头 + data_rows = rows[1:] + if data_rows: + for r in data_rows: + fields = [] + for i, c in enumerate(r): + if not c.value: + continue + t = str(ti[i].value) if i < len(ti) else "" + t += (":" if t else "") + str(c.value) + fields.append(t) + line = "; ".join(fields) + if sheetname.lower().find("sheet") < 0: + line += " ——" + sheetname + res.append(line) + else: + # 只有表头的情况 + if header_fields: + line = "; ".join(header_fields) + if sheetname.lower().find("sheet") < 0: + line += " ——" + sheetname + res.append(line) return res @staticmethod From c6cd2e583962aeb2d6ceeac5b456bb31d8ebc7b1 Mon Sep 17 00:00:00 2001 From: lixiangcheng1 Date: Sun, 4 Jan 2026 15:18:44 +0800 Subject: [PATCH 18/18] [fix]get knowledge graph entity types --- api/app/controllers/knowledge_controller.py | 100 ++++++++++---------- 1 file changed, 51 insertions(+), 49 deletions(-) diff --git a/api/app/controllers/knowledge_controller.py b/api/app/controllers/knowledge_controller.py index bca69050..deed5723 100644 --- a/api/app/controllers/knowledge_controller.py +++ b/api/app/controllers/knowledge_controller.py @@ -1,26 +1,28 @@ -from typing import Optional import datetime import json +from typing import Optional import uuid + from fastapi import APIRouter, Depends, HTTPException, status, Query from sqlalchemy import or_ from sqlalchemy.orm import Session +from app.celery_app import celery_app +from app.core.logging_config import get_api_logger +from app.core.rag.common import settings +from app.core.rag.llm.chat_model import Base +from app.core.rag.nlp import rag_tokenizer, search +from app.core.rag.prompts.generator import graph_entity_types +from app.core.rag.vdb.elasticsearch.elasticsearch_vector import ElasticSearchVectorFactory +from app.core.response_utils import success from app.db import get_db from app.dependencies import get_current_user from app.models.user_model import User from app.models import knowledge_model, document_model, file_model from app.schemas import knowledge_schema from app.schemas.response_schema import ApiResponse -from app.core.response_utils import success from app.services import knowledge_service, document_service -from app.core.rag.llm.chat_model import Base -from app.core.rag.prompts.generator import graph_entity_types -from app.core.rag.vdb.elasticsearch.elasticsearch_vector import ElasticSearchVectorFactory -from app.core.logging_config import get_api_logger -from app.core.rag.nlp import rag_tokenizer, search -from app.core.rag.common import settings -from app.celery_app import celery_app +from app.services.model_service import ModelConfigService # Obtain a dedicated API logger api_logger = get_api_logger() @@ -47,6 +49,45 @@ def get_parser_types(): return success(msg="Successfully obtained the knowledge parser type", data=list(knowledge_model.ParserType)) +@router.get("/knowledge_graph_entity_types", response_model=ApiResponse) +async def get_knowledge_graph_entity_types( + llm_id: uuid.UUID, + scenario: str, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user) +): + """ + get knowledge graph entity types based on llm_id + """ + api_logger.info(f"Obtain details of the knowledge graph: llm_id={llm_id}, username: {current_user.username}") + + try: + # 1. Check whether the model exists + api_logger.debug(f"Check whether the model exists: {llm_id}") + config = ModelConfigService.get_model_by_id(db=db, model_id=llm_id) + + if not config: + api_logger.warning( + f"The model does not exist or you do not have permission to access it: llm_id={llm_id}") + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="The model does not exist or you do not have permission to access it" + ) + # 2. Prepare to configure chat_mdl information + chat_model = Base( + key=config.api_keys[0].api_key, + model_name=config.api_keys[0].model_name, + base_url=config.api_keys[0].api_base + ) + response = graph_entity_types(chat_model, scenario) + return success(data=response, msg="Successfully obtained knowledge graph entity types") + except HTTPException: + raise + except Exception as e: + api_logger.error(f"get knowledge graph entity types failed: llm_id={llm_id} - {str(e)}") + raise + + @router.get("/knowledges", response_model=ApiResponse) async def get_knowledges( parent_id: Optional[uuid.UUID] = Query(None, description="parent folder id"), @@ -379,7 +420,7 @@ async def delete_knowledge_graph( current_user: User = Depends(get_current_user) ): """ - Soft-delete knowledge graph + delete knowledge graph """ api_logger.info(f"Request to delete knowledge graph: knowledge_id={knowledge_id}, username: {current_user.username}") @@ -442,42 +483,3 @@ async def rebuild_knowledge_graph( except Exception as e: api_logger.error(f"Failed to rebuild knowledge graph: knowledge_id={knowledge_id} - {str(e)}") raise - - -@router.get("/{knowledge_id}/knowledge_graph_entity_types", response_model=ApiResponse) -async def get_knowledge_graph_entity_types( - knowledge_id: uuid.UUID, - scenario: str, - db: Session = Depends(get_db), - current_user: User = Depends(get_current_user) -): - """ - get knowledge graph entity types based on knowledge_id - """ - api_logger.info(f"Obtain details of the knowledge graph: knowledge_id={knowledge_id}, username: {current_user.username}") - - try: - # 1. Check whether the knowledge base exists - api_logger.debug(f"Check whether the knowledge base exists: {knowledge_id}") - db_knowledge = knowledge_service.get_knowledge_by_id(db, knowledge_id=knowledge_id, current_user=current_user) - - if not db_knowledge: - api_logger.warning( - f"The knowledge base does not exist or you do not have permission to access it: knowledge_id={knowledge_id}") - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="The knowledge base does not exist or you do not have permission to access it" - ) - # 2. Prepare to configure chat_mdl information - chat_model = Base( - key=db_knowledge.llm.api_keys[0].api_key, - model_name=db_knowledge.llm.api_keys[0].model_name, - base_url=db_knowledge.llm.api_keys[0].api_base - ) - response = graph_entity_types(chat_model, scenario) - return success(data=response, msg="Successfully obtained knowledge graph entity types") - except HTTPException: - raise - except Exception as e: - api_logger.error(f"get knowledge graph entity types failed: knowledge_id={knowledge_id} - {str(e)}") - raise
{sheetname}