From 8ea243c572c65746acc3422d7e4c6f8d0344738d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=96=B0=E6=9C=88?= Date: Tue, 30 Dec 2025 08:37:37 +0000 Subject: [PATCH 1/8] Merge #87 into develop from fix/develop_kj_knowledge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agent应用中添加知识库的配置字段(提示词修改、反思给默认值) * fix/develop_kj_knowledge: (1 commits squashed) - Agent应用中添加知识库的配置字段(提示词修改、反思给默认值) Signed-off-by: aliyun8644380055 Reviewed-by: aliyun6762716068 Merged-by: aliyun6762716068 CR-link: https://codeup.aliyun.com/redbearai/python/redbear-mem-open/change/87 --- api/app/core/memory/utils/prompt/prompts/evaluate.jinja2 | 2 +- api/app/schemas/app_schema.py | 1 + api/app/schemas/memory_reflection_schemas.py | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/api/app/core/memory/utils/prompt/prompts/evaluate.jinja2 b/api/app/core/memory/utils/prompt/prompts/evaluate.jinja2 index 200f2667..e649897a 100644 --- a/api/app/core/memory/utils/prompt/prompts/evaluate.jinja2 +++ b/api/app/core/memory/utils/prompt/prompts/evaluate.jinja2 @@ -86,5 +86,5 @@ - **quality_assessment**: quality_assessment=true时输出评估对象,否则为null(注意:- summary输出的结果不允许含有(expired_at设为2024-01-01T00:00:00Z)等原数据字段以及涉及需要修改的字段以及内容) - **memory_verify**: memory_verify=true时输出隐私检测对象,否则为null - (注意:- summary输出的结果不允许含有(expired_at设为2024-01-01T00:00:00Z)等原数据字段以及涉及需要修改的字段以及内容) + (注意:- summary输出的结果不允许含有(expired_at设为2024-01-01T00:00:00Z、memory_verify=true\memory_verify=false)等原数据字段以及涉及需要修改的字段以及内容) 模式参考:{{ json_schema }} \ No newline at end of file diff --git a/api/app/schemas/app_schema.py b/api/app/schemas/app_schema.py index de0a4c53..81cd704d 100644 --- a/api/app/schemas/app_schema.py +++ b/api/app/schemas/app_schema.py @@ -32,6 +32,7 @@ class KnowledgeRetrievalConfig(BaseModel): ) reranker_id: Optional[str] = Field(default=None, description="多知识库结果融合的模型ID") reranker_top_k: int = Field(default=10, ge=0, le=1024, description="多知识库结果融合的模型参数") + use_graph: bool = Field(default=False, description="是否使用图搜索") class ToolConfig(BaseModel): diff --git a/api/app/schemas/memory_reflection_schemas.py b/api/app/schemas/memory_reflection_schemas.py index ada92cf2..860f1ef1 100644 --- a/api/app/schemas/memory_reflection_schemas.py +++ b/api/app/schemas/memory_reflection_schemas.py @@ -12,8 +12,8 @@ class Memory_Reflection(BaseModel): config_id: Optional[int] = None reflection_enabled: bool reflection_period_in_hours: str - reflexion_range: str - baseline: str + reflexion_range: Optional[str] = "partial" + baseline: Optional[str] = "TIME" reflection_model_id: str memory_verify: bool quality_assessment: bool From 82b9925448f684f0768d7e7072d13eaa3f1fcd2a Mon Sep 17 00:00:00 2001 From: lixiangcheng1 Date: Tue, 30 Dec 2025 17:44:37 +0800 Subject: [PATCH 2/8] [fix]document:pandas.read_excel error: Missing optional dependency 'python-calamine'. --- api/app/models/document_model.py | 5 ++++- api/app/models/knowledge_model.py | 5 ++++- api/pyproject.toml | 2 ++ api/requirements.txt | 2 ++ 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/api/app/models/document_model.py b/api/app/models/document_model.py index db9280c6..fb43d44d 100644 --- a/api/app/models/document_model.py +++ b/api/app/models/document_model.py @@ -26,6 +26,7 @@ class Document(Base): "html4excel": False, "graphrag": { "use_graphrag": False, + "scene_name": "", "entity_types": [ "organization", "person", @@ -33,7 +34,9 @@ class Document(Base): "event", "category" ], - "method": "general" + "method": "general", + "resolution": True, + "community": True } }, comment="default parser config") chunk_num = Column(Integer, default=0, comment="chunk num") diff --git a/api/app/models/knowledge_model.py b/api/app/models/knowledge_model.py index 6d3465f9..8f0909d3 100644 --- a/api/app/models/knowledge_model.py +++ b/api/app/models/knowledge_model.py @@ -65,6 +65,7 @@ class Knowledge(Base): "html4excel": False, "graphrag": { "use_graphrag": False, + "scene_name": "", "entity_types": [ "organization", "person", @@ -72,7 +73,9 @@ class Knowledge(Base): "event", "category" ], - "method": "general" + "method": "general", + "resolution": True, + "community": True } }, comment="default parser config") diff --git a/api/pyproject.toml b/api/pyproject.toml index 901858e6..2dcc706d 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -135,6 +135,8 @@ dependencies = [ "graspologic==3.4.5.dev2", "markdown-to-json==2.1.1", "valkey==6.0.2", + "python-calamine>=0.4.0", + "xlrd==2.0.2" ] [tool.pytest.ini_options] diff --git a/api/requirements.txt b/api/requirements.txt index 5530a9e3..99252e09 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -129,3 +129,5 @@ editdistance==0.8.1 graspologic==3.4.5.dev2 markdown-to-json==2.1.1 valkey==6.0.2 +python-calamine>=0.4.0 +xlrd==2.0.2 From 909c536b473f554d1bab52fea7b9685bbaa950ca Mon Sep 17 00:00:00 2001 From: lixiangcheng1 Date: Tue, 30 Dec 2025 18:20:34 +0800 Subject: [PATCH 3/8] [fix]parsed excel document error:float division by zero --- api/app/core/rag/deepdoc/parser/excel_parser.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/api/app/core/rag/deepdoc/parser/excel_parser.py b/api/app/core/rag/deepdoc/parser/excel_parser.py index b6e1e4a1..856155f1 100644 --- a/api/app/core/rag/deepdoc/parser/excel_parser.py +++ b/api/app/core/rag/deepdoc/parser/excel_parser.py @@ -52,13 +52,19 @@ class RAGExcelParser: raise Exception(f"pandas.read_excel error: {e_pandas}, original openpyxl error: {e}") @staticmethod - def _clean_dataframe(df: pd.DataFrame): + def _clean_dataframe(df): def clean_string(s): if isinstance(s, str): return ILLEGAL_CHARACTERS_RE.sub(" ", s) return s - return df.apply(lambda col: col.map(clean_string)) + # 处理单 DataFrame 或字典(多 Sheet) + if isinstance(df, dict): + return {sheet: RAGExcelParser._clean_dataframe(sheet_df) for sheet, sheet_df in df.items()} + elif isinstance(df, pd.DataFrame): + return df.apply(lambda col: col.map(clean_string)) + else: + raise ValueError(f"Unsupported type for cleaning: {type(df)}") @staticmethod def _dataframe_to_workbook(df): From 775d36b16b6fa0566ebd50bb8d1284752c4625d1 Mon Sep 17 00:00:00 2001 From: lixiangcheng1 Date: Tue, 30 Dec 2025 19:05:29 +0800 Subject: [PATCH 4/8] [fix]parsed excel document error:float division by zero --- api/app/core/rag/deepdoc/parser/excel_parser.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/api/app/core/rag/deepdoc/parser/excel_parser.py b/api/app/core/rag/deepdoc/parser/excel_parser.py index 856155f1..07e39f4a 100644 --- a/api/app/core/rag/deepdoc/parser/excel_parser.py +++ b/api/app/core/rag/deepdoc/parser/excel_parser.py @@ -69,7 +69,8 @@ class RAGExcelParser: @staticmethod def _dataframe_to_workbook(df): # if contains multiple sheets use _dataframes_to_workbook - if isinstance(df, dict) and len(df) > 1: + # if isinstance(df, dict) and len(df) > 1: + if isinstance(df, dict): return RAGExcelParser._dataframes_to_workbook(df) df = RAGExcelParser._clean_dataframe(df) From 37f72f919f51f692fe6fc7be2a611f05b000b902 Mon Sep 17 00:00:00 2001 From: lixiangcheng1 Date: Tue, 30 Dec 2025 19:31:54 +0800 Subject: [PATCH 5/8] [fix]parsed excel document error:float division by zero --- api/app/core/rag/deepdoc/parser/excel_parser.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/api/app/core/rag/deepdoc/parser/excel_parser.py b/api/app/core/rag/deepdoc/parser/excel_parser.py index 07e39f4a..f7601ee3 100644 --- a/api/app/core/rag/deepdoc/parser/excel_parser.py +++ b/api/app/core/rag/deepdoc/parser/excel_parser.py @@ -42,35 +42,31 @@ class RAGExcelParser: file_like_object.seek(0) try: dfs = pd.read_excel(file_like_object, sheet_name=None) + if isinstance(dfs, dict): + dfs = next(iter(dfs.values())) return RAGExcelParser._dataframe_to_workbook(dfs) except Exception as ex: logging.info(f"pandas with default engine load error: {ex}, try calamine instead") file_like_object.seek(0) df = pd.read_excel(file_like_object, engine="calamine") + print(df) return RAGExcelParser._dataframe_to_workbook(df) except Exception as e_pandas: raise Exception(f"pandas.read_excel error: {e_pandas}, original openpyxl error: {e}") @staticmethod - def _clean_dataframe(df): + def _clean_dataframe(df: pd.DataFrame): def clean_string(s): if isinstance(s, str): return ILLEGAL_CHARACTERS_RE.sub(" ", s) return s - # 处理单 DataFrame 或字典(多 Sheet) - if isinstance(df, dict): - return {sheet: RAGExcelParser._clean_dataframe(sheet_df) for sheet, sheet_df in df.items()} - elif isinstance(df, pd.DataFrame): - return df.apply(lambda col: col.map(clean_string)) - else: - raise ValueError(f"Unsupported type for cleaning: {type(df)}") + return df.apply(lambda col: col.map(clean_string)) @staticmethod def _dataframe_to_workbook(df): # if contains multiple sheets use _dataframes_to_workbook - # if isinstance(df, dict) and len(df) > 1: - if isinstance(df, dict): + if isinstance(df, dict) and len(df) > 1: return RAGExcelParser._dataframes_to_workbook(df) df = RAGExcelParser._clean_dataframe(df) From 07bcb54ed3320742cab0e469096899a0c2c1f031 Mon Sep 17 00:00:00 2001 From: lixiangcheng1 Date: Wed, 31 Dec 2025 08:54:36 +0800 Subject: [PATCH 6/8] [fix]entity_resolution.py:199: SyntaxWarning: invalid escape sequence '\d' --- api/app/core/rag/graphrag/entity_resolution.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/app/core/rag/graphrag/entity_resolution.py b/api/app/core/rag/graphrag/entity_resolution.py index d648e5a1..20af8818 100644 --- a/api/app/core/rag/graphrag/entity_resolution.py +++ b/api/app/core/rag/graphrag/entity_resolution.py @@ -196,7 +196,7 @@ class EntityResolution(Extractor): ans_list = [] records = [r.strip() for r in results.split(record_delimiter)] for record in records: - pattern_int = f"{re.escape(entity_index_delimiter)}(\d+){re.escape(entity_index_delimiter)}" + pattern_int = fr"{re.escape(entity_index_delimiter)}(\d+){re.escape(entity_index_delimiter)}" match_int = re.search(pattern_int, record) res_int = int(str(match_int.group(1) if match_int else '0')) if res_int > records_length: From c78dc1fd4719cbb543b57b7cf69a6c397a083416 Mon Sep 17 00:00:00 2001 From: lixiangcheng1 Date: Wed, 31 Dec 2025 09:51:00 +0800 Subject: [PATCH 7/8] [fix]parsed excel document error:float division by zero --- api/app/core/rag/app/naive.py | 1 + .../core/rag/deepdoc/parser/excel_parser.py | 77 +++++++++++++++---- 2 files changed, 61 insertions(+), 17 deletions(-) diff --git a/api/app/core/rag/app/naive.py b/api/app/core/rag/app/naive.py index 6d6b933a..23f0c4ba 100644 --- a/api/app/core/rag/app/naive.py +++ b/api/app/core/rag/app/naive.py @@ -672,6 +672,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, excel_parser = ExcelParser() if parser_config.get("html4excel"): sections = [(_, "") for _ in excel_parser.html(binary, 12) if _] + parser_config["chunk_token_num"] = 0 else: sections = [(_, "") for _ in excel_parser(binary) if _] parser_config["chunk_token_num"] = 12800 diff --git a/api/app/core/rag/deepdoc/parser/excel_parser.py b/api/app/core/rag/deepdoc/parser/excel_parser.py index f7601ee3..a161f4ca 100644 --- a/api/app/core/rag/deepdoc/parser/excel_parser.py +++ b/api/app/core/rag/deepdoc/parser/excel_parser.py @@ -5,6 +5,7 @@ from io import BytesIO import pandas as pd from openpyxl import Workbook, load_workbook +from PIL import Image from app.core.rag.nlp import find_codec @@ -28,7 +29,7 @@ class RAGExcelParser: try: file_like_object.seek(0) - df = pd.read_csv(file_like_object) + df = pd.read_csv(file_like_object, on_bad_lines='skip') return RAGExcelParser._dataframe_to_workbook(df) except Exception as e_csv: @@ -42,14 +43,12 @@ class RAGExcelParser: file_like_object.seek(0) try: dfs = pd.read_excel(file_like_object, sheet_name=None) - if isinstance(dfs, dict): - dfs = next(iter(dfs.values())) return RAGExcelParser._dataframe_to_workbook(dfs) except Exception as ex: logging.info(f"pandas with default engine load error: {ex}, try calamine instead") file_like_object.seek(0) df = pd.read_excel(file_like_object, engine="calamine") - print(df) + print("lxc1") return RAGExcelParser._dataframe_to_workbook(df) except Exception as e_pandas: raise Exception(f"pandas.read_excel error: {e_pandas}, original openpyxl error: {e}") @@ -68,7 +67,6 @@ class RAGExcelParser: # if contains multiple sheets use _dataframes_to_workbook if isinstance(df, dict) and len(df) > 1: return RAGExcelParser._dataframes_to_workbook(df) - df = RAGExcelParser._clean_dataframe(df) wb = Workbook() ws = wb.active @@ -80,15 +78,14 @@ class RAGExcelParser: for row_num, row in enumerate(df.values, 2): for col_num, value in enumerate(row, 1): ws.cell(row=row_num, column=col_num, value=value) - return wb - + @staticmethod def _dataframes_to_workbook(dfs: dict): wb = Workbook() default_sheet = wb.active wb.remove(default_sheet) - + for sheet_name, df in dfs.items(): df = RAGExcelParser._clean_dataframe(df) ws = wb.create_sheet(title=sheet_name) @@ -99,6 +96,52 @@ class RAGExcelParser: ws.cell(row=row_num, column=col_num, value=value) return wb + @staticmethod + def _extract_images_from_worksheet(ws, sheetname=None): + """ + Extract images from a worksheet and enrich them with vision-based descriptions. + + Returns: List[dict] + """ + images = getattr(ws, "_images", []) + if not images: + return [] + + raw_items = [] + + for img in images: + try: + img_bytes = img._data() + pil_img = Image.open(BytesIO(img_bytes)).convert("RGB") + + anchor = img.anchor + if hasattr(anchor, "_from") and hasattr(anchor, "_to"): + r1, c1 = anchor._from.row + 1, anchor._from.col + 1 + r2, c2 = anchor._to.row + 1, anchor._to.col + 1 + if r1 == r2 and c1 == c2: + span = "single_cell" + else: + span = "multi_cell" + else: + r1, c1 = anchor._from.row + 1, anchor._from.col + 1 + r2, c2 = r1, c1 + span = "single_cell" + + item = { + "sheet": sheetname or ws.title, + "image": pil_img, + "image_description": "", + "row_from": r1, + "col_from": c1, + "row_to": r2, + "col_to": c2, + "span_type": span, + } + raw_items.append(item) + except Exception: + continue + return raw_items + def html(self, fnm, chunk_rows=256): from html import escape @@ -131,7 +174,7 @@ class RAGExcelParser: tb = "" tb += f"" tb += tb_rows_0 - for r in list(rows[1 + chunk_i * chunk_rows : min(1 + (chunk_i + 1) * chunk_rows, len(rows))]): + for r in list(rows[1 + chunk_i * chunk_rows: min(1 + (chunk_i + 1) * chunk_rows, len(rows))]): tb += "" for i, c in enumerate(r): if c.value is None: @@ -154,7 +197,7 @@ class RAGExcelParser: except Exception as e: logging.warning(f"Parse spreadsheet error: {e}, trying to interpret as CSV file") file_like_object.seek(0) - df = pd.read_csv(file_like_object) + df = pd.read_csv(file_like_object, on_bad_lines='skip') df = df.replace(r"^\s*$", "", regex=True) return df.to_markdown(index=False) @@ -192,14 +235,14 @@ class RAGExcelParser: if fnm.split(".")[-1].lower().find("xls") >= 0: wb = RAGExcelParser._load_excel_to_workbook(BytesIO(binary)) total = 0 - + for sheetname in wb.sheetnames: - try: - ws = wb[sheetname] - total += len(list(ws.rows)) - except Exception as e: - logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}") - continue + try: + ws = wb[sheetname] + total += len(list(ws.rows)) + except Exception as e: + logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}") + continue return total if fnm.split(".")[-1].lower() in ["csv", "txt"]: From 6b0ee1b74a079f32d5482b52ae8a0836b19ef917 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=96=B0=E6=9C=88?= Date: Wed, 31 Dec 2025 02:20:45 +0000 Subject: [PATCH 8/8] Merge #88 into develop from fix/develop_kj_knowledge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 提示词优化 * fix/develop_kj_knowledge: (5 commits squashed) - Agent应用中添加知识库的配置字段(提示词修改、反思给默认值) - 提示词优化 - 提示词优化 - 提示词优化 - 提示词优化 Signed-off-by: aliyun8644380055 Reviewed-by: aliyun6762716068 Merged-by: aliyun6762716068 CR-link: https://codeup.aliyun.com/redbearai/python/redbear-mem-open/change/88 --- .../reflection_engine/example/example.json | 48 ++++--------------- .../reflection_engine/self_reflexion.py | 20 +++++++- .../utils/prompt/prompts/reflexion.jinja2 | 6 ++- 3 files changed, 32 insertions(+), 42 deletions(-) diff --git a/api/app/core/memory/storage_services/reflection_engine/example/example.json b/api/app/core/memory/storage_services/reflection_engine/example/example.json index 18a2b185..09429829 100644 --- a/api/app/core/memory/storage_services/reflection_engine/example/example.json +++ b/api/app/core/memory/storage_services/reflection_engine/example/example.json @@ -2,52 +2,39 @@ "memory_verify": { "source_data": [ { - "statement_name": "用户是2023年春天去北京工作的。", - "statement_id": "62beac695b1346f4871740a45db88782" + "statement_name": "我是 2023 年春天去北京工作的,后来基本一直都在北京上班,也没怎么换过城市。不过后来公司调整,2024 年上半年我被调到上海待了差不多半年,那段时间每天都是在上海办公室打卡。当时入职资料用的还是我之前的身份信息,身份证号是 11010119950308123X,银行卡是 6222023847595898,这些一直没变。对了,其实我 从 2023 年开始就一直在北京生活,从来没有长期离开过北京,上海那段更多算是远程配合。" }, { - "statement_name": "用户后来基本一直都在北京上班。", - "statement_id": "4cba5ac08b674d7fb1e2ae634d2b8f0b" + "statement_name": "用户后来基本一直都在北京上班。" }, { - "statement_name": "用户从2023年开始就一直在北京生活。", - "statement_id": "e612a44da4db483993c350df7c97a1a1" + "statement_name": "用户从2023年开始就一直在北京生活。" }, { - "statement_name": "用户从来没有长期离开过北京。", - "statement_id": "b3c787a2e33c49f7981accabbbb4538a" + "statement_name": "用户从来没有长期离开过北京。" }, { - "statement_name": "由于公司调整,用户在2024年上半年被调到上海待了差不多半年。", - "statement_id": "64cde4230cb24a4da726e7db9e7aa616" + "statement_name": "由于公司调整,用户在2024年上半年被调到上海待了差不多半年。" }, { - "statement_name": "用户在被调到上海期间每天都是在上海办公室打卡。", - "statement_id": "8b1b12e23b844b8088dfeb67da6ad669" + "statement_name": "用户在被调到上海期间每天都是在上海办公室打卡。" }, { - "statement_name": "用户在入职时使用的身份信息是之前的,身份证号为11010119950308123X。", - "statement_id": "030afd362e9b4110b139e68e5d3e7143" + "statement_name": "用户在入职时使用的身份信息是之前的,身份证号为11010119950308123X。" }, { - "statement_name": "用户的银行卡号是6222023847595898。", - "statement_id": "6c7567cd1f3c478bb42d1b65383e6f2f" + "statement_name": "用户的银行卡号是6222023847595898。" }, { - "statement_name": "用户的身份信息和银行卡信息一直没变。", - "statement_id": "b3ca618e1e204b83bebd70e75cf2073f" + "statement_name": "用户的身份信息和银行卡信息一直没变。" }, { - "statement_name": "用户认为在上海的那段时间更多算是远程配合。", - "statement_id": "150af89d2c154e6eb41ff1a91e37f962" + "statement_name": "用户认为在上海的那段时间更多算是远程配合。" } ], "databasets": [ { "entity1_name": "Person", - "description": "表示人类个体的通用类型", - "statement_id": "62beac695b1346f4871740a45db88782", - "entity2_name": "用户", "entity2": { "description": "叙述者,讲述个人工作与生活经历的个体", "name": "用户" @@ -55,9 +42,6 @@ }, { "entity1_name": "用户", - "description": "叙述者,讲述个人工作与生活经历的个体", - "statement_id": "62beac695b1346f4871740a45db88782", - "entity2_name": "身份信息", "entity2": { "description": "用于个人身份识别的数据", "name": "身份信息" @@ -65,9 +49,6 @@ }, { "entity1_name": "用户", - "description": "叙述者,讲述个人工作与生活经历的个体", - "statement_id": "62beac695b1346f4871740a45db88782", - "entity2_name": "6222023847595898", "entity2": { "description": "用户的银行卡号码", "name": "6222023847595898" @@ -76,33 +57,24 @@ { "entity1_name": "用户", "description": "叙述者,讲述个人工作与生活经历的个体", - "statement_id": "62beac695b1346f4871740a45db88782", - "entity2_name": "上海办公室", "entity2": { "entity_idx": 1, "aliases": ["上海办"], - "description": "位于上海的工作办公场所", "name": "上海办公室" } }, { "entity1_name": "用户", "description": "叙述者,讲述个人工作与生活经历的个体", - "statement_id": "62beac695b1346f4871740a45db88782", - "entity2_name": "北京", "entity2": { "aliases": ["京", "京城", "北平"], - "description": "中国的首都城市,用户主要工作和生活所在地", "name": "北京" } }, { "entity1_name": "11010119950308123X", "description": "具体的身份证号码值", - "statement_id": "030afd362e9b4110b139e68e5d3e7143", - "entity2_name": "身份证号", "entity2": { - "description": "中华人民共和国公民的身份号码", "name": "身份证号" } } diff --git a/api/app/core/memory/storage_services/reflection_engine/self_reflexion.py b/api/app/core/memory/storage_services/reflection_engine/self_reflexion.py index 97f51fb9..e9fb8855 100644 --- a/api/app/core/memory/storage_services/reflection_engine/self_reflexion.py +++ b/api/app/core/memory/storage_services/reflection_engine/self_reflexion.py @@ -387,7 +387,7 @@ class ReflectionEngine: result_data['memory_verifies'] = memory_verifies result_data['quality_assessments'] = quality_assessments conflicts_found='' - + REMOVE_KEYS = {"created_at", "expired_at","relationship","predicate","statement_id","id","statement_id","relationship_statement_id"} # Clearn conflict_data,And memory_verify和quality_assessment cleaned_conflict_data = [] for item in conflict_data: @@ -396,7 +396,23 @@ class ReflectionEngine: 'conflict': item['conflict'] } cleaned_conflict_data.append(cleaned_item) - + cleaned_conflict_data_=[] + for item in conflict_data: + cleaned_data = [] + for row in item.get("data", []): + # 删除 created_at / expired_at + cleaned_row = { + k: v + for k, v in row.items() + if k not in REMOVE_KEYS + } + cleaned_data.append(cleaned_row) + cleaned_item = { + "data": cleaned_data, + "conflict": item.get("conflict"), + } + cleaned_conflict_data_.append(cleaned_item) + print(cleaned_conflict_data_) # 3. 解决冲突 solved_data = await self._resolve_conflicts(cleaned_conflict_data, source_data) if not solved_data: diff --git a/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 b/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 index 99476c82..91b067ee 100644 --- a/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 +++ b/api/app/core/memory/utils/prompt/prompts/reflexion.jinja2 @@ -111,7 +111,8 @@ - 隐私保护优先: 所有输出记录必须完成隐私脱敏 - 脱敏变更记录: 隐私脱敏变更也必须在change字段中记录{% endif %} - 不可修改数据: 数据被判定为正确时不可修改,无数据可输出时为空 -- 输出的结果reflexion字段中的reason字段和solution不允许含有(expired_at设为2024-01-01T00:00:00Z、memory_verify=true)等原数据字段以及涉及需要修改的字段以及内容 +- 输出的结果reflexion字段中的reason字段和solution不允许含有(expired_at设为2024-01-01T00:00:00Z、memory_verify=true、memory_verify=false)等原数据字段以及涉及需要修改的字段以及内容, + ,如果是FACT,只记录事实冲突相关的数据;如果是TIME,只记录时间冲突相关的数据;如果是HYBRID,则记录所有冲突相关的数据 **变更记录格式**: ```json @@ -158,7 +159,8 @@ "conflict": true }, "reflexion": { - "reason": "该冲突类型的原因分析", + "reason": "该冲突类型的原因分析,如果是FACT就是存在事实冲突,分析该冲突原因,如果是TIME就是存在时间冲突,分析该冲突原因,如果是HYBRID,可以输出存在时间与事实的混合冲突再添加上原因分析, + 不可以随意分配冲突类型以及原因", "solution": "该冲突类型的解决方案" }, "resolved": {
{sheetname}