Merge #37 into develop from fix/memory_reflection
反思输出输入格式统一
* fix/memory_reflection: (60 commits squashed)
- 新增反思功能(功能配置接口+反思celery后台检测反思的迭代周期)
- 新增反思功能(功能配置接口+反思celery后台检测反思的迭代周期)
- 新增反思功能(检测代码/规范化程序)
- 新增反思功能(检测代码/规范化程序)
- 新增反思功能(检测代码/规范化程序)
- 新增反思功能(检测代码/规范化程序)
- 新增反思功能(检测代码/规范化程序)
- 反思优化
- 反思优化
- 反思优化
- 反思优化
- 反思优化
- 反思优化
- 反思优化
- 反思优化
- 反思优化
- 反思优化
- 反思优化
- 反思优化
- 反思优化
- 反思优化
- 反思优化
- 反思优化
- 反思优化
- 反思优化
- Merge branch develop into fix/memory_reflection (Conflict resolved online)
# Conflicts:
# api/app/controllers/memory_reflection_controller.py
# api/app/schemas/memory_reflection_schemas.py
- 反思优化
- Merge remote-tracking branch 'origin/fix/memory_reflection' into fix/memory_reflection
- 统一输出
- 统一输出
- 统一输出
- Merge branch develop into fix/memory_reflection (Conflict resolved online)
# Conflicts:
# api/app/controllers/memory_reflection_controller.py
- 统一输出
- Merge remote-tracking branch 'origin/fix/memory_reflection' into fix/memory_reflection
- 统一输出
- 反思速度提升,从4分钟优化成1分10-40秒
- 反思速度提升,从4分钟优化成1分10-40秒
- 反思速度提升,从4分钟优化成1分10-40秒
- Merge branch develop into fix/memory_reflection (Conflict resolved online)
# Conflicts:
# api/app/core/memory/storage_services/reflection_engine/self_reflexion.py
- 反思速度提升,从4分钟优化成1分10-40秒
- Merge remote-tracking branch 'origin/fix/memory_reflection' into fix/memory_reflection
# Conflicts:
#	api/app/core/memory/storage_services/reflection_engine/self_reflexion.py
- 更新 self_reflexion.py
- 反思图谱添加边的修改
- Merge remote-tracking branch 'origin/fix/memory_reflection' into fix/memory_reflection
# Conflicts:
#	api/app/core/memory/storage_services/reflection_engine/self_reflexion.py
- 反思图谱添加边的修改
- 反思图谱添加边的修改
- 反思图谱添加边的修改
- 反思图谱添加边的修改
- 反思图谱添加边的修改
- update
# Conflicts:
# api/app/core/memory/storage_services/reflection_engine/self_reflexion.py
# api/app/core/memory/utils/prompt/prompts/reflexion.jinja2
- 反思BUG修复
- Merge remote-tracking branch 'origin/fix/memory_reflection' into fix/memory_reflection
- 反思BUG修复
- Merge branch develop into fix/memory_reflection (Conflict resolved online)
# Conflicts:
# api/app/core/memory/storage_services/reflection_engine/self_reflexion.py
- 反思BUG修复
- Merge remote-tracking branch 'origin/fix/memory_reflection' into fix/memory_reflection
- 反思输出输入格式统一
- Merge branch develop into fix/memory_reflection (Conflict resolved online)
# Conflicts:
# api/app/core/memory/utils/prompt/template_render.py
- 反思优化提示词,提升速度,删除多余LOG日志
- Merge remote-tracking branch 'origin/fix/memory_reflection' into fix/memory_reflection
Signed-off-by: aliyun8644380055 <accounts_68c0f5d519f260d93ee2997e@mail.teambition.com>
Reviewed-by: aliyun6762716068 <accounts_68cb7c6b61f5dcc4200d6251@mail.teambition.com>
Merged-by: aliyun6762716068 <accounts_68cb7c6b61f5dcc4200d6251@mail.teambition.com>
CR-link: https://codeup.aliyun.com/redbearai/python/redbear-mem-open/change/37
This commit is contained in:
@@ -50,9 +50,7 @@
|
|||||||
"entity2_name": "用户",
|
"entity2_name": "用户",
|
||||||
"entity2": {
|
"entity2": {
|
||||||
"description": "叙述者,讲述个人工作与生活经历的个体",
|
"description": "叙述者,讲述个人工作与生活经历的个体",
|
||||||
"statement_id": "62beac695b1346f4871740a45db88782",
|
"name": "用户"
|
||||||
"name": "用户",
|
|
||||||
"id": "3d3896797b334572a80d57590026063d"
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -62,9 +60,7 @@
|
|||||||
"entity2_name": "身份信息",
|
"entity2_name": "身份信息",
|
||||||
"entity2": {
|
"entity2": {
|
||||||
"description": "用于个人身份识别的数据",
|
"description": "用于个人身份识别的数据",
|
||||||
"statement_id": "030afd362e9b4110b139e68e5d3e7143",
|
"name": "身份信息"
|
||||||
"name": "身份信息",
|
|
||||||
"id": "aa766a517e82490599a9b3af54cfd933"
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -74,9 +70,7 @@
|
|||||||
"entity2_name": "6222023847595898",
|
"entity2_name": "6222023847595898",
|
||||||
"entity2": {
|
"entity2": {
|
||||||
"description": "用户的银行卡号码",
|
"description": "用户的银行卡号码",
|
||||||
"statement_id": "6c7567cd1f3c478bb42d1b65383e6f2f",
|
"name": "6222023847595898"
|
||||||
"name": "6222023847595898",
|
|
||||||
"id": "610ba361918f4e68a65ce6ad06e5c7a0"
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -88,9 +82,7 @@
|
|||||||
"entity_idx": 1,
|
"entity_idx": 1,
|
||||||
"aliases": ["上海办"],
|
"aliases": ["上海办"],
|
||||||
"description": "位于上海的工作办公场所",
|
"description": "位于上海的工作办公场所",
|
||||||
"statement_id": "8b1b12e23b844b8088dfeb67da6ad669",
|
"name": "上海办公室"
|
||||||
"name": "上海办公室",
|
|
||||||
"id": "fb702ef695c14e14af3e56786bc8815b"
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -101,9 +93,7 @@
|
|||||||
"entity2": {
|
"entity2": {
|
||||||
"aliases": ["京", "京城", "北平"],
|
"aliases": ["京", "京城", "北平"],
|
||||||
"description": "中国的首都城市,用户主要工作和生活所在地",
|
"description": "中国的首都城市,用户主要工作和生活所在地",
|
||||||
"statement_id": "62beac695b1346f4871740a45db88782",
|
"name": "北京"
|
||||||
"name": "北京",
|
|
||||||
"id": "81b2d1a571bb46a08a2d7a1e87efb945"
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -113,9 +103,7 @@
|
|||||||
"entity2_name": "身份证号",
|
"entity2_name": "身份证号",
|
||||||
"entity2": {
|
"entity2": {
|
||||||
"description": "中华人民共和国公民的身份号码",
|
"description": "中华人民共和国公民的身份号码",
|
||||||
"statement_id": "030afd362e9b4110b139e68e5d3e7143",
|
"name": "身份证号"
|
||||||
"name": "身份证号",
|
|
||||||
"id": "3e5f920645b2404fadb0e9ff60d1306e"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -239,8 +239,6 @@ class ReflectionEngine:
|
|||||||
# # 检查是否真的有冲突
|
# # 检查是否真的有冲突
|
||||||
conflicts_found=''
|
conflicts_found=''
|
||||||
|
|
||||||
# 记录冲突数据
|
|
||||||
await self._log_data("conflict", conflict_data)
|
|
||||||
conflicts_found=''
|
conflicts_found=''
|
||||||
# 3. 解决冲突
|
# 3. 解决冲突
|
||||||
solved_data = await self._resolve_conflicts(conflict_data, statement_databasets)
|
solved_data = await self._resolve_conflicts(conflict_data, statement_databasets)
|
||||||
@@ -258,8 +256,6 @@ class ReflectionEngine:
|
|||||||
conflicts_resolved = len(solved_data)
|
conflicts_resolved = len(solved_data)
|
||||||
logging.info(f"解决了 {conflicts_resolved} 个冲突")
|
logging.info(f"解决了 {conflicts_resolved} 个冲突")
|
||||||
|
|
||||||
# 记录解决方案
|
|
||||||
await self._log_data("solved_data", solved_data)
|
|
||||||
|
|
||||||
# 4. 应用反思结果(更新记忆库)
|
# 4. 应用反思结果(更新记忆库)
|
||||||
memories_updated=await self._apply_reflection_results(solved_data)
|
memories_updated=await self._apply_reflection_results(solved_data)
|
||||||
@@ -360,14 +356,7 @@ class ReflectionEngine:
|
|||||||
memory_verifies.append(item['memory_verify'])
|
memory_verifies.append(item['memory_verify'])
|
||||||
result_data['memory_verifies'] = memory_verifies
|
result_data['memory_verifies'] = memory_verifies
|
||||||
result_data['quality_assessments'] = quality_assessments
|
result_data['quality_assessments'] = quality_assessments
|
||||||
|
conflicts_found=''
|
||||||
# 检查是否真的有冲突
|
|
||||||
has_conflict = conflict_data[0].get('conflict', False)
|
|
||||||
conflicts_found = len(conflict_data[0]['data']) if has_conflict else 0
|
|
||||||
logging.info(f"冲突状态: {has_conflict}, 发现 {conflicts_found} 个冲突")
|
|
||||||
|
|
||||||
# 记录冲突数据
|
|
||||||
await self._log_data("conflict", conflict_data)
|
|
||||||
|
|
||||||
# Clearn conflict_data,And memory_verify和quality_assessment
|
# Clearn conflict_data,And memory_verify和quality_assessment
|
||||||
cleaned_conflict_data = []
|
cleaned_conflict_data = []
|
||||||
@@ -377,6 +366,7 @@ class ReflectionEngine:
|
|||||||
'conflict': item['conflict']
|
'conflict': item['conflict']
|
||||||
}
|
}
|
||||||
cleaned_conflict_data.append(cleaned_item)
|
cleaned_conflict_data.append(cleaned_item)
|
||||||
|
|
||||||
# 3. 解决冲突
|
# 3. 解决冲突
|
||||||
solved_data = await self._resolve_conflicts(cleaned_conflict_data, source_data)
|
solved_data = await self._resolve_conflicts(cleaned_conflict_data, source_data)
|
||||||
if not solved_data:
|
if not solved_data:
|
||||||
@@ -615,26 +605,7 @@ class ReflectionEngine:
|
|||||||
success_count = await neo4j_data(changes)
|
success_count = await neo4j_data(changes)
|
||||||
return success_count
|
return success_count
|
||||||
|
|
||||||
async def _log_data(self, label: str, data: Any) -> None:
|
|
||||||
"""
|
|
||||||
记录数据到文件
|
|
||||||
|
|
||||||
Args:
|
|
||||||
label: 数据标签
|
|
||||||
data: 要记录的数据
|
|
||||||
"""
|
|
||||||
|
|
||||||
def _write():
|
|
||||||
try:
|
|
||||||
with open("reflexion_data.json", "a", encoding="utf-8") as f:
|
|
||||||
f.write(f"### {label} ###\n")
|
|
||||||
json.dump(data, f, ensure_ascii=False, indent=4)
|
|
||||||
f.write("\n\n")
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(f"记录数据失败: {e}")
|
|
||||||
|
|
||||||
# 在后台线程中执行写入,避免阻塞事件循环
|
|
||||||
await asyncio.to_thread(_write)
|
|
||||||
|
|
||||||
# 基于时间的反思方法
|
# 基于时间的反思方法
|
||||||
async def time_based_reflection(
|
async def time_based_reflection(
|
||||||
@@ -723,4 +694,3 @@ class ReflectionEngine:
|
|||||||
raise ValueError(f"未知的反思基线: {self.config.baseline}")
|
raise ValueError(f"未知的反思基线: {self.config.baseline}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -17,10 +17,12 @@
|
|||||||
- **日期属性冲突**: 同一人的生日等单值属性出现多值
|
- **日期属性冲突**: 同一人的生日等单值属性出现多值
|
||||||
- **先后约束违反**: 存在A→B约束但t(A)>t(B)(如入学>毕业)
|
- **先后约束违反**: 存在A→B约束但t(A)>t(B)(如入学>毕业)
|
||||||
- **互斥重叠**: 同一时间出现在不同地点等互斥事件
|
- **互斥重叠**: 同一时间出现在不同地点等互斥事件
|
||||||
|
- **隐私审核**: 存在隐私信息也作为冲突输出当{{ memory_verify }}是true的时候
|
||||||
### 事实冲突
|
### 事实冲突
|
||||||
- **属性互斥**: 同一实体的相反属性(喜欢↔不喜欢)
|
- **属性互斥**: 同一实体的相反属性(喜欢↔不喜欢)
|
||||||
- **关系矛盾**: 同一实体在相同语境下的不同关系描述
|
- **关系矛盾**: 同一实体在相同语境下的不同关系描述
|
||||||
- **身份冲突**: 同一实体被赋予不同类型或角色
|
- **身份冲突**: 同一实体被赋予不同类型或角色
|
||||||
|
- **隐私审核**: 存在隐私信息也作为冲突输出当{{ memory_verify }}是true的时候
|
||||||
### 混合冲突
|
### 混合冲突
|
||||||
检测所有逻辑不一致或相互矛盾的记录。
|
检测所有逻辑不一致或相互矛盾的记录。
|
||||||
**检测原则**:
|
**检测原则**:
|
||||||
|
|||||||
@@ -171,7 +171,6 @@
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
**输出要求**:
|
**输出要求**:
|
||||||
- 只输出JSON,不添加解释文本
|
- 只输出JSON,不添加解释文本
|
||||||
- 使用标准双引号,必要时转义
|
- 使用标准双引号,必要时转义
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from typing import List, Dict, Any
|
|||||||
prompt_dir = os.path.join(os.path.dirname(__file__), "prompts")
|
prompt_dir = os.path.join(os.path.dirname(__file__), "prompts")
|
||||||
prompt_env = Environment(loader=FileSystemLoader(prompt_dir))
|
prompt_env = Environment(loader=FileSystemLoader(prompt_dir))
|
||||||
|
|
||||||
async def render_evaluate_prompt(evaluate_data: List[Any], schema: Dict[str, Any],
|
async def render_evaluate_prompt(evaluate_data: List[Any], schema: Any,
|
||||||
baseline: str = "TIME",
|
baseline: str = "TIME",
|
||||||
memory_verify: bool = False,quality_assessment:bool = False,
|
memory_verify: bool = False,quality_assessment:bool = False,
|
||||||
statement_databasets: List[str] = [],language_type:str = "zh") -> str:
|
statement_databasets: List[str] = [],language_type:str = "zh") -> str:
|
||||||
@@ -16,7 +16,7 @@ async def render_evaluate_prompt(evaluate_data: List[Any], schema: Dict[str, Any
|
|||||||
|
|
||||||
Args:
|
Args:
|
||||||
evaluate_data: The data to evaluate
|
evaluate_data: The data to evaluate
|
||||||
schema: The JSON schema to use for the output.
|
schema: The Pydantic model class or JSON schema to use for the output.
|
||||||
baseline: The baseline type for conflict detection (TIME/FACT/TIME-FACT)
|
baseline: The baseline type for conflict detection (TIME/FACT/TIME-FACT)
|
||||||
memory_verify: Whether to enable memory verification for privacy detection
|
memory_verify: Whether to enable memory verification for privacy detection
|
||||||
|
|
||||||
@@ -25,9 +25,17 @@ async def render_evaluate_prompt(evaluate_data: List[Any], schema: Dict[str, Any
|
|||||||
"""
|
"""
|
||||||
template = prompt_env.get_template("evaluate.jinja2")
|
template = prompt_env.get_template("evaluate.jinja2")
|
||||||
|
|
||||||
|
# Convert Pydantic model to JSON schema if needed
|
||||||
|
if hasattr(schema, 'model_json_schema'):
|
||||||
|
json_schema = schema.model_json_schema()
|
||||||
|
elif hasattr(schema, 'schema'):
|
||||||
|
json_schema = schema.schema()
|
||||||
|
else:
|
||||||
|
json_schema = schema
|
||||||
|
|
||||||
rendered_prompt = template.render(
|
rendered_prompt = template.render(
|
||||||
evaluate_data=evaluate_data,
|
evaluate_data=evaluate_data,
|
||||||
json_schema=schema,
|
json_schema=json_schema,
|
||||||
baseline=baseline,
|
baseline=baseline,
|
||||||
memory_verify=memory_verify,
|
memory_verify=memory_verify,
|
||||||
quality_assessment=quality_assessment,
|
quality_assessment=quality_assessment,
|
||||||
@@ -36,14 +44,15 @@ async def render_evaluate_prompt(evaluate_data: List[Any], schema: Dict[str, Any
|
|||||||
)
|
)
|
||||||
return rendered_prompt
|
return rendered_prompt
|
||||||
|
|
||||||
async def render_reflexion_prompt(data: Dict[str, Any], schema: Dict[str, Any], baseline: str, memory_verify: bool = False,
|
|
||||||
|
async def render_reflexion_prompt(data: Dict[str, Any], schema: Any, baseline: str, memory_verify: bool = False,
|
||||||
statement_databasets: List[str] = [],language_type:str = "zh") -> str:
|
statement_databasets: List[str] = [],language_type:str = "zh") -> str:
|
||||||
"""
|
"""
|
||||||
Renders the reflexion prompt using the reflexion_optimized.jinja2 template.
|
Renders the reflexion prompt using the reflexion_optimized.jinja2 template.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
data: The data to reflex on.
|
data: The data to reflex on.
|
||||||
schema: The JSON schema to use for the output.
|
schema: The Pydantic model class or JSON schema to use for the output.
|
||||||
baseline: The baseline type for conflict resolution.
|
baseline: The baseline type for conflict resolution.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
@@ -51,7 +60,15 @@ async def render_reflexion_prompt(data: Dict[str, Any], schema: Dict[str, Any],
|
|||||||
"""
|
"""
|
||||||
template = prompt_env.get_template("reflexion.jinja2")
|
template = prompt_env.get_template("reflexion.jinja2")
|
||||||
|
|
||||||
rendered_prompt = template.render(data=data, json_schema=schema,
|
# Convert Pydantic model to JSON schema if needed
|
||||||
|
if hasattr(schema, 'model_json_schema'):
|
||||||
|
json_schema = schema.model_json_schema()
|
||||||
|
elif hasattr(schema, 'schema'):
|
||||||
|
json_schema = schema.schema()
|
||||||
|
else:
|
||||||
|
json_schema = schema
|
||||||
|
|
||||||
|
rendered_prompt = template.render(data=data, json_schema=json_schema,
|
||||||
baseline=baseline,memory_verify=memory_verify,
|
baseline=baseline,memory_verify=memory_verify,
|
||||||
statement_databasets=statement_databasets,language_type=language_type)
|
statement_databasets=statement_databasets,language_type=language_type)
|
||||||
|
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ class BaseDataSchema(BaseModel):
|
|||||||
# 保持原有必需字段为可选,以兼容不同数据源
|
# 保持原有必需字段为可选,以兼容不同数据源
|
||||||
id: Optional[str] = Field(None, description="The unique identifier for the data entry.")
|
id: Optional[str] = Field(None, description="The unique identifier for the data entry.")
|
||||||
statement: Optional[str] = Field(None, description="The statement text.")
|
statement: Optional[str] = Field(None, description="The statement text.")
|
||||||
created_at: str = Field(..., description="The creation timestamp in ISO 8601 format.")
|
created_at: Optional[str] = Field(None, description="The creation timestamp in ISO 8601 format.")
|
||||||
expired_at: Optional[str] = Field(None, description="The expiration timestamp in ISO 8601 format.")
|
expired_at: Optional[str] = Field(None, description="The expiration timestamp in ISO 8601 format.")
|
||||||
description: Optional[str] = Field(None, description="The description of the data entry.")
|
description: Optional[str] = Field(None, description="The description of the data entry.")
|
||||||
|
|
||||||
@@ -46,6 +46,14 @@ class BaseDataSchema(BaseModel):
|
|||||||
relationship: Optional[Union[str, Dict[str, Any]]] = Field(None, description="The relationship object or string.")
|
relationship: Optional[Union[str, Dict[str, Any]]] = Field(None, description="The relationship object or string.")
|
||||||
entity2: Optional[Dict[str, Any]] = Field(None, description="The second entity object.")
|
entity2: Optional[Dict[str, Any]] = Field(None, description="The second entity object.")
|
||||||
|
|
||||||
|
@model_validator(mode="before")
|
||||||
|
def _set_default_created_at(cls, v):
|
||||||
|
"""Set default created_at if missing"""
|
||||||
|
if isinstance(v, dict) and v.get("created_at") is None:
|
||||||
|
from datetime import datetime
|
||||||
|
v["created_at"] = datetime.now().isoformat()
|
||||||
|
return v
|
||||||
|
|
||||||
|
|
||||||
class QualityAssessmentSchema(BaseModel):
|
class QualityAssessmentSchema(BaseModel):
|
||||||
"""Schema for memory quality assessment results."""
|
"""Schema for memory quality assessment results."""
|
||||||
|
|||||||
Reference in New Issue
Block a user