Merge branch 'refs/heads/develop' into feature/20251219_xjn

This commit is contained in:
谢俊男
2026-01-04 15:35:57 +08:00
13 changed files with 200 additions and 138 deletions

View File

@@ -1,26 +1,28 @@
from typing import Optional
import datetime import datetime
import json import json
from typing import Optional
import uuid import uuid
from fastapi import APIRouter, Depends, HTTPException, status, Query from fastapi import APIRouter, Depends, HTTPException, status, Query
from sqlalchemy import or_ from sqlalchemy import or_
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from app.celery_app import celery_app
from app.core.logging_config import get_api_logger
from app.core.rag.common import settings
from app.core.rag.llm.chat_model import Base
from app.core.rag.nlp import rag_tokenizer, search
from app.core.rag.prompts.generator import graph_entity_types
from app.core.rag.vdb.elasticsearch.elasticsearch_vector import ElasticSearchVectorFactory
from app.core.response_utils import success
from app.db import get_db from app.db import get_db
from app.dependencies import get_current_user from app.dependencies import get_current_user
from app.models.user_model import User from app.models.user_model import User
from app.models import knowledge_model, document_model, file_model from app.models import knowledge_model, document_model, file_model
from app.schemas import knowledge_schema from app.schemas import knowledge_schema
from app.schemas.response_schema import ApiResponse from app.schemas.response_schema import ApiResponse
from app.core.response_utils import success
from app.services import knowledge_service, document_service from app.services import knowledge_service, document_service
from app.core.rag.llm.chat_model import Base from app.services.model_service import ModelConfigService
from app.core.rag.prompts.generator import graph_entity_types
from app.core.rag.vdb.elasticsearch.elasticsearch_vector import ElasticSearchVectorFactory
from app.core.logging_config import get_api_logger
from app.core.rag.nlp import rag_tokenizer, search
from app.core.rag.common import settings
from app.celery_app import celery_app
# Obtain a dedicated API logger # Obtain a dedicated API logger
api_logger = get_api_logger() api_logger = get_api_logger()
@@ -47,6 +49,45 @@ def get_parser_types():
return success(msg="Successfully obtained the knowledge parser type", data=list(knowledge_model.ParserType)) return success(msg="Successfully obtained the knowledge parser type", data=list(knowledge_model.ParserType))
@router.get("/knowledge_graph_entity_types", response_model=ApiResponse)
async def get_knowledge_graph_entity_types(
llm_id: uuid.UUID,
scenario: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""
get knowledge graph entity types based on llm_id
"""
api_logger.info(f"Obtain details of the knowledge graph: llm_id={llm_id}, username: {current_user.username}")
try:
# 1. Check whether the model exists
api_logger.debug(f"Check whether the model exists: {llm_id}")
config = ModelConfigService.get_model_by_id(db=db, model_id=llm_id)
if not config:
api_logger.warning(
f"The model does not exist or you do not have permission to access it: llm_id={llm_id}")
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="The model does not exist or you do not have permission to access it"
)
# 2. Prepare to configure chat_mdl information
chat_model = Base(
key=config.api_keys[0].api_key,
model_name=config.api_keys[0].model_name,
base_url=config.api_keys[0].api_base
)
response = graph_entity_types(chat_model, scenario)
return success(data=response, msg="Successfully obtained knowledge graph entity types")
except HTTPException:
raise
except Exception as e:
api_logger.error(f"get knowledge graph entity types failed: llm_id={llm_id} - {str(e)}")
raise
@router.get("/knowledges", response_model=ApiResponse) @router.get("/knowledges", response_model=ApiResponse)
async def get_knowledges( async def get_knowledges(
parent_id: Optional[uuid.UUID] = Query(None, description="parent folder id"), parent_id: Optional[uuid.UUID] = Query(None, description="parent folder id"),
@@ -379,7 +420,7 @@ async def delete_knowledge_graph(
current_user: User = Depends(get_current_user) current_user: User = Depends(get_current_user)
): ):
""" """
Soft-delete knowledge graph delete knowledge graph
""" """
api_logger.info(f"Request to delete knowledge graph: knowledge_id={knowledge_id}, username: {current_user.username}") api_logger.info(f"Request to delete knowledge graph: knowledge_id={knowledge_id}, username: {current_user.username}")
@@ -442,42 +483,3 @@ async def rebuild_knowledge_graph(
except Exception as e: except Exception as e:
api_logger.error(f"Failed to rebuild knowledge graph: knowledge_id={knowledge_id} - {str(e)}") api_logger.error(f"Failed to rebuild knowledge graph: knowledge_id={knowledge_id} - {str(e)}")
raise raise
@router.get("/{knowledge_id}/knowledge_graph_entity_types", response_model=ApiResponse)
async def get_knowledge_graph_entity_types(
knowledge_id: uuid.UUID,
scenario: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""
get knowledge graph entity types based on knowledge_id
"""
api_logger.info(f"Obtain details of the knowledge graph: knowledge_id={knowledge_id}, username: {current_user.username}")
try:
# 1. Check whether the knowledge base exists
api_logger.debug(f"Check whether the knowledge base exists: {knowledge_id}")
db_knowledge = knowledge_service.get_knowledge_by_id(db, knowledge_id=knowledge_id, current_user=current_user)
if not db_knowledge:
api_logger.warning(
f"The knowledge base does not exist or you do not have permission to access it: knowledge_id={knowledge_id}")
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="The knowledge base does not exist or you do not have permission to access it"
)
# 2. Prepare to configure chat_mdl information
chat_model = Base(
key=db_knowledge.llm.api_keys[0].api_key,
model_name=db_knowledge.llm.api_keys[0].model_name,
base_url=db_knowledge.llm.api_keys[0].api_base
)
response = graph_entity_types(chat_model, scenario)
return success(data=response, msg="Successfully obtained knowledge graph entity types")
except HTTPException:
raise
except Exception as e:
api_logger.error(f"get knowledge graph entity types failed: knowledge_id={knowledge_id} - {str(e)}")
raise

View File

@@ -2,52 +2,39 @@
"memory_verify": { "memory_verify": {
"source_data": [ "source_data": [
{ {
"statement_name": "用户是2023年春天去北京工作的。", "statement_name": "我是 2023 年春天去北京工作的后来基本一直都在北京上班也没怎么换过城市。不过后来公司调整2024 年上半年我被调到上海待了差不多半年,那段时间每天都是在上海办公室打卡。当时入职资料用的还是我之前的身份信息,身份证号是 11010119950308123X银行卡是 6222023847595898这些一直没变。对了其实我 从 2023 年开始就一直在北京生活,从来没有长期离开过北京,上海那段更多算是远程配合。"
"statement_id": "62beac695b1346f4871740a45db88782"
}, },
{ {
"statement_name": "用户后来基本一直都在北京上班。", "statement_name": "用户后来基本一直都在北京上班。"
"statement_id": "4cba5ac08b674d7fb1e2ae634d2b8f0b"
}, },
{ {
"statement_name": "用户从2023年开始就一直在北京生活。", "statement_name": "用户从2023年开始就一直在北京生活。"
"statement_id": "e612a44da4db483993c350df7c97a1a1"
}, },
{ {
"statement_name": "用户从来没有长期离开过北京。", "statement_name": "用户从来没有长期离开过北京。"
"statement_id": "b3c787a2e33c49f7981accabbbb4538a"
}, },
{ {
"statement_name": "由于公司调整用户在2024年上半年被调到上海待了差不多半年。", "statement_name": "由于公司调整用户在2024年上半年被调到上海待了差不多半年。"
"statement_id": "64cde4230cb24a4da726e7db9e7aa616"
}, },
{ {
"statement_name": "用户在被调到上海期间每天都是在上海办公室打卡。", "statement_name": "用户在被调到上海期间每天都是在上海办公室打卡。"
"statement_id": "8b1b12e23b844b8088dfeb67da6ad669"
}, },
{ {
"statement_name": "用户在入职时使用的身份信息是之前的身份证号为11010119950308123X。", "statement_name": "用户在入职时使用的身份信息是之前的身份证号为11010119950308123X。"
"statement_id": "030afd362e9b4110b139e68e5d3e7143"
}, },
{ {
"statement_name": "用户的银行卡号是6222023847595898。", "statement_name": "用户的银行卡号是6222023847595898。"
"statement_id": "6c7567cd1f3c478bb42d1b65383e6f2f"
}, },
{ {
"statement_name": "用户的身份信息和银行卡信息一直没变。", "statement_name": "用户的身份信息和银行卡信息一直没变。"
"statement_id": "b3ca618e1e204b83bebd70e75cf2073f"
}, },
{ {
"statement_name": "用户认为在上海的那段时间更多算是远程配合。", "statement_name": "用户认为在上海的那段时间更多算是远程配合。"
"statement_id": "150af89d2c154e6eb41ff1a91e37f962"
} }
], ],
"databasets": [ "databasets": [
{ {
"entity1_name": "Person", "entity1_name": "Person",
"description": "表示人类个体的通用类型",
"statement_id": "62beac695b1346f4871740a45db88782",
"entity2_name": "用户",
"entity2": { "entity2": {
"description": "叙述者,讲述个人工作与生活经历的个体", "description": "叙述者,讲述个人工作与生活经历的个体",
"name": "用户" "name": "用户"
@@ -55,9 +42,6 @@
}, },
{ {
"entity1_name": "用户", "entity1_name": "用户",
"description": "叙述者,讲述个人工作与生活经历的个体",
"statement_id": "62beac695b1346f4871740a45db88782",
"entity2_name": "身份信息",
"entity2": { "entity2": {
"description": "用于个人身份识别的数据", "description": "用于个人身份识别的数据",
"name": "身份信息" "name": "身份信息"
@@ -65,9 +49,6 @@
}, },
{ {
"entity1_name": "用户", "entity1_name": "用户",
"description": "叙述者,讲述个人工作与生活经历的个体",
"statement_id": "62beac695b1346f4871740a45db88782",
"entity2_name": "6222023847595898",
"entity2": { "entity2": {
"description": "用户的银行卡号码", "description": "用户的银行卡号码",
"name": "6222023847595898" "name": "6222023847595898"
@@ -76,33 +57,24 @@
{ {
"entity1_name": "用户", "entity1_name": "用户",
"description": "叙述者,讲述个人工作与生活经历的个体", "description": "叙述者,讲述个人工作与生活经历的个体",
"statement_id": "62beac695b1346f4871740a45db88782",
"entity2_name": "上海办公室",
"entity2": { "entity2": {
"entity_idx": 1, "entity_idx": 1,
"aliases": ["上海办"], "aliases": ["上海办"],
"description": "位于上海的工作办公场所",
"name": "上海办公室" "name": "上海办公室"
} }
}, },
{ {
"entity1_name": "用户", "entity1_name": "用户",
"description": "叙述者,讲述个人工作与生活经历的个体", "description": "叙述者,讲述个人工作与生活经历的个体",
"statement_id": "62beac695b1346f4871740a45db88782",
"entity2_name": "北京",
"entity2": { "entity2": {
"aliases": ["京", "京城", "北平"], "aliases": ["京", "京城", "北平"],
"description": "中国的首都城市,用户主要工作和生活所在地",
"name": "北京" "name": "北京"
} }
}, },
{ {
"entity1_name": "11010119950308123X", "entity1_name": "11010119950308123X",
"description": "具体的身份证号码值", "description": "具体的身份证号码值",
"statement_id": "030afd362e9b4110b139e68e5d3e7143",
"entity2_name": "身份证号",
"entity2": { "entity2": {
"description": "中华人民共和国公民的身份号码",
"name": "身份证号" "name": "身份证号"
} }
} }

View File

@@ -387,7 +387,7 @@ class ReflectionEngine:
result_data['memory_verifies'] = memory_verifies result_data['memory_verifies'] = memory_verifies
result_data['quality_assessments'] = quality_assessments result_data['quality_assessments'] = quality_assessments
conflicts_found='' conflicts_found=''
REMOVE_KEYS = {"created_at", "expired_at","relationship","predicate","statement_id","id","statement_id","relationship_statement_id"}
# Clearn conflict_dataAnd memory_verify和quality_assessment # Clearn conflict_dataAnd memory_verify和quality_assessment
cleaned_conflict_data = [] cleaned_conflict_data = []
for item in conflict_data: for item in conflict_data:
@@ -396,7 +396,23 @@ class ReflectionEngine:
'conflict': item['conflict'] 'conflict': item['conflict']
} }
cleaned_conflict_data.append(cleaned_item) cleaned_conflict_data.append(cleaned_item)
cleaned_conflict_data_=[]
for item in conflict_data:
cleaned_data = []
for row in item.get("data", []):
# 删除 created_at / expired_at
cleaned_row = {
k: v
for k, v in row.items()
if k not in REMOVE_KEYS
}
cleaned_data.append(cleaned_row)
cleaned_item = {
"data": cleaned_data,
"conflict": item.get("conflict"),
}
cleaned_conflict_data_.append(cleaned_item)
print(cleaned_conflict_data_)
# 3. 解决冲突 # 3. 解决冲突
solved_data = await self._resolve_conflicts(cleaned_conflict_data, source_data) solved_data = await self._resolve_conflicts(cleaned_conflict_data, source_data)
if not solved_data: if not solved_data:

View File

@@ -9,9 +9,7 @@
## 任务目标 ## 任务目标
作为数据冲突解决专家,分析冲突原因,按类型分组处理,为每种冲突生成独立解决方案。 作为数据冲突解决专家,分析冲突原因,按类型分组处理,为每种冲突生成独立解决方案。
**数据关系**: statement_databasets中的statement_id对应data中的记录statement_created_at为用户输入时间。 **数据关系**: statement_databasets中的statement_id对应data中的记录statement_created_at为用户输入时间。
**处理模式**: **处理模式**:
- memory_verify=false: 仅处理数据冲突 - memory_verify=false: 仅处理数据冲突
- memory_verify=true: 处理数据冲突 + 隐私脱敏 - memory_verify=true: 处理数据冲突 + 隐私脱敏
@@ -111,7 +109,8 @@
- 隐私保护优先: 所有输出记录必须完成隐私脱敏 - 隐私保护优先: 所有输出记录必须完成隐私脱敏
- 脱敏变更记录: 隐私脱敏变更也必须在change字段中记录{% endif %} - 脱敏变更记录: 隐私脱敏变更也必须在change字段中记录{% endif %}
- 不可修改数据: 数据被判定为正确时不可修改,无数据可输出时为空 - 不可修改数据: 数据被判定为正确时不可修改,无数据可输出时为空
- 输出的结果reflexion字段中的reason字段和solution不允许含有expired_at设为2024-01-01T00:00:00Z、memory_verify=true)等原数据字段以及涉及需要修改的字段以及内容 - 输出的结果reflexion字段中的reason字段和solution不允许含有expired_at设为2024-01-01T00:00:00Z、memory_verify=true、memory_verify=false)等原数据字段以及涉及需要修改的字段以及内容
如果是FACT只记录事实冲突相关的数据如果是TIME只记录时间冲突相关的数据如果是HYBRID则记录所有冲突相关的数据
**变更记录格式**: **变更记录格式**:
```json ```json
@@ -158,8 +157,9 @@
"conflict": true "conflict": true
}, },
"reflexion": { "reflexion": {
"reason": "该冲突类型的原因分析", "reason": "该冲突类型的原因分析如果是FACT就是存在事实冲突分析该冲突原因如果是TIME就是存在时间冲突分析该冲突原因如果是HYBRID可以输出存在时间与事实的混合冲突再添加上原因分析
"solution": "该冲突类型的解决方案" 不可以随意分配冲突类型以及原因不允许输出字段比如statement、description、entity1_name、entity2_name、name、memory_verify、expired_at、conflict等类似这种",
"solution": "该冲突类型的解决方案不允许输出字段比如statement、description、entity1_name、entity2_name、name、memory_verify、expired_at、conflict等类似这种"
}, },
"resolved": { "resolved": {
"original_memory_id": "被设为失效的记忆id", "original_memory_id": "被设为失效的记忆id",
@@ -182,4 +182,5 @@
- **resolved.change**: 包含详细变更信息 - **resolved.change**: 包含详细变更信息
- 无需修改的冲突类型resolved为null - 无需修改的冲突类型resolved为null
- 与baseline不匹配的冲突类型不包含在results中 - 与baseline不匹配的冲突类型不包含在results中
模式参考: {{ json_schema }} 模式参考: {{ json_schema }}

View File

@@ -672,6 +672,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
excel_parser = ExcelParser() excel_parser = ExcelParser()
if parser_config.get("html4excel"): if parser_config.get("html4excel"):
sections = [(_, "") for _ in excel_parser.html(binary, 12) if _] sections = [(_, "") for _ in excel_parser.html(binary, 12) if _]
parser_config["chunk_token_num"] = 0
else: else:
sections = [(_, "") for _ in excel_parser(binary) if _] sections = [(_, "") for _ in excel_parser(binary) if _]
parser_config["chunk_token_num"] = 12800 parser_config["chunk_token_num"] = 12800

View File

@@ -5,6 +5,7 @@ from io import BytesIO
import pandas as pd import pandas as pd
from openpyxl import Workbook, load_workbook from openpyxl import Workbook, load_workbook
from PIL import Image
from app.core.rag.nlp import find_codec from app.core.rag.nlp import find_codec
@@ -28,7 +29,7 @@ class RAGExcelParser:
try: try:
file_like_object.seek(0) file_like_object.seek(0)
df = pd.read_csv(file_like_object) df = pd.read_csv(file_like_object, on_bad_lines='skip')
return RAGExcelParser._dataframe_to_workbook(df) return RAGExcelParser._dataframe_to_workbook(df)
except Exception as e_csv: except Exception as e_csv:
@@ -42,14 +43,11 @@ class RAGExcelParser:
file_like_object.seek(0) file_like_object.seek(0)
try: try:
dfs = pd.read_excel(file_like_object, sheet_name=None) dfs = pd.read_excel(file_like_object, sheet_name=None)
if isinstance(dfs, dict):
dfs = next(iter(dfs.values()))
return RAGExcelParser._dataframe_to_workbook(dfs) return RAGExcelParser._dataframe_to_workbook(dfs)
except Exception as ex: except Exception as ex:
logging.info(f"pandas with default engine load error: {ex}, try calamine instead") logging.info(f"pandas with default engine load error: {ex}, try calamine instead")
file_like_object.seek(0) file_like_object.seek(0)
df = pd.read_excel(file_like_object, engine="calamine") df = pd.read_excel(file_like_object, engine="calamine")
print(df)
return RAGExcelParser._dataframe_to_workbook(df) return RAGExcelParser._dataframe_to_workbook(df)
except Exception as e_pandas: except Exception as e_pandas:
raise Exception(f"pandas.read_excel error: {e_pandas}, original openpyxl error: {e}") raise Exception(f"pandas.read_excel error: {e_pandas}, original openpyxl error: {e}")
@@ -68,7 +66,6 @@ class RAGExcelParser:
# if contains multiple sheets use _dataframes_to_workbook # if contains multiple sheets use _dataframes_to_workbook
if isinstance(df, dict) and len(df) > 1: if isinstance(df, dict) and len(df) > 1:
return RAGExcelParser._dataframes_to_workbook(df) return RAGExcelParser._dataframes_to_workbook(df)
df = RAGExcelParser._clean_dataframe(df) df = RAGExcelParser._clean_dataframe(df)
wb = Workbook() wb = Workbook()
ws = wb.active ws = wb.active
@@ -80,15 +77,14 @@ class RAGExcelParser:
for row_num, row in enumerate(df.values, 2): for row_num, row in enumerate(df.values, 2):
for col_num, value in enumerate(row, 1): for col_num, value in enumerate(row, 1):
ws.cell(row=row_num, column=col_num, value=value) ws.cell(row=row_num, column=col_num, value=value)
return wb return wb
@staticmethod @staticmethod
def _dataframes_to_workbook(dfs: dict): def _dataframes_to_workbook(dfs: dict):
wb = Workbook() wb = Workbook()
default_sheet = wb.active default_sheet = wb.active
wb.remove(default_sheet) wb.remove(default_sheet)
for sheet_name, df in dfs.items(): for sheet_name, df in dfs.items():
df = RAGExcelParser._clean_dataframe(df) df = RAGExcelParser._clean_dataframe(df)
ws = wb.create_sheet(title=sheet_name) ws = wb.create_sheet(title=sheet_name)
@@ -99,6 +95,52 @@ class RAGExcelParser:
ws.cell(row=row_num, column=col_num, value=value) ws.cell(row=row_num, column=col_num, value=value)
return wb return wb
@staticmethod
def _extract_images_from_worksheet(ws, sheetname=None):
"""
Extract images from a worksheet and enrich them with vision-based descriptions.
Returns: List[dict]
"""
images = getattr(ws, "_images", [])
if not images:
return []
raw_items = []
for img in images:
try:
img_bytes = img._data()
pil_img = Image.open(BytesIO(img_bytes)).convert("RGB")
anchor = img.anchor
if hasattr(anchor, "_from") and hasattr(anchor, "_to"):
r1, c1 = anchor._from.row + 1, anchor._from.col + 1
r2, c2 = anchor._to.row + 1, anchor._to.col + 1
if r1 == r2 and c1 == c2:
span = "single_cell"
else:
span = "multi_cell"
else:
r1, c1 = anchor._from.row + 1, anchor._from.col + 1
r2, c2 = r1, c1
span = "single_cell"
item = {
"sheet": sheetname or ws.title,
"image": pil_img,
"image_description": "",
"row_from": r1,
"col_from": c1,
"row_to": r2,
"col_to": c2,
"span_type": span,
}
raw_items.append(item)
except Exception:
continue
return raw_items
def html(self, fnm, chunk_rows=256): def html(self, fnm, chunk_rows=256):
from html import escape from html import escape
@@ -131,7 +173,7 @@ class RAGExcelParser:
tb = "" tb = ""
tb += f"<table><caption>{sheetname}</caption>" tb += f"<table><caption>{sheetname}</caption>"
tb += tb_rows_0 tb += tb_rows_0
for r in list(rows[1 + chunk_i * chunk_rows : min(1 + (chunk_i + 1) * chunk_rows, len(rows))]): for r in list(rows[1 + chunk_i * chunk_rows: min(1 + (chunk_i + 1) * chunk_rows, len(rows))]):
tb += "<tr>" tb += "<tr>"
for i, c in enumerate(r): for i, c in enumerate(r):
if c.value is None: if c.value is None:
@@ -154,7 +196,7 @@ class RAGExcelParser:
except Exception as e: except Exception as e:
logging.warning(f"Parse spreadsheet error: {e}, trying to interpret as CSV file") logging.warning(f"Parse spreadsheet error: {e}, trying to interpret as CSV file")
file_like_object.seek(0) file_like_object.seek(0)
df = pd.read_csv(file_like_object) df = pd.read_csv(file_like_object, on_bad_lines='skip')
df = df.replace(r"^\s*$", "", regex=True) df = df.replace(r"^\s*$", "", regex=True)
return df.to_markdown(index=False) return df.to_markdown(index=False)
@@ -172,19 +214,35 @@ class RAGExcelParser:
continue continue
if not rows: if not rows:
continue continue
# 获取表头
ti = list(rows[0]) ti = list(rows[0])
for r in list(rows[1:]): header_fields = []
fields = [] for cell in ti:
for i, c in enumerate(r): if cell.value: # 只添加有值的表头
if not c.value: header_fields.append(str(cell.value))
continue
t = str(ti[i].value) if i < len(ti) else "" # 如果有数据行,处理数据行;否则只处理表头
t += ("" if t else "") + str(c.value) data_rows = rows[1:]
fields.append(t) if data_rows:
line = "; ".join(fields) for r in data_rows:
if sheetname.lower().find("sheet") < 0: fields = []
line += " ——" + sheetname for i, c in enumerate(r):
res.append(line) if not c.value:
continue
t = str(ti[i].value) if i < len(ti) else ""
t += ("" if t else "") + str(c.value)
fields.append(t)
line = "; ".join(fields)
if sheetname.lower().find("sheet") < 0:
line += " ——" + sheetname
res.append(line)
else:
# 只有表头的情况
if header_fields:
line = "; ".join(header_fields)
if sheetname.lower().find("sheet") < 0:
line += " ——" + sheetname
res.append(line)
return res return res
@staticmethod @staticmethod
@@ -192,14 +250,14 @@ class RAGExcelParser:
if fnm.split(".")[-1].lower().find("xls") >= 0: if fnm.split(".")[-1].lower().find("xls") >= 0:
wb = RAGExcelParser._load_excel_to_workbook(BytesIO(binary)) wb = RAGExcelParser._load_excel_to_workbook(BytesIO(binary))
total = 0 total = 0
for sheetname in wb.sheetnames: for sheetname in wb.sheetnames:
try: try:
ws = wb[sheetname] ws = wb[sheetname]
total += len(list(ws.rows)) total += len(list(ws.rows))
except Exception as e: except Exception as e:
logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}") logging.warning(f"Skip sheet '{sheetname}' due to rows access error: {e}")
continue continue
return total return total
if fnm.split(".")[-1].lower() in ["csv", "txt"]: if fnm.split(".")[-1].lower() in ["csv", "txt"]:

View File

@@ -196,7 +196,7 @@ class EntityResolution(Extractor):
ans_list = [] ans_list = []
records = [r.strip() for r in results.split(record_delimiter)] records = [r.strip() for r in results.split(record_delimiter)]
for record in records: for record in records:
pattern_int = f"{re.escape(entity_index_delimiter)}(\d+){re.escape(entity_index_delimiter)}" pattern_int = fr"{re.escape(entity_index_delimiter)}(\d+){re.escape(entity_index_delimiter)}"
match_int = re.search(pattern_int, record) match_int = re.search(pattern_int, record)
res_int = int(str(match_int.group(1) if match_int else '0')) res_int = int(str(match_int.group(1) if match_int else '0'))
if res_int > records_length: if res_int > records_length:

View File

@@ -197,6 +197,10 @@ def validate_embedding_model(
embedding_id, "embedding", db, tenant_id, required=True, embedding_id, "embedding", db, tenant_id, required=True,
config_id=config_id, workspace_id=workspace_id config_id=config_id, workspace_id=workspace_id
) )
print(100*'-')
print(embedding_uuid)
print(_)
print(100*'-')
if embedding_uuid is None: if embedding_uuid is None:
raise InvalidConfigError( raise InvalidConfigError(

View File

@@ -61,7 +61,7 @@ class EndNode(BaseNode):
引用的节点 ID 列表 引用的节点 ID 列表
""" """
# 匹配 {{node_id.xxx}} 格式 # 匹配 {{node_id.xxx}} 格式
pattern = r'\{\{([a-zA-Z0-9_]+)\.[a-zA-Z0-9_]+\}\}' pattern = r'\{\{([a-zA-Z0-9_-]+)\.[a-zA-Z0-9_]+\}\}'
matches = re.findall(pattern, template) matches = re.findall(pattern, template)
return list(set(matches)) # 去重 return list(set(matches)) # 去重

View File

@@ -51,8 +51,8 @@ class DataConfig(Base):
# 自我反思配置 # 自我反思配置
enable_self_reflexion = Column(Boolean, default=False, comment="是否启用自我反思") enable_self_reflexion = Column(Boolean, default=False, comment="是否启用自我反思")
iteration_period = Column(String, default="3", comment="反思迭代周期") iteration_period = Column(String, default="3", comment="反思迭代周期")
reflexion_range = Column(String, default="retrieval", comment="反思范围:部分/全部") reflexion_range = Column(String, default="partial", comment="反思范围:部分/全部")
baseline = Column(String, default="time", comment="基线:时间/事实/时间和事实") baseline = Column(String, default="TIME", comment="基线:时间/事实/时间和事实")
reflection_model_id = Column(String, nullable=True, comment="反思模型ID") reflection_model_id = Column(String, nullable=True, comment="反思模型ID")
memory_verify = Column(Boolean, default=True, comment="记忆验证") memory_verify = Column(Boolean, default=True, comment="记忆验证")
quality_assessment = Column(Boolean, default=True, comment="质量评估") quality_assessment = Column(Boolean, default=True, comment="质量评估")

View File

@@ -361,14 +361,14 @@ class MasterAgentRouter:
"model_name": api_key_config.model_name "model_name": api_key_config.model_name
} }
) )
temperature = 0.3 # 决策任务使用较低温度 # temperature = 0.3 # 决策任务使用较低温度
max_tokens = 1000 # max_tokens = 1000
if self.model_parameters: # if self.model_parameters:
temperature = self.model_parameters.temperature # temperature = self.model_parameters["temperature"]
max_tokens = self.model_parameters.max_tokens # max_tokens = self.model_parameters["max_tokens"]
extra_params = {"temperature": temperature, extra_params = {"temperature": self.model_parameters.get("temperature", 0.3),
"max_tokens":max_tokens "max_tokens":self.model_parameters.get("max_tokens", 1000)
} }
# 创建 RedBearModelConfig # 创建 RedBearModelConfig
model_config = RedBearModelConfig( model_config = RedBearModelConfig(

View File

@@ -290,12 +290,22 @@ class MultiAgentService:
else: else:
execution_config_data = convert_uuids_to_str(data.execution_config.model_dump()) execution_config_data = convert_uuids_to_str(data.execution_config.model_dump())
# 处理 model_parameters可能是 None、字典或 Pydantic 模型)
if data.model_parameters is None:
model_parameters_data = None
elif isinstance(data.model_parameters, dict):
# 过滤掉值为 None 的字段
model_parameters_data = {k: v for k, v in data.model_parameters.items() if v is not None}
else:
# 过滤掉值为 None 的字段
model_parameters_data = {k: v for k, v in data.model_parameters.model_dump().items() if v is not None}
config = MultiAgentConfig( config = MultiAgentConfig(
app_id=app_id, app_id=app_id,
master_agent_id=data.master_agent_id, master_agent_id=data.master_agent_id,
master_agent_name=data.master_agent_name, master_agent_name=data.master_agent_name,
default_model_config_id=data.default_model_config_id, default_model_config_id=data.default_model_config_id,
model_parameters=data.model_parameters, model_parameters=model_parameters_data,
orchestration_mode=data.orchestration_mode, orchestration_mode=data.orchestration_mode,
sub_agents=sub_agents_data, sub_agents=sub_agents_data,
# routing_rules=routing_rules_data, # routing_rules=routing_rules_data,

View File

@@ -41,8 +41,6 @@ nodes:
- 使用友好、礼貌的语气 - 使用友好、礼貌的语气
- 适当使用格式化(如列表、段落)提高可读性 - 适当使用格式化(如列表、段落)提高可读性
- role: user
content: "{{sys.message}}"
model_id: null model_id: null
temperature: 0.7 temperature: 0.7