Merge remote-tracking branch 'origin/release/v0.2.9' into develop

This commit is contained in:
Ke Sun
2026-03-31 19:16:13 +08:00
55 changed files with 1482 additions and 570 deletions

View File

@@ -141,13 +141,13 @@ class AppChatService:
# 如果是新会话且有开场白,作为第一条 assistant 消息写入数据库
is_new_conversation = len(history) == 0
if is_new_conversation:
opening = self.agent_service._get_opening_statement(features_config, True, variables)
opening, suggested_questions = self.agent_service._get_opening_statement(features_config, True, variables)
if opening:
self.conversation_service.add_message(
conversation_id=conversation_id,
role="assistant",
content=opening,
meta_data={}
meta_data={"suggested_questions": suggested_questions}
)
# 重新加载历史(包含刚写入的开场白)
history = await self.conversation_service.get_conversation_history(
@@ -378,13 +378,13 @@ class AppChatService:
# 如果是新会话且有开场白,作为第一条 assistant 消息写入数据库
is_new_conversation = len(history) == 0
if is_new_conversation:
opening = self.agent_service._get_opening_statement(features_config, True, variables)
opening, suggested_questions = self.agent_service._get_opening_statement(features_config, True, variables)
if opening:
self.conversation_service.add_message(
conversation_id=conversation_id,
role="assistant",
content=opening,
meta_data={}
meta_data={"suggested_questions": suggested_questions}
)
# 重新加载历史(包含刚写入的开场白)
history = await self.conversation_service.get_conversation_history(

View File

@@ -0,0 +1,128 @@
"""应用日志服务层"""
import uuid
from typing import Optional, Tuple
from datetime import datetime
from sqlalchemy.orm import Session
from app.core.logging_config import get_business_logger
from app.models.conversation_model import Conversation, Message
from app.repositories.conversation_repository import ConversationRepository, MessageRepository
logger = get_business_logger()
class AppLogService:
"""应用日志服务"""
def __init__(self, db: Session):
self.db = db
self.conversation_repository = ConversationRepository(db)
self.message_repository = MessageRepository(db)
def list_conversations(
self,
app_id: uuid.UUID,
workspace_id: uuid.UUID,
page: int = 1,
pagesize: int = 20,
is_draft: Optional[bool] = None,
) -> Tuple[list[Conversation], int]:
"""
查询应用日志会话列表
Args:
app_id: 应用 ID
workspace_id: 工作空间 ID
page: 页码(从 1 开始)
pagesize: 每页数量
is_draft: 是否草稿会话None 表示不过滤)
Returns:
Tuple[list[Conversation], int]: (会话列表,总数)
"""
logger.info(
"查询应用日志会话列表",
extra={
"app_id": str(app_id),
"workspace_id": str(workspace_id),
"page": page,
"pagesize": pagesize,
"is_draft": is_draft
}
)
# 使用 Repository 查询
conversations, total = self.conversation_repository.list_app_conversations(
app_id=app_id,
workspace_id=workspace_id,
is_draft=is_draft,
page=page,
pagesize=pagesize
)
logger.info(
"查询应用日志会话列表成功",
extra={
"app_id": str(app_id),
"total": total,
"returned": len(conversations)
}
)
return conversations, total
def get_conversation_detail(
self,
app_id: uuid.UUID,
conversation_id: uuid.UUID,
workspace_id: uuid.UUID
) -> Conversation:
"""
查询会话详情(包含消息)
Args:
app_id: 应用 ID
conversation_id: 会话 ID
workspace_id: 工作空间 ID
Returns:
Conversation: 包含消息的会话对象
Raises:
ResourceNotFoundException: 当会话不存在时
"""
logger.info(
"查询应用日志会话详情",
extra={
"app_id": str(app_id),
"conversation_id": str(conversation_id),
"workspace_id": str(workspace_id)
}
)
# 查询会话
conversation = self.conversation_repository.get_conversation_for_app_log(
conversation_id=conversation_id,
app_id=app_id,
workspace_id=workspace_id
)
# 查询消息(按时间正序)
messages = self.message_repository.get_messages_by_conversation(
conversation_id=conversation_id
)
# 将消息附加到会话对象
conversation.messages = messages
logger.info(
"查询应用日志会话详情成功",
extra={
"app_id": str(app_id),
"conversation_id": str(conversation_id),
"message_count": len(messages)
}
)
return conversation

View File

@@ -1084,7 +1084,6 @@ class AppService:
if not exists:
cleaned["memory_config_id"] = None
cleaned.pop("memory_content", None)
cleaned["enabled"] = False
return cleaned
exists = self.db.query(
@@ -1096,7 +1095,6 @@ class AppService:
if not exists:
cleaned["memory_config_id"] = None
cleaned.pop("memory_content", None)
cleaned["enabled"] = False
return cleaned
@@ -1684,15 +1682,15 @@ class AppService:
return config.config_id
def _update_endusers_memory_config_by_workspace(
def _update_endusers_memory_config_by_app(
self,
workspace_id: uuid.UUID,
app_id: uuid.UUID,
memory_config_id: uuid.UUID
) -> int:
"""批量更新应用下所有终端用户的 memory_config_id
Args:
workspace_id: 工作空间ID
app_id: 应用ID
memory_config_id: 新的记忆配置ID
Returns:
@@ -1701,8 +1699,8 @@ class AppService:
from app.repositories.end_user_repository import EndUserRepository
repo = EndUserRepository(self.db)
updated_count = repo.batch_update_memory_config_id_by_workspace(
workspace_id=workspace_id,
updated_count = repo.batch_update_memory_config_id_by_app(
app_id=app_id,
memory_config_id=memory_config_id
)
@@ -1753,12 +1751,16 @@ class AppService:
miss_params = []
if agent_cfg.default_model_config_id is None:
miss_params.append("model config")
miss_params.append("模型配置")
if agent_cfg.memory.get("enabled") and not agent_cfg.memory.get("memory_config_id"):
miss_params.append("memory config")
miss_params.append("记忆配置")
if miss_params:
raise BusinessException(f"{', '.join(miss_params)} is required")
raise BusinessException(
f"应用发布失败:检测到以下必要配置尚未完成:{', '.join(miss_params)}。请返回应用编辑页面完成相关配置后再尝试发布。",
BizCode.CONFIG_MISSING,
context={"missing_params": miss_params},
)
config = {
"system_prompt": agent_cfg.system_prompt,
@@ -1877,8 +1879,8 @@ class AppService:
if memory_config_id:
app = self.db.query(App).filter(App.id == app_id).first()
if app:
updated_count = self._update_endusers_memory_config_by_workspace(
app.workspace_id, memory_config_id
updated_count = self._update_endusers_memory_config_by_app(
app_id, memory_config_id
)
logger.info(
f"发布时更新终端用户记忆配置: app_id={app_id}, workspace_id={app.workspace_id}, "
@@ -2014,7 +2016,7 @@ class AppService:
if memory_config_id:
updated_count = self._update_endusers_memory_config_by_workspace(app.workspace_id, memory_config_id)
updated_count = self._update_endusers_memory_config_by_app(app_id, memory_config_id)
logger.info(
f"回滚时更新终端用户记忆配置: app_id={app_id}, version={version}, "
f"memory_config_id={memory_config_id}, updated_count={updated_count}"

View File

@@ -214,7 +214,7 @@ class ConversationService:
conversation.message_count += 1
if conversation.message_count == 1 and role == "user":
if conversation.message_count <= 2 and role == "user":
conversation.title = (
content[:50] + ("..." if len(content) > 50 else "")
)

View File

@@ -448,15 +448,16 @@ class AgentRunService:
features_config: Dict[str, Any],
is_new_conversation: bool,
variables: Optional[Dict[str, Any]] = None
) -> Optional[str]:
) -> tuple[Any, Any]:
"""首轮对话时返回开场白文本(支持变量替换),否则返回 None"""
if not is_new_conversation:
return None
return None, None
opening = features_config.get("opening_statement", {})
if not (isinstance(opening, dict) and opening.get("enabled") and opening.get("statement")):
return None
return None, None
statement = opening["statement"]
suggested_questions = opening["suggested_questions"]
# 如果有变量,进行替换(仅支持 {{var_name}} 格式)
if variables:
@@ -464,7 +465,7 @@ class AgentRunService:
placeholder = f"{{{{{var_name}}}}}"
statement = statement.replace(placeholder, str(var_value))
return statement
return statement, suggested_questions
@staticmethod
def _filter_citations(
@@ -598,13 +599,16 @@ class AgentRunService:
# 5. 处理会话ID创建或验证新会话时写入开场白
is_new_conversation = not conversation_id
opening = self._get_opening_statement(features_config, is_new_conversation, variables)
opening, suggested_questions = None, None
if not sub_agent:
opening, suggested_questions = self._get_opening_statement(features_config, is_new_conversation, variables)
conversation_id = await self._ensure_conversation(
conversation_id=conversation_id,
app_id=agent_config.app_id,
workspace_id=workspace_id,
user_id=user_id,
opening_statement=opening
opening_statement=opening,
suggested_questions=suggested_questions
)
model_info = ModelInfo(
@@ -839,14 +843,17 @@ class AgentRunService:
# 5. 处理会话ID创建或验证新会话时写入开场白
is_new_conversation = not conversation_id
opening = self._get_opening_statement(features_config, is_new_conversation, variables)
opening, suggested_questions = None, None
if not sub_agent:
opening, suggested_questions = self._get_opening_statement(features_config, is_new_conversation, variables)
conversation_id = await self._ensure_conversation(
conversation_id=conversation_id,
app_id=agent_config.app_id,
workspace_id=workspace_id,
user_id=user_id,
sub_agent=sub_agent,
opening_statement=opening
opening_statement=opening,
suggested_questions=suggested_questions
)
model_info = ModelInfo(
@@ -1050,7 +1057,8 @@ class AgentRunService:
workspace_id: uuid.UUID,
user_id: Optional[str],
sub_agent: bool = False,
opening_statement: Optional[str] = None
opening_statement: Optional[str] = None,
suggested_questions: Optional[List[str]] = None
) -> str:
"""确保会话存在(创建或验证)
@@ -1061,6 +1069,7 @@ class AgentRunService:
user_id: 用户ID
sub_agent: 是否为子代理
opening_statement: 开场白(新会话时作为第一条消息写入)
suggested_questions: 预设问题列表
Returns:
str: 会话ID
@@ -1104,7 +1113,7 @@ class AgentRunService:
conversation_id=uuid.UUID(new_conv_id),
role="assistant",
content=opening_statement,
meta_data={}
meta_data={"suggested_questions": suggested_questions}
)
logger.debug(f"已保存开场白到会话 {new_conv_id}")

View File

@@ -204,30 +204,35 @@ class MemoryForgetService:
end_user_id: str,
forgetting_threshold: float,
min_days_since_access: int,
limit: int = 20
) -> list[Dict[str, Any]]:
page: Optional[int] = None,
pagesize: Optional[int] = None
) -> Dict[str, Any]:
"""
获取待遗忘节点列表
查询满足遗忘条件的节点(激活值低于阈值且最后访问时间超过最小天数)
查询满足遗忘条件的节点(激活值低于阈值且最后访问时间超过最小天数)。支持分页查询。
Args:
connector: Neo4j 连接器
end_user_id: 组ID
forgetting_threshold: 遗忘阈值
min_days_since_access: 最小未访问天数
limit: 返回节点数量限制
page: 页码可选从1开始
pagesize: 每页数量(可选)
Returns:
list: 待遗忘节点列表
dict: 包含待遗忘节点列表和分页信息的字典
- items: 待遗忘节点列表
- page: 分页信息(分页时)
"""
from datetime import timedelta
# 计算最小访问时间ISO 8601 格式字符串,使用 UTC 时区)
min_access_time = datetime.now(timezone.utc) - timedelta(days=min_days_since_access)
min_access_time_str = min_access_time.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
query = """
# 基础查询(用于获取总数)
count_query = """
MATCH (n)
WHERE (n:Statement OR n:ExtractedEntity OR n:MemorySummary)
AND n.end_user_id = $end_user_id
@@ -235,10 +240,22 @@ class MemoryForgetService:
AND n.activation_value < $threshold
AND n.last_access_time IS NOT NULL
AND datetime(n.last_access_time) < datetime($min_access_time_str)
RETURN
RETURN count(n) as total
"""
# 数据查询
data_query = """
MATCH (n)
WHERE (n:Statement OR n:ExtractedEntity OR n:MemorySummary)
AND n.end_user_id = $end_user_id
AND n.activation_value IS NOT NULL
AND n.activation_value < $threshold
AND n.last_access_time IS NOT NULL
AND datetime(n.last_access_time) < datetime($min_access_time_str)
RETURN
elementId(n) as node_id,
labels(n)[0] as node_type,
CASE
CASE
WHEN n:Statement THEN n.statement
WHEN n:ExtractedEntity THEN n.name
WHEN n:MemorySummary THEN n.content
@@ -247,18 +264,32 @@ class MemoryForgetService:
n.activation_value as activation_value,
n.last_access_time as last_access_time
ORDER BY n.activation_value ASC
LIMIT $limit
"""
# 如果启用分页,添加 SKIP 和 LIMIT
if page is not None and pagesize is not None and page > 0 and pagesize > 0:
data_query += " SKIP $skip LIMIT $limit"
params = {
'end_user_id': end_user_id,
'threshold': forgetting_threshold,
'min_access_time_str': min_access_time_str,
'limit': limit
'min_access_time_str': min_access_time_str
}
results = await connector.execute_query(query, **params)
# 获取总数(分页时需要)
total = 0
if page is not None and pagesize is not None and page > 0 and pagesize > 0:
count_results = await connector.execute_query(count_query, **params)
if count_results:
total = count_results[0]['total']
# 添加分页参数
if page is not None and pagesize is not None and page > 0 and pagesize > 0:
params['skip'] = (page - 1) * pagesize
params['limit'] = pagesize
results = await connector.execute_query(data_query, **params)
pending_nodes = []
for result in results:
# 将节点类型标签转换为小写
@@ -267,7 +298,7 @@ class MemoryForgetService:
node_type_label = 'entity'
elif node_type_label == 'memorysummary':
node_type_label = 'summary'
# 将 Neo4j DateTime 对象转换为时间戳(毫秒)
last_access_time = result['last_access_time']
last_access_dt = convert_neo4j_datetime_to_python(last_access_time)
@@ -278,7 +309,7 @@ class MemoryForgetService:
last_access_timestamp = int(last_access_dt.timestamp() * 1000)
else:
last_access_timestamp = 0
pending_nodes.append({
'node_id': str(result['node_id']),
'node_type': node_type_label,
@@ -286,8 +317,20 @@ class MemoryForgetService:
'activation_value': result['activation_value'],
'last_access_time': last_access_timestamp
})
return pending_nodes
# 构建返回结果
result: Dict[str, Any] = {'items': pending_nodes}
# 如果启用分页,添加分页信息
if page is not None and pagesize is not None and page > 0 and pagesize > 0:
result['page'] = {
'page': page,
'pagesize': pagesize,
'total': total,
'hasnext': (page * pagesize) < total
}
return result
async def trigger_forgetting_cycle(
self,
@@ -636,7 +679,7 @@ class MemoryForgetService:
api_logger.error(f"获取历史趋势数据失败: {str(e)}")
# 失败时返回空列表,不影响主流程
# 获取待遗忘节点列表前20个满足遗忘条件的节点
# 获取待遗忘节点列表
pending_nodes = []
try:
if end_user_id:
@@ -652,8 +695,7 @@ class MemoryForgetService:
connector=connector,
end_user_id=end_user_id,
forgetting_threshold=forgetting_threshold,
min_days_since_access=int(min_days),
limit=20
min_days_since_access=int(min_days)
)
api_logger.info(f"成功获取 {len(pending_nodes)} 个待遗忘节点")
@@ -661,24 +703,79 @@ class MemoryForgetService:
except Exception as e:
api_logger.error(f"获取待遗忘节点失败: {str(e)}")
# 失败时返回空列表,不影响主流程
# 构建统计信息
# 构建统计信息(不包含 pending_nodes已分离到独立接口
stats = {
'activation_metrics': activation_metrics,
'node_distribution': node_distribution,
'recent_trends': recent_trends,
'pending_nodes': pending_nodes,
'timestamp': int(datetime.now().timestamp() * 1000)
}
api_logger.info(
f"成功获取遗忘引擎统计: total_nodes={stats['activation_metrics']['total_nodes']}, "
f"low_activation_nodes={stats['activation_metrics']['low_activation_nodes']}, "
f"trend_days={len(recent_trends)}, pending_nodes={len(pending_nodes)}"
f"trend_days={len(recent_trends)}"
)
return stats
async def get_pending_nodes(
self,
db: Session,
end_user_id: str,
config_id: Optional[UUID] = None,
page: int = 1,
pagesize: int = 10
) -> Dict[str, Any]:
"""
获取待遗忘节点列表(独立分页接口)
查询满足遗忘条件的节点(激活值低于阈值且最后访问时间超过最小天数)。
Args:
db: 数据库会话
end_user_id: 组ID必填
config_id: 配置ID可选用于获取遗忘阈值
page: 页码从1开始默认1
pagesize: 每页数量默认10
Returns:
dict: 包含待遗忘节点列表和分页信息的字典
- items: 待遗忘节点列表
- page: 分页信息
"""
# 获取遗忘引擎组件
_, _, forgetting_scheduler, config = await self._get_forgetting_components(db, config_id)
connector = forgetting_scheduler.connector
forgetting_threshold = config['forgetting_threshold']
# 验证 min_days_since_access 配置值
min_days = config.get('min_days_since_access')
if min_days is None or not isinstance(min_days, (int, float)) or min_days < 0:
api_logger.warning(
f"min_days_since_access 配置无效: {min_days}, 使用默认值 7"
)
min_days = 7
# 调用内部方法获取分页数据
pending_nodes_result = await self._get_pending_forgetting_nodes(
connector=connector,
end_user_id=end_user_id,
forgetting_threshold=forgetting_threshold,
min_days_since_access=int(min_days),
page=page,
pagesize=pagesize
)
api_logger.info(
f"成功获取待遗忘节点列表: end_user_id={end_user_id}, "
f"page={page}, pagesize={pagesize}, total={pending_nodes_result.get('page', {}).get('total', 0)}"
)
return pending_nodes_result
async def get_forgetting_curve(
self,
db: Session,

View File

@@ -12,6 +12,9 @@ import base64
import csv
import io
import json
import re
import olefile
import struct
import zipfile
from abc import ABC, abstractmethod
from typing import List, Dict, Any, Optional
@@ -602,31 +605,75 @@ class MultimodalService:
try:
word_file = io.BytesIO(file_content)
doc = Document(word_file)
return '\n'.join(p.text for p in doc.paragraphs)
text_lines = []
for p in doc.paragraphs:
text = p.text.strip()
if text:
text_lines.append(text)
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
text = cell.text.strip()
if text:
text_lines.append(text)
full_text = "\n".join(text_lines)
return full_text.strip() or "[docx 文件无文本内容]"
except Exception as e:
logger.error(f"提取 docx 文本失败: {e}")
logger.error(f"提取 docx 文本失败: {str(e)}", exc_info=True)
return f"[docx 提取失败: {str(e)}]"
# 旧版 .docOLE2 格式)
# 旧版 .docOLE2/CFB 格式),按 Word Binary Format 规范解析 piece table
try:
import olefile
ole = olefile.OleFileIO(io.BytesIO(file_content))
if not ole.exists('WordDocument'):
return "[doc 提取失败: 未找到 WordDocument 流]"
# 读取 WordDocument 流,提取可见 ASCII/Unicode 文本
stream = ole.openstream('WordDocument').read()
# Word Binary Format: 文本在流中以 UTF-16-LE 编码存储
# 简单提取:过滤出可打印字符段
try:
text = stream.decode('utf-16-le', errors='ignore')
except Exception:
text = stream.decode('latin-1', errors='ignore')
# 过滤控制字符,保留可打印内容
import re
text = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', text)
text = re.sub(r' +', ' ', text).strip()
word_stream = ole.openstream('WordDocument').read()
# FIB offset 0xA bit9 决定使用 0Table 还是 1Table
fib_flags = struct.unpack_from('<H', word_stream, 0xA)[0]
table_name = '1Table' if (fib_flags & 0x0200) else '0Table'
table_stream = ole.openstream(table_name).read()
# 从 FIB 读取 fcClx/lcbClx 定位 piece table
fc_clx, lcb_clx = struct.unpack_from("<II", word_stream, 0x1A2)
clx = table_stream[fc_clx: fc_clx + lcb_clx]
# 解析 CLX找到 PlcPcdpiece table
i, plc_pcd = 0, None
while i < len(clx):
clxt = clx[i]
if clxt == 0x01:
i += 3 + struct.unpack_from('<H', clx, i + 1)[0]
elif clxt == 0x02:
cb = struct.unpack_from('<I', clx, i + 1)[0]
plc_pcd = clx[i + 5: i + 5 + cb]
break
else:
break
if plc_pcd is None:
raise ValueError("PlcPcd not found")
# PlcPcd: (n+1) 个 CP4字节+ n 个 PCD8字节
n_pieces = (len(plc_pcd) - 4) // 12
cp_array = [struct.unpack_from('<I', plc_pcd, k * 4)[0] for k in range(n_pieces + 1)]
parts = []
for k in range(n_pieces):
fc_value = struct.unpack_from('<I', plc_pcd, (n_pieces + 1) * 4 + k * 8 + 2)[0]
is_ansi = bool(fc_value & 0x40000000)
fc = fc_value & 0x3FFFFFFF
char_count = cp_array[k + 1] - cp_array[k]
if is_ansi:
parts.append(word_stream[fc: fc + char_count].decode('cp1252', errors='replace'))
else:
parts.append(word_stream[fc: fc + char_count * 2].decode('utf-16-le', errors='replace'))
ole.close()
return text
result = re.sub(r'[\x00-\x1f\x7f]', '', ''.join(parts))
return result.strip()
except Exception as e:
logger.error(f"提取 doc 文本失败: {e}")
return f"[doc 提取失败: {str(e)}]"

View File

@@ -138,7 +138,7 @@ class TenantService:
except Exception as e:
business_logger.error(f"删除租户失败: {str(e)}")
raise BusinessException(f"删除租户失败: {str(e)}", code=BizCode.DB_ERROR)
raise BusinessException(f"删除租户失败{str(e)}", code=BizCode.DB_ERROR)
# 租户用户管理
def get_tenant_users(
@@ -147,6 +147,7 @@ class TenantService:
skip: int = 0,
limit: int = 100,
is_active: Optional[bool] = None,
is_superuser: Optional[bool] = None,
search: Optional[str] = None
) -> List[UserModel]:
"""获取租户下的用户列表"""
@@ -155,6 +156,7 @@ class TenantService:
skip=skip,
limit=limit,
is_active=is_active,
is_superuser=is_superuser,
search=search
)
@@ -162,12 +164,14 @@ class TenantService:
self,
tenant_id: uuid.UUID,
is_active: Optional[bool] = None,
is_superuser: Optional[bool] = None,
search: Optional[str] = None
) -> int:
"""统计租户下的用户数量"""
return self.user_repo.count_users_by_tenant(
tenant_id=tenant_id,
is_active=is_active,
is_superuser=is_superuser,
search=search
)

View File

@@ -472,6 +472,21 @@ class UserMemoryService:
# 定义允许更新的字段白名单
allowed_fields = {'other_name', 'aliases', 'meta_data'}
# 用户占位名称黑名单,不允许作为 other_name 或出现在 aliases 中
_user_placeholder_names = {'用户', '', 'User', 'I'}
# 过滤 other_name不允许设置为占位名称
if 'other_name' in update_data and update_data['other_name'] and update_data['other_name'].strip() in _user_placeholder_names:
logger.warning(f"拒绝将占位名称 '{update_data['other_name']}' 设置为 other_name")
del update_data['other_name']
# 过滤 aliases移除占位名称和非字符串值
if 'aliases' in update_data and update_data['aliases']:
update_data['aliases'] = [
a for a in update_data['aliases']
if isinstance(a, str) and a.strip() and a.strip() not in _user_placeholder_names
]
# 检查是否更新了 aliases 字段
aliases_updated = 'aliases' in update_data and update_data['aliases'] != end_user_info_record.aliases