Merge remote-tracking branch 'origin/release/v0.2.9' into develop
This commit is contained in:
@@ -141,13 +141,13 @@ class AppChatService:
|
||||
# 如果是新会话且有开场白,作为第一条 assistant 消息写入数据库
|
||||
is_new_conversation = len(history) == 0
|
||||
if is_new_conversation:
|
||||
opening = self.agent_service._get_opening_statement(features_config, True, variables)
|
||||
opening, suggested_questions = self.agent_service._get_opening_statement(features_config, True, variables)
|
||||
if opening:
|
||||
self.conversation_service.add_message(
|
||||
conversation_id=conversation_id,
|
||||
role="assistant",
|
||||
content=opening,
|
||||
meta_data={}
|
||||
meta_data={"suggested_questions": suggested_questions}
|
||||
)
|
||||
# 重新加载历史(包含刚写入的开场白)
|
||||
history = await self.conversation_service.get_conversation_history(
|
||||
@@ -378,13 +378,13 @@ class AppChatService:
|
||||
# 如果是新会话且有开场白,作为第一条 assistant 消息写入数据库
|
||||
is_new_conversation = len(history) == 0
|
||||
if is_new_conversation:
|
||||
opening = self.agent_service._get_opening_statement(features_config, True, variables)
|
||||
opening, suggested_questions = self.agent_service._get_opening_statement(features_config, True, variables)
|
||||
if opening:
|
||||
self.conversation_service.add_message(
|
||||
conversation_id=conversation_id,
|
||||
role="assistant",
|
||||
content=opening,
|
||||
meta_data={}
|
||||
meta_data={"suggested_questions": suggested_questions}
|
||||
)
|
||||
# 重新加载历史(包含刚写入的开场白)
|
||||
history = await self.conversation_service.get_conversation_history(
|
||||
|
||||
128
api/app/services/app_log_service.py
Normal file
128
api/app/services/app_log_service.py
Normal file
@@ -0,0 +1,128 @@
|
||||
"""应用日志服务层"""
|
||||
import uuid
|
||||
from typing import Optional, Tuple
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.logging_config import get_business_logger
|
||||
from app.models.conversation_model import Conversation, Message
|
||||
from app.repositories.conversation_repository import ConversationRepository, MessageRepository
|
||||
|
||||
logger = get_business_logger()
|
||||
|
||||
|
||||
class AppLogService:
|
||||
"""应用日志服务"""
|
||||
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
self.conversation_repository = ConversationRepository(db)
|
||||
self.message_repository = MessageRepository(db)
|
||||
|
||||
def list_conversations(
|
||||
self,
|
||||
app_id: uuid.UUID,
|
||||
workspace_id: uuid.UUID,
|
||||
page: int = 1,
|
||||
pagesize: int = 20,
|
||||
is_draft: Optional[bool] = None,
|
||||
) -> Tuple[list[Conversation], int]:
|
||||
"""
|
||||
查询应用日志会话列表
|
||||
|
||||
Args:
|
||||
app_id: 应用 ID
|
||||
workspace_id: 工作空间 ID
|
||||
page: 页码(从 1 开始)
|
||||
pagesize: 每页数量
|
||||
is_draft: 是否草稿会话(None 表示不过滤)
|
||||
|
||||
Returns:
|
||||
Tuple[list[Conversation], int]: (会话列表,总数)
|
||||
"""
|
||||
logger.info(
|
||||
"查询应用日志会话列表",
|
||||
extra={
|
||||
"app_id": str(app_id),
|
||||
"workspace_id": str(workspace_id),
|
||||
"page": page,
|
||||
"pagesize": pagesize,
|
||||
"is_draft": is_draft
|
||||
}
|
||||
)
|
||||
|
||||
# 使用 Repository 查询
|
||||
conversations, total = self.conversation_repository.list_app_conversations(
|
||||
app_id=app_id,
|
||||
workspace_id=workspace_id,
|
||||
is_draft=is_draft,
|
||||
page=page,
|
||||
pagesize=pagesize
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"查询应用日志会话列表成功",
|
||||
extra={
|
||||
"app_id": str(app_id),
|
||||
"total": total,
|
||||
"returned": len(conversations)
|
||||
}
|
||||
)
|
||||
|
||||
return conversations, total
|
||||
|
||||
def get_conversation_detail(
|
||||
self,
|
||||
app_id: uuid.UUID,
|
||||
conversation_id: uuid.UUID,
|
||||
workspace_id: uuid.UUID
|
||||
) -> Conversation:
|
||||
"""
|
||||
查询会话详情(包含消息)
|
||||
|
||||
Args:
|
||||
app_id: 应用 ID
|
||||
conversation_id: 会话 ID
|
||||
workspace_id: 工作空间 ID
|
||||
|
||||
Returns:
|
||||
Conversation: 包含消息的会话对象
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundException: 当会话不存在时
|
||||
"""
|
||||
logger.info(
|
||||
"查询应用日志会话详情",
|
||||
extra={
|
||||
"app_id": str(app_id),
|
||||
"conversation_id": str(conversation_id),
|
||||
"workspace_id": str(workspace_id)
|
||||
}
|
||||
)
|
||||
|
||||
# 查询会话
|
||||
conversation = self.conversation_repository.get_conversation_for_app_log(
|
||||
conversation_id=conversation_id,
|
||||
app_id=app_id,
|
||||
workspace_id=workspace_id
|
||||
)
|
||||
|
||||
# 查询消息(按时间正序)
|
||||
messages = self.message_repository.get_messages_by_conversation(
|
||||
conversation_id=conversation_id
|
||||
)
|
||||
|
||||
# 将消息附加到会话对象
|
||||
conversation.messages = messages
|
||||
|
||||
logger.info(
|
||||
"查询应用日志会话详情成功",
|
||||
extra={
|
||||
"app_id": str(app_id),
|
||||
"conversation_id": str(conversation_id),
|
||||
"message_count": len(messages)
|
||||
}
|
||||
)
|
||||
|
||||
return conversation
|
||||
@@ -1084,7 +1084,6 @@ class AppService:
|
||||
if not exists:
|
||||
cleaned["memory_config_id"] = None
|
||||
cleaned.pop("memory_content", None)
|
||||
cleaned["enabled"] = False
|
||||
return cleaned
|
||||
|
||||
exists = self.db.query(
|
||||
@@ -1096,7 +1095,6 @@ class AppService:
|
||||
if not exists:
|
||||
cleaned["memory_config_id"] = None
|
||||
cleaned.pop("memory_content", None)
|
||||
cleaned["enabled"] = False
|
||||
|
||||
return cleaned
|
||||
|
||||
@@ -1684,15 +1682,15 @@ class AppService:
|
||||
|
||||
return config.config_id
|
||||
|
||||
def _update_endusers_memory_config_by_workspace(
|
||||
def _update_endusers_memory_config_by_app(
|
||||
self,
|
||||
workspace_id: uuid.UUID,
|
||||
app_id: uuid.UUID,
|
||||
memory_config_id: uuid.UUID
|
||||
) -> int:
|
||||
"""批量更新应用下所有终端用户的 memory_config_id
|
||||
|
||||
Args:
|
||||
workspace_id: 工作空间ID
|
||||
app_id: 应用ID
|
||||
memory_config_id: 新的记忆配置ID
|
||||
|
||||
Returns:
|
||||
@@ -1701,8 +1699,8 @@ class AppService:
|
||||
from app.repositories.end_user_repository import EndUserRepository
|
||||
|
||||
repo = EndUserRepository(self.db)
|
||||
updated_count = repo.batch_update_memory_config_id_by_workspace(
|
||||
workspace_id=workspace_id,
|
||||
updated_count = repo.batch_update_memory_config_id_by_app(
|
||||
app_id=app_id,
|
||||
memory_config_id=memory_config_id
|
||||
)
|
||||
|
||||
@@ -1753,12 +1751,16 @@ class AppService:
|
||||
|
||||
miss_params = []
|
||||
if agent_cfg.default_model_config_id is None:
|
||||
miss_params.append("model config")
|
||||
miss_params.append("模型配置")
|
||||
|
||||
if agent_cfg.memory.get("enabled") and not agent_cfg.memory.get("memory_config_id"):
|
||||
miss_params.append("memory config")
|
||||
miss_params.append("记忆配置")
|
||||
if miss_params:
|
||||
raise BusinessException(f"{', '.join(miss_params)} is required")
|
||||
raise BusinessException(
|
||||
f"应用发布失败:检测到以下必要配置尚未完成:{', '.join(miss_params)}。请返回应用编辑页面完成相关配置后再尝试发布。",
|
||||
BizCode.CONFIG_MISSING,
|
||||
context={"missing_params": miss_params},
|
||||
)
|
||||
|
||||
config = {
|
||||
"system_prompt": agent_cfg.system_prompt,
|
||||
@@ -1877,8 +1879,8 @@ class AppService:
|
||||
if memory_config_id:
|
||||
app = self.db.query(App).filter(App.id == app_id).first()
|
||||
if app:
|
||||
updated_count = self._update_endusers_memory_config_by_workspace(
|
||||
app.workspace_id, memory_config_id
|
||||
updated_count = self._update_endusers_memory_config_by_app(
|
||||
app_id, memory_config_id
|
||||
)
|
||||
logger.info(
|
||||
f"发布时更新终端用户记忆配置: app_id={app_id}, workspace_id={app.workspace_id}, "
|
||||
@@ -2014,7 +2016,7 @@ class AppService:
|
||||
|
||||
if memory_config_id:
|
||||
|
||||
updated_count = self._update_endusers_memory_config_by_workspace(app.workspace_id, memory_config_id)
|
||||
updated_count = self._update_endusers_memory_config_by_app(app_id, memory_config_id)
|
||||
logger.info(
|
||||
f"回滚时更新终端用户记忆配置: app_id={app_id}, version={version}, "
|
||||
f"memory_config_id={memory_config_id}, updated_count={updated_count}"
|
||||
|
||||
@@ -214,7 +214,7 @@ class ConversationService:
|
||||
|
||||
conversation.message_count += 1
|
||||
|
||||
if conversation.message_count == 1 and role == "user":
|
||||
if conversation.message_count <= 2 and role == "user":
|
||||
conversation.title = (
|
||||
content[:50] + ("..." if len(content) > 50 else "")
|
||||
)
|
||||
|
||||
@@ -448,15 +448,16 @@ class AgentRunService:
|
||||
features_config: Dict[str, Any],
|
||||
is_new_conversation: bool,
|
||||
variables: Optional[Dict[str, Any]] = None
|
||||
) -> Optional[str]:
|
||||
) -> tuple[Any, Any]:
|
||||
"""首轮对话时返回开场白文本(支持变量替换),否则返回 None"""
|
||||
if not is_new_conversation:
|
||||
return None
|
||||
return None, None
|
||||
opening = features_config.get("opening_statement", {})
|
||||
if not (isinstance(opening, dict) and opening.get("enabled") and opening.get("statement")):
|
||||
return None
|
||||
return None, None
|
||||
|
||||
statement = opening["statement"]
|
||||
suggested_questions = opening["suggested_questions"]
|
||||
|
||||
# 如果有变量,进行替换(仅支持 {{var_name}} 格式)
|
||||
if variables:
|
||||
@@ -464,7 +465,7 @@ class AgentRunService:
|
||||
placeholder = f"{{{{{var_name}}}}}"
|
||||
statement = statement.replace(placeholder, str(var_value))
|
||||
|
||||
return statement
|
||||
return statement, suggested_questions
|
||||
|
||||
@staticmethod
|
||||
def _filter_citations(
|
||||
@@ -598,13 +599,16 @@ class AgentRunService:
|
||||
|
||||
# 5. 处理会话ID(创建或验证),新会话时写入开场白
|
||||
is_new_conversation = not conversation_id
|
||||
opening = self._get_opening_statement(features_config, is_new_conversation, variables)
|
||||
opening, suggested_questions = None, None
|
||||
if not sub_agent:
|
||||
opening, suggested_questions = self._get_opening_statement(features_config, is_new_conversation, variables)
|
||||
conversation_id = await self._ensure_conversation(
|
||||
conversation_id=conversation_id,
|
||||
app_id=agent_config.app_id,
|
||||
workspace_id=workspace_id,
|
||||
user_id=user_id,
|
||||
opening_statement=opening
|
||||
opening_statement=opening,
|
||||
suggested_questions=suggested_questions
|
||||
)
|
||||
|
||||
model_info = ModelInfo(
|
||||
@@ -839,14 +843,17 @@ class AgentRunService:
|
||||
|
||||
# 5. 处理会话ID(创建或验证),新会话时写入开场白
|
||||
is_new_conversation = not conversation_id
|
||||
opening = self._get_opening_statement(features_config, is_new_conversation, variables)
|
||||
opening, suggested_questions = None, None
|
||||
if not sub_agent:
|
||||
opening, suggested_questions = self._get_opening_statement(features_config, is_new_conversation, variables)
|
||||
conversation_id = await self._ensure_conversation(
|
||||
conversation_id=conversation_id,
|
||||
app_id=agent_config.app_id,
|
||||
workspace_id=workspace_id,
|
||||
user_id=user_id,
|
||||
sub_agent=sub_agent,
|
||||
opening_statement=opening
|
||||
opening_statement=opening,
|
||||
suggested_questions=suggested_questions
|
||||
)
|
||||
|
||||
model_info = ModelInfo(
|
||||
@@ -1050,7 +1057,8 @@ class AgentRunService:
|
||||
workspace_id: uuid.UUID,
|
||||
user_id: Optional[str],
|
||||
sub_agent: bool = False,
|
||||
opening_statement: Optional[str] = None
|
||||
opening_statement: Optional[str] = None,
|
||||
suggested_questions: Optional[List[str]] = None
|
||||
) -> str:
|
||||
"""确保会话存在(创建或验证)
|
||||
|
||||
@@ -1061,6 +1069,7 @@ class AgentRunService:
|
||||
user_id: 用户ID
|
||||
sub_agent: 是否为子代理
|
||||
opening_statement: 开场白(新会话时作为第一条消息写入)
|
||||
suggested_questions: 预设问题列表
|
||||
|
||||
Returns:
|
||||
str: 会话ID
|
||||
@@ -1104,7 +1113,7 @@ class AgentRunService:
|
||||
conversation_id=uuid.UUID(new_conv_id),
|
||||
role="assistant",
|
||||
content=opening_statement,
|
||||
meta_data={}
|
||||
meta_data={"suggested_questions": suggested_questions}
|
||||
)
|
||||
logger.debug(f"已保存开场白到会话 {new_conv_id}")
|
||||
|
||||
|
||||
@@ -204,30 +204,35 @@ class MemoryForgetService:
|
||||
end_user_id: str,
|
||||
forgetting_threshold: float,
|
||||
min_days_since_access: int,
|
||||
limit: int = 20
|
||||
) -> list[Dict[str, Any]]:
|
||||
page: Optional[int] = None,
|
||||
pagesize: Optional[int] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
获取待遗忘节点列表
|
||||
|
||||
查询满足遗忘条件的节点(激活值低于阈值且最后访问时间超过最小天数)
|
||||
|
||||
|
||||
查询满足遗忘条件的节点(激活值低于阈值且最后访问时间超过最小天数)。支持分页查询。
|
||||
|
||||
Args:
|
||||
connector: Neo4j 连接器
|
||||
end_user_id: 组ID
|
||||
forgetting_threshold: 遗忘阈值
|
||||
min_days_since_access: 最小未访问天数
|
||||
limit: 返回节点数量限制
|
||||
|
||||
page: 页码(可选,从1开始)
|
||||
pagesize: 每页数量(可选)
|
||||
|
||||
Returns:
|
||||
list: 待遗忘节点列表
|
||||
dict: 包含待遗忘节点列表和分页信息的字典
|
||||
- items: 待遗忘节点列表
|
||||
- page: 分页信息(分页时)
|
||||
"""
|
||||
from datetime import timedelta
|
||||
|
||||
|
||||
# 计算最小访问时间(ISO 8601 格式字符串,使用 UTC 时区)
|
||||
min_access_time = datetime.now(timezone.utc) - timedelta(days=min_days_since_access)
|
||||
min_access_time_str = min_access_time.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
|
||||
|
||||
query = """
|
||||
|
||||
# 基础查询(用于获取总数)
|
||||
count_query = """
|
||||
MATCH (n)
|
||||
WHERE (n:Statement OR n:ExtractedEntity OR n:MemorySummary)
|
||||
AND n.end_user_id = $end_user_id
|
||||
@@ -235,10 +240,22 @@ class MemoryForgetService:
|
||||
AND n.activation_value < $threshold
|
||||
AND n.last_access_time IS NOT NULL
|
||||
AND datetime(n.last_access_time) < datetime($min_access_time_str)
|
||||
RETURN
|
||||
RETURN count(n) as total
|
||||
"""
|
||||
|
||||
# 数据查询
|
||||
data_query = """
|
||||
MATCH (n)
|
||||
WHERE (n:Statement OR n:ExtractedEntity OR n:MemorySummary)
|
||||
AND n.end_user_id = $end_user_id
|
||||
AND n.activation_value IS NOT NULL
|
||||
AND n.activation_value < $threshold
|
||||
AND n.last_access_time IS NOT NULL
|
||||
AND datetime(n.last_access_time) < datetime($min_access_time_str)
|
||||
RETURN
|
||||
elementId(n) as node_id,
|
||||
labels(n)[0] as node_type,
|
||||
CASE
|
||||
CASE
|
||||
WHEN n:Statement THEN n.statement
|
||||
WHEN n:ExtractedEntity THEN n.name
|
||||
WHEN n:MemorySummary THEN n.content
|
||||
@@ -247,18 +264,32 @@ class MemoryForgetService:
|
||||
n.activation_value as activation_value,
|
||||
n.last_access_time as last_access_time
|
||||
ORDER BY n.activation_value ASC
|
||||
LIMIT $limit
|
||||
"""
|
||||
|
||||
|
||||
# 如果启用分页,添加 SKIP 和 LIMIT
|
||||
if page is not None and pagesize is not None and page > 0 and pagesize > 0:
|
||||
data_query += " SKIP $skip LIMIT $limit"
|
||||
|
||||
params = {
|
||||
'end_user_id': end_user_id,
|
||||
'threshold': forgetting_threshold,
|
||||
'min_access_time_str': min_access_time_str,
|
||||
'limit': limit
|
||||
'min_access_time_str': min_access_time_str
|
||||
}
|
||||
|
||||
results = await connector.execute_query(query, **params)
|
||||
|
||||
|
||||
# 获取总数(分页时需要)
|
||||
total = 0
|
||||
if page is not None and pagesize is not None and page > 0 and pagesize > 0:
|
||||
count_results = await connector.execute_query(count_query, **params)
|
||||
if count_results:
|
||||
total = count_results[0]['total']
|
||||
|
||||
# 添加分页参数
|
||||
if page is not None and pagesize is not None and page > 0 and pagesize > 0:
|
||||
params['skip'] = (page - 1) * pagesize
|
||||
params['limit'] = pagesize
|
||||
|
||||
results = await connector.execute_query(data_query, **params)
|
||||
|
||||
pending_nodes = []
|
||||
for result in results:
|
||||
# 将节点类型标签转换为小写
|
||||
@@ -267,7 +298,7 @@ class MemoryForgetService:
|
||||
node_type_label = 'entity'
|
||||
elif node_type_label == 'memorysummary':
|
||||
node_type_label = 'summary'
|
||||
|
||||
|
||||
# 将 Neo4j DateTime 对象转换为时间戳(毫秒)
|
||||
last_access_time = result['last_access_time']
|
||||
last_access_dt = convert_neo4j_datetime_to_python(last_access_time)
|
||||
@@ -278,7 +309,7 @@ class MemoryForgetService:
|
||||
last_access_timestamp = int(last_access_dt.timestamp() * 1000)
|
||||
else:
|
||||
last_access_timestamp = 0
|
||||
|
||||
|
||||
pending_nodes.append({
|
||||
'node_id': str(result['node_id']),
|
||||
'node_type': node_type_label,
|
||||
@@ -286,8 +317,20 @@ class MemoryForgetService:
|
||||
'activation_value': result['activation_value'],
|
||||
'last_access_time': last_access_timestamp
|
||||
})
|
||||
|
||||
return pending_nodes
|
||||
|
||||
# 构建返回结果
|
||||
result: Dict[str, Any] = {'items': pending_nodes}
|
||||
|
||||
# 如果启用分页,添加分页信息
|
||||
if page is not None and pagesize is not None and page > 0 and pagesize > 0:
|
||||
result['page'] = {
|
||||
'page': page,
|
||||
'pagesize': pagesize,
|
||||
'total': total,
|
||||
'hasnext': (page * pagesize) < total
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
async def trigger_forgetting_cycle(
|
||||
self,
|
||||
@@ -636,7 +679,7 @@ class MemoryForgetService:
|
||||
api_logger.error(f"获取历史趋势数据失败: {str(e)}")
|
||||
# 失败时返回空列表,不影响主流程
|
||||
|
||||
# 获取待遗忘节点列表(前20个满足遗忘条件的节点)
|
||||
# 获取待遗忘节点列表
|
||||
pending_nodes = []
|
||||
try:
|
||||
if end_user_id:
|
||||
@@ -652,8 +695,7 @@ class MemoryForgetService:
|
||||
connector=connector,
|
||||
end_user_id=end_user_id,
|
||||
forgetting_threshold=forgetting_threshold,
|
||||
min_days_since_access=int(min_days),
|
||||
limit=20
|
||||
min_days_since_access=int(min_days)
|
||||
)
|
||||
|
||||
api_logger.info(f"成功获取 {len(pending_nodes)} 个待遗忘节点")
|
||||
@@ -661,24 +703,79 @@ class MemoryForgetService:
|
||||
except Exception as e:
|
||||
api_logger.error(f"获取待遗忘节点失败: {str(e)}")
|
||||
# 失败时返回空列表,不影响主流程
|
||||
|
||||
# 构建统计信息
|
||||
|
||||
# 构建统计信息(不包含 pending_nodes,已分离到独立接口)
|
||||
stats = {
|
||||
'activation_metrics': activation_metrics,
|
||||
'node_distribution': node_distribution,
|
||||
'recent_trends': recent_trends,
|
||||
'pending_nodes': pending_nodes,
|
||||
'timestamp': int(datetime.now().timestamp() * 1000)
|
||||
}
|
||||
|
||||
|
||||
api_logger.info(
|
||||
f"成功获取遗忘引擎统计: total_nodes={stats['activation_metrics']['total_nodes']}, "
|
||||
f"low_activation_nodes={stats['activation_metrics']['low_activation_nodes']}, "
|
||||
f"trend_days={len(recent_trends)}, pending_nodes={len(pending_nodes)}"
|
||||
f"trend_days={len(recent_trends)}"
|
||||
)
|
||||
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
async def get_pending_nodes(
|
||||
self,
|
||||
db: Session,
|
||||
end_user_id: str,
|
||||
config_id: Optional[UUID] = None,
|
||||
page: int = 1,
|
||||
pagesize: int = 10
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
获取待遗忘节点列表(独立分页接口)
|
||||
|
||||
查询满足遗忘条件的节点(激活值低于阈值且最后访问时间超过最小天数)。
|
||||
|
||||
Args:
|
||||
db: 数据库会话
|
||||
end_user_id: 组ID(必填)
|
||||
config_id: 配置ID(可选,用于获取遗忘阈值)
|
||||
page: 页码(从1开始,默认1)
|
||||
pagesize: 每页数量(默认10)
|
||||
|
||||
Returns:
|
||||
dict: 包含待遗忘节点列表和分页信息的字典
|
||||
- items: 待遗忘节点列表
|
||||
- page: 分页信息
|
||||
"""
|
||||
# 获取遗忘引擎组件
|
||||
_, _, forgetting_scheduler, config = await self._get_forgetting_components(db, config_id)
|
||||
|
||||
connector = forgetting_scheduler.connector
|
||||
forgetting_threshold = config['forgetting_threshold']
|
||||
|
||||
# 验证 min_days_since_access 配置值
|
||||
min_days = config.get('min_days_since_access')
|
||||
if min_days is None or not isinstance(min_days, (int, float)) or min_days < 0:
|
||||
api_logger.warning(
|
||||
f"min_days_since_access 配置无效: {min_days}, 使用默认值 7"
|
||||
)
|
||||
min_days = 7
|
||||
|
||||
# 调用内部方法获取分页数据
|
||||
pending_nodes_result = await self._get_pending_forgetting_nodes(
|
||||
connector=connector,
|
||||
end_user_id=end_user_id,
|
||||
forgetting_threshold=forgetting_threshold,
|
||||
min_days_since_access=int(min_days),
|
||||
page=page,
|
||||
pagesize=pagesize
|
||||
)
|
||||
|
||||
api_logger.info(
|
||||
f"成功获取待遗忘节点列表: end_user_id={end_user_id}, "
|
||||
f"page={page}, pagesize={pagesize}, total={pending_nodes_result.get('page', {}).get('total', 0)}"
|
||||
)
|
||||
|
||||
return pending_nodes_result
|
||||
|
||||
async def get_forgetting_curve(
|
||||
self,
|
||||
db: Session,
|
||||
|
||||
@@ -12,6 +12,9 @@ import base64
|
||||
import csv
|
||||
import io
|
||||
import json
|
||||
import re
|
||||
import olefile
|
||||
import struct
|
||||
import zipfile
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Dict, Any, Optional
|
||||
@@ -602,31 +605,75 @@ class MultimodalService:
|
||||
try:
|
||||
word_file = io.BytesIO(file_content)
|
||||
doc = Document(word_file)
|
||||
return '\n'.join(p.text for p in doc.paragraphs)
|
||||
text_lines = []
|
||||
for p in doc.paragraphs:
|
||||
text = p.text.strip()
|
||||
if text:
|
||||
text_lines.append(text)
|
||||
|
||||
for table in doc.tables:
|
||||
for row in table.rows:
|
||||
for cell in row.cells:
|
||||
text = cell.text.strip()
|
||||
if text:
|
||||
text_lines.append(text)
|
||||
|
||||
full_text = "\n".join(text_lines)
|
||||
return full_text.strip() or "[docx 文件无文本内容]"
|
||||
except Exception as e:
|
||||
logger.error(f"提取 docx 文本失败: {e}")
|
||||
logger.error(f"提取 docx 文本失败: {str(e)}", exc_info=True)
|
||||
return f"[docx 提取失败: {str(e)}]"
|
||||
|
||||
# 旧版 .doc(OLE2 格式)
|
||||
# 旧版 .doc(OLE2/CFB 格式),按 Word Binary Format 规范解析 piece table
|
||||
try:
|
||||
import olefile
|
||||
ole = olefile.OleFileIO(io.BytesIO(file_content))
|
||||
if not ole.exists('WordDocument'):
|
||||
return "[doc 提取失败: 未找到 WordDocument 流]"
|
||||
# 读取 WordDocument 流,提取可见 ASCII/Unicode 文本
|
||||
stream = ole.openstream('WordDocument').read()
|
||||
# Word Binary Format: 文本在流中以 UTF-16-LE 编码存储
|
||||
# 简单提取:过滤出可打印字符段
|
||||
try:
|
||||
text = stream.decode('utf-16-le', errors='ignore')
|
||||
except Exception:
|
||||
text = stream.decode('latin-1', errors='ignore')
|
||||
# 过滤控制字符,保留可打印内容
|
||||
import re
|
||||
text = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', text)
|
||||
text = re.sub(r' +', ' ', text).strip()
|
||||
word_stream = ole.openstream('WordDocument').read()
|
||||
|
||||
# FIB offset 0xA bit9 决定使用 0Table 还是 1Table
|
||||
fib_flags = struct.unpack_from('<H', word_stream, 0xA)[0]
|
||||
table_name = '1Table' if (fib_flags & 0x0200) else '0Table'
|
||||
table_stream = ole.openstream(table_name).read()
|
||||
|
||||
# 从 FIB 读取 fcClx/lcbClx 定位 piece table
|
||||
fc_clx, lcb_clx = struct.unpack_from("<II", word_stream, 0x1A2)
|
||||
clx = table_stream[fc_clx: fc_clx + lcb_clx]
|
||||
|
||||
# 解析 CLX,找到 PlcPcd(piece table)
|
||||
i, plc_pcd = 0, None
|
||||
while i < len(clx):
|
||||
clxt = clx[i]
|
||||
if clxt == 0x01:
|
||||
i += 3 + struct.unpack_from('<H', clx, i + 1)[0]
|
||||
elif clxt == 0x02:
|
||||
cb = struct.unpack_from('<I', clx, i + 1)[0]
|
||||
plc_pcd = clx[i + 5: i + 5 + cb]
|
||||
break
|
||||
else:
|
||||
break
|
||||
|
||||
if plc_pcd is None:
|
||||
raise ValueError("PlcPcd not found")
|
||||
|
||||
# PlcPcd: (n+1) 个 CP(4字节)+ n 个 PCD(8字节)
|
||||
n_pieces = (len(plc_pcd) - 4) // 12
|
||||
cp_array = [struct.unpack_from('<I', plc_pcd, k * 4)[0] for k in range(n_pieces + 1)]
|
||||
|
||||
parts = []
|
||||
for k in range(n_pieces):
|
||||
fc_value = struct.unpack_from('<I', plc_pcd, (n_pieces + 1) * 4 + k * 8 + 2)[0]
|
||||
is_ansi = bool(fc_value & 0x40000000)
|
||||
fc = fc_value & 0x3FFFFFFF
|
||||
char_count = cp_array[k + 1] - cp_array[k]
|
||||
|
||||
if is_ansi:
|
||||
parts.append(word_stream[fc: fc + char_count].decode('cp1252', errors='replace'))
|
||||
else:
|
||||
parts.append(word_stream[fc: fc + char_count * 2].decode('utf-16-le', errors='replace'))
|
||||
|
||||
ole.close()
|
||||
return text
|
||||
result = re.sub(r'[\x00-\x1f\x7f]', '', ''.join(parts))
|
||||
return result.strip()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"提取 doc 文本失败: {e}")
|
||||
return f"[doc 提取失败: {str(e)}]"
|
||||
|
||||
@@ -138,7 +138,7 @@ class TenantService:
|
||||
|
||||
except Exception as e:
|
||||
business_logger.error(f"删除租户失败: {str(e)}")
|
||||
raise BusinessException(f"删除租户失败: {str(e)}", code=BizCode.DB_ERROR)
|
||||
raise BusinessException(f"删除租户失败:{str(e)}", code=BizCode.DB_ERROR)
|
||||
|
||||
# 租户用户管理
|
||||
def get_tenant_users(
|
||||
@@ -147,6 +147,7 @@ class TenantService:
|
||||
skip: int = 0,
|
||||
limit: int = 100,
|
||||
is_active: Optional[bool] = None,
|
||||
is_superuser: Optional[bool] = None,
|
||||
search: Optional[str] = None
|
||||
) -> List[UserModel]:
|
||||
"""获取租户下的用户列表"""
|
||||
@@ -155,6 +156,7 @@ class TenantService:
|
||||
skip=skip,
|
||||
limit=limit,
|
||||
is_active=is_active,
|
||||
is_superuser=is_superuser,
|
||||
search=search
|
||||
)
|
||||
|
||||
@@ -162,12 +164,14 @@ class TenantService:
|
||||
self,
|
||||
tenant_id: uuid.UUID,
|
||||
is_active: Optional[bool] = None,
|
||||
is_superuser: Optional[bool] = None,
|
||||
search: Optional[str] = None
|
||||
) -> int:
|
||||
"""统计租户下的用户数量"""
|
||||
return self.user_repo.count_users_by_tenant(
|
||||
tenant_id=tenant_id,
|
||||
is_active=is_active,
|
||||
is_superuser=is_superuser,
|
||||
search=search
|
||||
)
|
||||
|
||||
|
||||
@@ -472,6 +472,21 @@ class UserMemoryService:
|
||||
# 定义允许更新的字段白名单
|
||||
allowed_fields = {'other_name', 'aliases', 'meta_data'}
|
||||
|
||||
# 用户占位名称黑名单,不允许作为 other_name 或出现在 aliases 中
|
||||
_user_placeholder_names = {'用户', '我', 'User', 'I'}
|
||||
|
||||
# 过滤 other_name:不允许设置为占位名称
|
||||
if 'other_name' in update_data and update_data['other_name'] and update_data['other_name'].strip() in _user_placeholder_names:
|
||||
logger.warning(f"拒绝将占位名称 '{update_data['other_name']}' 设置为 other_name")
|
||||
del update_data['other_name']
|
||||
|
||||
# 过滤 aliases:移除占位名称和非字符串值
|
||||
if 'aliases' in update_data and update_data['aliases']:
|
||||
update_data['aliases'] = [
|
||||
a for a in update_data['aliases']
|
||||
if isinstance(a, str) and a.strip() and a.strip() not in _user_placeholder_names
|
||||
]
|
||||
|
||||
# 检查是否更新了 aliases 字段
|
||||
aliases_updated = 'aliases' in update_data and update_data['aliases'] != end_user_info_record.aliases
|
||||
|
||||
|
||||
Reference in New Issue
Block a user