Merge remote-tracking branch 'origin/develop' into refactor/memory-config-management

This commit is contained in:
Ke Sun
2025-12-22 11:37:08 +08:00
119 changed files with 18212 additions and 2208 deletions

View File

@@ -20,6 +20,11 @@ from .data_config_model import DataConfig
from .multi_agent_model import MultiAgentConfig, AgentInvocation
from .workflow_model import WorkflowConfig, WorkflowExecution, WorkflowNodeExecution
from .retrieval_info import RetrievalInfo
from .prompt_optimizer_model import PromptOptimizerSession, PromptOptimizerSessionHistory
from .tool_model import (
ToolConfig, BuiltinToolConfig, CustomToolConfig, MCPToolConfig,
ToolExecution, ToolType, ToolStatus, AuthType, ExecutionStatus
)
__all__ = [
"Tenants",
@@ -54,5 +59,17 @@ __all__ = [
"WorkflowConfig",
"WorkflowExecution",
"WorkflowNodeExecution",
"RetrievalInfo"
"RetrievalInfo",
"PromptOptimizerSession",
"PromptOptimizerSessionHistory",
"RetrievalInfo",
"ToolConfig",
"BuiltinToolConfig",
"CustomToolConfig",
"MCPToolConfig",
"ToolExecution",
"ToolType",
"ToolStatus",
"AuthType",
"ExecutionStatus"
]

View File

@@ -1,5 +1,4 @@
import datetime
import uuid
from sqlalchemy import Column, String, Boolean, DateTime, Integer, Float
from sqlalchemy.dialects.postgresql import UUID
from app.db import Base
@@ -11,50 +10,53 @@ class DataConfig(Base):
# 主键
config_id = Column(Integer, primary_key=True, autoincrement=True, comment="配置ID")
# 基本信息
config_name = Column(String, nullable=False, comment="配置名称")
config_desc = Column(String, nullable=True, comment="配置描述")
# 组织信息
workspace_id = Column(UUID(as_uuid=True), nullable=True, comment="工作空间ID")
group_id = Column(String, nullable=True, comment="组ID")
user_id = Column(String, nullable=True, comment="用户ID")
apply_id = Column(String, nullable=True, comment="应用ID")
# 模型选择从workspace继承
llm_id = Column(String, nullable=True, comment="LLM模型配置ID")
embedding_id = Column(String, nullable=True, comment="嵌入模型配置ID")
rerank_id = Column(String, nullable=True, comment="重排序模型配置ID")
llm = Column(String, nullable=True, comment="LLM模型配置ID")
# 记忆萃取引擎配置
enable_llm_dedup_blockwise = Column(Boolean, default=True, comment="启用LLM决策去重")
enable_llm_disambiguation = Column(Boolean, default=True, comment="启用LLM决策消歧")
deep_retrieval = Column(Boolean, default=True, comment="深度检索开关")
# 阈值配置 (0-1 之间的浮点数)
t_type_strict = Column(Float, default=0.8, comment="类型严格阈值")
t_name_strict = Column(Float, default=0.8, comment="名称严格阈值")
t_overall = Column(Float, default=0.8, comment="综合阈值")
# 状态配置
state = Column(Boolean, default=False, comment="配置使用状态")
# 分块策略
chunker_strategy = Column(String, default="RecursiveChunker", comment="分块策略")
# 剪枝配置
pruning_enabled = Column(Boolean, default=False, comment="是否启动智能语义剪枝")
pruning_scene = Column(String, nullable=True, comment="智能剪枝场景education/online_service/outbound")
pruning_threshold = Column(Float, nullable=True, comment="智能语义剪枝阈值0-0.9")
# 自我反思配置
enable_self_reflexion = Column(Boolean, default=False, comment="是否启用自我反思")
iteration_period = Column(String, default="3", comment="反思迭代周期")
reflexion_range = Column(String, default="retrieval", comment="反思范围:部分/全部")
baseline = Column(String, default="time", comment="基线:时间/事实/时间和事实")
reflection_model_id = Column(String, nullable=True, comment="反思模型ID")
memory_verify = Column(Boolean, default=True, comment="记忆验证")
quality_assessment = Column(Boolean, default=True, comment="质量评估")
# 遗忘引擎配置
statement_granularity = Column(Integer, default=2, comment="陈述提取颗粒度,挡位 1/2/3")
include_dialogue_context = Column(Boolean, default=False, comment="是否包含对话上下文")
@@ -63,6 +65,13 @@ class DataConfig(Base):
lambda_mem = Column("lambda_mem", Float, default=0.5, comment="遗忘率0-1 小数")
offset = Column("offset", Float, default=0.0, comment="偏移度0-1 小数")
# 情绪引擎配置
emotion_enabled = Column(Boolean, default=True, comment="是否启用情绪提取")
emotion_model_id = Column(String, nullable=True, comment="情绪分析专用模型ID")
emotion_extract_keywords = Column(Boolean, default=True, comment="是否提取情绪关键词")
emotion_min_intensity = Column(Float, default=0.1, comment="最小情绪强度阈值")
emotion_enable_subject = Column(Boolean, default=True, comment="是否启用主体分类")
# 时间戳
created_at = Column(DateTime, default=datetime.datetime.now, comment="创建时间")
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now, comment="更新时间")

View File

@@ -16,7 +16,26 @@ class Document(Base):
file_size = Column(Integer, default=0, comment="file size(byte)")
file_meta = Column(JSON, nullable=False, default={})
parser_id = Column(String, index=True, nullable=False, comment="default parser ID")
parser_config = Column(JSON, nullable=False, default={"layout_recognize": "DeepDOC", "chunk_token_num": 128, "delimiter": "\n"}, comment="default parser config")
parser_config = Column(JSON, nullable=False,
default={
"layout_recognize": "DeepDOC",
"chunk_token_num": 128,
"delimiter": "\n",
"auto_keywords": 0,
"auto_questions": 0,
"html4excel": False,
"graphrag": {
"use_graphrag": False,
"entity_types": [
"organization",
"person",
"geo",
"event",
"category",
],
"method": "general",
}
}, comment="default parser config")
chunk_num = Column(Integer, default=0, comment="chunk num")
progress = Column(Float, default=0)
progress_msg = Column(String, default="", comment="process message")

View File

@@ -14,6 +14,7 @@ class EndUser(Base):
other_id = Column(String, nullable=True) # Store original user_id
other_name = Column(String, default="", nullable=False)
other_address = Column(String, default="", nullable=False)
reflection_time = Column(DateTime, nullable=True)
created_at = Column(DateTime, default=datetime.datetime.now)
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)

View File

@@ -56,7 +56,25 @@ class Knowledge(Base):
chunk_num = Column(Integer, default=0, comment="chunk num")
parser_id = Column(String, index=True, default="naive", comment="default parser ID")
parser_config = Column(JSON, nullable=False,
default={"layout_recognize": "DeepDOC", "chunk_token_num": 128, "delimiter": "\n"},
default={
"layout_recognize": "DeepDOC",
"chunk_token_num": 128,
"delimiter": "\n",
"auto_keywords": 0,
"auto_questions": 0,
"html4excel": False,
"graphrag": {
"use_graphrag": False,
"entity_types": [
"organization",
"person",
"geo",
"event",
"category",
],
"method": "general",
}
},
comment="default parser config")
status = Column(Integer, index=True, default=1, comment="is it validate(0: disable, 1: enable, 2:Soft-delete)")
created_at = Column(DateTime, default=datetime.datetime.now)

View File

@@ -15,6 +15,25 @@ class ModelType(StrEnum):
EMBEDDING = "embedding"
RERANK = "rerank"
@classmethod
def from_str(cls, value: str) -> "ModelType":
"""
Get a ModelType enum instance from a string value.
Args:
value (str): The string representation of the model type.
Returns:
ModelType: The corresponding ModelType enum object.
Raises:
ValueError: If the given value does not match any ModelType.
"""
try:
return cls(value)
except ValueError:
raise ValueError(f"Invalid ModelType: {value}")
class ModelProvider(StrEnum):
"""模型提供商枚举"""

View File

@@ -0,0 +1,130 @@
import datetime
import uuid
from enum import StrEnum
from sqlalchemy import Column, ForeignKey, Text, DateTime, String, Index
from sqlalchemy.dialects.postgresql import UUID
from app.db import Base
class RoleType(StrEnum):
"""
Enumeration of message roles used in prompt optimization conversations.
This enum standardizes the role identifiers for messages stored in the
prompt optimization session history, ensuring consistency across
system-generated messages, user inputs, and assistant responses.
Attributes:
SYSTEM (str): Represents system-level instructions or prompts that
define the behavior or constraints of the assistant.
USER (str): Represents messages originating from the end user.
ASSISTANT (str): Represents messages generated by the AI assistant.
"""
SYSTEM = "system"
USER = "user"
ASSISTANT = "assistant"
class PromptOptimizerSession(Base):
"""
Prompt Optimization Session Registry.
This table records high-level metadata for prompt optimization sessions.
Each record represents a single logical session initiated by a user
under a specific tenant.
The session acts as a container for multiple conversation messages
stored in the session history table.
Table Name:
prompt_opt_session_list
Columns:
id (UUID):
Public-facing session identifier used to group conversation history.
tenant_id (UUID):
Foreign key referencing `tenants.id`.
Identifies the tenant under which the session is created.
user_id (UUID):
Foreign key referencing `users.id`.
Identifies the user who initiated the session.
created_at (DateTime):
Timestamp indicating when the session was created.
Design Notes:
- This table intentionally does not store message content
- Message-level data is stored in `prompt_opt_session_history`
- Enables efficient session listing and pagination
"""
__tablename__ = "prompt_opt_session_list"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True, comment="Session ID")
tenant_id = Column(UUID(as_uuid=True), ForeignKey("tenants.id"), nullable=False, comment="Tenant ID")
# app_id = Column(UUID(as_uuid=True), ForeignKey("apps.id"), nullable=False, comment="Application ID")
user_id = Column(UUID(as_uuid=True), ForeignKey("users.id"), nullable=False, comment="User ID")
created_at = Column(DateTime, default=datetime.datetime.now, comment="Creation Time", index=True)
class PromptOptimizerSessionHistory(Base):
"""
Prompt Optimization Session Message History.
This table stores the complete conversational history of a prompt
optimization session, including system prompts, user inputs, and
assistant responses.
Each record represents a single message within a session, preserving
the chronological order of interactions.
Table Name:
prompt_opt_session_history
Columns:
id (UUID):
Primary key. Unique identifier for the message record.
tenant_id (UUID):
Foreign key referencing `tenants.id`.
Identifies the tenant under which the session operates.
session_id (UUID):
Logical session identifier linking messages to a session.
user_id (UUID):
Foreign key referencing `users.id`.
Identifies the user associated with the session.
message_role (Text):
Role of the message sender (e.g., system, user, assistant).
message_content (Text):
Raw message content generated or provided during the session.
prompt (Text):
The prompt snapshot used at the time of message generation.
created_at (DateTime):
Timestamp indicating when the message was created.
Design Notes:
- Supports full conversation replay and audit
- Enables prompt evolution tracking over time
- Indexed by creation time for efficient chronological queries
"""
__tablename__ = "prompt_opt_session_history"
__table_args__ = (
Index(
"ix_prompt_opt_session_history_session_user_created",
"session_id",
"user_id",
"created_at"
),
)
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True)
tenant_id = Column(UUID(as_uuid=True), ForeignKey("tenants.id"), nullable=False, comment="Tenant ID")
# app_id = Column(UUID(as_uuid=True), ForeignKey("apps.id"), nullable=False, comment="Application ID")
session_id = Column(UUID(as_uuid=True), ForeignKey("prompt_opt_session_list.id"),nullable=False, comment="Session ID")
user_id = Column(UUID(as_uuid=True), ForeignKey("users.id"), nullable=False, comment="User ID")
role = Column(String, nullable=False, comment="Message Role")
content = Column(Text, nullable=False, comment="Message Content")
# prompt = Column(Text, nullable=False, comment="Prompt")
created_at = Column(DateTime, default=datetime.datetime.now, comment="Creation Time", index=True)

View File

@@ -21,3 +21,6 @@ class Tenants(Base):
# Relationship to workspaces owned by the tenant
owned_workspaces = relationship("Workspace", back_populates="tenant")
# Relationship to tool configs owned by the tenant
tool_configs = relationship("ToolConfig", back_populates="tenant")

View File

@@ -0,0 +1,226 @@
"""工具管理相关数据模型"""
import uuid
from datetime import datetime
from enum import StrEnum
from sqlalchemy import Column, String, Text, DateTime, JSON, ForeignKey, Integer, Float
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import relationship
from app.db import Base
class ToolType(StrEnum):
"""工具类型枚举"""
BUILTIN = "builtin"
CUSTOM = "custom"
MCP = "mcp"
class ToolStatus(StrEnum):
"""工具状态枚举"""
ACTIVE = "active"
INACTIVE = "inactive"
ERROR = "error"
LOADING = "loading"
class AuthType(StrEnum):
"""认证类型枚举"""
NONE = "none"
API_KEY = "api_key"
BEARER_TOKEN = "bearer_token"
class ExecutionStatus(StrEnum):
"""执行状态枚举"""
PENDING = "pending"
RUNNING = "running"
COMPLETED = "completed"
FAILED = "failed"
TIMEOUT = "timeout"
class ToolConfig(Base):
"""工具配置基础模型"""
__tablename__ = "tool_configs"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
name = Column(String(255), nullable=False, index=True)
description = Column(Text)
tool_type = Column(String(50), nullable=False, index=True)
tenant_id = Column(UUID(as_uuid=True), ForeignKey("tenants.id"), nullable=False, index=True) # 必须属于租户
status = Column(String(50), default=ToolStatus.INACTIVE.value, nullable=False, index=True) # 工具状态
# 工具特定配置JSON格式存储
config_data = Column(JSON, default=dict)
# 元数据
version = Column(String(50), default="1.0.0")
tags = Column(JSON, default=list) # 标签列表
# 时间戳
created_at = Column(DateTime, default=datetime.now, nullable=False)
updated_at = Column(DateTime, default=datetime.now, onupdate=datetime.now, nullable=False)
# 关联关系
tenant = relationship("Tenants", back_populates="tool_configs")
executions = relationship("ToolExecution", back_populates="tool_config", cascade="all, delete-orphan")
def __repr__(self):
return f"<ToolConfig(id={self.id}, name={self.name}, type={self.tool_type}, status={self.status})>"
class BuiltinToolConfig(Base):
"""内置工具配置模型"""
__tablename__ = "builtin_tool_configs"
id = Column(UUID(as_uuid=True), ForeignKey("tool_configs.id"), primary_key=True)
tool_class = Column(String(255), nullable=False) # 工具类名
parameters = Column(JSON, default=dict) # 工具参数配置
# 关联关系
base_config = relationship("ToolConfig", foreign_keys=[id])
def __repr__(self):
return f"<BuiltinToolConfig(id={self.id}, tool_class={self.tool_class})>"
class CustomToolConfig(Base):
"""自定义工具配置模型"""
__tablename__ = "custom_tool_configs"
id = Column(UUID(as_uuid=True), ForeignKey("tool_configs.id"), primary_key=True)
schema_url = Column(String(1000)) # OpenAPI schema URL
schema_content = Column(JSON) # OpenAPI schema 内容
# 认证配置
auth_type = Column(String(50), default=AuthType.NONE.value, nullable=False)
auth_config = Column(JSON, default=dict) # 认证配置(加密存储)
# API配置
base_url = Column(String(1000)) # API基础URL
timeout = Column(Integer, default=30) # 超时时间(秒)
# 关联关系
base_config = relationship("ToolConfig", foreign_keys=[id])
def __repr__(self):
return f"<CustomToolConfig(id={self.id}, auth_type={self.auth_type})>"
class MCPToolConfig(Base):
"""MCP工具配置模型"""
__tablename__ = "mcp_tool_configs"
id = Column(UUID(as_uuid=True), ForeignKey("tool_configs.id"), primary_key=True)
server_url = Column(String(1000), nullable=False) # MCP服务器URL
connection_config = Column(JSON, default=dict) # 连接配置
# 服务状态
last_health_check = Column(DateTime)
health_status = Column(String(50), default="unknown")
error_message = Column(Text)
# 可用工具列表
available_tools = Column(JSON, default=list)
# 关联关系
base_config = relationship("ToolConfig", foreign_keys=[id])
def __repr__(self):
return f"<MCPToolConfig(id={self.id}, server_url={self.server_url})>"
class ToolExecution(Base):
"""工具执行记录模型"""
__tablename__ = "tool_executions"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
tool_config_id = Column(UUID(as_uuid=True), ForeignKey("tool_configs.id"), nullable=False, index=True)
# 执行信息
execution_id = Column(String(255), nullable=False, index=True) # 执行ID可用于关联工作流等
status = Column(String(50), default=ExecutionStatus.PENDING.value, nullable=False, index=True)
# 输入输出
input_data = Column(JSON) # 输入参数
output_data = Column(JSON) # 输出结果
error_message = Column(Text) # 错误信息
# 性能指标
started_at = Column(DateTime, nullable=False, index=True)
completed_at = Column(DateTime)
execution_time = Column(Float) # 执行时间(秒)
# Token使用情况如果适用
token_usage = Column(JSON)
# 用户信息
user_id = Column(UUID(as_uuid=True), ForeignKey("users.id"), index=True)
workspace_id = Column(UUID(as_uuid=True), ForeignKey("workspaces.id"), nullable=False, index=True)
# 关联关系
tool_config = relationship("ToolConfig", back_populates="executions")
user = relationship("User")
workspace = relationship("Workspace")
def __repr__(self):
return f"<ToolExecution(id={self.id}, status={self.status}, tool={self.tool_config_id})>"
# class ToolDependency(Base):
# """工具依赖关系模型"""
# __tablename__ = "tool_dependencies"
#
# id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
# tool_id = Column(UUID(as_uuid=True), ForeignKey("tool_configs.id"), nullable=False)
# depends_on_tool_id = Column(UUID(as_uuid=True), ForeignKey("tool_configs.id"), nullable=False)
#
# # 依赖类型和版本要求
# dependency_type = Column(String(50), default="required") # required, optional
# version_constraint = Column(String(100)) # 版本约束,如 ">=1.0.0"
#
# # 时间戳
# created_at = Column(DateTime, default=datetime.now, nullable=False)
#
# # 关联关系
# tool = relationship("ToolConfig", foreign_keys=[tool_id])
# depends_on_tool = relationship("ToolConfig", foreign_keys=[depends_on_tool_id])
#
# def __repr__(self):
# return f"<ToolDependency(tool={self.tool_id}, depends_on={self.depends_on_tool_id})>"
# class PluginConfig(Base):
# """插件配置模型"""
# __tablename__ = "plugin_configs"
#
# id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
# name = Column(String(255), nullable=False, unique=True, index=True)
# description = Column(Text)
#
# # 插件信息
# plugin_path = Column(String(1000), nullable=False) # 插件文件路径
# entry_point = Column(String(255), nullable=False) # 入口点
# version = Column(String(50), default="1.0.0")
#
# # 状态
# is_enabled = Column(Boolean, default=True, nullable=False)
# is_loaded = Column(Boolean, default=False, nullable=False)
# load_error = Column(Text) # 加载错误信息
#
# # 配置
# config_schema = Column(JSON) # 配置schema
# config_data = Column(JSON, default=dict) # 配置数据
#
# # 依赖
# dependencies = Column(JSON, default=list) # 依赖的其他插件
#
# # 时间戳
# created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False)
# updated_at = Column(DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow, nullable=False)
# last_loaded_at = Column(DateTime)
#
# def __repr__(self):
# return f"<PluginConfig(id={self.id}, name={self.name}, version={self.version})>"

View File

@@ -1,7 +1,7 @@
import datetime
from enum import StrEnum
import uuid
from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, Boolean
from sqlalchemy import Column, String, DateTime, ForeignKey, Boolean
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import relationship
from app.db import Base