Merge remote-tracking branch 'origin/develop' into refactor/memory-config-management

2025-12-22 11:37:08 +08:00
parent 1e3ba39150 5097fed067
commit 258b88276f
119 changed files with 18212 additions and 2208 deletions
--- a/api/app/models/init.py
+++ b/api/app/models/init.py
@@ -20,6 +20,11 @@ from .data_config_model import DataConfig
 from .multi_agent_model import MultiAgentConfig, AgentInvocation
 from .workflow_model import WorkflowConfig, WorkflowExecution, WorkflowNodeExecution
 from .retrieval_info import RetrievalInfo
+from .prompt_optimizer_model import PromptOptimizerSession, PromptOptimizerSessionHistory
+from .tool_model import (
+    ToolConfig, BuiltinToolConfig, CustomToolConfig, MCPToolConfig,
+    ToolExecution, ToolType, ToolStatus, AuthType, ExecutionStatus
+)

 __all__ = [
    "Tenants",
@@ -54,5 +59,17 @@ __all__ = [
    "WorkflowConfig",
    "WorkflowExecution",
    "WorkflowNodeExecution",
-    "RetrievalInfo"
+    "RetrievalInfo",
+    "PromptOptimizerSession",
+    "PromptOptimizerSessionHistory",
+    "RetrievalInfo",
+    "ToolConfig",
+    "BuiltinToolConfig",
+    "CustomToolConfig",
+    "MCPToolConfig",
+    "ToolExecution",
+    "ToolType",
+    "ToolStatus",
+    "AuthType",
+    "ExecutionStatus"
 ]
--- a/api/app/models/data_config_model.py
+++ b/api/app/models/data_config_model.py
@@ -1,5 +1,4 @@
 import datetime
-import uuid
 from sqlalchemy import Column, String, Boolean, DateTime, Integer, Float
 from sqlalchemy.dialects.postgresql import UUID
 from app.db import Base
@@ -11,50 +10,53 @@ class DataConfig(Base):

    # 主键
    config_id = Column(Integer, primary_key=True, autoincrement=True, comment="配置ID")
-    
+
    # 基本信息
    config_name = Column(String, nullable=False, comment="配置名称")
    config_desc = Column(String, nullable=True, comment="配置描述")
-    
+
    # 组织信息
    workspace_id = Column(UUID(as_uuid=True), nullable=True, comment="工作空间ID")
    group_id = Column(String, nullable=True, comment="组ID")
    user_id = Column(String, nullable=True, comment="用户ID")
    apply_id = Column(String, nullable=True, comment="应用ID")
-    
+
    # 模型选择（从workspace继承）
    llm_id = Column(String, nullable=True, comment="LLM模型配置ID")
    embedding_id = Column(String, nullable=True, comment="嵌入模型配置ID")
    rerank_id = Column(String, nullable=True, comment="重排序模型配置ID")
    llm = Column(String, nullable=True, comment="LLM模型配置ID")
-    
+
    # 记忆萃取引擎配置
    enable_llm_dedup_blockwise = Column(Boolean, default=True, comment="启用LLM决策去重")
    enable_llm_disambiguation = Column(Boolean, default=True, comment="启用LLM决策消歧")
    deep_retrieval = Column(Boolean, default=True, comment="深度检索开关")
-    
+
    # 阈值配置 (0-1 之间的浮点数)
    t_type_strict = Column(Float, default=0.8, comment="类型严格阈值")
    t_name_strict = Column(Float, default=0.8, comment="名称严格阈值")
    t_overall = Column(Float, default=0.8, comment="综合阈值")
-    
+
    # 状态配置
    state = Column(Boolean, default=False, comment="配置使用状态")
-    
+
    # 分块策略
    chunker_strategy = Column(String, default="RecursiveChunker", comment="分块策略")
-    
+
    # 剪枝配置
    pruning_enabled = Column(Boolean, default=False, comment="是否启动智能语义剪枝")
    pruning_scene = Column(String, nullable=True, comment="智能剪枝场景：education/online_service/outbound")
    pruning_threshold = Column(Float, nullable=True, comment="智能语义剪枝阈值（0-0.9）")
-    
+
    # 自我反思配置
    enable_self_reflexion = Column(Boolean, default=False, comment="是否启用自我反思")
    iteration_period = Column(String, default="3", comment="反思迭代周期")
    reflexion_range = Column(String, default="retrieval", comment="反思范围：部分/全部")
    baseline = Column(String, default="time", comment="基线：时间/事实/时间和事实")
-    
+    reflection_model_id = Column(String, nullable=True, comment="反思模型ID")
+    memory_verify = Column(Boolean, default=True, comment="记忆验证")
+    quality_assessment = Column(Boolean, default=True, comment="质量评估")
+
    # 遗忘引擎配置
    statement_granularity = Column(Integer, default=2, comment="陈述提取颗粒度，挡位 1/2/3")
    include_dialogue_context = Column(Boolean, default=False, comment="是否包含对话上下文")
@@ -63,6 +65,13 @@ class DataConfig(Base):
    lambda_mem = Column("lambda_mem", Float, default=0.5, comment="遗忘率，0-1 小数")
    offset = Column("offset", Float, default=0.0, comment="偏移度，0-1 小数")
    
+    # 情绪引擎配置
+    emotion_enabled = Column(Boolean, default=True, comment="是否启用情绪提取")
+    emotion_model_id = Column(String, nullable=True, comment="情绪分析专用模型ID")
+    emotion_extract_keywords = Column(Boolean, default=True, comment="是否提取情绪关键词")
+    emotion_min_intensity = Column(Float, default=0.1, comment="最小情绪强度阈值")
+    emotion_enable_subject = Column(Boolean, default=True, comment="是否启用主体分类")
+    
    # 时间戳
    created_at = Column(DateTime, default=datetime.datetime.now, comment="创建时间")
    updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now, comment="更新时间")
--- a/api/app/models/document_model.py
+++ b/api/app/models/document_model.py
@@ -16,7 +16,26 @@ class Document(Base):
    file_size = Column(Integer, default=0, comment="file size(byte)")
    file_meta = Column(JSON, nullable=False, default={})
    parser_id = Column(String, index=True, nullable=False, comment="default parser ID")
-    parser_config = Column(JSON, nullable=False, default={"layout_recognize": "DeepDOC", "chunk_token_num": 128, "delimiter": "\n"}, comment="default parser config")
+    parser_config = Column(JSON, nullable=False,
+                           default={
+                               "layout_recognize": "DeepDOC",
+                               "chunk_token_num": 128,
+                               "delimiter": "\n",
+                               "auto_keywords": 0,
+                               "auto_questions": 0,
+                               "html4excel": False,
+                               "graphrag": {
+                                    "use_graphrag": False,
+                                    "entity_types": [
+                                        "organization",
+                                        "person",
+                                        "geo",
+                                        "event",
+                                        "category",
+                                    ],
+                                    "method": "general",
+                                }
+                           }, comment="default parser config")
    chunk_num = Column(Integer, default=0, comment="chunk num")
    progress = Column(Float, default=0)
    progress_msg = Column(String, default="", comment="process message")
--- a/api/app/models/end_user_model.py
+++ b/api/app/models/end_user_model.py
@@ -14,6 +14,7 @@ class EndUser(Base):
    other_id = Column(String, nullable=True)  # Store original user_id
    other_name = Column(String, default="", nullable=False)
    other_address = Column(String, default="", nullable=False)
+    reflection_time = Column(DateTime, nullable=True)
    created_at = Column(DateTime, default=datetime.datetime.now)
    updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)

--- a/api/app/models/knowledge_model.py
+++ b/api/app/models/knowledge_model.py
@@ -56,7 +56,25 @@ class Knowledge(Base):
    chunk_num = Column(Integer, default=0, comment="chunk num")
    parser_id = Column(String, index=True, default="naive", comment="default parser ID")
    parser_config = Column(JSON, nullable=False,
-                           default={"layout_recognize": "DeepDOC", "chunk_token_num": 128, "delimiter": "\n"},
+                           default={
+                               "layout_recognize": "DeepDOC",
+                               "chunk_token_num": 128,
+                               "delimiter": "\n",
+                               "auto_keywords": 0,
+                               "auto_questions": 0,
+                               "html4excel": False,
+                               "graphrag": {
+                                    "use_graphrag": False,
+                                    "entity_types": [
+                                        "organization",
+                                        "person",
+                                        "geo",
+                                        "event",
+                                        "category",
+                                    ],
+                                    "method": "general",
+                                }
+                           },
                           comment="default parser config")
    status = Column(Integer, index=True, default=1, comment="is it validate(0: disable, 1: enable, 2:Soft-delete)")
    created_at = Column(DateTime, default=datetime.datetime.now)
--- a/api/app/models/models_model.py
+++ b/api/app/models/models_model.py
@@ -15,6 +15,25 @@ class ModelType(StrEnum):
    EMBEDDING = "embedding"
    RERANK = "rerank"

+    @classmethod
+    def from_str(cls, value: str) -> "ModelType":
+        """
+        Get a ModelType enum instance from a string value.
+
+        Args:
+            value (str): The string representation of the model type.
+
+        Returns:
+            ModelType: The corresponding ModelType enum object.
+
+        Raises:
+            ValueError: If the given value does not match any ModelType.
+        """
+        try:
+            return cls(value)
+        except ValueError:
+            raise ValueError(f"Invalid ModelType: {value}")
+

 class ModelProvider(StrEnum):
    """模型提供商枚举"""
--- a/api/app/models/prompt_optimizer_model.py
+++ b/api/app/models/prompt_optimizer_model.py
@@ -0,0 +1,130 @@
+import datetime
+import uuid
+from enum import StrEnum
+
+from sqlalchemy import Column, ForeignKey, Text, DateTime, String, Index
+from sqlalchemy.dialects.postgresql import UUID
+
+from app.db import Base
+
+
+class RoleType(StrEnum):
+    """
+    Enumeration of message roles used in prompt optimization conversations.
+
+    This enum standardizes the role identifiers for messages stored in the
+    prompt optimization session history, ensuring consistency across
+    system-generated messages, user inputs, and assistant responses.
+
+    Attributes:
+        SYSTEM (str): Represents system-level instructions or prompts that
+            define the behavior or constraints of the assistant.
+        USER (str): Represents messages originating from the end user.
+        ASSISTANT (str): Represents messages generated by the AI assistant.
+    """
+    SYSTEM = "system"
+    USER = "user"
+    ASSISTANT = "assistant"
+
+
+class PromptOptimizerSession(Base):
+    """
+    Prompt Optimization Session Registry.
+
+    This table records high-level metadata for prompt optimization sessions.
+    Each record represents a single logical session initiated by a user
+    under a specific tenant.
+
+    The session acts as a container for multiple conversation messages
+    stored in the session history table.
+
+    Table Name:
+        prompt_opt_session_list
+
+    Columns:
+        id (UUID):
+            Public-facing session identifier used to group conversation history.
+        tenant_id (UUID):
+            Foreign key referencing `tenants.id`.
+            Identifies the tenant under which the session is created.
+        user_id (UUID):
+            Foreign key referencing `users.id`.
+            Identifies the user who initiated the session.
+        created_at (DateTime):
+            Timestamp indicating when the session was created.
+
+    Design Notes:
+        - This table intentionally does not store message content
+        - Message-level data is stored in `prompt_opt_session_history`
+        - Enables efficient session listing and pagination
+    """
+    __tablename__ = "prompt_opt_session_list"
+
+    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True, comment="Session ID")
+    tenant_id = Column(UUID(as_uuid=True), ForeignKey("tenants.id"), nullable=False, comment="Tenant ID")
+    # app_id = Column(UUID(as_uuid=True), ForeignKey("apps.id"), nullable=False, comment="Application ID")
+    user_id = Column(UUID(as_uuid=True), ForeignKey("users.id"), nullable=False, comment="User ID")
+
+    created_at = Column(DateTime, default=datetime.datetime.now, comment="Creation Time", index=True)
+
+
+class PromptOptimizerSessionHistory(Base):
+    """
+    Prompt Optimization Session Message History.
+
+    This table stores the complete conversational history of a prompt
+    optimization session, including system prompts, user inputs, and
+    assistant responses.
+
+    Each record represents a single message within a session, preserving
+    the chronological order of interactions.
+
+    Table Name:
+        prompt_opt_session_history
+
+    Columns:
+        id (UUID):
+            Primary key. Unique identifier for the message record.
+        tenant_id (UUID):
+            Foreign key referencing `tenants.id`.
+            Identifies the tenant under which the session operates.
+        session_id (UUID):
+            Logical session identifier linking messages to a session.
+        user_id (UUID):
+            Foreign key referencing `users.id`.
+            Identifies the user associated with the session.
+        message_role (Text):
+            Role of the message sender (e.g., system, user, assistant).
+        message_content (Text):
+            Raw message content generated or provided during the session.
+        prompt (Text):
+            The prompt snapshot used at the time of message generation.
+        created_at (DateTime):
+            Timestamp indicating when the message was created.
+
+    Design Notes:
+        - Supports full conversation replay and audit
+        - Enables prompt evolution tracking over time
+        - Indexed by creation time for efficient chronological queries
+    """
+    __tablename__ = "prompt_opt_session_history"
+
+    __table_args__ = (
+        Index(
+            "ix_prompt_opt_session_history_session_user_created",
+            "session_id",
+            "user_id",
+            "created_at"
+        ),
+    )
+
+    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True)
+    tenant_id = Column(UUID(as_uuid=True), ForeignKey("tenants.id"), nullable=False, comment="Tenant ID")
+    # app_id = Column(UUID(as_uuid=True), ForeignKey("apps.id"), nullable=False, comment="Application ID")
+    session_id = Column(UUID(as_uuid=True), ForeignKey("prompt_opt_session_list.id"),nullable=False, comment="Session ID")
+    user_id = Column(UUID(as_uuid=True), ForeignKey("users.id"), nullable=False, comment="User ID")
+    role = Column(String, nullable=False, comment="Message Role")
+    content = Column(Text, nullable=False, comment="Message Content")
+    # prompt = Column(Text, nullable=False, comment="Prompt")
+
+    created_at = Column(DateTime, default=datetime.datetime.now, comment="Creation Time", index=True)
--- a/api/app/models/tenant_model.py
+++ b/api/app/models/tenant_model.py
@@ -21,3 +21,6 @@ class Tenants(Base):
    
    # Relationship to workspaces owned by the tenant
    owned_workspaces = relationship("Workspace", back_populates="tenant")
+    
+    # Relationship to tool configs owned by the tenant
+    tool_configs = relationship("ToolConfig", back_populates="tenant")
--- a/api/app/models/tool_model.py
+++ b/api/app/models/tool_model.py
@@ -0,0 +1,226 @@
+"""工具管理相关数据模型"""
+import uuid
+from datetime import datetime
+from enum import StrEnum
+
+from sqlalchemy import Column, String, Text, DateTime, JSON, ForeignKey, Integer, Float
+from sqlalchemy.dialects.postgresql import UUID
+from sqlalchemy.orm import relationship
+
+from app.db import Base
+
+
+class ToolType(StrEnum):
+    """工具类型枚举"""
+    BUILTIN = "builtin"
+    CUSTOM = "custom"
+    MCP = "mcp"
+
+
+class ToolStatus(StrEnum):
+    """工具状态枚举"""
+    ACTIVE = "active"
+    INACTIVE = "inactive"
+    ERROR = "error"
+    LOADING = "loading"
+
+
+class AuthType(StrEnum):
+    """认证类型枚举"""
+    NONE = "none"
+    API_KEY = "api_key"
+    BEARER_TOKEN = "bearer_token"
+
+
+class ExecutionStatus(StrEnum):
+    """执行状态枚举"""
+    PENDING = "pending"
+    RUNNING = "running"
+    COMPLETED = "completed"
+    FAILED = "failed"
+    TIMEOUT = "timeout"
+
+
+class ToolConfig(Base):
+    """工具配置基础模型"""
+    __tablename__ = "tool_configs"
+
+    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    name = Column(String(255), nullable=False, index=True)
+    description = Column(Text)
+    tool_type = Column(String(50), nullable=False, index=True)
+    tenant_id = Column(UUID(as_uuid=True), ForeignKey("tenants.id"), nullable=False, index=True)  # 必须属于租户
+    status = Column(String(50), default=ToolStatus.INACTIVE.value, nullable=False, index=True)  # 工具状态
+    
+    # 工具特定配置（JSON格式存储）
+    config_data = Column(JSON, default=dict)
+    
+    # 元数据
+    version = Column(String(50), default="1.0.0")
+    tags = Column(JSON, default=list)  # 标签列表
+    
+    # 时间戳
+    created_at = Column(DateTime, default=datetime.now, nullable=False)
+    updated_at = Column(DateTime, default=datetime.now, onupdate=datetime.now, nullable=False)
+    
+    # 关联关系
+    tenant = relationship("Tenants", back_populates="tool_configs")
+    executions = relationship("ToolExecution", back_populates="tool_config", cascade="all, delete-orphan")
+    
+    def __repr__(self):
+        return f"<ToolConfig(id={self.id}, name={self.name}, type={self.tool_type}, status={self.status})>"
+
+
+class BuiltinToolConfig(Base):
+    """内置工具配置模型"""
+    __tablename__ = "builtin_tool_configs"
+
+    id = Column(UUID(as_uuid=True), ForeignKey("tool_configs.id"), primary_key=True)
+    tool_class = Column(String(255), nullable=False)  # 工具类名
+    parameters = Column(JSON, default=dict)  # 工具参数配置
+    
+    # 关联关系
+    base_config = relationship("ToolConfig", foreign_keys=[id])
+    
+    def __repr__(self):
+        return f"<BuiltinToolConfig(id={self.id}, tool_class={self.tool_class})>"
+
+
+class CustomToolConfig(Base):
+    """自定义工具配置模型"""
+    __tablename__ = "custom_tool_configs"
+
+    id = Column(UUID(as_uuid=True), ForeignKey("tool_configs.id"), primary_key=True)
+    schema_url = Column(String(1000))  # OpenAPI schema URL
+    schema_content = Column(JSON)  # OpenAPI schema 内容
+    
+    # 认证配置
+    auth_type = Column(String(50), default=AuthType.NONE.value, nullable=False)
+    auth_config = Column(JSON, default=dict)  # 认证配置（加密存储）
+    
+    # API配置
+    base_url = Column(String(1000))  # API基础URL
+    timeout = Column(Integer, default=30)  # 超时时间（秒）
+    
+    # 关联关系
+    base_config = relationship("ToolConfig", foreign_keys=[id])
+    
+    def __repr__(self):
+        return f"<CustomToolConfig(id={self.id}, auth_type={self.auth_type})>"
+
+
+class MCPToolConfig(Base):
+    """MCP工具配置模型"""
+    __tablename__ = "mcp_tool_configs"
+
+    id = Column(UUID(as_uuid=True), ForeignKey("tool_configs.id"), primary_key=True)
+    server_url = Column(String(1000), nullable=False)  # MCP服务器URL
+    connection_config = Column(JSON, default=dict)  # 连接配置
+    
+    # 服务状态
+    last_health_check = Column(DateTime)
+    health_status = Column(String(50), default="unknown")
+    error_message = Column(Text)
+    
+    # 可用工具列表
+    available_tools = Column(JSON, default=list)
+    
+    # 关联关系
+    base_config = relationship("ToolConfig", foreign_keys=[id])
+    
+    def __repr__(self):
+        return f"<MCPToolConfig(id={self.id}, server_url={self.server_url})>"
+
+
+class ToolExecution(Base):
+    """工具执行记录模型"""
+    __tablename__ = "tool_executions"
+
+    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    tool_config_id = Column(UUID(as_uuid=True), ForeignKey("tool_configs.id"), nullable=False, index=True)
+    
+    # 执行信息
+    execution_id = Column(String(255), nullable=False, index=True)  # 执行ID（可用于关联工作流等）
+    status = Column(String(50), default=ExecutionStatus.PENDING.value, nullable=False, index=True)
+    
+    # 输入输出
+    input_data = Column(JSON)  # 输入参数
+    output_data = Column(JSON)  # 输出结果
+    error_message = Column(Text)  # 错误信息
+    
+    # 性能指标
+    started_at = Column(DateTime, nullable=False, index=True)
+    completed_at = Column(DateTime)
+    execution_time = Column(Float)  # 执行时间（秒）
+    
+    # Token使用情况（如果适用）
+    token_usage = Column(JSON)
+    
+    # 用户信息
+    user_id = Column(UUID(as_uuid=True), ForeignKey("users.id"), index=True)
+    workspace_id = Column(UUID(as_uuid=True), ForeignKey("workspaces.id"), nullable=False, index=True)
+    
+    # 关联关系
+    tool_config = relationship("ToolConfig", back_populates="executions")
+    user = relationship("User")
+    workspace = relationship("Workspace")
+    
+    def __repr__(self):
+        return f"<ToolExecution(id={self.id}, status={self.status}, tool={self.tool_config_id})>"
+
+
+# class ToolDependency(Base):
+#     """工具依赖关系模型"""
+#     __tablename__ = "tool_dependencies"
+#
+#     id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+#     tool_id = Column(UUID(as_uuid=True), ForeignKey("tool_configs.id"), nullable=False)
+#     depends_on_tool_id = Column(UUID(as_uuid=True), ForeignKey("tool_configs.id"), nullable=False)
+#
+#     # 依赖类型和版本要求
+#     dependency_type = Column(String(50), default="required")  # required, optional
+#     version_constraint = Column(String(100))  # 版本约束，如 ">=1.0.0"
+#
+#     # 时间戳
+#     created_at = Column(DateTime, default=datetime.now, nullable=False)
+#
+#     # 关联关系
+#     tool = relationship("ToolConfig", foreign_keys=[tool_id])
+#     depends_on_tool = relationship("ToolConfig", foreign_keys=[depends_on_tool_id])
+#
+#     def __repr__(self):
+#         return f"<ToolDependency(tool={self.tool_id}, depends_on={self.depends_on_tool_id})>"
+
+
+# class PluginConfig(Base):
+#     """插件配置模型"""
+#     __tablename__ = "plugin_configs"
+#
+#     id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+#     name = Column(String(255), nullable=False, unique=True, index=True)
+#     description = Column(Text)
+#
+#     # 插件信息
+#     plugin_path = Column(String(1000), nullable=False)  # 插件文件路径
+#     entry_point = Column(String(255), nullable=False)  # 入口点
+#     version = Column(String(50), default="1.0.0")
+#
+#     # 状态
+#     is_enabled = Column(Boolean, default=True, nullable=False)
+#     is_loaded = Column(Boolean, default=False, nullable=False)
+#     load_error = Column(Text)  # 加载错误信息
+#
+#     # 配置
+#     config_schema = Column(JSON)  # 配置schema
+#     config_data = Column(JSON, default=dict)  # 配置数据
+#
+#     # 依赖
+#     dependencies = Column(JSON, default=list)  # 依赖的其他插件
+#
+#     # 时间戳
+#     created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False)
+#     updated_at = Column(DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow, nullable=False)
+#     last_loaded_at = Column(DateTime)
+#
+#     def __repr__(self):
+#         return f"<PluginConfig(id={self.id}, name={self.name}, version={self.version})>"
--- a/api/app/models/workspace_model.py
+++ b/api/app/models/workspace_model.py
@@ -1,7 +1,7 @@
 import datetime
 from enum import StrEnum
 import uuid
-from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, Boolean
+from sqlalchemy import Column, String, DateTime, ForeignKey, Boolean
 from sqlalchemy.dialects.postgresql import UUID
 from sqlalchemy.orm import relationship
 from app.db import Base