Merge #21 into develop from feature/emotion-engine

feature/情绪引擎 * feature/emotion-engine: (7 commits squashed) - [feature]Emotion Engine Development - [feature]Emotion Engine Development - Merge branch 'feature/emotion-engine' of codeup.aliyun.com:redbearai/python/redbear-mem-open into feature/emotion-engine - [fix]1.Fix the front-end files;2.Cache Management Deletion;3.Delete "check_code.py" - [fix]1.Fix the front-end files;2.Cache Management Deletion;3.Delete "check_code.py" - Merge branch 'feature/emotion-engine' of codeup.aliyun.com:redbearai/python/redbear-mem-open into feature/emotion-engine - [fix]fix vite.config.ts Signed-off-by: 乐力齐 <accounts_690c7b0af9007d7e338af636@mail.teambition.com> Commented-by: aliyun6762716068 <accounts_68cb7c6b61f5dcc4200d6251@mail.teambition.com> Commented-by: 乐力齐 <accounts_690c7b0af9007d7e338af636@mail.teambition.com> Reviewed-by: aliyun6762716068 <accounts_68cb7c6b61f5dcc4200d6251@mail.teambition.com> Merged-by: aliyun6762716068 <accounts_68cb7c6b61f5dcc4200d6251@mail.teambition.com> CR-link: https://codeup.aliyun.com/redbearai/python/redbear-mem-open/change/21
2025-12-20 07:02:46 +00:00
parent 1f0bb1f8af
commit 1f4524c28c
23 changed files with 2453 additions and 67 deletions
--- a/api/app/core/memory/agent/langgraph_graph/write_graph.py
+++ b/api/app/core/memory/agent/langgraph_graph/write_graph.py
@@ -38,14 +38,53 @@ async def make_write_graph(user_id, tools, apply_id, group_id, config_id=None):
        messages = state["messages"]
        last_message = messages[-1]

-        result = await data_type_tool.ainvoke({
-            "context": last_message[1] if isinstance(last_message, tuple) else last_message.content
-        })
-        result=json.loads( result)
+        # 调用 Data_type_differentiation 工具
+        try:
+            raw_result = await data_type_tool.ainvoke({
+                "context": last_message[1] if isinstance(last_message, tuple) else last_message.content
+            })
+            
+            # MCP工具返回的是列表格式，需要提取内容
+            logger.debug(f"Data_type_differentiation raw result type: {type(raw_result)}, value: {raw_result}")
+            
+            # 处理不同的返回格式
+            if isinstance(raw_result, list) and len(raw_result) > 0:
+                # MCP工具返回格式: [{"type": "text", "text": "..."}]
+                result_text = raw_result[0].get("text", "{}") if isinstance(raw_result[0], dict) else str(raw_result[0])
+            elif isinstance(raw_result, str):
+                result_text = raw_result
+            else:
+                result_text = str(raw_result)
+            
+            # 解析JSON字符串
+            try:
+                result = json.loads(result_text)
+            except json.JSONDecodeError as je:
+                logger.error(f"Failed to parse result as JSON: {result_text}, error: {je}")
+                return {"messages": [AIMessage(content=json.dumps({
+                    "status": "error",
+                    "message": f"Invalid JSON response from Data_type_differentiation: {str(je)}"
+                }))]}
+            
+            # 检查是否有错误
+            if isinstance(result, dict) and result.get("type") == "error":
+                error_msg = result.get("message", "Unknown error in Data_type_differentiation")
+                logger.error(f"Data_type_differentiation 返回错误: {error_msg}")
+                return {"messages": [AIMessage(content=json.dumps({
+                    "status": "error",
+                    "message": error_msg
+                }))]}
+                
+        except Exception as e:
+            logger.error(f"调用 Data_type_differentiation 失败: {e}", exc_info=True)
+            return {"messages": [AIMessage(content=json.dumps({
+                "status": "error",
+                "message": f"Data type differentiation failed: {str(e)}"
+            }))]}

        # 调用 Data_write，传递 config_id
        write_params = {
-            "content": result["context"],
+            "content": result.get("context", last_message.content if hasattr(last_message, 'content') else str(last_message)),
            "apply_id": apply_id,
            "group_id": group_id,
            "user_id": user_id
@@ -56,14 +95,22 @@ async def make_write_graph(user_id, tools, apply_id, group_id, config_id=None):
            write_params["config_id"] = config_id
            logger.debug(f"传递 config_id 到 Data_write: {config_id}")
        
-        write_result = await data_write_tool.ainvoke(write_params)
+        try:
+            write_result = await data_write_tool.ainvoke(write_params)

-        if isinstance(write_result, dict):
-            content = write_result.get("data", str(write_result))
-        else:
-            content = str(write_result)
-        logger.info("写入内容: %s", content)
-        return {"messages": [AIMessage(content=content)]}
+            if isinstance(write_result, dict):
+                content = write_result.get("data", str(write_result))
+            else:
+                content = str(write_result)
+            logger.info("写入内容: %s", content)
+            return {"messages": [AIMessage(content=content)]}
+            
+        except Exception as e:
+            logger.error(f"调用 Data_write 失败: {e}", exc_info=True)
+            return {"messages": [AIMessage(content=json.dumps({
+                "status": "error",
+                "message": f"Data write failed: {str(e)}"
+            }))]}

    workflow = StateGraph(WriteState)
    workflow.add_node("content_input", call_model)
--- a/api/app/core/memory/agent/utils/write_tools.py
+++ b/api/app/core/memory/agent/utils/write_tools.py
@@ -39,6 +39,17 @@ async def write(content: str, user_id: str, apply_id: str, group_id: str, ref_id
        ref_id: 参考ID，默认为 "wyl20251027"
        config_id: 配置ID，用于标记数据处理配置
    """
+    # 如果提供了config_id，重新加载配置
+    if config_id:
+        from app.core.memory.utils.config.definitions import reload_configuration_from_database
+        logger.info(f"Reloading configuration for config_id: {config_id}")
+        config_loaded = reload_configuration_from_database(config_id)
+        if not config_loaded:
+            error_msg = f"Failed to load configuration for config_id: {config_id}"
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+        logger.info(f"Configuration reloaded successfully for config_id: {config_id}")
+    
    logger.info("=== MemSci Knowledge Extraction Pipeline ===")
    logger.info(f"Using model: {config_defs.SELECTED_LLM_NAME}")
    logger.info(f"Using LLM ID: {config_defs.SELECTED_LLM_ID}")
--- a/api/app/core/memory/models/emotion_models.py
+++ b/api/app/core/memory/models/emotion_models.py
@@ -0,0 +1,85 @@
+"""Emotion extraction models for LLM structured output.
+
+This module contains Pydantic models for emotion extraction from statements,
+designed to be used with LLM structured output capabilities.
+
+Classes:
+    EmotionExtraction: Model for emotion extraction results from statements
+"""
+
+from pydantic import BaseModel, Field, field_validator
+from typing import List, Optional
+
+
+class EmotionExtraction(BaseModel):
+    """Emotion extraction result model for LLM structured output.
+    
+    This model represents the structured emotion information extracted from
+    a statement using LLM. It includes emotion type, intensity, keywords,
+    subject classification, and optional target.
+    
+    Attributes:
+        emotion_type: Type of emotion (joy/sadness/anger/fear/surprise/neutral)
+        emotion_intensity: Intensity of emotion (0.0-1.0)
+        emotion_keywords: List of emotion keywords from the statement (max 3)
+        emotion_subject: Subject of emotion (self/other/object)
+        emotion_target: Optional target of emotion (person or object name)
+    """
+    
+    emotion_type: str = Field(
+        ..., 
+        description="Emotion type: joy/sadness/anger/fear/surprise/neutral"
+    )
+    emotion_intensity: float = Field(
+        ..., 
+        ge=0.0, 
+        le=1.0,
+        description="Emotion intensity from 0.0 to 1.0"
+    )
+    emotion_keywords: List[str] = Field(
+        default_factory=list,
+        description="Emotion keywords extracted from the statement (max 3)"
+    )
+    emotion_subject: str = Field(
+        ...,
+        description="Emotion subject: self/other/object"
+    )
+    emotion_target: Optional[str] = Field(
+        None,
+        description="Emotion target: person or object name"
+    )
+    
+    @field_validator('emotion_type')
+    @classmethod
+    def validate_emotion_type(cls, v):
+        """Validate emotion type is one of the valid values."""
+        valid_types = ['joy', 'sadness', 'anger', 'fear', 'surprise', 'neutral']
+        if v not in valid_types:
+            raise ValueError(f"emotion_type must be one of {valid_types}, got {v}")
+        return v
+    
+    @field_validator('emotion_subject')
+    @classmethod
+    def validate_emotion_subject(cls, v):
+        """Validate emotion subject is one of the valid values."""
+        valid_subjects = ['self', 'other', 'object']
+        if v not in valid_subjects:
+            raise ValueError(f"emotion_subject must be one of {valid_subjects}, got {v}")
+        return v
+    
+    @field_validator('emotion_keywords')
+    @classmethod
+    def validate_emotion_keywords(cls, v):
+        """Validate and limit emotion keywords to max 3 items."""
+        if not isinstance(v, list):
+            return []
+        # Limit to max 3 keywords
+        return v[:3]
+    
+    @field_validator('emotion_intensity')
+    @classmethod
+    def validate_emotion_intensity(cls, v):
+        """Validate emotion intensity is within valid range."""
+        if not (0.0 <= v <= 1.0):
+            raise ValueError(f"emotion_intensity must be between 0.0 and 1.0, got {v}")
+        return v
--- a/api/app/core/memory/models/graph_models.py
+++ b/api/app/core/memory/models/graph_models.py
@@ -215,24 +215,58 @@ class StatementNode(Node):
    Attributes:
        chunk_id: ID of the parent chunk this statement belongs to
        stmt_type: Type of the statement (from ontology)
-        temporal_info: Temporal information extracted from the statement
        statement: The actual statement text content
-        connect_strength: Classification of connection strength ('Strong' or 'Weak')
+        emotion_intensity: Optional emotion intensity (0.0-1.0) - displayed on node
+        emotion_target: Optional emotion target (person or object name)
+        emotion_subject: Optional emotion subject (self/other/object)
+        emotion_type: Optional emotion type (joy/sadness/anger/fear/surprise/neutral)
+        emotion_keywords: Optional list of emotion keywords (max 3)
+        temporal_info: Temporal information extracted from the statement
        valid_at: Optional start date of temporal validity
        invalid_at: Optional end date of temporal validity
        statement_embedding: Optional embedding vector for the statement
        chunk_embedding: Optional embedding vector for the parent chunk
+        connect_strength: Classification of connection strength ('Strong' or 'Weak')
        config_id: Configuration ID used to process this statement
    """
+    # Core fields (ordered as requested)
    chunk_id: str = Field(..., description="ID of the parent chunk")
    stmt_type: str = Field(..., description="Type of the statement")
-    temporal_info: TemporalInfo = Field(..., description="Temporal information")
    statement: str = Field(..., description="The statement text content")
-    connect_strength: str = Field(..., description="Strong VS Weak classification of this statement")
+    
+    # Emotion fields (ordered as requested, emotion_intensity first for display)
+    emotion_intensity: Optional[float] = Field(
+        None, 
+        ge=0.0, 
+        le=1.0,
+        description="Emotion intensity: 0.0-1.0 (displayed on node)"
+    )
+    emotion_target: Optional[str] = Field(
+        None,
+        description="Emotion target: person or object name"
+    )
+    emotion_subject: Optional[str] = Field(
+        None,
+        description="Emotion subject: self/other/object"
+    )
+    emotion_type: Optional[str] = Field(
+        None, 
+        description="Emotion type: joy/sadness/anger/fear/surprise/neutral"
+    )
+    emotion_keywords: Optional[List[str]] = Field(
+        default_factory=list,
+        description="Emotion keywords list, max 3 items"
+    )
+    
+    # Temporal fields
+    temporal_info: TemporalInfo = Field(..., description="Temporal information")
    valid_at: Optional[datetime] = Field(None, description="Temporal validity start")
    invalid_at: Optional[datetime] = Field(None, description="Temporal validity end")
+    
+    # Embedding and other fields
    statement_embedding: Optional[List[float]] = Field(None, description="Statement embedding vector")
    chunk_embedding: Optional[List[float]] = Field(None, description="Chunk embedding vector")
+    connect_strength: str = Field(..., description="Strong VS Weak classification of this statement")
    config_id: Optional[int | str] = Field(None, description="Configuration ID used to process this statement (integer or string)")
    
    @field_validator('valid_at', 'invalid_at', mode='before')
@@ -240,6 +274,39 @@ class StatementNode(Node):
    def validate_datetime(cls, v):
        """使用通用的历史日期解析函数"""
        return parse_historical_datetime(v)
+    
+    @field_validator('emotion_type', mode='before')
+    @classmethod
+    def validate_emotion_type(cls, v):
+        """Validate emotion type is one of the valid values"""
+        if v is None:
+            return v
+        valid_types = ['joy', 'sadness', 'anger', 'fear', 'surprise', 'neutral']
+        if v not in valid_types:
+            raise ValueError(f"emotion_type must be one of {valid_types}, got {v}")
+        return v
+    
+    @field_validator('emotion_subject', mode='before')
+    @classmethod
+    def validate_emotion_subject(cls, v):
+        """Validate emotion subject is one of the valid values"""
+        if v is None:
+            return v
+        valid_subjects = ['self', 'other', 'object']
+        if v not in valid_subjects:
+            raise ValueError(f"emotion_subject must be one of {valid_subjects}, got {v}")
+        return v
+    
+    @field_validator('emotion_keywords', mode='before')
+    @classmethod
+    def validate_emotion_keywords(cls, v):
+        """Validate emotion keywords list has max 3 items"""
+        if v is None:
+            return []
+        if not isinstance(v, list):
+            return []
+        # Limit to max 3 keywords
+        return v[:3]


 class ChunkNode(Node):
--- a/api/app/core/memory/models/message_models.py
+++ b/api/app/core/memory/models/message_models.py
@@ -64,6 +64,11 @@ class Statement(BaseModel):
        connect_strength: Optional connection strength ('Strong' or 'Weak')
        temporal_validity: Optional temporal validity range
        triplet_extraction_info: Optional triplet extraction results
+        emotion_type: Optional emotion type (joy/sadness/anger/fear/surprise/neutral)
+        emotion_intensity: Optional emotion intensity (0.0-1.0)
+        emotion_keywords: Optional list of emotion keywords
+        emotion_subject: Optional emotion subject (self/other/object)
+        emotion_target: Optional emotion target (person or object name)
    """
    id: str = Field(default_factory=lambda: uuid4().hex, description="A unique identifier for the statement.")
    chunk_id: str = Field(..., description="ID of the parent chunk this statement belongs to.")
@@ -80,6 +85,12 @@ class Statement(BaseModel):
    triplet_extraction_info: Optional[TripletExtractionResponse] = Field(
        None, description="The triplet extraction information of the statement."
    )
+    # Emotion fields
+    emotion_type: Optional[str] = Field(None, description="Emotion type: joy/sadness/anger/fear/surprise/neutral")
+    emotion_intensity: Optional[float] = Field(None, ge=0.0, le=1.0, description="Emotion intensity: 0.0-1.0")
+    emotion_keywords: Optional[List[str]] = Field(default_factory=list, description="Emotion keywords, max 3")
+    emotion_subject: Optional[str] = Field(None, description="Emotion subject: self/other/object")
+    emotion_target: Optional[str] = Field(None, description="Emotion target: person or object name")


 class ConversationContext(BaseModel):
--- a/api/app/core/memory/storage_services/extraction_engine/deduplication/entity_dedup_llm.py
+++ b/api/app/core/memory/storage_services/extraction_engine/deduplication/entity_dedup_llm.py
@@ -480,7 +480,6 @@ async def llm_dedup_entities_iterative_blocks( # 迭代分块并发 LLM 去重
    - global_redirect: dict losing_id -> canonical_id accumulated across rounds
    - records: textual logs including per-round/per-block summaries and per-pair decisions
    """
-    import asyncio
    import random
    # 初始化全局日志和全局ID映射（存储所有轮次的结果）
    records: List[str] = []
--- a/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py
+++ b/api/app/core/memory/storage_services/extraction_engine/extraction_orchestrator.py
@@ -35,7 +35,6 @@ from app.core.memory.models.graph_models import (
 from app.core.memory.utils.data.ontology import TemporalInfo
 from app.core.memory.models.variate_config import (
    ExtractionPipelineConfig,
-    StatementExtractionConfig,
 )
 from app.core.memory.llm_tools.openai_client import LLMClient
 from app.core.memory.llm_tools.openai_embedder import OpenAIEmbedderClient
@@ -53,7 +52,6 @@ from app.core.memory.storage_services.extraction_engine.knowledge_extraction.tem
 )
 from app.core.memory.storage_services.extraction_engine.knowledge_extraction.embedding_generation import (
    embedding_generation,
-    embedding_generation_all,
    generate_entity_embeddings_from_triplets,
 )
 from app.core.memory.storage_services.extraction_engine.deduplication.two_stage_dedup import (
@@ -179,24 +177,12 @@ class ExtractionOrchestrator:
                    all_statements_list.extend(chunk.statements)
            total_statements = len(all_statements_list)

-            # 🔥 陈述句提取完成后，立即发送知识抽取完成消息
-            if self.progress_callback:
-                extraction_stats = {
-                    "statements_count": total_statements,
-                    "entities_count": 0,  # 暂时为0，后续会更新
-                    "triplets_count": 0,  # 暂时为0，后续会更新
-                    "temporal_ranges_count": 0,  # 暂时为0，后续会更新
-                }
-                await self.progress_callback("knowledge_extraction_complete", "知识抽取完成", extraction_stats)
-                
-                # 🔥 立即发送下一阶段的开始消息，让前端知道进入了创建节点和边阶段
-                await self.progress_callback("creating_nodes_edges", "正在创建节点和边...")
-
-            # 步骤 2: 并行执行三元组提取、时间信息提取和基础嵌入生成（后台静默执行）
-            logger.info("步骤 2/6: 并行执行三元组提取、时间信息提取和嵌入生成（后台静默执行）")
+            # 步骤 2: 并行执行三元组提取、时间信息提取、情绪提取和基础嵌入生成
+            logger.info("步骤 2/6: 并行执行三元组提取、时间信息提取、情绪提取和嵌入生成")
            (
                triplet_maps,
                temporal_maps,
+                emotion_maps,
                statement_embedding_maps,
                chunk_embedding_maps,
                dialog_embeddings,
@@ -225,6 +211,7 @@ class ExtractionOrchestrator:
                dialog_data_list,
                temporal_maps,
                triplet_maps,
+                emotion_maps,
                statement_embedding_maps,
                chunk_embedding_maps,
                dialog_embeddings,
@@ -552,9 +539,108 @@ class ExtractionOrchestrator:

        return temporal_maps

+    async def _extract_emotions(
+        self, dialog_data_list: List[DialogData]
+    ) -> List[Dict[str, Any]]:
+        """
+        从对话中提取情绪信息（优化版：全局陈述句级并行）
+
+        Args:
+            dialog_data_list: 对话数据列表
+
+        Returns:
+            情绪信息映射列表，每个对话对应一个字典
+        """
+        logger.info("开始情绪信息提取（全局陈述句级并行）")
+
+        # 收集所有陈述句及其配置
+        all_statements = []
+        statement_metadata = []  # (dialog_idx, statement_id)
+        
+        # 获取第一个对话的config_id来加载配置
+        config_id = None
+        if dialog_data_list and hasattr(dialog_data_list[0], 'config_id'):
+            config_id = dialog_data_list[0].config_id
+        
+        # 加载DataConfig
+        data_config = None
+        if config_id:
+            try:
+                from app.db import SessionLocal
+                from app.repositories.data_config_repository import DataConfigRepository
+                
+                db = SessionLocal()
+                try:
+                    data_config = DataConfigRepository.get_by_id(db, config_id)
+                finally:
+                    db.close()
+                    
+                if data_config and not data_config.emotion_enabled:
+                    logger.info("情绪提取已在配置中禁用，跳过情绪提取")
+                    return [{} for _ in dialog_data_list]
+                    
+            except Exception as e:
+                logger.warning(f"加载DataConfig失败: {e}，将跳过情绪提取")
+                return [{} for _ in dialog_data_list]
+        else:
+            logger.info("未找到config_id，跳过情绪提取")
+            return [{} for _ in dialog_data_list]
+        
+        # 如果配置未启用情绪提取，直接返回空映射
+        if not data_config or not data_config.emotion_enabled:
+            logger.info("情绪提取未启用，跳过")
+            return [{} for _ in dialog_data_list]
+        
+        # 收集所有陈述句
+        for d_idx, dialog in enumerate(dialog_data_list):
+            for chunk in dialog.chunks:
+                for statement in chunk.statements:
+                    all_statements.append((statement, data_config))
+                    statement_metadata.append((d_idx, statement.id))
+
+        logger.info(f"收集到 {len(all_statements)} 个陈述句，开始全局并行提取情绪")
+
+        # 初始化情绪提取服务
+        from app.services.emotion_extraction_service import EmotionExtractionService
+        emotion_service = EmotionExtractionService(
+            llm_id=data_config.emotion_model_id if data_config.emotion_model_id else None
+        )
+
+        # 全局并行处理所有陈述句
+        async def extract_for_statement(stmt_data):
+            statement, config = stmt_data
+            try:
+                return await emotion_service.extract_emotion(statement.statement, config)
+            except Exception as e:
+                logger.error(f"陈述句 {statement.id} 情绪提取失败: {e}")
+                return None
+
+        tasks = [extract_for_statement(stmt_data) for stmt_data in all_statements]
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+
+        # 将结果组织成对话级别的映射
+        emotion_maps = [{} for _ in dialog_data_list]
+        successful_extractions = 0
+        
+        for i, result in enumerate(results):
+            d_idx, stmt_id = statement_metadata[i]
+            if isinstance(result, Exception):
+                logger.error(f"陈述句处理异常: {result}")
+                emotion_maps[d_idx][stmt_id] = None
+            else:
+                emotion_maps[d_idx][stmt_id] = result
+                if result is not None:
+                    successful_extractions += 1
+
+        # 统计提取结果
+        logger.info(f"情绪信息提取完成，共成功提取 {successful_extractions}/{len(all_statements)} 个情绪")
+
+        return emotion_maps
+
    async def _parallel_extract_and_embed(
        self, dialog_data_list: List[DialogData]
    ) -> Tuple[
+        List[Dict[str, Any]],
        List[Dict[str, Any]],
        List[Dict[str, Any]],
        List[Dict[str, List[float]]],
@@ -562,35 +648,39 @@ class ExtractionOrchestrator:
        List[List[float]],
    ]:
        """
-        并行执行三元组提取、时间信息提取和基础嵌入生成
+        并行执行三元组提取、时间信息提取、情绪提取和基础嵌入生成

-        这三个任务都依赖陈述句提取的结果，但彼此独立，可以并行执行：
+        这四个任务都依赖陈述句提取的结果，但彼此独立，可以并行执行：
        - 三元组提取：从陈述句中提取实体和关系
        - 时间信息提取：从陈述句中提取时间范围
+        - 情绪提取：从陈述句中提取情绪信息
        - 嵌入生成：为陈述句、分块和对话生成向量（不依赖三元组）

        Args:
            dialog_data_list: 对话数据列表

        Returns:
-            五个列表的元组：
+            六个列表的元组：
            - 三元组映射列表
            - 时间信息映射列表
+            - 情绪映射列表
            - 陈述句嵌入映射列表
            - 分块嵌入映射列表
            - 对话嵌入列表
        """
-        logger.info("并行执行：三元组提取 + 时间信息提取 + 基础嵌入生成")
+        logger.info("并行执行：三元组提取 + 时间信息提取 + 情绪提取 + 基础嵌入生成")

-        # 创建三个并行任务
+        # 创建四个并行任务
        triplet_task = self._extract_triplets(dialog_data_list)
        temporal_task = self._extract_temporal(dialog_data_list)
+        emotion_task = self._extract_emotions(dialog_data_list)
        embedding_task = self._generate_basic_embeddings(dialog_data_list)

        # 并行执行
        results = await asyncio.gather(
            triplet_task,
            temporal_task,
+            emotion_task,
            embedding_task,
            return_exceptions=True
        )
@@ -598,19 +688,21 @@ class ExtractionOrchestrator:
        # 解包结果
        triplet_maps = results[0] if not isinstance(results[0], Exception) else [{} for _ in dialog_data_list]
        temporal_maps = results[1] if not isinstance(results[1], Exception) else [{} for _ in dialog_data_list]
+        emotion_maps = results[2] if not isinstance(results[2], Exception) else [{} for _ in dialog_data_list]
        
-        if isinstance(results[2], Exception):
-            logger.error(f"基础嵌入生成失败: {results[2]}")
+        if isinstance(results[3], Exception):
+            logger.error(f"基础嵌入生成失败: {results[3]}")
            statement_embedding_maps = [{} for _ in dialog_data_list]
            chunk_embedding_maps = [{} for _ in dialog_data_list]
            dialog_embeddings = [[] for _ in dialog_data_list]
        else:
-            statement_embedding_maps, chunk_embedding_maps, dialog_embeddings = results[2]
+            statement_embedding_maps, chunk_embedding_maps, dialog_embeddings = results[3]

        logger.info("并行任务执行完成")
        return (
            triplet_maps,
            temporal_maps,
+            emotion_maps,
            statement_embedding_maps,
            chunk_embedding_maps,
            dialog_embeddings,
@@ -727,6 +819,7 @@ class ExtractionOrchestrator:
        dialog_data_list: List[DialogData],
        temporal_maps: List[Dict[str, Any]],
        triplet_maps: List[Dict[str, Any]],
+        emotion_maps: List[Dict[str, Any]],
        statement_embedding_maps: List[Dict[str, List[float]]],
        chunk_embedding_maps: List[Dict[str, List[float]]],
        dialog_embeddings: List[List[float]],
@@ -738,6 +831,7 @@ class ExtractionOrchestrator:
            dialog_data_list: 对话数据列表
            temporal_maps: 时间信息映射列表
            triplet_maps: 三元组映射列表
+            emotion_maps: 情绪信息映射列表
            statement_embedding_maps: 陈述句嵌入映射列表
            chunk_embedding_maps: 分块嵌入映射列表
            dialog_embeddings: 对话嵌入列表
@@ -752,6 +846,7 @@ class ExtractionOrchestrator:
        if (
            len(temporal_maps) != expected_length
            or len(triplet_maps) != expected_length
+            or len(emotion_maps) != expected_length
            or len(statement_embedding_maps) != expected_length
            or len(chunk_embedding_maps) != expected_length
            or len(dialog_embeddings) != expected_length
@@ -759,6 +854,7 @@ class ExtractionOrchestrator:
            logger.warning(
                f"数据大小不匹配 - 对话: {len(dialog_data_list)}, "
                f"时间映射: {len(temporal_maps)}, 三元组映射: {len(triplet_maps)}, "
+                f"情绪映射: {len(emotion_maps)}, "
                f"陈述句嵌入: {len(statement_embedding_maps)}, "
                f"分块嵌入: {len(chunk_embedding_maps)}, "
                f"对话嵌入: {len(dialog_embeddings)}"
@@ -767,6 +863,7 @@ class ExtractionOrchestrator:
        total_statements = 0
        assigned_temporal = 0
        assigned_triplets = 0
+        assigned_emotions = 0
        assigned_statement_embeddings = 0
        assigned_chunk_embeddings = 0
        assigned_dialog_embeddings = 0
@@ -774,12 +871,13 @@ class ExtractionOrchestrator:
        # 处理每个对话
        for i, dialog_data in enumerate(dialog_data_list):
            # 检查是否有缺失的数据
-            if i >= len(temporal_maps) or i >= len(triplet_maps):
+            if i >= len(temporal_maps) or i >= len(triplet_maps) or i >= len(emotion_maps):
                logger.warning(f"对话 {dialog_data.id} 缺少提取数据，跳过赋值")
                continue

            temporal_map = temporal_maps[i]
            triplet_map = triplet_maps[i]
+            emotion_map = emotion_maps[i]
            statement_embedding_map = statement_embedding_maps[i] if i < len(statement_embedding_maps) else {}
            chunk_embedding_map = chunk_embedding_maps[i] if i < len(chunk_embedding_maps) else {}
            dialog_embedding = dialog_embeddings[i] if i < len(dialog_embeddings) else []
@@ -810,6 +908,18 @@ class ExtractionOrchestrator:
                        statement.triplet_extraction_info = triplet_map[statement.id]
                        assigned_triplets += 1

+                    # 赋值情绪信息
+                    if statement.id in emotion_map:
+                        emotion_data = emotion_map[statement.id]
+                        if emotion_data is not None:
+                            # 将EmotionExtraction对象的字段赋值到Statement
+                            statement.emotion_type = emotion_data.emotion_type
+                            statement.emotion_intensity = emotion_data.emotion_intensity
+                            statement.emotion_keywords = emotion_data.emotion_keywords
+                            statement.emotion_subject = emotion_data.emotion_subject
+                            statement.emotion_target = emotion_data.emotion_target
+                            assigned_emotions += 1
+
                    # 赋值陈述句嵌入
                    if statement.id in statement_embedding_map:
                        statement.statement_embedding = statement_embedding_map[statement.id]
@@ -818,6 +928,7 @@ class ExtractionOrchestrator:
        logger.info(
            f"数据赋值完成 - 总陈述句: {total_statements}, "
            f"时间信息: {assigned_temporal}, 三元组: {assigned_triplets}, "
+            f"情绪信息: {assigned_emotions}, "
            f"陈述句嵌入: {assigned_statement_embeddings}, "
            f"分块嵌入: {assigned_chunk_embeddings}, "
            f"对话嵌入: {assigned_dialog_embeddings}"
@@ -927,6 +1038,12 @@ class ExtractionOrchestrator:
                        created_at=dialog_data.created_at,
                        expired_at=dialog_data.expired_at,
                        config_id=dialog_data.config_id if hasattr(dialog_data, 'config_id') else None,
+                        # Emotion fields
+                        emotion_type=getattr(statement, 'emotion_type', None),
+                        emotion_intensity=getattr(statement, 'emotion_intensity', None),
+                        emotion_keywords=getattr(statement, 'emotion_keywords', None),
+                        emotion_subject=getattr(statement, 'emotion_subject', None),
+                        emotion_target=getattr(statement, 'emotion_target', None),
                    )
                    statement_nodes.append(statement_node)

@@ -1333,7 +1450,7 @@ class ExtractionOrchestrator:
                        if match:
                            entity1_name = match.group(1).strip()
                            entity1_type = match.group(2)
-                            entity2_name = match.group(3).strip()
+                            match.group(3).strip()
                            entity2_type = match.group(4)
                            
                            # 提取置信度和原因
@@ -1646,7 +1763,6 @@ async def get_chunked_dialogs(
    """
    import json
    import re
-    import os
    
    # 加载测试数据
    testdata_path = os.path.join(os.path.dirname(__file__), "../../data", "testdata.json")
@@ -1822,7 +1938,6 @@ async def get_chunked_dialogs_with_preprocessing(
    Returns:
        带 chunks 的 DialogData 列表
    """
-    import os
    print("\n=== 完整数据处理流程（包含预处理）===")

    if input_data_path is None:
--- a/api/app/core/memory/utils/config/overrides.py
+++ b/api/app/core/memory/utils/config/overrides.py
@@ -28,7 +28,6 @@
 """
 import os
 import json
-import socket
 from typing import Optional, Dict, Any, Literal

 NetworkMode = Literal['internal', 'external']
@@ -105,7 +104,6 @@ def _make_pgsql_conn() -> Optional[object]:

    try:
        import psycopg2  # type: ignore
-        from psycopg2.extras import RealDictCursor  # type: ignore

        port = int(port_str) if port_str else 5432
        conn = psycopg2.connect(
@@ -193,7 +191,7 @@ def _fetch_db_config_by_config_id(config_id: int | str) -> Optional[Dict[str, An
        # config_id 在数据库中是 Integer 类型，需要转换
        try:
            config_id_int = int(config_id)
-        except (ValueError, TypeError) as e:
+        except (ValueError, TypeError):
            try:
                pass
            except Exception:
@@ -207,7 +205,7 @@ def _fetch_db_config_by_config_id(config_id: int | str) -> Optional[Dict[str, An
            "       statement_granularity, include_dialogue_context, max_context, "
            "       \"offset\" AS offset, lambda_time, lambda_mem, "
            "       pruning_enabled, pruning_scene, pruning_threshold, "
-            "       llm_id, embedding_id "
+            "       llm_id, embedding_id, rerank_id "
            "FROM data_config WHERE config_id = %s LIMIT 1"
        )
        cur.execute(sql, (config_id_int,))
@@ -222,7 +220,7 @@ def _fetch_db_config_by_config_id(config_id: int | str) -> Optional[Dict[str, An
            pass
        
        return row if row else None
-    except Exception as e:
+    except Exception:
        pass
        return None
    finally:
@@ -325,7 +323,7 @@ def _apply_overrides_from_db_row(
            _set_if_present(selections, tk, db_row, tk, str)
        
        # 特殊处理 UUID 字段，确保转换为字符串格式
-        for uuid_field in ("llm_id", "embedding_id"):
+        for uuid_field in ("llm_id", "embedding_id", "rerank_id"):
            if uuid_field in db_row and db_row.get(uuid_field) is not None:
                try:
                    value = db_row.get(uuid_field)
@@ -370,7 +368,7 @@ def _apply_overrides_from_db_row(
            pass

        return runtime_cfg
-    except Exception as e:
+    except Exception:
        pass
        return runtime_cfg

@@ -460,7 +458,7 @@ def apply_runtime_overrides_with_config_id(
        
        updated_cfg = _apply_overrides_from_db_row(runtime_cfg, db_row, selected_cid, "config_id")
        return updated_cfg, True
-    except Exception as e:
+    except Exception:
        pass
        return runtime_cfg, False

@@ -570,7 +568,7 @@ def load_unified_config(
        try:
            with open(runtime_config_path, "r", encoding="utf-8") as f:
                runtime_cfg = json.load(f)
-        except (FileNotFoundError, json.JSONDecodeError) as e:
+        except (FileNotFoundError, json.JSONDecodeError):
            runtime_cfg = {"selections": {}}
        
        # 步骤 2: 尝试从 dbrun.json 读取 config_id 并应用数据库配置（最高优先级）
@@ -603,7 +601,7 @@ def load_unified_config(
                        pass
        return runtime_cfg
        
-    except Exception as e:
+    except Exception:
        return {"selections": {}}


--- a/api/app/core/memory/utils/prompt/prompt_utils.py
+++ b/api/app/core/memory/utils/prompt/prompt_utils.py
@@ -238,3 +238,81 @@ async def render_memory_summary_prompt(
        'json_schema': 'MemorySummaryResponse.schema'
    })
    return rendered_prompt
+
+async def render_emotion_extraction_prompt(
+    statement: str,
+    extract_keywords: bool,
+    enable_subject: bool
+) -> str:
+    """
+    Renders the emotion extraction prompt using the extract_emotion.jinja2 template.
+
+    Args:
+        statement: The statement to analyze
+        extract_keywords: Whether to extract emotion keywords
+        enable_subject: Whether to enable subject classification
+
+    Returns:
+        Rendered prompt content as string
+    """
+    template = prompt_env.get_template("extract_emotion.jinja2")
+    rendered_prompt = template.render(
+        statement=statement,
+        extract_keywords=extract_keywords,
+        enable_subject=enable_subject
+    )
+    
+    # 记录渲染结果到提示日志
+    log_prompt_rendering('emotion extraction', rendered_prompt)
+    # 可选：记录模板渲染信息
+    log_template_rendering('extract_emotion.jinja2', {
+        'statement': 'str',
+        'extract_keywords': extract_keywords,
+        'enable_subject': enable_subject
+    })
+    
+    return rendered_prompt
+
+async def render_emotion_suggestions_prompt(
+    health_data: dict,
+    patterns: dict,
+    user_profile: dict
+) -> str:
+    """
+    Renders the emotion suggestions generation prompt using the generate_emotion_suggestions.jinja2 template.
+
+    Args:
+        health_data: 情绪健康数据
+        patterns: 情绪模式分析结果
+        user_profile: 用户画像数据
+
+    Returns:
+        Rendered prompt content as string
+    """
+    import json
+    
+    # 预处理 emotion_distribution 为 JSON 字符串
+    emotion_distribution_json = json.dumps(
+        health_data.get('emotion_distribution', {}), 
+        ensure_ascii=False, 
+        indent=2
+    )
+    
+    template = prompt_env.get_template("generate_emotion_suggestions.jinja2")
+    rendered_prompt = template.render(
+        health_data=health_data,
+        patterns=patterns,
+        user_profile=user_profile,
+        emotion_distribution_json=emotion_distribution_json
+    )
+    
+    # 记录渲染结果到提示日志
+    log_prompt_rendering('emotion suggestions', rendered_prompt)
+    # 可选：记录模板渲染信息
+    log_template_rendering('generate_emotion_suggestions.jinja2', {
+        'health_score': health_data.get('health_score'),
+        'health_level': health_data.get('level'),
+        'user_interests': user_profile.get('interests', [])
+    })
+    
+    return rendered_prompt
--- a/api/app/core/memory/utils/prompt/prompts/extract_emotion.jinja2
+++ b/api/app/core/memory/utils/prompt/prompts/extract_emotion.jinja2
@@ -0,0 +1,57 @@
+你是一个专业的情绪分析专家。请分析以下陈述句的情绪信息。
+
+陈述句：{{ statement }}
+
+请提取以下信息：
+
+1. emotion_type（情绪类型）：
+   - joy: 喜悦、开心、高兴、满意、愉快
+   - sadness: 悲伤、难过、失落、沮丧、遗憾
+   - anger: 愤怒、生气、不满、恼火、烦躁
+   - fear: 恐惧、害怕、担心、焦虑、紧张
+   - surprise: 惊讶、意外、震惊、吃惊
+   - neutral: 中性、客观陈述、无明显情绪
+
+2. emotion_intensity（情绪强度）：
+   - 0.0-0.3: 弱情绪
+   - 0.3-0.7: 中等情绪
+   - 0.7-1.0: 强情绪
+
+{% if extract_keywords %}
+3. emotion_keywords（情绪关键词）：
+   - 原句中直接表达情绪的词语
+   - 最多提取3个关键词
+   - 如果没有明显的情绪词，返回空列表
+{% else %}
+3. emotion_keywords（情绪关键词）：
+   - 返回空列表
+{% endif %}
+
+{% if enable_subject %}
+4. emotion_subject（情绪主体）：
+   - self: 用户本人的情绪（包含"我"、"我们"、"咱们"等第一人称）
+   - other: 他人的情绪（包含人名、"他/她"等第三人称）
+   - object: 对事物的评价（针对产品、地点、事件等）
+   
+   注意：
+   - 如果同时包含多个主体，优先识别用户本人（self）
+   - 如果无法明确判断主体，默认为 self
+
+5. emotion_target（情绪对象）：
+   - 如果有明确的情绪对象，提取其名称
+   - 如果没有明确对象，返回 null
+{% else %}
+4. emotion_subject（情绪主体）：
+   - 默认为 self
+
+5. emotion_target（情绪对象）：
+   - 返回 null
+{% endif %}
+
+注意事项：
+- 如果陈述句是客观事实陈述，无明显情绪，标记为 neutral
+- 情绪强度要符合语境，不要过度解读
+- 情绪关键词要准确，不要添加原句中没有的词
+- 主体分类要准确，优先识别用户本人（self）
+
+请以 JSON 格式返回结果。
--- a/api/app/core/memory/utils/prompt/prompts/generate_emotion_suggestions.jinja2
+++ b/api/app/core/memory/utils/prompt/prompts/generate_emotion_suggestions.jinja2
@@ -0,0 +1,63 @@
+你是一位专业的心理健康顾问。请根据以下用户的情绪健康数据和个人信息，生成3-5条个性化的情绪改善建议。
+
+## 用户情绪健康数据
+
+健康分数：{{ health_data.health_score }}/100
+健康等级：{{ health_data.level }}
+
+维度分析：
+- 积极率：{{ health_data.dimensions.positivity_rate.score }}/100
+  - 正面情绪：{{ health_data.dimensions.positivity_rate.positive_count }}次
+  - 负面情绪：{{ health_data.dimensions.positivity_rate.negative_count }}次
+  - 中性情绪：{{ health_data.dimensions.positivity_rate.neutral_count }}次
+
+- 稳定性：{{ health_data.dimensions.stability.score }}/100
+  - 标准差：{{ health_data.dimensions.stability.std_deviation }}
+
+- 恢复力：{{ health_data.dimensions.resilience.score }}/100
+  - 恢复率：{{ health_data.dimensions.resilience.recovery_rate }}
+
+情绪分布：
+{{ emotion_distribution_json }}
+
+## 情绪模式分析
+
+主要负面情绪：{{ patterns.dominant_negative_emotion|default('无') }}
+情绪波动性：{{ patterns.emotion_volatility|default('未知') }}
+高强度情绪次数：{{ patterns.high_intensity_emotions|default([])|length }}
+
+## 用户兴趣
+
+{{ user_profile.interests|default(['未知'])|join(', ') }}
+
+## 任务要求
+
+请生成3-5条个性化建议，每条建议包含：
+1. type: 建议类型（emotion_balance/activity_recommendation/social_connection/stress_management）
+2. title: 建议标题（简短有力）
+3. content: 建议内容（详细说明，50-100字）
+4. priority: 优先级（high/medium/low）
+5. actionable_steps: 3个可执行的具体步骤
+
+同时提供一个health_summary（不超过50字），概括用户的整体情绪状态。
+
+请以JSON格式返回，格式如下：
+{
+  "health_summary": "您的情绪健康状况...",
+  "suggestions": [
+    {
+      "type": "emotion_balance",
+      "title": "建议标题",
+      "content": "建议内容...",
+      "priority": "high",
+      "actionable_steps": ["步骤1", "步骤2", "步骤3"]
+    }
+  ]
+}
+
+注意事项：
+- 建议要具体、可执行，避免空泛
+- 结合用户的兴趣爱好提供个性化建议
+- 针对主要问题（如主要负面情绪）提供针对性建议
+- 优先级要合理分配（至少1个high，1-2个medium，其余low）
+- 每个建议的3个步骤要循序渐进、易于实施