Merge #21 into develop from feature/emotion-engine

feature/情绪引擎

* feature/emotion-engine: (7 commits squashed)

  - [feature]Emotion Engine Development

  - [feature]Emotion Engine Development

  - Merge branch 'feature/emotion-engine' of codeup.aliyun.com:redbearai/python/redbear-mem-open into feature/emotion-engine

  - [fix]1.Fix the front-end files;2.Cache Management Deletion;3.Delete "check_code.py"

  - [fix]1.Fix the front-end files;2.Cache Management Deletion;3.Delete "check_code.py"

  - Merge branch 'feature/emotion-engine' of codeup.aliyun.com:redbearai/python/redbear-mem-open into feature/emotion-engine

  - [fix]fix vite.config.ts

Signed-off-by: 乐力齐 <accounts_690c7b0af9007d7e338af636@mail.teambition.com>
Commented-by: aliyun6762716068 <accounts_68cb7c6b61f5dcc4200d6251@mail.teambition.com>
Commented-by: 乐力齐 <accounts_690c7b0af9007d7e338af636@mail.teambition.com>
Reviewed-by: aliyun6762716068 <accounts_68cb7c6b61f5dcc4200d6251@mail.teambition.com>
Merged-by: aliyun6762716068 <accounts_68cb7c6b61f5dcc4200d6251@mail.teambition.com>

CR-link: https://codeup.aliyun.com/redbearai/python/redbear-mem-open/change/21
This commit is contained in:
乐力齐
2025-12-20 07:02:46 +00:00
committed by 孙科
parent 1f0bb1f8af
commit 1f4524c28c
23 changed files with 2453 additions and 67 deletions

View File

@@ -38,14 +38,53 @@ async def make_write_graph(user_id, tools, apply_id, group_id, config_id=None):
messages = state["messages"]
last_message = messages[-1]
result = await data_type_tool.ainvoke({
"context": last_message[1] if isinstance(last_message, tuple) else last_message.content
})
result=json.loads( result)
# 调用 Data_type_differentiation 工具
try:
raw_result = await data_type_tool.ainvoke({
"context": last_message[1] if isinstance(last_message, tuple) else last_message.content
})
# MCP工具返回的是列表格式需要提取内容
logger.debug(f"Data_type_differentiation raw result type: {type(raw_result)}, value: {raw_result}")
# 处理不同的返回格式
if isinstance(raw_result, list) and len(raw_result) > 0:
# MCP工具返回格式: [{"type": "text", "text": "..."}]
result_text = raw_result[0].get("text", "{}") if isinstance(raw_result[0], dict) else str(raw_result[0])
elif isinstance(raw_result, str):
result_text = raw_result
else:
result_text = str(raw_result)
# 解析JSON字符串
try:
result = json.loads(result_text)
except json.JSONDecodeError as je:
logger.error(f"Failed to parse result as JSON: {result_text}, error: {je}")
return {"messages": [AIMessage(content=json.dumps({
"status": "error",
"message": f"Invalid JSON response from Data_type_differentiation: {str(je)}"
}))]}
# 检查是否有错误
if isinstance(result, dict) and result.get("type") == "error":
error_msg = result.get("message", "Unknown error in Data_type_differentiation")
logger.error(f"Data_type_differentiation 返回错误: {error_msg}")
return {"messages": [AIMessage(content=json.dumps({
"status": "error",
"message": error_msg
}))]}
except Exception as e:
logger.error(f"调用 Data_type_differentiation 失败: {e}", exc_info=True)
return {"messages": [AIMessage(content=json.dumps({
"status": "error",
"message": f"Data type differentiation failed: {str(e)}"
}))]}
# 调用 Data_write传递 config_id
write_params = {
"content": result["context"],
"content": result.get("context", last_message.content if hasattr(last_message, 'content') else str(last_message)),
"apply_id": apply_id,
"group_id": group_id,
"user_id": user_id
@@ -56,14 +95,22 @@ async def make_write_graph(user_id, tools, apply_id, group_id, config_id=None):
write_params["config_id"] = config_id
logger.debug(f"传递 config_id 到 Data_write: {config_id}")
write_result = await data_write_tool.ainvoke(write_params)
try:
write_result = await data_write_tool.ainvoke(write_params)
if isinstance(write_result, dict):
content = write_result.get("data", str(write_result))
else:
content = str(write_result)
logger.info("写入内容: %s", content)
return {"messages": [AIMessage(content=content)]}
if isinstance(write_result, dict):
content = write_result.get("data", str(write_result))
else:
content = str(write_result)
logger.info("写入内容: %s", content)
return {"messages": [AIMessage(content=content)]}
except Exception as e:
logger.error(f"调用 Data_write 失败: {e}", exc_info=True)
return {"messages": [AIMessage(content=json.dumps({
"status": "error",
"message": f"Data write failed: {str(e)}"
}))]}
workflow = StateGraph(WriteState)
workflow.add_node("content_input", call_model)

View File

@@ -39,6 +39,17 @@ async def write(content: str, user_id: str, apply_id: str, group_id: str, ref_id
ref_id: 参考ID默认为 "wyl20251027"
config_id: 配置ID用于标记数据处理配置
"""
# 如果提供了config_id重新加载配置
if config_id:
from app.core.memory.utils.config.definitions import reload_configuration_from_database
logger.info(f"Reloading configuration for config_id: {config_id}")
config_loaded = reload_configuration_from_database(config_id)
if not config_loaded:
error_msg = f"Failed to load configuration for config_id: {config_id}"
logger.error(error_msg)
raise ValueError(error_msg)
logger.info(f"Configuration reloaded successfully for config_id: {config_id}")
logger.info("=== MemSci Knowledge Extraction Pipeline ===")
logger.info(f"Using model: {config_defs.SELECTED_LLM_NAME}")
logger.info(f"Using LLM ID: {config_defs.SELECTED_LLM_ID}")

View File

@@ -0,0 +1,85 @@
"""Emotion extraction models for LLM structured output.
This module contains Pydantic models for emotion extraction from statements,
designed to be used with LLM structured output capabilities.
Classes:
EmotionExtraction: Model for emotion extraction results from statements
"""
from pydantic import BaseModel, Field, field_validator
from typing import List, Optional
class EmotionExtraction(BaseModel):
"""Emotion extraction result model for LLM structured output.
This model represents the structured emotion information extracted from
a statement using LLM. It includes emotion type, intensity, keywords,
subject classification, and optional target.
Attributes:
emotion_type: Type of emotion (joy/sadness/anger/fear/surprise/neutral)
emotion_intensity: Intensity of emotion (0.0-1.0)
emotion_keywords: List of emotion keywords from the statement (max 3)
emotion_subject: Subject of emotion (self/other/object)
emotion_target: Optional target of emotion (person or object name)
"""
emotion_type: str = Field(
...,
description="Emotion type: joy/sadness/anger/fear/surprise/neutral"
)
emotion_intensity: float = Field(
...,
ge=0.0,
le=1.0,
description="Emotion intensity from 0.0 to 1.0"
)
emotion_keywords: List[str] = Field(
default_factory=list,
description="Emotion keywords extracted from the statement (max 3)"
)
emotion_subject: str = Field(
...,
description="Emotion subject: self/other/object"
)
emotion_target: Optional[str] = Field(
None,
description="Emotion target: person or object name"
)
@field_validator('emotion_type')
@classmethod
def validate_emotion_type(cls, v):
"""Validate emotion type is one of the valid values."""
valid_types = ['joy', 'sadness', 'anger', 'fear', 'surprise', 'neutral']
if v not in valid_types:
raise ValueError(f"emotion_type must be one of {valid_types}, got {v}")
return v
@field_validator('emotion_subject')
@classmethod
def validate_emotion_subject(cls, v):
"""Validate emotion subject is one of the valid values."""
valid_subjects = ['self', 'other', 'object']
if v not in valid_subjects:
raise ValueError(f"emotion_subject must be one of {valid_subjects}, got {v}")
return v
@field_validator('emotion_keywords')
@classmethod
def validate_emotion_keywords(cls, v):
"""Validate and limit emotion keywords to max 3 items."""
if not isinstance(v, list):
return []
# Limit to max 3 keywords
return v[:3]
@field_validator('emotion_intensity')
@classmethod
def validate_emotion_intensity(cls, v):
"""Validate emotion intensity is within valid range."""
if not (0.0 <= v <= 1.0):
raise ValueError(f"emotion_intensity must be between 0.0 and 1.0, got {v}")
return v

View File

@@ -215,24 +215,58 @@ class StatementNode(Node):
Attributes:
chunk_id: ID of the parent chunk this statement belongs to
stmt_type: Type of the statement (from ontology)
temporal_info: Temporal information extracted from the statement
statement: The actual statement text content
connect_strength: Classification of connection strength ('Strong' or 'Weak')
emotion_intensity: Optional emotion intensity (0.0-1.0) - displayed on node
emotion_target: Optional emotion target (person or object name)
emotion_subject: Optional emotion subject (self/other/object)
emotion_type: Optional emotion type (joy/sadness/anger/fear/surprise/neutral)
emotion_keywords: Optional list of emotion keywords (max 3)
temporal_info: Temporal information extracted from the statement
valid_at: Optional start date of temporal validity
invalid_at: Optional end date of temporal validity
statement_embedding: Optional embedding vector for the statement
chunk_embedding: Optional embedding vector for the parent chunk
connect_strength: Classification of connection strength ('Strong' or 'Weak')
config_id: Configuration ID used to process this statement
"""
# Core fields (ordered as requested)
chunk_id: str = Field(..., description="ID of the parent chunk")
stmt_type: str = Field(..., description="Type of the statement")
temporal_info: TemporalInfo = Field(..., description="Temporal information")
statement: str = Field(..., description="The statement text content")
connect_strength: str = Field(..., description="Strong VS Weak classification of this statement")
# Emotion fields (ordered as requested, emotion_intensity first for display)
emotion_intensity: Optional[float] = Field(
None,
ge=0.0,
le=1.0,
description="Emotion intensity: 0.0-1.0 (displayed on node)"
)
emotion_target: Optional[str] = Field(
None,
description="Emotion target: person or object name"
)
emotion_subject: Optional[str] = Field(
None,
description="Emotion subject: self/other/object"
)
emotion_type: Optional[str] = Field(
None,
description="Emotion type: joy/sadness/anger/fear/surprise/neutral"
)
emotion_keywords: Optional[List[str]] = Field(
default_factory=list,
description="Emotion keywords list, max 3 items"
)
# Temporal fields
temporal_info: TemporalInfo = Field(..., description="Temporal information")
valid_at: Optional[datetime] = Field(None, description="Temporal validity start")
invalid_at: Optional[datetime] = Field(None, description="Temporal validity end")
# Embedding and other fields
statement_embedding: Optional[List[float]] = Field(None, description="Statement embedding vector")
chunk_embedding: Optional[List[float]] = Field(None, description="Chunk embedding vector")
connect_strength: str = Field(..., description="Strong VS Weak classification of this statement")
config_id: Optional[int | str] = Field(None, description="Configuration ID used to process this statement (integer or string)")
@field_validator('valid_at', 'invalid_at', mode='before')
@@ -240,6 +274,39 @@ class StatementNode(Node):
def validate_datetime(cls, v):
"""使用通用的历史日期解析函数"""
return parse_historical_datetime(v)
@field_validator('emotion_type', mode='before')
@classmethod
def validate_emotion_type(cls, v):
"""Validate emotion type is one of the valid values"""
if v is None:
return v
valid_types = ['joy', 'sadness', 'anger', 'fear', 'surprise', 'neutral']
if v not in valid_types:
raise ValueError(f"emotion_type must be one of {valid_types}, got {v}")
return v
@field_validator('emotion_subject', mode='before')
@classmethod
def validate_emotion_subject(cls, v):
"""Validate emotion subject is one of the valid values"""
if v is None:
return v
valid_subjects = ['self', 'other', 'object']
if v not in valid_subjects:
raise ValueError(f"emotion_subject must be one of {valid_subjects}, got {v}")
return v
@field_validator('emotion_keywords', mode='before')
@classmethod
def validate_emotion_keywords(cls, v):
"""Validate emotion keywords list has max 3 items"""
if v is None:
return []
if not isinstance(v, list):
return []
# Limit to max 3 keywords
return v[:3]
class ChunkNode(Node):

View File

@@ -64,6 +64,11 @@ class Statement(BaseModel):
connect_strength: Optional connection strength ('Strong' or 'Weak')
temporal_validity: Optional temporal validity range
triplet_extraction_info: Optional triplet extraction results
emotion_type: Optional emotion type (joy/sadness/anger/fear/surprise/neutral)
emotion_intensity: Optional emotion intensity (0.0-1.0)
emotion_keywords: Optional list of emotion keywords
emotion_subject: Optional emotion subject (self/other/object)
emotion_target: Optional emotion target (person or object name)
"""
id: str = Field(default_factory=lambda: uuid4().hex, description="A unique identifier for the statement.")
chunk_id: str = Field(..., description="ID of the parent chunk this statement belongs to.")
@@ -80,6 +85,12 @@ class Statement(BaseModel):
triplet_extraction_info: Optional[TripletExtractionResponse] = Field(
None, description="The triplet extraction information of the statement."
)
# Emotion fields
emotion_type: Optional[str] = Field(None, description="Emotion type: joy/sadness/anger/fear/surprise/neutral")
emotion_intensity: Optional[float] = Field(None, ge=0.0, le=1.0, description="Emotion intensity: 0.0-1.0")
emotion_keywords: Optional[List[str]] = Field(default_factory=list, description="Emotion keywords, max 3")
emotion_subject: Optional[str] = Field(None, description="Emotion subject: self/other/object")
emotion_target: Optional[str] = Field(None, description="Emotion target: person or object name")
class ConversationContext(BaseModel):

View File

@@ -480,7 +480,6 @@ async def llm_dedup_entities_iterative_blocks( # 迭代分块并发 LLM 去重
- global_redirect: dict losing_id -> canonical_id accumulated across rounds
- records: textual logs including per-round/per-block summaries and per-pair decisions
"""
import asyncio
import random
# 初始化全局日志和全局ID映射存储所有轮次的结果
records: List[str] = []

View File

@@ -35,7 +35,6 @@ from app.core.memory.models.graph_models import (
from app.core.memory.utils.data.ontology import TemporalInfo
from app.core.memory.models.variate_config import (
ExtractionPipelineConfig,
StatementExtractionConfig,
)
from app.core.memory.llm_tools.openai_client import LLMClient
from app.core.memory.llm_tools.openai_embedder import OpenAIEmbedderClient
@@ -53,7 +52,6 @@ from app.core.memory.storage_services.extraction_engine.knowledge_extraction.tem
)
from app.core.memory.storage_services.extraction_engine.knowledge_extraction.embedding_generation import (
embedding_generation,
embedding_generation_all,
generate_entity_embeddings_from_triplets,
)
from app.core.memory.storage_services.extraction_engine.deduplication.two_stage_dedup import (
@@ -179,24 +177,12 @@ class ExtractionOrchestrator:
all_statements_list.extend(chunk.statements)
total_statements = len(all_statements_list)
# 🔥 陈述句提取完成后,立即发送知识抽取完成消息
if self.progress_callback:
extraction_stats = {
"statements_count": total_statements,
"entities_count": 0, # 暂时为0后续会更新
"triplets_count": 0, # 暂时为0后续会更新
"temporal_ranges_count": 0, # 暂时为0后续会更新
}
await self.progress_callback("knowledge_extraction_complete", "知识抽取完成", extraction_stats)
# 🔥 立即发送下一阶段的开始消息,让前端知道进入了创建节点和边阶段
await self.progress_callback("creating_nodes_edges", "正在创建节点和边...")
# 步骤 2: 并行执行三元组提取、时间信息提取和基础嵌入生成(后台静默执行)
logger.info("步骤 2/6: 并行执行三元组提取、时间信息提取和嵌入生成(后台静默执行)")
# 步骤 2: 并行执行三元组提取、时间信息提取、情绪提取和基础嵌入生成
logger.info("步骤 2/6: 并行执行三元组提取、时间信息提取、情绪提取和嵌入生成")
(
triplet_maps,
temporal_maps,
emotion_maps,
statement_embedding_maps,
chunk_embedding_maps,
dialog_embeddings,
@@ -225,6 +211,7 @@ class ExtractionOrchestrator:
dialog_data_list,
temporal_maps,
triplet_maps,
emotion_maps,
statement_embedding_maps,
chunk_embedding_maps,
dialog_embeddings,
@@ -552,9 +539,108 @@ class ExtractionOrchestrator:
return temporal_maps
async def _extract_emotions(
self, dialog_data_list: List[DialogData]
) -> List[Dict[str, Any]]:
"""
从对话中提取情绪信息(优化版:全局陈述句级并行)
Args:
dialog_data_list: 对话数据列表
Returns:
情绪信息映射列表,每个对话对应一个字典
"""
logger.info("开始情绪信息提取(全局陈述句级并行)")
# 收集所有陈述句及其配置
all_statements = []
statement_metadata = [] # (dialog_idx, statement_id)
# 获取第一个对话的config_id来加载配置
config_id = None
if dialog_data_list and hasattr(dialog_data_list[0], 'config_id'):
config_id = dialog_data_list[0].config_id
# 加载DataConfig
data_config = None
if config_id:
try:
from app.db import SessionLocal
from app.repositories.data_config_repository import DataConfigRepository
db = SessionLocal()
try:
data_config = DataConfigRepository.get_by_id(db, config_id)
finally:
db.close()
if data_config and not data_config.emotion_enabled:
logger.info("情绪提取已在配置中禁用,跳过情绪提取")
return [{} for _ in dialog_data_list]
except Exception as e:
logger.warning(f"加载DataConfig失败: {e},将跳过情绪提取")
return [{} for _ in dialog_data_list]
else:
logger.info("未找到config_id跳过情绪提取")
return [{} for _ in dialog_data_list]
# 如果配置未启用情绪提取,直接返回空映射
if not data_config or not data_config.emotion_enabled:
logger.info("情绪提取未启用,跳过")
return [{} for _ in dialog_data_list]
# 收集所有陈述句
for d_idx, dialog in enumerate(dialog_data_list):
for chunk in dialog.chunks:
for statement in chunk.statements:
all_statements.append((statement, data_config))
statement_metadata.append((d_idx, statement.id))
logger.info(f"收集到 {len(all_statements)} 个陈述句,开始全局并行提取情绪")
# 初始化情绪提取服务
from app.services.emotion_extraction_service import EmotionExtractionService
emotion_service = EmotionExtractionService(
llm_id=data_config.emotion_model_id if data_config.emotion_model_id else None
)
# 全局并行处理所有陈述句
async def extract_for_statement(stmt_data):
statement, config = stmt_data
try:
return await emotion_service.extract_emotion(statement.statement, config)
except Exception as e:
logger.error(f"陈述句 {statement.id} 情绪提取失败: {e}")
return None
tasks = [extract_for_statement(stmt_data) for stmt_data in all_statements]
results = await asyncio.gather(*tasks, return_exceptions=True)
# 将结果组织成对话级别的映射
emotion_maps = [{} for _ in dialog_data_list]
successful_extractions = 0
for i, result in enumerate(results):
d_idx, stmt_id = statement_metadata[i]
if isinstance(result, Exception):
logger.error(f"陈述句处理异常: {result}")
emotion_maps[d_idx][stmt_id] = None
else:
emotion_maps[d_idx][stmt_id] = result
if result is not None:
successful_extractions += 1
# 统计提取结果
logger.info(f"情绪信息提取完成,共成功提取 {successful_extractions}/{len(all_statements)} 个情绪")
return emotion_maps
async def _parallel_extract_and_embed(
self, dialog_data_list: List[DialogData]
) -> Tuple[
List[Dict[str, Any]],
List[Dict[str, Any]],
List[Dict[str, Any]],
List[Dict[str, List[float]]],
@@ -562,35 +648,39 @@ class ExtractionOrchestrator:
List[List[float]],
]:
"""
并行执行三元组提取、时间信息提取和基础嵌入生成
并行执行三元组提取、时间信息提取、情绪提取和基础嵌入生成
个任务都依赖陈述句提取的结果,但彼此独立,可以并行执行:
个任务都依赖陈述句提取的结果,但彼此独立,可以并行执行:
- 三元组提取:从陈述句中提取实体和关系
- 时间信息提取:从陈述句中提取时间范围
- 情绪提取:从陈述句中提取情绪信息
- 嵌入生成:为陈述句、分块和对话生成向量(不依赖三元组)
Args:
dialog_data_list: 对话数据列表
Returns:
个列表的元组:
个列表的元组:
- 三元组映射列表
- 时间信息映射列表
- 情绪映射列表
- 陈述句嵌入映射列表
- 分块嵌入映射列表
- 对话嵌入列表
"""
logger.info("并行执行:三元组提取 + 时间信息提取 + 基础嵌入生成")
logger.info("并行执行:三元组提取 + 时间信息提取 + 情绪提取 + 基础嵌入生成")
# 创建个并行任务
# 创建个并行任务
triplet_task = self._extract_triplets(dialog_data_list)
temporal_task = self._extract_temporal(dialog_data_list)
emotion_task = self._extract_emotions(dialog_data_list)
embedding_task = self._generate_basic_embeddings(dialog_data_list)
# 并行执行
results = await asyncio.gather(
triplet_task,
temporal_task,
emotion_task,
embedding_task,
return_exceptions=True
)
@@ -598,19 +688,21 @@ class ExtractionOrchestrator:
# 解包结果
triplet_maps = results[0] if not isinstance(results[0], Exception) else [{} for _ in dialog_data_list]
temporal_maps = results[1] if not isinstance(results[1], Exception) else [{} for _ in dialog_data_list]
emotion_maps = results[2] if not isinstance(results[2], Exception) else [{} for _ in dialog_data_list]
if isinstance(results[2], Exception):
logger.error(f"基础嵌入生成失败: {results[2]}")
if isinstance(results[3], Exception):
logger.error(f"基础嵌入生成失败: {results[3]}")
statement_embedding_maps = [{} for _ in dialog_data_list]
chunk_embedding_maps = [{} for _ in dialog_data_list]
dialog_embeddings = [[] for _ in dialog_data_list]
else:
statement_embedding_maps, chunk_embedding_maps, dialog_embeddings = results[2]
statement_embedding_maps, chunk_embedding_maps, dialog_embeddings = results[3]
logger.info("并行任务执行完成")
return (
triplet_maps,
temporal_maps,
emotion_maps,
statement_embedding_maps,
chunk_embedding_maps,
dialog_embeddings,
@@ -727,6 +819,7 @@ class ExtractionOrchestrator:
dialog_data_list: List[DialogData],
temporal_maps: List[Dict[str, Any]],
triplet_maps: List[Dict[str, Any]],
emotion_maps: List[Dict[str, Any]],
statement_embedding_maps: List[Dict[str, List[float]]],
chunk_embedding_maps: List[Dict[str, List[float]]],
dialog_embeddings: List[List[float]],
@@ -738,6 +831,7 @@ class ExtractionOrchestrator:
dialog_data_list: 对话数据列表
temporal_maps: 时间信息映射列表
triplet_maps: 三元组映射列表
emotion_maps: 情绪信息映射列表
statement_embedding_maps: 陈述句嵌入映射列表
chunk_embedding_maps: 分块嵌入映射列表
dialog_embeddings: 对话嵌入列表
@@ -752,6 +846,7 @@ class ExtractionOrchestrator:
if (
len(temporal_maps) != expected_length
or len(triplet_maps) != expected_length
or len(emotion_maps) != expected_length
or len(statement_embedding_maps) != expected_length
or len(chunk_embedding_maps) != expected_length
or len(dialog_embeddings) != expected_length
@@ -759,6 +854,7 @@ class ExtractionOrchestrator:
logger.warning(
f"数据大小不匹配 - 对话: {len(dialog_data_list)}, "
f"时间映射: {len(temporal_maps)}, 三元组映射: {len(triplet_maps)}, "
f"情绪映射: {len(emotion_maps)}, "
f"陈述句嵌入: {len(statement_embedding_maps)}, "
f"分块嵌入: {len(chunk_embedding_maps)}, "
f"对话嵌入: {len(dialog_embeddings)}"
@@ -767,6 +863,7 @@ class ExtractionOrchestrator:
total_statements = 0
assigned_temporal = 0
assigned_triplets = 0
assigned_emotions = 0
assigned_statement_embeddings = 0
assigned_chunk_embeddings = 0
assigned_dialog_embeddings = 0
@@ -774,12 +871,13 @@ class ExtractionOrchestrator:
# 处理每个对话
for i, dialog_data in enumerate(dialog_data_list):
# 检查是否有缺失的数据
if i >= len(temporal_maps) or i >= len(triplet_maps):
if i >= len(temporal_maps) or i >= len(triplet_maps) or i >= len(emotion_maps):
logger.warning(f"对话 {dialog_data.id} 缺少提取数据,跳过赋值")
continue
temporal_map = temporal_maps[i]
triplet_map = triplet_maps[i]
emotion_map = emotion_maps[i]
statement_embedding_map = statement_embedding_maps[i] if i < len(statement_embedding_maps) else {}
chunk_embedding_map = chunk_embedding_maps[i] if i < len(chunk_embedding_maps) else {}
dialog_embedding = dialog_embeddings[i] if i < len(dialog_embeddings) else []
@@ -810,6 +908,18 @@ class ExtractionOrchestrator:
statement.triplet_extraction_info = triplet_map[statement.id]
assigned_triplets += 1
# 赋值情绪信息
if statement.id in emotion_map:
emotion_data = emotion_map[statement.id]
if emotion_data is not None:
# 将EmotionExtraction对象的字段赋值到Statement
statement.emotion_type = emotion_data.emotion_type
statement.emotion_intensity = emotion_data.emotion_intensity
statement.emotion_keywords = emotion_data.emotion_keywords
statement.emotion_subject = emotion_data.emotion_subject
statement.emotion_target = emotion_data.emotion_target
assigned_emotions += 1
# 赋值陈述句嵌入
if statement.id in statement_embedding_map:
statement.statement_embedding = statement_embedding_map[statement.id]
@@ -818,6 +928,7 @@ class ExtractionOrchestrator:
logger.info(
f"数据赋值完成 - 总陈述句: {total_statements}, "
f"时间信息: {assigned_temporal}, 三元组: {assigned_triplets}, "
f"情绪信息: {assigned_emotions}, "
f"陈述句嵌入: {assigned_statement_embeddings}, "
f"分块嵌入: {assigned_chunk_embeddings}, "
f"对话嵌入: {assigned_dialog_embeddings}"
@@ -927,6 +1038,12 @@ class ExtractionOrchestrator:
created_at=dialog_data.created_at,
expired_at=dialog_data.expired_at,
config_id=dialog_data.config_id if hasattr(dialog_data, 'config_id') else None,
# Emotion fields
emotion_type=getattr(statement, 'emotion_type', None),
emotion_intensity=getattr(statement, 'emotion_intensity', None),
emotion_keywords=getattr(statement, 'emotion_keywords', None),
emotion_subject=getattr(statement, 'emotion_subject', None),
emotion_target=getattr(statement, 'emotion_target', None),
)
statement_nodes.append(statement_node)
@@ -1333,7 +1450,7 @@ class ExtractionOrchestrator:
if match:
entity1_name = match.group(1).strip()
entity1_type = match.group(2)
entity2_name = match.group(3).strip()
match.group(3).strip()
entity2_type = match.group(4)
# 提取置信度和原因
@@ -1646,7 +1763,6 @@ async def get_chunked_dialogs(
"""
import json
import re
import os
# 加载测试数据
testdata_path = os.path.join(os.path.dirname(__file__), "../../data", "testdata.json")
@@ -1822,7 +1938,6 @@ async def get_chunked_dialogs_with_preprocessing(
Returns:
带 chunks 的 DialogData 列表
"""
import os
print("\n=== 完整数据处理流程(包含预处理)===")
if input_data_path is None:

View File

@@ -28,7 +28,6 @@
"""
import os
import json
import socket
from typing import Optional, Dict, Any, Literal
NetworkMode = Literal['internal', 'external']
@@ -105,7 +104,6 @@ def _make_pgsql_conn() -> Optional[object]:
try:
import psycopg2 # type: ignore
from psycopg2.extras import RealDictCursor # type: ignore
port = int(port_str) if port_str else 5432
conn = psycopg2.connect(
@@ -193,7 +191,7 @@ def _fetch_db_config_by_config_id(config_id: int | str) -> Optional[Dict[str, An
# config_id 在数据库中是 Integer 类型,需要转换
try:
config_id_int = int(config_id)
except (ValueError, TypeError) as e:
except (ValueError, TypeError):
try:
pass
except Exception:
@@ -207,7 +205,7 @@ def _fetch_db_config_by_config_id(config_id: int | str) -> Optional[Dict[str, An
" statement_granularity, include_dialogue_context, max_context, "
" \"offset\" AS offset, lambda_time, lambda_mem, "
" pruning_enabled, pruning_scene, pruning_threshold, "
" llm_id, embedding_id "
" llm_id, embedding_id, rerank_id "
"FROM data_config WHERE config_id = %s LIMIT 1"
)
cur.execute(sql, (config_id_int,))
@@ -222,7 +220,7 @@ def _fetch_db_config_by_config_id(config_id: int | str) -> Optional[Dict[str, An
pass
return row if row else None
except Exception as e:
except Exception:
pass
return None
finally:
@@ -325,7 +323,7 @@ def _apply_overrides_from_db_row(
_set_if_present(selections, tk, db_row, tk, str)
# 特殊处理 UUID 字段,确保转换为字符串格式
for uuid_field in ("llm_id", "embedding_id"):
for uuid_field in ("llm_id", "embedding_id", "rerank_id"):
if uuid_field in db_row and db_row.get(uuid_field) is not None:
try:
value = db_row.get(uuid_field)
@@ -370,7 +368,7 @@ def _apply_overrides_from_db_row(
pass
return runtime_cfg
except Exception as e:
except Exception:
pass
return runtime_cfg
@@ -460,7 +458,7 @@ def apply_runtime_overrides_with_config_id(
updated_cfg = _apply_overrides_from_db_row(runtime_cfg, db_row, selected_cid, "config_id")
return updated_cfg, True
except Exception as e:
except Exception:
pass
return runtime_cfg, False
@@ -570,7 +568,7 @@ def load_unified_config(
try:
with open(runtime_config_path, "r", encoding="utf-8") as f:
runtime_cfg = json.load(f)
except (FileNotFoundError, json.JSONDecodeError) as e:
except (FileNotFoundError, json.JSONDecodeError):
runtime_cfg = {"selections": {}}
# 步骤 2: 尝试从 dbrun.json 读取 config_id 并应用数据库配置(最高优先级)
@@ -603,7 +601,7 @@ def load_unified_config(
pass
return runtime_cfg
except Exception as e:
except Exception:
return {"selections": {}}

View File

@@ -238,3 +238,81 @@ async def render_memory_summary_prompt(
'json_schema': 'MemorySummaryResponse.schema'
})
return rendered_prompt
async def render_emotion_extraction_prompt(
statement: str,
extract_keywords: bool,
enable_subject: bool
) -> str:
"""
Renders the emotion extraction prompt using the extract_emotion.jinja2 template.
Args:
statement: The statement to analyze
extract_keywords: Whether to extract emotion keywords
enable_subject: Whether to enable subject classification
Returns:
Rendered prompt content as string
"""
template = prompt_env.get_template("extract_emotion.jinja2")
rendered_prompt = template.render(
statement=statement,
extract_keywords=extract_keywords,
enable_subject=enable_subject
)
# 记录渲染结果到提示日志
log_prompt_rendering('emotion extraction', rendered_prompt)
# 可选:记录模板渲染信息
log_template_rendering('extract_emotion.jinja2', {
'statement': 'str',
'extract_keywords': extract_keywords,
'enable_subject': enable_subject
})
return rendered_prompt
async def render_emotion_suggestions_prompt(
health_data: dict,
patterns: dict,
user_profile: dict
) -> str:
"""
Renders the emotion suggestions generation prompt using the generate_emotion_suggestions.jinja2 template.
Args:
health_data: 情绪健康数据
patterns: 情绪模式分析结果
user_profile: 用户画像数据
Returns:
Rendered prompt content as string
"""
import json
# 预处理 emotion_distribution 为 JSON 字符串
emotion_distribution_json = json.dumps(
health_data.get('emotion_distribution', {}),
ensure_ascii=False,
indent=2
)
template = prompt_env.get_template("generate_emotion_suggestions.jinja2")
rendered_prompt = template.render(
health_data=health_data,
patterns=patterns,
user_profile=user_profile,
emotion_distribution_json=emotion_distribution_json
)
# 记录渲染结果到提示日志
log_prompt_rendering('emotion suggestions', rendered_prompt)
# 可选:记录模板渲染信息
log_template_rendering('generate_emotion_suggestions.jinja2', {
'health_score': health_data.get('health_score'),
'health_level': health_data.get('level'),
'user_interests': user_profile.get('interests', [])
})
return rendered_prompt

View File

@@ -0,0 +1,57 @@
你是一个专业的情绪分析专家。请分析以下陈述句的情绪信息。
陈述句:{{ statement }}
请提取以下信息:
1. emotion_type情绪类型
- joy: 喜悦、开心、高兴、满意、愉快
- sadness: 悲伤、难过、失落、沮丧、遗憾
- anger: 愤怒、生气、不满、恼火、烦躁
- fear: 恐惧、害怕、担心、焦虑、紧张
- surprise: 惊讶、意外、震惊、吃惊
- neutral: 中性、客观陈述、无明显情绪
2. emotion_intensity情绪强度
- 0.0-0.3: 弱情绪
- 0.3-0.7: 中等情绪
- 0.7-1.0: 强情绪
{% if extract_keywords %}
3. emotion_keywords情绪关键词
- 原句中直接表达情绪的词语
- 最多提取3个关键词
- 如果没有明显的情绪词,返回空列表
{% else %}
3. emotion_keywords情绪关键词
- 返回空列表
{% endif %}
{% if enable_subject %}
4. emotion_subject情绪主体
- self: 用户本人的情绪(包含"我"、"我们"、"咱们"等第一人称)
- other: 他人的情绪(包含人名、"他/她"等第三人称)
- object: 对事物的评价(针对产品、地点、事件等)
注意:
- 如果同时包含多个主体优先识别用户本人self
- 如果无法明确判断主体,默认为 self
5. emotion_target情绪对象
- 如果有明确的情绪对象,提取其名称
- 如果没有明确对象,返回 null
{% else %}
4. emotion_subject情绪主体
- 默认为 self
5. emotion_target情绪对象
- 返回 null
{% endif %}
注意事项:
- 如果陈述句是客观事实陈述,无明显情绪,标记为 neutral
- 情绪强度要符合语境,不要过度解读
- 情绪关键词要准确,不要添加原句中没有的词
- 主体分类要准确优先识别用户本人self
请以 JSON 格式返回结果。

View File

@@ -0,0 +1,63 @@
你是一位专业的心理健康顾问。请根据以下用户的情绪健康数据和个人信息生成3-5条个性化的情绪改善建议。
## 用户情绪健康数据
健康分数:{{ health_data.health_score }}/100
健康等级:{{ health_data.level }}
维度分析:
- 积极率:{{ health_data.dimensions.positivity_rate.score }}/100
- 正面情绪:{{ health_data.dimensions.positivity_rate.positive_count }}次
- 负面情绪:{{ health_data.dimensions.positivity_rate.negative_count }}次
- 中性情绪:{{ health_data.dimensions.positivity_rate.neutral_count }}次
- 稳定性:{{ health_data.dimensions.stability.score }}/100
- 标准差:{{ health_data.dimensions.stability.std_deviation }}
- 恢复力:{{ health_data.dimensions.resilience.score }}/100
- 恢复率:{{ health_data.dimensions.resilience.recovery_rate }}
情绪分布:
{{ emotion_distribution_json }}
## 情绪模式分析
主要负面情绪:{{ patterns.dominant_negative_emotion|default('无') }}
情绪波动性:{{ patterns.emotion_volatility|default('未知') }}
高强度情绪次数:{{ patterns.high_intensity_emotions|default([])|length }}
## 用户兴趣
{{ user_profile.interests|default(['未知'])|join(', ') }}
## 任务要求
请生成3-5条个性化建议每条建议包含
1. type: 建议类型emotion_balance/activity_recommendation/social_connection/stress_management
2. title: 建议标题(简短有力)
3. content: 建议内容详细说明50-100字
4. priority: 优先级high/medium/low
5. actionable_steps: 3个可执行的具体步骤
同时提供一个health_summary不超过50字概括用户的整体情绪状态。
请以JSON格式返回格式如下
{
"health_summary": "您的情绪健康状况...",
"suggestions": [
{
"type": "emotion_balance",
"title": "建议标题",
"content": "建议内容...",
"priority": "high",
"actionable_steps": ["步骤1", "步骤2", "步骤3"]
}
]
}
注意事项:
- 建议要具体、可执行,避免空泛
- 结合用户的兴趣爱好提供个性化建议
- 针对主要问题(如主要负面情绪)提供针对性建议
- 优先级要合理分配至少1个high1-2个medium其余low
- 每个建议的3个步骤要循序渐进、易于实施