Merge #21 into develop from feature/emotion-engine
feature/情绪引擎 * feature/emotion-engine: (7 commits squashed) - [feature]Emotion Engine Development - [feature]Emotion Engine Development - Merge branch 'feature/emotion-engine' of codeup.aliyun.com:redbearai/python/redbear-mem-open into feature/emotion-engine - [fix]1.Fix the front-end files;2.Cache Management Deletion;3.Delete "check_code.py" - [fix]1.Fix the front-end files;2.Cache Management Deletion;3.Delete "check_code.py" - Merge branch 'feature/emotion-engine' of codeup.aliyun.com:redbearai/python/redbear-mem-open into feature/emotion-engine - [fix]fix vite.config.ts Signed-off-by: 乐力齐 <accounts_690c7b0af9007d7e338af636@mail.teambition.com> Commented-by: aliyun6762716068 <accounts_68cb7c6b61f5dcc4200d6251@mail.teambition.com> Commented-by: 乐力齐 <accounts_690c7b0af9007d7e338af636@mail.teambition.com> Reviewed-by: aliyun6762716068 <accounts_68cb7c6b61f5dcc4200d6251@mail.teambition.com> Merged-by: aliyun6762716068 <accounts_68cb7c6b61f5dcc4200d6251@mail.teambition.com> CR-link: https://codeup.aliyun.com/redbearai/python/redbear-mem-open/change/21
This commit is contained in:
@@ -38,14 +38,53 @@ async def make_write_graph(user_id, tools, apply_id, group_id, config_id=None):
|
||||
messages = state["messages"]
|
||||
last_message = messages[-1]
|
||||
|
||||
result = await data_type_tool.ainvoke({
|
||||
"context": last_message[1] if isinstance(last_message, tuple) else last_message.content
|
||||
})
|
||||
result=json.loads( result)
|
||||
# 调用 Data_type_differentiation 工具
|
||||
try:
|
||||
raw_result = await data_type_tool.ainvoke({
|
||||
"context": last_message[1] if isinstance(last_message, tuple) else last_message.content
|
||||
})
|
||||
|
||||
# MCP工具返回的是列表格式,需要提取内容
|
||||
logger.debug(f"Data_type_differentiation raw result type: {type(raw_result)}, value: {raw_result}")
|
||||
|
||||
# 处理不同的返回格式
|
||||
if isinstance(raw_result, list) and len(raw_result) > 0:
|
||||
# MCP工具返回格式: [{"type": "text", "text": "..."}]
|
||||
result_text = raw_result[0].get("text", "{}") if isinstance(raw_result[0], dict) else str(raw_result[0])
|
||||
elif isinstance(raw_result, str):
|
||||
result_text = raw_result
|
||||
else:
|
||||
result_text = str(raw_result)
|
||||
|
||||
# 解析JSON字符串
|
||||
try:
|
||||
result = json.loads(result_text)
|
||||
except json.JSONDecodeError as je:
|
||||
logger.error(f"Failed to parse result as JSON: {result_text}, error: {je}")
|
||||
return {"messages": [AIMessage(content=json.dumps({
|
||||
"status": "error",
|
||||
"message": f"Invalid JSON response from Data_type_differentiation: {str(je)}"
|
||||
}))]}
|
||||
|
||||
# 检查是否有错误
|
||||
if isinstance(result, dict) and result.get("type") == "error":
|
||||
error_msg = result.get("message", "Unknown error in Data_type_differentiation")
|
||||
logger.error(f"Data_type_differentiation 返回错误: {error_msg}")
|
||||
return {"messages": [AIMessage(content=json.dumps({
|
||||
"status": "error",
|
||||
"message": error_msg
|
||||
}))]}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"调用 Data_type_differentiation 失败: {e}", exc_info=True)
|
||||
return {"messages": [AIMessage(content=json.dumps({
|
||||
"status": "error",
|
||||
"message": f"Data type differentiation failed: {str(e)}"
|
||||
}))]}
|
||||
|
||||
# 调用 Data_write,传递 config_id
|
||||
write_params = {
|
||||
"content": result["context"],
|
||||
"content": result.get("context", last_message.content if hasattr(last_message, 'content') else str(last_message)),
|
||||
"apply_id": apply_id,
|
||||
"group_id": group_id,
|
||||
"user_id": user_id
|
||||
@@ -56,14 +95,22 @@ async def make_write_graph(user_id, tools, apply_id, group_id, config_id=None):
|
||||
write_params["config_id"] = config_id
|
||||
logger.debug(f"传递 config_id 到 Data_write: {config_id}")
|
||||
|
||||
write_result = await data_write_tool.ainvoke(write_params)
|
||||
try:
|
||||
write_result = await data_write_tool.ainvoke(write_params)
|
||||
|
||||
if isinstance(write_result, dict):
|
||||
content = write_result.get("data", str(write_result))
|
||||
else:
|
||||
content = str(write_result)
|
||||
logger.info("写入内容: %s", content)
|
||||
return {"messages": [AIMessage(content=content)]}
|
||||
if isinstance(write_result, dict):
|
||||
content = write_result.get("data", str(write_result))
|
||||
else:
|
||||
content = str(write_result)
|
||||
logger.info("写入内容: %s", content)
|
||||
return {"messages": [AIMessage(content=content)]}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"调用 Data_write 失败: {e}", exc_info=True)
|
||||
return {"messages": [AIMessage(content=json.dumps({
|
||||
"status": "error",
|
||||
"message": f"Data write failed: {str(e)}"
|
||||
}))]}
|
||||
|
||||
workflow = StateGraph(WriteState)
|
||||
workflow.add_node("content_input", call_model)
|
||||
|
||||
@@ -39,6 +39,17 @@ async def write(content: str, user_id: str, apply_id: str, group_id: str, ref_id
|
||||
ref_id: 参考ID,默认为 "wyl20251027"
|
||||
config_id: 配置ID,用于标记数据处理配置
|
||||
"""
|
||||
# 如果提供了config_id,重新加载配置
|
||||
if config_id:
|
||||
from app.core.memory.utils.config.definitions import reload_configuration_from_database
|
||||
logger.info(f"Reloading configuration for config_id: {config_id}")
|
||||
config_loaded = reload_configuration_from_database(config_id)
|
||||
if not config_loaded:
|
||||
error_msg = f"Failed to load configuration for config_id: {config_id}"
|
||||
logger.error(error_msg)
|
||||
raise ValueError(error_msg)
|
||||
logger.info(f"Configuration reloaded successfully for config_id: {config_id}")
|
||||
|
||||
logger.info("=== MemSci Knowledge Extraction Pipeline ===")
|
||||
logger.info(f"Using model: {config_defs.SELECTED_LLM_NAME}")
|
||||
logger.info(f"Using LLM ID: {config_defs.SELECTED_LLM_ID}")
|
||||
|
||||
85
api/app/core/memory/models/emotion_models.py
Normal file
85
api/app/core/memory/models/emotion_models.py
Normal file
@@ -0,0 +1,85 @@
|
||||
"""Emotion extraction models for LLM structured output.
|
||||
|
||||
This module contains Pydantic models for emotion extraction from statements,
|
||||
designed to be used with LLM structured output capabilities.
|
||||
|
||||
Classes:
|
||||
EmotionExtraction: Model for emotion extraction results from statements
|
||||
"""
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
class EmotionExtraction(BaseModel):
|
||||
"""Emotion extraction result model for LLM structured output.
|
||||
|
||||
This model represents the structured emotion information extracted from
|
||||
a statement using LLM. It includes emotion type, intensity, keywords,
|
||||
subject classification, and optional target.
|
||||
|
||||
Attributes:
|
||||
emotion_type: Type of emotion (joy/sadness/anger/fear/surprise/neutral)
|
||||
emotion_intensity: Intensity of emotion (0.0-1.0)
|
||||
emotion_keywords: List of emotion keywords from the statement (max 3)
|
||||
emotion_subject: Subject of emotion (self/other/object)
|
||||
emotion_target: Optional target of emotion (person or object name)
|
||||
"""
|
||||
|
||||
emotion_type: str = Field(
|
||||
...,
|
||||
description="Emotion type: joy/sadness/anger/fear/surprise/neutral"
|
||||
)
|
||||
emotion_intensity: float = Field(
|
||||
...,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
description="Emotion intensity from 0.0 to 1.0"
|
||||
)
|
||||
emotion_keywords: List[str] = Field(
|
||||
default_factory=list,
|
||||
description="Emotion keywords extracted from the statement (max 3)"
|
||||
)
|
||||
emotion_subject: str = Field(
|
||||
...,
|
||||
description="Emotion subject: self/other/object"
|
||||
)
|
||||
emotion_target: Optional[str] = Field(
|
||||
None,
|
||||
description="Emotion target: person or object name"
|
||||
)
|
||||
|
||||
@field_validator('emotion_type')
|
||||
@classmethod
|
||||
def validate_emotion_type(cls, v):
|
||||
"""Validate emotion type is one of the valid values."""
|
||||
valid_types = ['joy', 'sadness', 'anger', 'fear', 'surprise', 'neutral']
|
||||
if v not in valid_types:
|
||||
raise ValueError(f"emotion_type must be one of {valid_types}, got {v}")
|
||||
return v
|
||||
|
||||
@field_validator('emotion_subject')
|
||||
@classmethod
|
||||
def validate_emotion_subject(cls, v):
|
||||
"""Validate emotion subject is one of the valid values."""
|
||||
valid_subjects = ['self', 'other', 'object']
|
||||
if v not in valid_subjects:
|
||||
raise ValueError(f"emotion_subject must be one of {valid_subjects}, got {v}")
|
||||
return v
|
||||
|
||||
@field_validator('emotion_keywords')
|
||||
@classmethod
|
||||
def validate_emotion_keywords(cls, v):
|
||||
"""Validate and limit emotion keywords to max 3 items."""
|
||||
if not isinstance(v, list):
|
||||
return []
|
||||
# Limit to max 3 keywords
|
||||
return v[:3]
|
||||
|
||||
@field_validator('emotion_intensity')
|
||||
@classmethod
|
||||
def validate_emotion_intensity(cls, v):
|
||||
"""Validate emotion intensity is within valid range."""
|
||||
if not (0.0 <= v <= 1.0):
|
||||
raise ValueError(f"emotion_intensity must be between 0.0 and 1.0, got {v}")
|
||||
return v
|
||||
@@ -215,24 +215,58 @@ class StatementNode(Node):
|
||||
Attributes:
|
||||
chunk_id: ID of the parent chunk this statement belongs to
|
||||
stmt_type: Type of the statement (from ontology)
|
||||
temporal_info: Temporal information extracted from the statement
|
||||
statement: The actual statement text content
|
||||
connect_strength: Classification of connection strength ('Strong' or 'Weak')
|
||||
emotion_intensity: Optional emotion intensity (0.0-1.0) - displayed on node
|
||||
emotion_target: Optional emotion target (person or object name)
|
||||
emotion_subject: Optional emotion subject (self/other/object)
|
||||
emotion_type: Optional emotion type (joy/sadness/anger/fear/surprise/neutral)
|
||||
emotion_keywords: Optional list of emotion keywords (max 3)
|
||||
temporal_info: Temporal information extracted from the statement
|
||||
valid_at: Optional start date of temporal validity
|
||||
invalid_at: Optional end date of temporal validity
|
||||
statement_embedding: Optional embedding vector for the statement
|
||||
chunk_embedding: Optional embedding vector for the parent chunk
|
||||
connect_strength: Classification of connection strength ('Strong' or 'Weak')
|
||||
config_id: Configuration ID used to process this statement
|
||||
"""
|
||||
# Core fields (ordered as requested)
|
||||
chunk_id: str = Field(..., description="ID of the parent chunk")
|
||||
stmt_type: str = Field(..., description="Type of the statement")
|
||||
temporal_info: TemporalInfo = Field(..., description="Temporal information")
|
||||
statement: str = Field(..., description="The statement text content")
|
||||
connect_strength: str = Field(..., description="Strong VS Weak classification of this statement")
|
||||
|
||||
# Emotion fields (ordered as requested, emotion_intensity first for display)
|
||||
emotion_intensity: Optional[float] = Field(
|
||||
None,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
description="Emotion intensity: 0.0-1.0 (displayed on node)"
|
||||
)
|
||||
emotion_target: Optional[str] = Field(
|
||||
None,
|
||||
description="Emotion target: person or object name"
|
||||
)
|
||||
emotion_subject: Optional[str] = Field(
|
||||
None,
|
||||
description="Emotion subject: self/other/object"
|
||||
)
|
||||
emotion_type: Optional[str] = Field(
|
||||
None,
|
||||
description="Emotion type: joy/sadness/anger/fear/surprise/neutral"
|
||||
)
|
||||
emotion_keywords: Optional[List[str]] = Field(
|
||||
default_factory=list,
|
||||
description="Emotion keywords list, max 3 items"
|
||||
)
|
||||
|
||||
# Temporal fields
|
||||
temporal_info: TemporalInfo = Field(..., description="Temporal information")
|
||||
valid_at: Optional[datetime] = Field(None, description="Temporal validity start")
|
||||
invalid_at: Optional[datetime] = Field(None, description="Temporal validity end")
|
||||
|
||||
# Embedding and other fields
|
||||
statement_embedding: Optional[List[float]] = Field(None, description="Statement embedding vector")
|
||||
chunk_embedding: Optional[List[float]] = Field(None, description="Chunk embedding vector")
|
||||
connect_strength: str = Field(..., description="Strong VS Weak classification of this statement")
|
||||
config_id: Optional[int | str] = Field(None, description="Configuration ID used to process this statement (integer or string)")
|
||||
|
||||
@field_validator('valid_at', 'invalid_at', mode='before')
|
||||
@@ -240,6 +274,39 @@ class StatementNode(Node):
|
||||
def validate_datetime(cls, v):
|
||||
"""使用通用的历史日期解析函数"""
|
||||
return parse_historical_datetime(v)
|
||||
|
||||
@field_validator('emotion_type', mode='before')
|
||||
@classmethod
|
||||
def validate_emotion_type(cls, v):
|
||||
"""Validate emotion type is one of the valid values"""
|
||||
if v is None:
|
||||
return v
|
||||
valid_types = ['joy', 'sadness', 'anger', 'fear', 'surprise', 'neutral']
|
||||
if v not in valid_types:
|
||||
raise ValueError(f"emotion_type must be one of {valid_types}, got {v}")
|
||||
return v
|
||||
|
||||
@field_validator('emotion_subject', mode='before')
|
||||
@classmethod
|
||||
def validate_emotion_subject(cls, v):
|
||||
"""Validate emotion subject is one of the valid values"""
|
||||
if v is None:
|
||||
return v
|
||||
valid_subjects = ['self', 'other', 'object']
|
||||
if v not in valid_subjects:
|
||||
raise ValueError(f"emotion_subject must be one of {valid_subjects}, got {v}")
|
||||
return v
|
||||
|
||||
@field_validator('emotion_keywords', mode='before')
|
||||
@classmethod
|
||||
def validate_emotion_keywords(cls, v):
|
||||
"""Validate emotion keywords list has max 3 items"""
|
||||
if v is None:
|
||||
return []
|
||||
if not isinstance(v, list):
|
||||
return []
|
||||
# Limit to max 3 keywords
|
||||
return v[:3]
|
||||
|
||||
|
||||
class ChunkNode(Node):
|
||||
|
||||
@@ -64,6 +64,11 @@ class Statement(BaseModel):
|
||||
connect_strength: Optional connection strength ('Strong' or 'Weak')
|
||||
temporal_validity: Optional temporal validity range
|
||||
triplet_extraction_info: Optional triplet extraction results
|
||||
emotion_type: Optional emotion type (joy/sadness/anger/fear/surprise/neutral)
|
||||
emotion_intensity: Optional emotion intensity (0.0-1.0)
|
||||
emotion_keywords: Optional list of emotion keywords
|
||||
emotion_subject: Optional emotion subject (self/other/object)
|
||||
emotion_target: Optional emotion target (person or object name)
|
||||
"""
|
||||
id: str = Field(default_factory=lambda: uuid4().hex, description="A unique identifier for the statement.")
|
||||
chunk_id: str = Field(..., description="ID of the parent chunk this statement belongs to.")
|
||||
@@ -80,6 +85,12 @@ class Statement(BaseModel):
|
||||
triplet_extraction_info: Optional[TripletExtractionResponse] = Field(
|
||||
None, description="The triplet extraction information of the statement."
|
||||
)
|
||||
# Emotion fields
|
||||
emotion_type: Optional[str] = Field(None, description="Emotion type: joy/sadness/anger/fear/surprise/neutral")
|
||||
emotion_intensity: Optional[float] = Field(None, ge=0.0, le=1.0, description="Emotion intensity: 0.0-1.0")
|
||||
emotion_keywords: Optional[List[str]] = Field(default_factory=list, description="Emotion keywords, max 3")
|
||||
emotion_subject: Optional[str] = Field(None, description="Emotion subject: self/other/object")
|
||||
emotion_target: Optional[str] = Field(None, description="Emotion target: person or object name")
|
||||
|
||||
|
||||
class ConversationContext(BaseModel):
|
||||
|
||||
@@ -480,7 +480,6 @@ async def llm_dedup_entities_iterative_blocks( # 迭代分块并发 LLM 去重
|
||||
- global_redirect: dict losing_id -> canonical_id accumulated across rounds
|
||||
- records: textual logs including per-round/per-block summaries and per-pair decisions
|
||||
"""
|
||||
import asyncio
|
||||
import random
|
||||
# 初始化全局日志和全局ID映射(存储所有轮次的结果)
|
||||
records: List[str] = []
|
||||
|
||||
@@ -35,7 +35,6 @@ from app.core.memory.models.graph_models import (
|
||||
from app.core.memory.utils.data.ontology import TemporalInfo
|
||||
from app.core.memory.models.variate_config import (
|
||||
ExtractionPipelineConfig,
|
||||
StatementExtractionConfig,
|
||||
)
|
||||
from app.core.memory.llm_tools.openai_client import LLMClient
|
||||
from app.core.memory.llm_tools.openai_embedder import OpenAIEmbedderClient
|
||||
@@ -53,7 +52,6 @@ from app.core.memory.storage_services.extraction_engine.knowledge_extraction.tem
|
||||
)
|
||||
from app.core.memory.storage_services.extraction_engine.knowledge_extraction.embedding_generation import (
|
||||
embedding_generation,
|
||||
embedding_generation_all,
|
||||
generate_entity_embeddings_from_triplets,
|
||||
)
|
||||
from app.core.memory.storage_services.extraction_engine.deduplication.two_stage_dedup import (
|
||||
@@ -179,24 +177,12 @@ class ExtractionOrchestrator:
|
||||
all_statements_list.extend(chunk.statements)
|
||||
total_statements = len(all_statements_list)
|
||||
|
||||
# 🔥 陈述句提取完成后,立即发送知识抽取完成消息
|
||||
if self.progress_callback:
|
||||
extraction_stats = {
|
||||
"statements_count": total_statements,
|
||||
"entities_count": 0, # 暂时为0,后续会更新
|
||||
"triplets_count": 0, # 暂时为0,后续会更新
|
||||
"temporal_ranges_count": 0, # 暂时为0,后续会更新
|
||||
}
|
||||
await self.progress_callback("knowledge_extraction_complete", "知识抽取完成", extraction_stats)
|
||||
|
||||
# 🔥 立即发送下一阶段的开始消息,让前端知道进入了创建节点和边阶段
|
||||
await self.progress_callback("creating_nodes_edges", "正在创建节点和边...")
|
||||
|
||||
# 步骤 2: 并行执行三元组提取、时间信息提取和基础嵌入生成(后台静默执行)
|
||||
logger.info("步骤 2/6: 并行执行三元组提取、时间信息提取和嵌入生成(后台静默执行)")
|
||||
# 步骤 2: 并行执行三元组提取、时间信息提取、情绪提取和基础嵌入生成
|
||||
logger.info("步骤 2/6: 并行执行三元组提取、时间信息提取、情绪提取和嵌入生成")
|
||||
(
|
||||
triplet_maps,
|
||||
temporal_maps,
|
||||
emotion_maps,
|
||||
statement_embedding_maps,
|
||||
chunk_embedding_maps,
|
||||
dialog_embeddings,
|
||||
@@ -225,6 +211,7 @@ class ExtractionOrchestrator:
|
||||
dialog_data_list,
|
||||
temporal_maps,
|
||||
triplet_maps,
|
||||
emotion_maps,
|
||||
statement_embedding_maps,
|
||||
chunk_embedding_maps,
|
||||
dialog_embeddings,
|
||||
@@ -552,9 +539,108 @@ class ExtractionOrchestrator:
|
||||
|
||||
return temporal_maps
|
||||
|
||||
async def _extract_emotions(
|
||||
self, dialog_data_list: List[DialogData]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
从对话中提取情绪信息(优化版:全局陈述句级并行)
|
||||
|
||||
Args:
|
||||
dialog_data_list: 对话数据列表
|
||||
|
||||
Returns:
|
||||
情绪信息映射列表,每个对话对应一个字典
|
||||
"""
|
||||
logger.info("开始情绪信息提取(全局陈述句级并行)")
|
||||
|
||||
# 收集所有陈述句及其配置
|
||||
all_statements = []
|
||||
statement_metadata = [] # (dialog_idx, statement_id)
|
||||
|
||||
# 获取第一个对话的config_id来加载配置
|
||||
config_id = None
|
||||
if dialog_data_list and hasattr(dialog_data_list[0], 'config_id'):
|
||||
config_id = dialog_data_list[0].config_id
|
||||
|
||||
# 加载DataConfig
|
||||
data_config = None
|
||||
if config_id:
|
||||
try:
|
||||
from app.db import SessionLocal
|
||||
from app.repositories.data_config_repository import DataConfigRepository
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
data_config = DataConfigRepository.get_by_id(db, config_id)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
if data_config and not data_config.emotion_enabled:
|
||||
logger.info("情绪提取已在配置中禁用,跳过情绪提取")
|
||||
return [{} for _ in dialog_data_list]
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"加载DataConfig失败: {e},将跳过情绪提取")
|
||||
return [{} for _ in dialog_data_list]
|
||||
else:
|
||||
logger.info("未找到config_id,跳过情绪提取")
|
||||
return [{} for _ in dialog_data_list]
|
||||
|
||||
# 如果配置未启用情绪提取,直接返回空映射
|
||||
if not data_config or not data_config.emotion_enabled:
|
||||
logger.info("情绪提取未启用,跳过")
|
||||
return [{} for _ in dialog_data_list]
|
||||
|
||||
# 收集所有陈述句
|
||||
for d_idx, dialog in enumerate(dialog_data_list):
|
||||
for chunk in dialog.chunks:
|
||||
for statement in chunk.statements:
|
||||
all_statements.append((statement, data_config))
|
||||
statement_metadata.append((d_idx, statement.id))
|
||||
|
||||
logger.info(f"收集到 {len(all_statements)} 个陈述句,开始全局并行提取情绪")
|
||||
|
||||
# 初始化情绪提取服务
|
||||
from app.services.emotion_extraction_service import EmotionExtractionService
|
||||
emotion_service = EmotionExtractionService(
|
||||
llm_id=data_config.emotion_model_id if data_config.emotion_model_id else None
|
||||
)
|
||||
|
||||
# 全局并行处理所有陈述句
|
||||
async def extract_for_statement(stmt_data):
|
||||
statement, config = stmt_data
|
||||
try:
|
||||
return await emotion_service.extract_emotion(statement.statement, config)
|
||||
except Exception as e:
|
||||
logger.error(f"陈述句 {statement.id} 情绪提取失败: {e}")
|
||||
return None
|
||||
|
||||
tasks = [extract_for_statement(stmt_data) for stmt_data in all_statements]
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# 将结果组织成对话级别的映射
|
||||
emotion_maps = [{} for _ in dialog_data_list]
|
||||
successful_extractions = 0
|
||||
|
||||
for i, result in enumerate(results):
|
||||
d_idx, stmt_id = statement_metadata[i]
|
||||
if isinstance(result, Exception):
|
||||
logger.error(f"陈述句处理异常: {result}")
|
||||
emotion_maps[d_idx][stmt_id] = None
|
||||
else:
|
||||
emotion_maps[d_idx][stmt_id] = result
|
||||
if result is not None:
|
||||
successful_extractions += 1
|
||||
|
||||
# 统计提取结果
|
||||
logger.info(f"情绪信息提取完成,共成功提取 {successful_extractions}/{len(all_statements)} 个情绪")
|
||||
|
||||
return emotion_maps
|
||||
|
||||
async def _parallel_extract_and_embed(
|
||||
self, dialog_data_list: List[DialogData]
|
||||
) -> Tuple[
|
||||
List[Dict[str, Any]],
|
||||
List[Dict[str, Any]],
|
||||
List[Dict[str, Any]],
|
||||
List[Dict[str, List[float]]],
|
||||
@@ -562,35 +648,39 @@ class ExtractionOrchestrator:
|
||||
List[List[float]],
|
||||
]:
|
||||
"""
|
||||
并行执行三元组提取、时间信息提取和基础嵌入生成
|
||||
并行执行三元组提取、时间信息提取、情绪提取和基础嵌入生成
|
||||
|
||||
这三个任务都依赖陈述句提取的结果,但彼此独立,可以并行执行:
|
||||
这四个任务都依赖陈述句提取的结果,但彼此独立,可以并行执行:
|
||||
- 三元组提取:从陈述句中提取实体和关系
|
||||
- 时间信息提取:从陈述句中提取时间范围
|
||||
- 情绪提取:从陈述句中提取情绪信息
|
||||
- 嵌入生成:为陈述句、分块和对话生成向量(不依赖三元组)
|
||||
|
||||
Args:
|
||||
dialog_data_list: 对话数据列表
|
||||
|
||||
Returns:
|
||||
五个列表的元组:
|
||||
六个列表的元组:
|
||||
- 三元组映射列表
|
||||
- 时间信息映射列表
|
||||
- 情绪映射列表
|
||||
- 陈述句嵌入映射列表
|
||||
- 分块嵌入映射列表
|
||||
- 对话嵌入列表
|
||||
"""
|
||||
logger.info("并行执行:三元组提取 + 时间信息提取 + 基础嵌入生成")
|
||||
logger.info("并行执行:三元组提取 + 时间信息提取 + 情绪提取 + 基础嵌入生成")
|
||||
|
||||
# 创建三个并行任务
|
||||
# 创建四个并行任务
|
||||
triplet_task = self._extract_triplets(dialog_data_list)
|
||||
temporal_task = self._extract_temporal(dialog_data_list)
|
||||
emotion_task = self._extract_emotions(dialog_data_list)
|
||||
embedding_task = self._generate_basic_embeddings(dialog_data_list)
|
||||
|
||||
# 并行执行
|
||||
results = await asyncio.gather(
|
||||
triplet_task,
|
||||
temporal_task,
|
||||
emotion_task,
|
||||
embedding_task,
|
||||
return_exceptions=True
|
||||
)
|
||||
@@ -598,19 +688,21 @@ class ExtractionOrchestrator:
|
||||
# 解包结果
|
||||
triplet_maps = results[0] if not isinstance(results[0], Exception) else [{} for _ in dialog_data_list]
|
||||
temporal_maps = results[1] if not isinstance(results[1], Exception) else [{} for _ in dialog_data_list]
|
||||
emotion_maps = results[2] if not isinstance(results[2], Exception) else [{} for _ in dialog_data_list]
|
||||
|
||||
if isinstance(results[2], Exception):
|
||||
logger.error(f"基础嵌入生成失败: {results[2]}")
|
||||
if isinstance(results[3], Exception):
|
||||
logger.error(f"基础嵌入生成失败: {results[3]}")
|
||||
statement_embedding_maps = [{} for _ in dialog_data_list]
|
||||
chunk_embedding_maps = [{} for _ in dialog_data_list]
|
||||
dialog_embeddings = [[] for _ in dialog_data_list]
|
||||
else:
|
||||
statement_embedding_maps, chunk_embedding_maps, dialog_embeddings = results[2]
|
||||
statement_embedding_maps, chunk_embedding_maps, dialog_embeddings = results[3]
|
||||
|
||||
logger.info("并行任务执行完成")
|
||||
return (
|
||||
triplet_maps,
|
||||
temporal_maps,
|
||||
emotion_maps,
|
||||
statement_embedding_maps,
|
||||
chunk_embedding_maps,
|
||||
dialog_embeddings,
|
||||
@@ -727,6 +819,7 @@ class ExtractionOrchestrator:
|
||||
dialog_data_list: List[DialogData],
|
||||
temporal_maps: List[Dict[str, Any]],
|
||||
triplet_maps: List[Dict[str, Any]],
|
||||
emotion_maps: List[Dict[str, Any]],
|
||||
statement_embedding_maps: List[Dict[str, List[float]]],
|
||||
chunk_embedding_maps: List[Dict[str, List[float]]],
|
||||
dialog_embeddings: List[List[float]],
|
||||
@@ -738,6 +831,7 @@ class ExtractionOrchestrator:
|
||||
dialog_data_list: 对话数据列表
|
||||
temporal_maps: 时间信息映射列表
|
||||
triplet_maps: 三元组映射列表
|
||||
emotion_maps: 情绪信息映射列表
|
||||
statement_embedding_maps: 陈述句嵌入映射列表
|
||||
chunk_embedding_maps: 分块嵌入映射列表
|
||||
dialog_embeddings: 对话嵌入列表
|
||||
@@ -752,6 +846,7 @@ class ExtractionOrchestrator:
|
||||
if (
|
||||
len(temporal_maps) != expected_length
|
||||
or len(triplet_maps) != expected_length
|
||||
or len(emotion_maps) != expected_length
|
||||
or len(statement_embedding_maps) != expected_length
|
||||
or len(chunk_embedding_maps) != expected_length
|
||||
or len(dialog_embeddings) != expected_length
|
||||
@@ -759,6 +854,7 @@ class ExtractionOrchestrator:
|
||||
logger.warning(
|
||||
f"数据大小不匹配 - 对话: {len(dialog_data_list)}, "
|
||||
f"时间映射: {len(temporal_maps)}, 三元组映射: {len(triplet_maps)}, "
|
||||
f"情绪映射: {len(emotion_maps)}, "
|
||||
f"陈述句嵌入: {len(statement_embedding_maps)}, "
|
||||
f"分块嵌入: {len(chunk_embedding_maps)}, "
|
||||
f"对话嵌入: {len(dialog_embeddings)}"
|
||||
@@ -767,6 +863,7 @@ class ExtractionOrchestrator:
|
||||
total_statements = 0
|
||||
assigned_temporal = 0
|
||||
assigned_triplets = 0
|
||||
assigned_emotions = 0
|
||||
assigned_statement_embeddings = 0
|
||||
assigned_chunk_embeddings = 0
|
||||
assigned_dialog_embeddings = 0
|
||||
@@ -774,12 +871,13 @@ class ExtractionOrchestrator:
|
||||
# 处理每个对话
|
||||
for i, dialog_data in enumerate(dialog_data_list):
|
||||
# 检查是否有缺失的数据
|
||||
if i >= len(temporal_maps) or i >= len(triplet_maps):
|
||||
if i >= len(temporal_maps) or i >= len(triplet_maps) or i >= len(emotion_maps):
|
||||
logger.warning(f"对话 {dialog_data.id} 缺少提取数据,跳过赋值")
|
||||
continue
|
||||
|
||||
temporal_map = temporal_maps[i]
|
||||
triplet_map = triplet_maps[i]
|
||||
emotion_map = emotion_maps[i]
|
||||
statement_embedding_map = statement_embedding_maps[i] if i < len(statement_embedding_maps) else {}
|
||||
chunk_embedding_map = chunk_embedding_maps[i] if i < len(chunk_embedding_maps) else {}
|
||||
dialog_embedding = dialog_embeddings[i] if i < len(dialog_embeddings) else []
|
||||
@@ -810,6 +908,18 @@ class ExtractionOrchestrator:
|
||||
statement.triplet_extraction_info = triplet_map[statement.id]
|
||||
assigned_triplets += 1
|
||||
|
||||
# 赋值情绪信息
|
||||
if statement.id in emotion_map:
|
||||
emotion_data = emotion_map[statement.id]
|
||||
if emotion_data is not None:
|
||||
# 将EmotionExtraction对象的字段赋值到Statement
|
||||
statement.emotion_type = emotion_data.emotion_type
|
||||
statement.emotion_intensity = emotion_data.emotion_intensity
|
||||
statement.emotion_keywords = emotion_data.emotion_keywords
|
||||
statement.emotion_subject = emotion_data.emotion_subject
|
||||
statement.emotion_target = emotion_data.emotion_target
|
||||
assigned_emotions += 1
|
||||
|
||||
# 赋值陈述句嵌入
|
||||
if statement.id in statement_embedding_map:
|
||||
statement.statement_embedding = statement_embedding_map[statement.id]
|
||||
@@ -818,6 +928,7 @@ class ExtractionOrchestrator:
|
||||
logger.info(
|
||||
f"数据赋值完成 - 总陈述句: {total_statements}, "
|
||||
f"时间信息: {assigned_temporal}, 三元组: {assigned_triplets}, "
|
||||
f"情绪信息: {assigned_emotions}, "
|
||||
f"陈述句嵌入: {assigned_statement_embeddings}, "
|
||||
f"分块嵌入: {assigned_chunk_embeddings}, "
|
||||
f"对话嵌入: {assigned_dialog_embeddings}"
|
||||
@@ -927,6 +1038,12 @@ class ExtractionOrchestrator:
|
||||
created_at=dialog_data.created_at,
|
||||
expired_at=dialog_data.expired_at,
|
||||
config_id=dialog_data.config_id if hasattr(dialog_data, 'config_id') else None,
|
||||
# Emotion fields
|
||||
emotion_type=getattr(statement, 'emotion_type', None),
|
||||
emotion_intensity=getattr(statement, 'emotion_intensity', None),
|
||||
emotion_keywords=getattr(statement, 'emotion_keywords', None),
|
||||
emotion_subject=getattr(statement, 'emotion_subject', None),
|
||||
emotion_target=getattr(statement, 'emotion_target', None),
|
||||
)
|
||||
statement_nodes.append(statement_node)
|
||||
|
||||
@@ -1333,7 +1450,7 @@ class ExtractionOrchestrator:
|
||||
if match:
|
||||
entity1_name = match.group(1).strip()
|
||||
entity1_type = match.group(2)
|
||||
entity2_name = match.group(3).strip()
|
||||
match.group(3).strip()
|
||||
entity2_type = match.group(4)
|
||||
|
||||
# 提取置信度和原因
|
||||
@@ -1646,7 +1763,6 @@ async def get_chunked_dialogs(
|
||||
"""
|
||||
import json
|
||||
import re
|
||||
import os
|
||||
|
||||
# 加载测试数据
|
||||
testdata_path = os.path.join(os.path.dirname(__file__), "../../data", "testdata.json")
|
||||
@@ -1822,7 +1938,6 @@ async def get_chunked_dialogs_with_preprocessing(
|
||||
Returns:
|
||||
带 chunks 的 DialogData 列表
|
||||
"""
|
||||
import os
|
||||
print("\n=== 完整数据处理流程(包含预处理)===")
|
||||
|
||||
if input_data_path is None:
|
||||
|
||||
@@ -28,7 +28,6 @@
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
import socket
|
||||
from typing import Optional, Dict, Any, Literal
|
||||
|
||||
NetworkMode = Literal['internal', 'external']
|
||||
@@ -105,7 +104,6 @@ def _make_pgsql_conn() -> Optional[object]:
|
||||
|
||||
try:
|
||||
import psycopg2 # type: ignore
|
||||
from psycopg2.extras import RealDictCursor # type: ignore
|
||||
|
||||
port = int(port_str) if port_str else 5432
|
||||
conn = psycopg2.connect(
|
||||
@@ -193,7 +191,7 @@ def _fetch_db_config_by_config_id(config_id: int | str) -> Optional[Dict[str, An
|
||||
# config_id 在数据库中是 Integer 类型,需要转换
|
||||
try:
|
||||
config_id_int = int(config_id)
|
||||
except (ValueError, TypeError) as e:
|
||||
except (ValueError, TypeError):
|
||||
try:
|
||||
pass
|
||||
except Exception:
|
||||
@@ -207,7 +205,7 @@ def _fetch_db_config_by_config_id(config_id: int | str) -> Optional[Dict[str, An
|
||||
" statement_granularity, include_dialogue_context, max_context, "
|
||||
" \"offset\" AS offset, lambda_time, lambda_mem, "
|
||||
" pruning_enabled, pruning_scene, pruning_threshold, "
|
||||
" llm_id, embedding_id "
|
||||
" llm_id, embedding_id, rerank_id "
|
||||
"FROM data_config WHERE config_id = %s LIMIT 1"
|
||||
)
|
||||
cur.execute(sql, (config_id_int,))
|
||||
@@ -222,7 +220,7 @@ def _fetch_db_config_by_config_id(config_id: int | str) -> Optional[Dict[str, An
|
||||
pass
|
||||
|
||||
return row if row else None
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
finally:
|
||||
@@ -325,7 +323,7 @@ def _apply_overrides_from_db_row(
|
||||
_set_if_present(selections, tk, db_row, tk, str)
|
||||
|
||||
# 特殊处理 UUID 字段,确保转换为字符串格式
|
||||
for uuid_field in ("llm_id", "embedding_id"):
|
||||
for uuid_field in ("llm_id", "embedding_id", "rerank_id"):
|
||||
if uuid_field in db_row and db_row.get(uuid_field) is not None:
|
||||
try:
|
||||
value = db_row.get(uuid_field)
|
||||
@@ -370,7 +368,7 @@ def _apply_overrides_from_db_row(
|
||||
pass
|
||||
|
||||
return runtime_cfg
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
pass
|
||||
return runtime_cfg
|
||||
|
||||
@@ -460,7 +458,7 @@ def apply_runtime_overrides_with_config_id(
|
||||
|
||||
updated_cfg = _apply_overrides_from_db_row(runtime_cfg, db_row, selected_cid, "config_id")
|
||||
return updated_cfg, True
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
pass
|
||||
return runtime_cfg, False
|
||||
|
||||
@@ -570,7 +568,7 @@ def load_unified_config(
|
||||
try:
|
||||
with open(runtime_config_path, "r", encoding="utf-8") as f:
|
||||
runtime_cfg = json.load(f)
|
||||
except (FileNotFoundError, json.JSONDecodeError) as e:
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
runtime_cfg = {"selections": {}}
|
||||
|
||||
# 步骤 2: 尝试从 dbrun.json 读取 config_id 并应用数据库配置(最高优先级)
|
||||
@@ -603,7 +601,7 @@ def load_unified_config(
|
||||
pass
|
||||
return runtime_cfg
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
return {"selections": {}}
|
||||
|
||||
|
||||
|
||||
@@ -238,3 +238,81 @@ async def render_memory_summary_prompt(
|
||||
'json_schema': 'MemorySummaryResponse.schema'
|
||||
})
|
||||
return rendered_prompt
|
||||
|
||||
async def render_emotion_extraction_prompt(
|
||||
statement: str,
|
||||
extract_keywords: bool,
|
||||
enable_subject: bool
|
||||
) -> str:
|
||||
"""
|
||||
Renders the emotion extraction prompt using the extract_emotion.jinja2 template.
|
||||
|
||||
Args:
|
||||
statement: The statement to analyze
|
||||
extract_keywords: Whether to extract emotion keywords
|
||||
enable_subject: Whether to enable subject classification
|
||||
|
||||
Returns:
|
||||
Rendered prompt content as string
|
||||
"""
|
||||
template = prompt_env.get_template("extract_emotion.jinja2")
|
||||
rendered_prompt = template.render(
|
||||
statement=statement,
|
||||
extract_keywords=extract_keywords,
|
||||
enable_subject=enable_subject
|
||||
)
|
||||
|
||||
# 记录渲染结果到提示日志
|
||||
log_prompt_rendering('emotion extraction', rendered_prompt)
|
||||
# 可选:记录模板渲染信息
|
||||
log_template_rendering('extract_emotion.jinja2', {
|
||||
'statement': 'str',
|
||||
'extract_keywords': extract_keywords,
|
||||
'enable_subject': enable_subject
|
||||
})
|
||||
|
||||
return rendered_prompt
|
||||
|
||||
async def render_emotion_suggestions_prompt(
|
||||
health_data: dict,
|
||||
patterns: dict,
|
||||
user_profile: dict
|
||||
) -> str:
|
||||
"""
|
||||
Renders the emotion suggestions generation prompt using the generate_emotion_suggestions.jinja2 template.
|
||||
|
||||
Args:
|
||||
health_data: 情绪健康数据
|
||||
patterns: 情绪模式分析结果
|
||||
user_profile: 用户画像数据
|
||||
|
||||
Returns:
|
||||
Rendered prompt content as string
|
||||
"""
|
||||
import json
|
||||
|
||||
# 预处理 emotion_distribution 为 JSON 字符串
|
||||
emotion_distribution_json = json.dumps(
|
||||
health_data.get('emotion_distribution', {}),
|
||||
ensure_ascii=False,
|
||||
indent=2
|
||||
)
|
||||
|
||||
template = prompt_env.get_template("generate_emotion_suggestions.jinja2")
|
||||
rendered_prompt = template.render(
|
||||
health_data=health_data,
|
||||
patterns=patterns,
|
||||
user_profile=user_profile,
|
||||
emotion_distribution_json=emotion_distribution_json
|
||||
)
|
||||
|
||||
# 记录渲染结果到提示日志
|
||||
log_prompt_rendering('emotion suggestions', rendered_prompt)
|
||||
# 可选:记录模板渲染信息
|
||||
log_template_rendering('generate_emotion_suggestions.jinja2', {
|
||||
'health_score': health_data.get('health_score'),
|
||||
'health_level': health_data.get('level'),
|
||||
'user_interests': user_profile.get('interests', [])
|
||||
})
|
||||
|
||||
return rendered_prompt
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
你是一个专业的情绪分析专家。请分析以下陈述句的情绪信息。
|
||||
|
||||
陈述句:{{ statement }}
|
||||
|
||||
请提取以下信息:
|
||||
|
||||
1. emotion_type(情绪类型):
|
||||
- joy: 喜悦、开心、高兴、满意、愉快
|
||||
- sadness: 悲伤、难过、失落、沮丧、遗憾
|
||||
- anger: 愤怒、生气、不满、恼火、烦躁
|
||||
- fear: 恐惧、害怕、担心、焦虑、紧张
|
||||
- surprise: 惊讶、意外、震惊、吃惊
|
||||
- neutral: 中性、客观陈述、无明显情绪
|
||||
|
||||
2. emotion_intensity(情绪强度):
|
||||
- 0.0-0.3: 弱情绪
|
||||
- 0.3-0.7: 中等情绪
|
||||
- 0.7-1.0: 强情绪
|
||||
|
||||
{% if extract_keywords %}
|
||||
3. emotion_keywords(情绪关键词):
|
||||
- 原句中直接表达情绪的词语
|
||||
- 最多提取3个关键词
|
||||
- 如果没有明显的情绪词,返回空列表
|
||||
{% else %}
|
||||
3. emotion_keywords(情绪关键词):
|
||||
- 返回空列表
|
||||
{% endif %}
|
||||
|
||||
{% if enable_subject %}
|
||||
4. emotion_subject(情绪主体):
|
||||
- self: 用户本人的情绪(包含"我"、"我们"、"咱们"等第一人称)
|
||||
- other: 他人的情绪(包含人名、"他/她"等第三人称)
|
||||
- object: 对事物的评价(针对产品、地点、事件等)
|
||||
|
||||
注意:
|
||||
- 如果同时包含多个主体,优先识别用户本人(self)
|
||||
- 如果无法明确判断主体,默认为 self
|
||||
|
||||
5. emotion_target(情绪对象):
|
||||
- 如果有明确的情绪对象,提取其名称
|
||||
- 如果没有明确对象,返回 null
|
||||
{% else %}
|
||||
4. emotion_subject(情绪主体):
|
||||
- 默认为 self
|
||||
|
||||
5. emotion_target(情绪对象):
|
||||
- 返回 null
|
||||
{% endif %}
|
||||
|
||||
注意事项:
|
||||
- 如果陈述句是客观事实陈述,无明显情绪,标记为 neutral
|
||||
- 情绪强度要符合语境,不要过度解读
|
||||
- 情绪关键词要准确,不要添加原句中没有的词
|
||||
- 主体分类要准确,优先识别用户本人(self)
|
||||
|
||||
请以 JSON 格式返回结果。
|
||||
@@ -0,0 +1,63 @@
|
||||
你是一位专业的心理健康顾问。请根据以下用户的情绪健康数据和个人信息,生成3-5条个性化的情绪改善建议。
|
||||
|
||||
## 用户情绪健康数据
|
||||
|
||||
健康分数:{{ health_data.health_score }}/100
|
||||
健康等级:{{ health_data.level }}
|
||||
|
||||
维度分析:
|
||||
- 积极率:{{ health_data.dimensions.positivity_rate.score }}/100
|
||||
- 正面情绪:{{ health_data.dimensions.positivity_rate.positive_count }}次
|
||||
- 负面情绪:{{ health_data.dimensions.positivity_rate.negative_count }}次
|
||||
- 中性情绪:{{ health_data.dimensions.positivity_rate.neutral_count }}次
|
||||
|
||||
- 稳定性:{{ health_data.dimensions.stability.score }}/100
|
||||
- 标准差:{{ health_data.dimensions.stability.std_deviation }}
|
||||
|
||||
- 恢复力:{{ health_data.dimensions.resilience.score }}/100
|
||||
- 恢复率:{{ health_data.dimensions.resilience.recovery_rate }}
|
||||
|
||||
情绪分布:
|
||||
{{ emotion_distribution_json }}
|
||||
|
||||
## 情绪模式分析
|
||||
|
||||
主要负面情绪:{{ patterns.dominant_negative_emotion|default('无') }}
|
||||
情绪波动性:{{ patterns.emotion_volatility|default('未知') }}
|
||||
高强度情绪次数:{{ patterns.high_intensity_emotions|default([])|length }}
|
||||
|
||||
## 用户兴趣
|
||||
|
||||
{{ user_profile.interests|default(['未知'])|join(', ') }}
|
||||
|
||||
## 任务要求
|
||||
|
||||
请生成3-5条个性化建议,每条建议包含:
|
||||
1. type: 建议类型(emotion_balance/activity_recommendation/social_connection/stress_management)
|
||||
2. title: 建议标题(简短有力)
|
||||
3. content: 建议内容(详细说明,50-100字)
|
||||
4. priority: 优先级(high/medium/low)
|
||||
5. actionable_steps: 3个可执行的具体步骤
|
||||
|
||||
同时提供一个health_summary(不超过50字),概括用户的整体情绪状态。
|
||||
|
||||
请以JSON格式返回,格式如下:
|
||||
{
|
||||
"health_summary": "您的情绪健康状况...",
|
||||
"suggestions": [
|
||||
{
|
||||
"type": "emotion_balance",
|
||||
"title": "建议标题",
|
||||
"content": "建议内容...",
|
||||
"priority": "high",
|
||||
"actionable_steps": ["步骤1", "步骤2", "步骤3"]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
注意事项:
|
||||
- 建议要具体、可执行,避免空泛
|
||||
- 结合用户的兴趣爱好提供个性化建议
|
||||
- 针对主要问题(如主要负面情绪)提供针对性建议
|
||||
- 优先级要合理分配(至少1个high,1-2个medium,其余low)
|
||||
- 每个建议的3个步骤要循序渐进、易于实施
|
||||
Reference in New Issue
Block a user