Merge branch 'release/v0.3.0' into develop

* release/v0.3.0: (44 commits)
  Revert "fix(web): prompt editor"
  fix(web): prompt editor
  fix(prompt-optimizer): handle escaped quotes in JSON parsing
  fix(custom-tools): remove parameter coercion in custom tool base class
  fix(core): conditionally apply thinking parameters based on model support
  refactor(custom-tools): coerce query and request body parameters to schema types
  fix(prompt-optimizer): support list content type in prompt optimizer
  refactor(memory): unify user placeholder names and harden alias sync logic
  fix(rag): replace semicolon separators with newlines in Excel parser output
  fix(web): Compatible with Windows whitespace
  fix(memory): make PgSQL the single source of truth for user entity aliases
  refactor(rag): simplify Excel parsing logic and remove redundant chunk_token_num assignment
  fix(web): Hide error message when workflow node error message equals empty string
  ci(wechat-notify): add Sourcery summary extraction with Qwen fallback
  fix(http-request,embedding,naive): tighten form-data validation, reduce truncation length to 8000, and disable chunking for Excel
  fix(web): adjust the value of End User Name
  fix(http-request): support array and file variables in form-data files upload
  fix(web): change http body key name
  fix(web): header user name
  fix(web): calculate using the filtered breadcrumbs length
  ...

# Conflicts:
#	web/src/views/UserMemoryDetail/Neo4j.tsx
#	web/src/views/UserMemoryDetail/components/EndUserProfile.tsx
#	web/src/views/UserMemoryDetail/types.ts
This commit is contained in:
Mark
2026-04-15 19:31:38 +08:00
48 changed files with 702 additions and 452 deletions

View File

@@ -73,15 +73,14 @@ class AppDslService:
AppType.MULTI_AGENT: "multi_agent_config",
AppType.WORKFLOW: "workflow"
}.get(app.type, "config")
config_data = self._enrich_release_config(app.type, release.config or {})
config_data = self._enrich_release_config(app.type, release.config or {}, release.default_model_config_id)
dsl = {**meta, "app": app_meta, config_key: config_data}
return yaml.dump(dsl, default_flow_style=False, allow_unicode=True), f"{release.name}_v{release.version_name}.yaml"
def _enrich_release_config(self, app_type: str, cfg: dict) -> dict:
def _enrich_release_config(self, app_type: str, cfg: dict, default_model_config_id=None) -> dict:
if app_type == AppType.AGENT:
enriched = {**cfg}
if "default_model_config_id" in cfg:
enriched["default_model_config_ref"] = self._model_ref(cfg["default_model_config_id"])
enriched["default_model_config_ref"] = self._model_ref(default_model_config_id)
if "knowledge_retrieval" in cfg:
enriched["knowledge_retrieval"] = self._enrich_knowledge_retrieval(cfg["knowledge_retrieval"])
if "tools" in cfg:
@@ -91,8 +90,7 @@ class AppDslService:
return enriched
if app_type == AppType.MULTI_AGENT:
enriched = {**cfg}
if "default_model_config_id" in cfg:
enriched["default_model_config_ref"] = self._model_ref(cfg["default_model_config_id"])
enriched["default_model_config_ref"] = self._model_ref(default_model_config_id)
if "master_agent_id" in cfg:
enriched["master_agent_ref"] = self._release_ref(cfg["master_agent_id"])
if "sub_agents" in cfg:

View File

@@ -679,9 +679,9 @@ class EmotionAnalyticsService:
# 查询用户的实体和标签
query = """
MATCH (e:Entity)
MATCH (e:ExtractedEntity)
WHERE e.end_user_id = $end_user_id
RETURN e.name as name, e.type as type
RETURN e.name as name, e.entity_type as type
ORDER BY e.created_at DESC
LIMIT 20
"""

View File

@@ -34,6 +34,7 @@ from app.schemas.implicit_memory_schema import (
UserMemorySummary,
)
from app.schemas.memory_config_schema import MemoryConfig
from app.services.memory_base_service import MIN_MEMORY_SUMMARY_COUNT
from sqlalchemy.orm import Session
logger = logging.getLogger(__name__)
@@ -379,12 +380,59 @@ class ImplicitMemoryService:
raise
def _build_empty_profile(self) -> dict:
"""构建 MemorySummary 不足时返回的固定空白画像数据"""
now_ms = int(datetime.utcnow().timestamp() * 1000)
insufficient = "Insufficient data for analysis"
def _empty_dimension(name: str) -> dict:
return {
"evidence": [insufficient],
"reasoning": f"No clear evidence found for {name} dimension",
"percentage": 0.0,
"dimension_name": name,
"confidence_level": 20,
}
def _empty_category(name: str) -> dict:
return {
"evidence": [insufficient],
"percentage": 25.0,
"category_name": name,
"trending_direction": None,
}
return {
"habits": [],
"portrait": {
"aesthetic": _empty_dimension("aesthetic"),
"creativity": _empty_dimension("creativity"),
"literature": _empty_dimension("literature"),
"technology": _empty_dimension("technology"),
"historical_trends": None,
"analysis_timestamp": now_ms,
"total_summaries_analyzed": 0,
},
"preferences": [],
"interest_areas": {
"art": _empty_category("art"),
"tech": _empty_category("tech"),
"music": _empty_category("music"),
"lifestyle": _empty_category("lifestyle"),
"analysis_timestamp": now_ms,
"total_summaries_analyzed": 0,
},
}
async def generate_complete_profile(
self,
user_id: str
) -> dict:
"""生成完整的用户画像包含所有4个模块
需要该用户的 MemorySummary 节点数量 >= 5 才会真正调用 LLM 生成画像,
否则返回固定的空白画像数据。
Args:
user_id: 用户ID
@@ -394,6 +442,16 @@ class ImplicitMemoryService:
logger.info(f"生成完整用户画像: user={user_id}")
try:
# 前置检查:查询该用户有效的 MemorySummary 节点数量(排除孤立节点)
from app.services.memory_base_service import MemoryBaseService
base_service = MemoryBaseService()
memory_summary_count = await base_service.get_valid_memory_summary_count(user_id)
logger.info(f"用户 MemorySummary 节点数量: {memory_summary_count} (user={user_id})")
if memory_summary_count < MIN_MEMORY_SUMMARY_COUNT:
logger.info(f"MemorySummary 数量不足 {MIN_MEMORY_SUMMARY_COUNT}(当前 {memory_summary_count}),返回空白画像: user={user_id}")
return self._build_empty_profile()
# 并行调用4个分析方法
preferences, portrait, interest_areas, habits = await asyncio.gather(
self.get_preference_tags(user_id=user_id),

View File

@@ -265,12 +265,50 @@ async def Translation_English(modid, text, fields=None):
# 其他类型数字、布尔值、None等原样返回
else:
return text
# 隐性记忆画像生成所需的最低 MemorySummary 节点数量
MIN_MEMORY_SUMMARY_COUNT = 5
class MemoryBaseService:
"""记忆服务基类,提供共享的辅助方法"""
def __init__(self):
self.neo4j_connector = Neo4jConnector()
async def get_valid_memory_summary_count(
self,
end_user_id: str
) -> int:
"""获取用户有效的 MemorySummary 节点数量(排除孤立节点)。
只统计存在 DERIVED_FROM_STATEMENT 关系的 MemorySummary 节点。
Args:
end_user_id: 终端用户ID
Returns:
有效 MemorySummary 节点数量
"""
try:
query = """
MATCH (n:MemorySummary)-[:DERIVED_FROM_STATEMENT]->(:Statement)
WHERE n.end_user_id = $end_user_id
RETURN count(DISTINCT n) as count
"""
result = await self.neo4j_connector.execute_query(
query, end_user_id=end_user_id
)
count = result[0]["count"] if result and len(result) > 0 else 0
logger.debug(
f"有效 MemorySummary 节点数量: {count} (end_user_id={end_user_id})"
)
return count
except Exception as e:
logger.error(
f"获取有效 MemorySummary 数量失败: {str(e)}", exc_info=True
)
return 0
@staticmethod
def parse_timestamp(timestamp_value) -> Optional[int]:
"""

View File

@@ -227,10 +227,20 @@ class PromptOptimizerService:
content = getattr(chunk, "content", chunk)
if not content:
continue
buffer += content
if isinstance(content, str):
buffer += content
elif isinstance(content, list):
for _ in content:
buffer += _["text"]
else:
logger.error(f"Unsupported content type - {content}")
raise Exception("Unsupported content type")
cache = buffer[:-20]
last_idx = 19
while cache and cache[-1] == '\\' and last_idx > 0:
cache = buffer[:-last_idx]
last_idx -= 1
# 尝试找到 "prompt": " 开始位置
if prompt_finished:
continue
@@ -272,7 +282,7 @@ class PromptOptimizerService:
def parser_prompt_variables(prompt: str):
try:
pattern = r'\{\{\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*\}\}'
matches = re.findall(pattern, prompt)
matches = re.findall(pattern, str(prompt))
variables = list(set(matches))
return variables
except Exception as e:

View File

@@ -14,6 +14,7 @@ from pydantic import BaseModel, Field
from sqlalchemy.orm import Session
from app.core.logging_config import get_logger
from app.core.memory.storage_services.extraction_engine.deduplication.deduped_and_disamb import _USER_PLACEHOLDER_NAMES
from app.core.memory.utils.llm.llm_utils import MemoryClientFactory
from app.db import get_db_context
from app.repositories.conversation_repository import ConversationRepository
@@ -21,7 +22,7 @@ from app.repositories.end_user_repository import EndUserRepository
from app.repositories.neo4j.cypher_queries import Graph_Node_query
from app.repositories.neo4j.neo4j_connector import Neo4jConnector
from app.schemas.memory_episodic_schema import EmotionSubject, EmotionType, type_mapping
from app.services.memory_base_service import MemoryBaseService
from app.services.memory_base_service import MemoryBaseService, MIN_MEMORY_SUMMARY_COUNT
from app.services.memory_config_service import MemoryConfigService
from app.services.memory_perceptual_service import MemoryPerceptualService
from app.services.memory_short_service import ShortService
@@ -477,7 +478,7 @@ class UserMemoryService:
allowed_fields = {'other_name', 'aliases', 'meta_data'}
# 用户占位名称黑名单,不允许作为 other_name 或出现在 aliases 中
_user_placeholder_names = {'用户', '', 'User', 'I'}
_user_placeholder_names = _USER_PLACEHOLDER_NAMES
# 过滤 other_name不允许设置为占位名称
if 'other_name' in update_data and update_data['other_name'] and update_data['other_name'].strip() in _user_placeholder_names:
@@ -1504,7 +1505,7 @@ async def analytics_memory_types(
2. 工作记忆 (WORKING_MEMORY) = 会话数量(通过 ConversationRepository.get_conversation_by_user_id 获取)
3. 短期记忆 (SHORT_TERM_MEMORY) = /short_term 接口返回的问答对数量
4. 显性记忆 (EXPLICIT_MEMORY) = 情景记忆 + 语义记忆(通过 MemoryBaseService.get_explicit_memory_count 获取)
5. 隐性记忆 (IMPLICIT_MEMORY) = Statement 节点数量的三分之一
5. 隐性记忆 (IMPLICIT_MEMORY) = MemorySummary 节点数量(需 >= MIN_MEMORY_SUMMARY_COUNT 才显示,否则为 0
6. 情绪记忆 (EMOTIONAL_MEMORY) = 情绪标签统计总数(通过 MemoryBaseService.get_emotional_memory_count 获取)
7. 情景记忆 (EPISODIC_MEMORY) = memory_summary通过 MemoryBaseService.get_episodic_memory_count 获取)
8. 遗忘记忆 (FORGET_MEMORY) = 激活值低于阈值的节点数(通过 MemoryBaseService.get_forget_memory_count 获取)
@@ -1561,23 +1562,15 @@ async def analytics_memory_types(
logger.warning(f"获取会话数量失败工作记忆数量设为0: {str(e)}")
work_count = 0
# 获取隐性记忆数量(基于 Statement 节点数量的三分之一
# 获取隐性记忆数量(基于有关联关系的 MemorySummary 节点数量,需 >= MIN_MEMORY_SUMMARY_COUNT 才计入
implicit_count = 0
if end_user_id:
try:
# 查询 Statement 节点数量
query = """
MATCH (n:Statement)
WHERE n.end_user_id = $end_user_id
RETURN count(n) as count
"""
result = await _neo4j_connector.execute_query(query, end_user_id=end_user_id)
statement_count = result[0]["count"] if result and len(result) > 0 else 0
# 取三分之一作为隐性记忆数量
implicit_count = round(statement_count / 3)
logger.debug(f"隐性记忆数量Statement数量的1/3: {implicit_count} (Statement总数={statement_count}, end_user_id={end_user_id})")
memory_summary_count = await base_service.get_valid_memory_summary_count(end_user_id)
implicit_count = memory_summary_count if memory_summary_count >= MIN_MEMORY_SUMMARY_COUNT else 0
logger.debug(f"隐性记忆数量有效MemorySummary节点数: {implicit_count} (有效MemorySummary总数={memory_summary_count}, end_user_id={end_user_id})")
except Exception as e:
logger.warning(f"获取Statement数量失败隐性记忆数量设为0: {str(e)}")
logger.warning(f"获取MemorySummary数量失败隐性记忆数量设为0: {str(e)}")
implicit_count = 0
# 原有的基于行为习惯的统计方式(已注释)
@@ -1643,7 +1636,7 @@ async def analytics_memory_types(
"WORKING_MEMORY": work_count, # 工作记忆(基于会话数量)
"SHORT_TERM_MEMORY": short_term_count, # 短期记忆(基于问答对数量)
"EXPLICIT_MEMORY": explicit_count, # 显性记忆(情景记忆 + 语义记忆)
"IMPLICIT_MEMORY": implicit_count, # 隐性记忆(Statement数量的1/3
"IMPLICIT_MEMORY": implicit_count, # 隐性记忆(MemorySummary节点数需>=MIN_MEMORY_SUMMARY_COUNT
"EMOTIONAL_MEMORY": emotion_count, # 情绪记忆(使用情绪标签统计)
"EPISODIC_MEMORY": episodic_count, # 情景记忆
"FORGET_MEMORY": forget_count # 遗忘记忆(激活值低于阈值)

View File

@@ -285,7 +285,7 @@ def activate_user(db: Session, user_id_to_activate: uuid.UUID, current_user: Use
try:
# 查找用户
business_logger.debug(f"查找待激活用户: {user_id_to_activate}")
db_user = user_repository.get_user_by_id(db, user_id=user_id_to_activate)
db_user = user_repository.get_user_by_id_regardless_active(db, user_id=user_id_to_activate)
if not db_user:
business_logger.warning(f"用户不存在: {user_id_to_activate}")
raise BusinessException("用户不存在", code=BizCode.USER_NOT_FOUND)