Merge branch 'release/v0.3.0' into develop

* release/v0.3.0: (44 commits) Revert "fix(web): prompt editor" fix(web): prompt editor fix(prompt-optimizer): handle escaped quotes in JSON parsing fix(custom-tools): remove parameter coercion in custom tool base class fix(core): conditionally apply thinking parameters based on model support refactor(custom-tools): coerce query and request body parameters to schema types fix(prompt-optimizer): support list content type in prompt optimizer refactor(memory): unify user placeholder names and harden alias sync logic fix(rag): replace semicolon separators with newlines in Excel parser output fix(web): Compatible with Windows whitespace fix(memory): make PgSQL the single source of truth for user entity aliases refactor(rag): simplify Excel parsing logic and remove redundant chunk_token_num assignment fix(web): Hide error message when workflow node error message equals empty string ci(wechat-notify): add Sourcery summary extraction with Qwen fallback fix(http-request,embedding,naive): tighten form-data validation, reduce truncation length to 8000, and disable chunking for Excel fix(web): adjust the value of End User Name fix(http-request): support array and file variables in form-data files upload fix(web): change http body key name fix(web): header user name fix(web): calculate using the filtered breadcrumbs length ... # Conflicts: # web/src/views/UserMemoryDetail/Neo4j.tsx # web/src/views/UserMemoryDetail/components/EndUserProfile.tsx # web/src/views/UserMemoryDetail/types.ts
2026-04-15 19:31:38 +08:00
parent bef6a50deb e0d7a5a91f
commit c7230659e3
48 changed files with 702 additions and 452 deletions
--- a/api/app/services/app_dsl_service.py
+++ b/api/app/services/app_dsl_service.py
@@ -73,15 +73,14 @@ class AppDslService:
            AppType.MULTI_AGENT: "multi_agent_config",
            AppType.WORKFLOW: "workflow"
        }.get(app.type, "config")
-        config_data = self._enrich_release_config(app.type, release.config or {})
+        config_data = self._enrich_release_config(app.type, release.config or {}, release.default_model_config_id)
        dsl = {**meta, "app": app_meta, config_key: config_data}
        return yaml.dump(dsl, default_flow_style=False, allow_unicode=True), f"{release.name}_v{release.version_name}.yaml"

-    def _enrich_release_config(self, app_type: str, cfg: dict) -> dict:
+    def _enrich_release_config(self, app_type: str, cfg: dict, default_model_config_id=None) -> dict:
        if app_type == AppType.AGENT:
            enriched = {**cfg}
-            if "default_model_config_id" in cfg:
-                enriched["default_model_config_ref"] = self._model_ref(cfg["default_model_config_id"])
+            enriched["default_model_config_ref"] = self._model_ref(default_model_config_id)
            if "knowledge_retrieval" in cfg:
                enriched["knowledge_retrieval"] = self._enrich_knowledge_retrieval(cfg["knowledge_retrieval"])
            if "tools" in cfg:
@@ -91,8 +90,7 @@ class AppDslService:
            return enriched
        if app_type == AppType.MULTI_AGENT:
            enriched = {**cfg}
-            if "default_model_config_id" in cfg:
-                enriched["default_model_config_ref"] = self._model_ref(cfg["default_model_config_id"])
+            enriched["default_model_config_ref"] = self._model_ref(default_model_config_id)
            if "master_agent_id" in cfg:
                enriched["master_agent_ref"] = self._release_ref(cfg["master_agent_id"])
            if "sub_agents" in cfg:
--- a/api/app/services/emotion_analytics_service.py
+++ b/api/app/services/emotion_analytics_service.py
@@ -679,9 +679,9 @@ class EmotionAnalyticsService:

            # 查询用户的实体和标签
            query = """
-            MATCH (e:Entity)
+            MATCH (e:ExtractedEntity)
            WHERE e.end_user_id = $end_user_id
-            RETURN e.name as name, e.type as type
+            RETURN e.name as name, e.entity_type as type
            ORDER BY e.created_at DESC
            LIMIT 20
            """
--- a/api/app/services/implicit_memory_service.py
+++ b/api/app/services/implicit_memory_service.py
@@ -34,6 +34,7 @@ from app.schemas.implicit_memory_schema import (
    UserMemorySummary,
 )
 from app.schemas.memory_config_schema import MemoryConfig
+from app.services.memory_base_service import MIN_MEMORY_SUMMARY_COUNT
 from sqlalchemy.orm import Session

 logger = logging.getLogger(__name__)
@@ -379,12 +380,59 @@ class ImplicitMemoryService:
            raise
    

+    def _build_empty_profile(self) -> dict:
+        """构建 MemorySummary 不足时返回的固定空白画像数据"""
+        now_ms = int(datetime.utcnow().timestamp() * 1000)
+        insufficient = "Insufficient data for analysis"
+
+        def _empty_dimension(name: str) -> dict:
+            return {
+                "evidence": [insufficient],
+                "reasoning": f"No clear evidence found for {name} dimension",
+                "percentage": 0.0,
+                "dimension_name": name,
+                "confidence_level": 20,
+            }
+
+        def _empty_category(name: str) -> dict:
+            return {
+                "evidence": [insufficient],
+                "percentage": 25.0,
+                "category_name": name,
+                "trending_direction": None,
+            }
+
+        return {
+            "habits": [],
+            "portrait": {
+                "aesthetic": _empty_dimension("aesthetic"),
+                "creativity": _empty_dimension("creativity"),
+                "literature": _empty_dimension("literature"),
+                "technology": _empty_dimension("technology"),
+                "historical_trends": None,
+                "analysis_timestamp": now_ms,
+                "total_summaries_analyzed": 0,
+            },
+            "preferences": [],
+            "interest_areas": {
+                "art": _empty_category("art"),
+                "tech": _empty_category("tech"),
+                "music": _empty_category("music"),
+                "lifestyle": _empty_category("lifestyle"),
+                "analysis_timestamp": now_ms,
+                "total_summaries_analyzed": 0,
+            },
+        }
+
    async def generate_complete_profile(
        self,
        user_id: str
    ) -> dict:
        """生成完整的用户画像（包含所有4个模块）
        
+        需要该用户的 MemorySummary 节点数量 >= 5 才会真正调用 LLM 生成画像，
+        否则返回固定的空白画像数据。
+        
        Args:
            user_id: 用户ID
            
@@ -394,6 +442,16 @@ class ImplicitMemoryService:
        logger.info(f"生成完整用户画像: user={user_id}")
        
        try:
+            # 前置检查：查询该用户有效的 MemorySummary 节点数量（排除孤立节点）
+            from app.services.memory_base_service import MemoryBaseService
+            base_service = MemoryBaseService()
+            memory_summary_count = await base_service.get_valid_memory_summary_count(user_id)
+            logger.info(f"用户 MemorySummary 节点数量: {memory_summary_count} (user={user_id})")
+
+            if memory_summary_count < MIN_MEMORY_SUMMARY_COUNT:
+                logger.info(f"MemorySummary 数量不足 {MIN_MEMORY_SUMMARY_COUNT}（当前 {memory_summary_count}），返回空白画像: user={user_id}")
+                return self._build_empty_profile()
+
            # 并行调用4个分析方法
            preferences, portrait, interest_areas, habits = await asyncio.gather(
                self.get_preference_tags(user_id=user_id),
--- a/api/app/services/memory_base_service.py
+++ b/api/app/services/memory_base_service.py
@@ -265,12 +265,50 @@ async def Translation_English(modid, text, fields=None):
    # 其他类型（数字、布尔值、None等）：原样返回
    else:
        return text
+# 隐性记忆画像生成所需的最低 MemorySummary 节点数量
+MIN_MEMORY_SUMMARY_COUNT = 5
+
+
 class MemoryBaseService:
    """记忆服务基类，提供共享的辅助方法"""
    
    def __init__(self):
        self.neo4j_connector = Neo4jConnector()
    
+    async def get_valid_memory_summary_count(
+        self,
+        end_user_id: str
+    ) -> int:
+        """获取用户有效的 MemorySummary 节点数量（排除孤立节点）。
+
+        只统计存在 DERIVED_FROM_STATEMENT 关系的 MemorySummary 节点。
+
+        Args:
+            end_user_id: 终端用户ID
+
+        Returns:
+            有效 MemorySummary 节点数量
+        """
+        try:
+            query = """
+            MATCH (n:MemorySummary)-[:DERIVED_FROM_STATEMENT]->(:Statement)
+            WHERE n.end_user_id = $end_user_id
+            RETURN count(DISTINCT n) as count
+            """
+            result = await self.neo4j_connector.execute_query(
+                query, end_user_id=end_user_id
+            )
+            count = result[0]["count"] if result and len(result) > 0 else 0
+            logger.debug(
+                f"有效 MemorySummary 节点数量: {count} (end_user_id={end_user_id})"
+            )
+            return count
+        except Exception as e:
+            logger.error(
+                f"获取有效 MemorySummary 数量失败: {str(e)}", exc_info=True
+            )
+            return 0
+    
    @staticmethod
    def parse_timestamp(timestamp_value) -> Optional[int]:
        """
--- a/api/app/services/prompt_optimizer_service.py
+++ b/api/app/services/prompt_optimizer_service.py
@@ -227,10 +227,20 @@ class PromptOptimizerService:
            content = getattr(chunk, "content", chunk)
            if not content:
                continue
-            buffer += content
+            if isinstance(content, str):
+                buffer += content
+            elif isinstance(content, list):
+                for _ in content:
+                    buffer += _["text"]
+            else:
+                logger.error(f"Unsupported content type - {content}")
+                raise Exception("Unsupported content type")
            cache = buffer[:-20]
+            last_idx = 19
+            while cache and cache[-1] == '\\' and last_idx > 0:
+                cache = buffer[:-last_idx]
+                last_idx -= 1

-            # 尝试找到 "prompt": " 开始位置
            if prompt_finished:
                continue

@@ -272,7 +282,7 @@ class PromptOptimizerService:
    def parser_prompt_variables(prompt: str):
        try:
            pattern = r'\{\{\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*\}\}'
-            matches = re.findall(pattern, prompt)
+            matches = re.findall(pattern, str(prompt))
            variables = list(set(matches))
            return variables
        except Exception as e:
--- a/api/app/services/user_memory_service.py
+++ b/api/app/services/user_memory_service.py
@@ -14,6 +14,7 @@ from pydantic import BaseModel, Field
 from sqlalchemy.orm import Session

 from app.core.logging_config import get_logger
+from app.core.memory.storage_services.extraction_engine.deduplication.deduped_and_disamb import _USER_PLACEHOLDER_NAMES
 from app.core.memory.utils.llm.llm_utils import MemoryClientFactory
 from app.db import get_db_context
 from app.repositories.conversation_repository import ConversationRepository
@@ -21,7 +22,7 @@ from app.repositories.end_user_repository import EndUserRepository
 from app.repositories.neo4j.cypher_queries import Graph_Node_query
 from app.repositories.neo4j.neo4j_connector import Neo4jConnector
 from app.schemas.memory_episodic_schema import EmotionSubject, EmotionType, type_mapping
-from app.services.memory_base_service import MemoryBaseService
+from app.services.memory_base_service import MemoryBaseService, MIN_MEMORY_SUMMARY_COUNT
 from app.services.memory_config_service import MemoryConfigService
 from app.services.memory_perceptual_service import MemoryPerceptualService
 from app.services.memory_short_service import ShortService
@@ -477,7 +478,7 @@ class UserMemoryService:
            allowed_fields = {'other_name', 'aliases', 'meta_data'}
            
            # 用户占位名称黑名单，不允许作为 other_name 或出现在 aliases 中
-            _user_placeholder_names = {'用户', '我', 'User', 'I'}
+            _user_placeholder_names = _USER_PLACEHOLDER_NAMES
            
            # 过滤 other_name：不允许设置为占位名称
            if 'other_name' in update_data and update_data['other_name'] and update_data['other_name'].strip() in _user_placeholder_names:
@@ -1504,7 +1505,7 @@ async def analytics_memory_types(
    2. 工作记忆 (WORKING_MEMORY) = 会话数量（通过 ConversationRepository.get_conversation_by_user_id 获取）
    3. 短期记忆 (SHORT_TERM_MEMORY) = /short_term 接口返回的问答对数量
    4. 显性记忆 (EXPLICIT_MEMORY) = 情景记忆 + 语义记忆（通过 MemoryBaseService.get_explicit_memory_count 获取）
-    5. 隐性记忆 (IMPLICIT_MEMORY) = Statement 节点数量的三分之一
+    5. 隐性记忆 (IMPLICIT_MEMORY) = MemorySummary 节点数量（需 >= MIN_MEMORY_SUMMARY_COUNT 才显示，否则为 0）
    6. 情绪记忆 (EMOTIONAL_MEMORY) = 情绪标签统计总数（通过 MemoryBaseService.get_emotional_memory_count 获取）
    7. 情景记忆 (EPISODIC_MEMORY) = memory_summary（通过 MemoryBaseService.get_episodic_memory_count 获取）
    8. 遗忘记忆 (FORGET_MEMORY) = 激活值低于阈值的节点数（通过 MemoryBaseService.get_forget_memory_count 获取）
@@ -1561,23 +1562,15 @@ async def analytics_memory_types(
            logger.warning(f"获取会话数量失败，工作记忆数量设为0: {str(e)}")
            work_count = 0
    
-    # 获取隐性记忆数量（基于 Statement 节点数量的三分之一）
+    # 获取隐性记忆数量（基于有关联关系的 MemorySummary 节点数量，需 >= MIN_MEMORY_SUMMARY_COUNT 才计入）
    implicit_count = 0
    if end_user_id:
        try:
-            # 查询 Statement 节点数量
-            query = """
-            MATCH (n:Statement)
-            WHERE n.end_user_id = $end_user_id
-            RETURN count(n) as count
-            """
-            result = await _neo4j_connector.execute_query(query, end_user_id=end_user_id)
-            statement_count = result[0]["count"] if result and len(result) > 0 else 0
-            # 取三分之一作为隐性记忆数量
-            implicit_count = round(statement_count / 3)
-            logger.debug(f"隐性记忆数量（Statement数量的1/3）: {implicit_count} (Statement总数={statement_count}, end_user_id={end_user_id})")
+            memory_summary_count = await base_service.get_valid_memory_summary_count(end_user_id)
+            implicit_count = memory_summary_count if memory_summary_count >= MIN_MEMORY_SUMMARY_COUNT else 0
+            logger.debug(f"隐性记忆数量（有效MemorySummary节点数）: {implicit_count} (有效MemorySummary总数={memory_summary_count}, end_user_id={end_user_id})")
        except Exception as e:
-            logger.warning(f"获取Statement数量失败，隐性记忆数量设为0: {str(e)}")
+            logger.warning(f"获取MemorySummary数量失败，隐性记忆数量设为0: {str(e)}")
            implicit_count = 0
    
    # 原有的基于行为习惯的统计方式（已注释）
@@ -1643,7 +1636,7 @@ async def analytics_memory_types(
        "WORKING_MEMORY": work_count,                             # 工作记忆（基于会话数量）
        "SHORT_TERM_MEMORY": short_term_count,                    # 短期记忆（基于问答对数量）
        "EXPLICIT_MEMORY": explicit_count,                        # 显性记忆（情景记忆 + 语义记忆）
-        "IMPLICIT_MEMORY": implicit_count,                        # 隐性记忆（Statement数量的1/3）
+        "IMPLICIT_MEMORY": implicit_count,                        # 隐性记忆（MemorySummary节点数，需>=MIN_MEMORY_SUMMARY_COUNT）
        "EMOTIONAL_MEMORY": emotion_count,                        # 情绪记忆（使用情绪标签统计）
        "EPISODIC_MEMORY": episodic_count,                        # 情景记忆
        "FORGET_MEMORY": forget_count                             # 遗忘记忆（激活值低于阈值）
--- a/api/app/services/user_service.py
+++ b/api/app/services/user_service.py
@@ -285,7 +285,7 @@ def activate_user(db: Session, user_id_to_activate: uuid.UUID, current_user: Use
    try:
        # 查找用户
        business_logger.debug(f"查找待激活用户: {user_id_to_activate}")
-        db_user = user_repository.get_user_by_id(db, user_id=user_id_to_activate)
+        db_user = user_repository.get_user_by_id_regardless_active(db, user_id=user_id_to_activate)
        if not db_user:
            business_logger.warning(f"用户不存在: {user_id_to_activate}")
            raise BusinessException("用户不存在", code=BizCode.USER_NOT_FOUND)