Merge remote-tracking branch 'origin/release/v0.2.9' into develop

2026-03-31 19:16:13 +08:00
parent 87c2419186 fe29141437
commit 3ea42ac27f
55 changed files with 1482 additions and 570 deletions
--- a/api/app/services/app_chat_service.py
+++ b/api/app/services/app_chat_service.py
@@ -141,13 +141,13 @@ class AppChatService:
        # 如果是新会话且有开场白，作为第一条 assistant 消息写入数据库
        is_new_conversation = len(history) == 0
        if is_new_conversation:
-            opening = self.agent_service._get_opening_statement(features_config, True, variables)
+            opening, suggested_questions = self.agent_service._get_opening_statement(features_config, True, variables)
            if opening:
                self.conversation_service.add_message(
                    conversation_id=conversation_id,
                    role="assistant",
                    content=opening,
-                    meta_data={}
+                    meta_data={"suggested_questions": suggested_questions}
                )
                # 重新加载历史（包含刚写入的开场白）
                history = await self.conversation_service.get_conversation_history(
@@ -378,13 +378,13 @@ class AppChatService:
            # 如果是新会话且有开场白，作为第一条 assistant 消息写入数据库
            is_new_conversation = len(history) == 0
            if is_new_conversation:
-                opening = self.agent_service._get_opening_statement(features_config, True, variables)
+                opening, suggested_questions = self.agent_service._get_opening_statement(features_config, True, variables)
                if opening:
                    self.conversation_service.add_message(
                        conversation_id=conversation_id,
                        role="assistant",
                        content=opening,
-                        meta_data={}
+                        meta_data={"suggested_questions": suggested_questions}
                    )
                    # 重新加载历史（包含刚写入的开场白）
                    history = await self.conversation_service.get_conversation_history(
--- a/api/app/services/app_log_service.py
+++ b/api/app/services/app_log_service.py
@@ -0,0 +1,128 @@
+"""应用日志服务层"""
+import uuid
+from typing import Optional, Tuple
+from datetime import datetime
+
+from sqlalchemy.orm import Session
+
+from app.core.logging_config import get_business_logger
+from app.models.conversation_model import Conversation, Message
+from app.repositories.conversation_repository import ConversationRepository, MessageRepository
+
+logger = get_business_logger()
+
+
+class AppLogService:
+    """应用日志服务"""
+
+    def __init__(self, db: Session):
+        self.db = db
+        self.conversation_repository = ConversationRepository(db)
+        self.message_repository = MessageRepository(db)
+
+    def list_conversations(
+        self,
+        app_id: uuid.UUID,
+        workspace_id: uuid.UUID,
+        page: int = 1,
+        pagesize: int = 20,
+        is_draft: Optional[bool] = None,
+    ) -> Tuple[list[Conversation], int]:
+        """
+        查询应用日志会话列表
+
+        Args:
+            app_id: 应用 ID
+            workspace_id: 工作空间 ID
+            page: 页码（从 1 开始）
+            pagesize: 每页数量
+            is_draft: 是否草稿会话（None 表示不过滤）
+
+        Returns:
+            Tuple[list[Conversation], int]: (会话列表，总数)
+        """
+        logger.info(
+            "查询应用日志会话列表",
+            extra={
+                "app_id": str(app_id),
+                "workspace_id": str(workspace_id),
+                "page": page,
+                "pagesize": pagesize,
+                "is_draft": is_draft
+            }
+        )
+
+        # 使用 Repository 查询
+        conversations, total = self.conversation_repository.list_app_conversations(
+            app_id=app_id,
+            workspace_id=workspace_id,
+            is_draft=is_draft,
+            page=page,
+            pagesize=pagesize
+        )
+
+        logger.info(
+            "查询应用日志会话列表成功",
+            extra={
+                "app_id": str(app_id),
+                "total": total,
+                "returned": len(conversations)
+            }
+        )
+
+        return conversations, total
+
+    def get_conversation_detail(
+        self,
+        app_id: uuid.UUID,
+        conversation_id: uuid.UUID,
+        workspace_id: uuid.UUID
+    ) -> Conversation:
+        """
+        查询会话详情（包含消息）
+
+        Args:
+            app_id: 应用 ID
+            conversation_id: 会话 ID
+            workspace_id: 工作空间 ID
+
+        Returns:
+            Conversation: 包含消息的会话对象
+
+        Raises:
+            ResourceNotFoundException: 当会话不存在时
+        """
+        logger.info(
+            "查询应用日志会话详情",
+            extra={
+                "app_id": str(app_id),
+                "conversation_id": str(conversation_id),
+                "workspace_id": str(workspace_id)
+            }
+        )
+
+        # 查询会话
+        conversation = self.conversation_repository.get_conversation_for_app_log(
+            conversation_id=conversation_id,
+            app_id=app_id,
+            workspace_id=workspace_id
+        )
+
+        # 查询消息（按时间正序）
+        messages = self.message_repository.get_messages_by_conversation(
+            conversation_id=conversation_id
+        )
+
+        # 将消息附加到会话对象
+        conversation.messages = messages
+
+        logger.info(
+            "查询应用日志会话详情成功",
+            extra={
+                "app_id": str(app_id),
+                "conversation_id": str(conversation_id),
+                "message_count": len(messages)
+            }
+        )
+
+        return conversation
--- a/api/app/services/app_service.py
+++ b/api/app/services/app_service.py
@@ -1084,7 +1084,6 @@ class AppService:
                if not exists:
                    cleaned["memory_config_id"] = None
                    cleaned.pop("memory_content", None)
-                    cleaned["enabled"] = False
                return cleaned

            exists = self.db.query(
@@ -1096,7 +1095,6 @@ class AppService:
            if not exists:
                cleaned["memory_config_id"] = None
                cleaned.pop("memory_content", None)
-                cleaned["enabled"] = False

        return cleaned

@@ -1684,15 +1682,15 @@ class AppService:

        return config.config_id

-    def _update_endusers_memory_config_by_workspace(
+    def _update_endusers_memory_config_by_app(
            self,
-            workspace_id: uuid.UUID,
+            app_id: uuid.UUID,
            memory_config_id: uuid.UUID
    ) -> int:
        """批量更新应用下所有终端用户的 memory_config_id
        
        Args:
-            workspace_id: 工作空间ID
+            app_id: 应用ID
            memory_config_id: 新的记忆配置ID
            
        Returns:
@@ -1701,8 +1699,8 @@ class AppService:
        from app.repositories.end_user_repository import EndUserRepository

        repo = EndUserRepository(self.db)
-        updated_count = repo.batch_update_memory_config_id_by_workspace(
-            workspace_id=workspace_id,
+        updated_count = repo.batch_update_memory_config_id_by_app(
+            app_id=app_id,
            memory_config_id=memory_config_id
        )

@@ -1753,12 +1751,16 @@ class AppService:

            miss_params = []
            if agent_cfg.default_model_config_id is None:
-                miss_params.append("model config")
+                miss_params.append("模型配置")

            if agent_cfg.memory.get("enabled") and not agent_cfg.memory.get("memory_config_id"):
-                miss_params.append("memory config")
+                miss_params.append("记忆配置")
            if miss_params:
-                raise BusinessException(f"{', '.join(miss_params)} is required")
+                raise BusinessException(
+                    f"应用发布失败：检测到以下必要配置尚未完成：{', '.join(miss_params)}。请返回应用编辑页面完成相关配置后再尝试发布。",
+                    BizCode.CONFIG_MISSING,
+                    context={"missing_params": miss_params},
+                )

            config = {
                "system_prompt": agent_cfg.system_prompt,
@@ -1877,8 +1879,8 @@ class AppService:
        if memory_config_id:
            app = self.db.query(App).filter(App.id == app_id).first()
            if app:
-                updated_count = self._update_endusers_memory_config_by_workspace(
-                    app.workspace_id, memory_config_id
+                updated_count = self._update_endusers_memory_config_by_app(
+                    app_id, memory_config_id
                )
                logger.info(
                    f"发布时更新终端用户记忆配置: app_id={app_id}, workspace_id={app.workspace_id}, "
@@ -2014,7 +2016,7 @@ class AppService:

        if memory_config_id:

-            updated_count = self._update_endusers_memory_config_by_workspace(app.workspace_id, memory_config_id)
+            updated_count = self._update_endusers_memory_config_by_app(app_id, memory_config_id)
            logger.info(
                f"回滚时更新终端用户记忆配置: app_id={app_id}, version={version}, "
                f"memory_config_id={memory_config_id}, updated_count={updated_count}"
--- a/api/app/services/conversation_service.py
+++ b/api/app/services/conversation_service.py
@@ -214,7 +214,7 @@ class ConversationService:

            conversation.message_count += 1

-            if conversation.message_count == 1 and role == "user":
+            if conversation.message_count <= 2 and role == "user":
                conversation.title = (
                        content[:50] + ("..." if len(content) > 50 else "")
                )
--- a/api/app/services/draft_run_service.py
+++ b/api/app/services/draft_run_service.py
@@ -448,15 +448,16 @@ class AgentRunService:
            features_config: Dict[str, Any],
            is_new_conversation: bool,
            variables: Optional[Dict[str, Any]] = None
-    ) -> Optional[str]:
+    ) -> tuple[Any, Any]:
        """首轮对话时返回开场白文本（支持变量替换），否则返回 None"""
        if not is_new_conversation:
-            return None
+            return None, None
        opening = features_config.get("opening_statement", {})
        if not (isinstance(opening, dict) and opening.get("enabled") and opening.get("statement")):
-            return None
+            return None, None
        
        statement = opening["statement"]
+        suggested_questions = opening["suggested_questions"]
        
        # 如果有变量，进行替换（仅支持 {{var_name}} 格式）
        if variables:
@@ -464,7 +465,7 @@ class AgentRunService:
                placeholder = f"{{{{{var_name}}}}}"
                statement = statement.replace(placeholder, str(var_value))
        
-        return statement
+        return statement, suggested_questions

    @staticmethod
    def _filter_citations(
@@ -598,13 +599,16 @@ class AgentRunService:

            # 5. 处理会话ID（创建或验证），新会话时写入开场白
            is_new_conversation = not conversation_id
-            opening = self._get_opening_statement(features_config, is_new_conversation, variables)
+            opening, suggested_questions = None, None
+            if not sub_agent:
+                opening, suggested_questions = self._get_opening_statement(features_config, is_new_conversation, variables)
            conversation_id = await self._ensure_conversation(
                conversation_id=conversation_id,
                app_id=agent_config.app_id,
                workspace_id=workspace_id,
                user_id=user_id,
-                opening_statement=opening
+                opening_statement=opening,
+                suggested_questions=suggested_questions
            )

            model_info = ModelInfo(
@@ -839,14 +843,17 @@ class AgentRunService:

            # 5. 处理会话ID（创建或验证），新会话时写入开场白
            is_new_conversation = not conversation_id
-            opening = self._get_opening_statement(features_config, is_new_conversation, variables)
+            opening, suggested_questions = None, None
+            if not sub_agent:
+                opening, suggested_questions = self._get_opening_statement(features_config, is_new_conversation, variables)
            conversation_id = await self._ensure_conversation(
                conversation_id=conversation_id,
                app_id=agent_config.app_id,
                workspace_id=workspace_id,
                user_id=user_id,
                sub_agent=sub_agent,
-                opening_statement=opening
+                opening_statement=opening,
+                suggested_questions=suggested_questions
            )

            model_info = ModelInfo(
@@ -1050,7 +1057,8 @@ class AgentRunService:
            workspace_id: uuid.UUID,
            user_id: Optional[str],
            sub_agent: bool = False,
-            opening_statement: Optional[str] = None
+            opening_statement: Optional[str] = None,
+            suggested_questions: Optional[List[str]] = None
    ) -> str:
        """确保会话存在（创建或验证）

@@ -1061,6 +1069,7 @@ class AgentRunService:
            user_id: 用户ID
            sub_agent: 是否为子代理
            opening_statement: 开场白（新会话时作为第一条消息写入）
+            suggested_questions: 预设问题列表

        Returns:
            str: 会话ID
@@ -1104,7 +1113,7 @@ class AgentRunService:
                    conversation_id=uuid.UUID(new_conv_id),
                    role="assistant",
                    content=opening_statement,
-                    meta_data={}
+                    meta_data={"suggested_questions": suggested_questions}
                )
                logger.debug(f"已保存开场白到会话 {new_conv_id}")

--- a/api/app/services/memory_forget_service.py
+++ b/api/app/services/memory_forget_service.py
@@ -204,30 +204,35 @@ class MemoryForgetService:
        end_user_id: str,
        forgetting_threshold: float,
        min_days_since_access: int,
-        limit: int = 20
-    ) -> list[Dict[str, Any]]:
+        page: Optional[int] = None,
+        pagesize: Optional[int] = None
+    ) -> Dict[str, Any]:
        """
        获取待遗忘节点列表
-        
-        查询满足遗忘条件的节点（激活值低于阈值且最后访问时间超过最小天数）
-        
+
+        查询满足遗忘条件的节点（激活值低于阈值且最后访问时间超过最小天数）。支持分页查询。
+
        Args:
            connector: Neo4j 连接器
            end_user_id: 组ID
            forgetting_threshold: 遗忘阈值
            min_days_since_access: 最小未访问天数
-            limit: 返回节点数量限制
-        
+            page: 页码（可选，从1开始）
+            pagesize: 每页数量（可选）
+
        Returns:
-            list: 待遗忘节点列表
+            dict: 包含待遗忘节点列表和分页信息的字典
+                - items: 待遗忘节点列表
+                - page: 分页信息（分页时）
        """
        from datetime import timedelta
-        
+
        # 计算最小访问时间（ISO 8601 格式字符串，使用 UTC 时区）
        min_access_time = datetime.now(timezone.utc) - timedelta(days=min_days_since_access)
        min_access_time_str = min_access_time.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
-        
-        query = """
+
+        # 基础查询（用于获取总数）
+        count_query = """
        MATCH (n)
        WHERE (n:Statement OR n:ExtractedEntity OR n:MemorySummary)
          AND n.end_user_id = $end_user_id
@@ -235,10 +240,22 @@ class MemoryForgetService:
          AND n.activation_value < $threshold
          AND n.last_access_time IS NOT NULL
          AND datetime(n.last_access_time) < datetime($min_access_time_str)
-        RETURN 
+        RETURN count(n) as total
+        """
+
+        # 数据查询
+        data_query = """
+        MATCH (n)
+        WHERE (n:Statement OR n:ExtractedEntity OR n:MemorySummary)
+          AND n.end_user_id = $end_user_id
+          AND n.activation_value IS NOT NULL
+          AND n.activation_value < $threshold
+          AND n.last_access_time IS NOT NULL
+          AND datetime(n.last_access_time) < datetime($min_access_time_str)
+        RETURN
          elementId(n) as node_id,
          labels(n)[0] as node_type,
-          CASE 
+          CASE
            WHEN n:Statement THEN n.statement
            WHEN n:ExtractedEntity THEN n.name
            WHEN n:MemorySummary THEN n.content
@@ -247,18 +264,32 @@ class MemoryForgetService:
          n.activation_value as activation_value,
          n.last_access_time as last_access_time
        ORDER BY n.activation_value ASC
-        LIMIT $limit
        """
-        
+
+        # 如果启用分页，添加 SKIP 和 LIMIT
+        if page is not None and pagesize is not None and page > 0 and pagesize > 0:
+            data_query += " SKIP $skip LIMIT $limit"
+
        params = {
            'end_user_id': end_user_id,
            'threshold': forgetting_threshold,
-            'min_access_time_str': min_access_time_str,
-            'limit': limit
+            'min_access_time_str': min_access_time_str
        }
-        
-        results = await connector.execute_query(query, **params)
-        
+
+        # 获取总数（分页时需要）
+        total = 0
+        if page is not None and pagesize is not None and page > 0 and pagesize > 0:
+            count_results = await connector.execute_query(count_query, **params)
+            if count_results:
+                total = count_results[0]['total']
+
+        # 添加分页参数
+        if page is not None and pagesize is not None and page > 0 and pagesize > 0:
+            params['skip'] = (page - 1) * pagesize
+            params['limit'] = pagesize
+
+        results = await connector.execute_query(data_query, **params)
+
        pending_nodes = []
        for result in results:
            # 将节点类型标签转换为小写
@@ -267,7 +298,7 @@ class MemoryForgetService:
                node_type_label = 'entity'
            elif node_type_label == 'memorysummary':
                node_type_label = 'summary'
-            
+
            # 将 Neo4j DateTime 对象转换为时间戳（毫秒）
            last_access_time = result['last_access_time']
            last_access_dt = convert_neo4j_datetime_to_python(last_access_time)
@@ -278,7 +309,7 @@ class MemoryForgetService:
                last_access_timestamp = int(last_access_dt.timestamp() * 1000)
            else:
                last_access_timestamp = 0
-            
+
            pending_nodes.append({
                'node_id': str(result['node_id']),
                'node_type': node_type_label,
@@ -286,8 +317,20 @@ class MemoryForgetService:
                'activation_value': result['activation_value'],
                'last_access_time': last_access_timestamp
            })
-        
-        return pending_nodes
+
+        # 构建返回结果
+        result: Dict[str, Any] = {'items': pending_nodes}
+
+        # 如果启用分页，添加分页信息
+        if page is not None and pagesize is not None and page > 0 and pagesize > 0:
+            result['page'] = {
+                'page': page,
+                'pagesize': pagesize,
+                'total': total,
+                'hasnext': (page * pagesize) < total
+            }
+
+        return result
    
    async def trigger_forgetting_cycle(
        self,
@@ -636,7 +679,7 @@ class MemoryForgetService:
            api_logger.error(f"获取历史趋势数据失败: {str(e)}")
            # 失败时返回空列表，不影响主流程
        
-        # 获取待遗忘节点列表（前20个满足遗忘条件的节点）
+        # 获取待遗忘节点列表
        pending_nodes = []
        try:
            if end_user_id:
@@ -652,8 +695,7 @@ class MemoryForgetService:
                    connector=connector,
                    end_user_id=end_user_id,
                    forgetting_threshold=forgetting_threshold,
-                    min_days_since_access=int(min_days),
-                    limit=20
+                    min_days_since_access=int(min_days)
                )
                
                api_logger.info(f"成功获取 {len(pending_nodes)} 个待遗忘节点")
@@ -661,24 +703,79 @@ class MemoryForgetService:
        except Exception as e:
            api_logger.error(f"获取待遗忘节点失败: {str(e)}")
            # 失败时返回空列表，不影响主流程
-        
-        # 构建统计信息
+
+        # 构建统计信息（不包含 pending_nodes，已分离到独立接口）
        stats = {
            'activation_metrics': activation_metrics,
            'node_distribution': node_distribution,
            'recent_trends': recent_trends,
-            'pending_nodes': pending_nodes,
            'timestamp': int(datetime.now().timestamp() * 1000)
        }
-        
+
        api_logger.info(
            f"成功获取遗忘引擎统计: total_nodes={stats['activation_metrics']['total_nodes']}, "
            f"low_activation_nodes={stats['activation_metrics']['low_activation_nodes']}, "
-            f"trend_days={len(recent_trends)}, pending_nodes={len(pending_nodes)}"
+            f"trend_days={len(recent_trends)}"
        )
-        
+
        return stats
-    
+
+    async def get_pending_nodes(
+        self,
+        db: Session,
+        end_user_id: str,
+        config_id: Optional[UUID] = None,
+        page: int = 1,
+        pagesize: int = 10
+    ) -> Dict[str, Any]:
+        """
+        获取待遗忘节点列表（独立分页接口）
+
+        查询满足遗忘条件的节点（激活值低于阈值且最后访问时间超过最小天数）。
+
+        Args:
+            db: 数据库会话
+            end_user_id: 组ID（必填）
+            config_id: 配置ID（可选，用于获取遗忘阈值）
+            page: 页码（从1开始，默认1）
+            pagesize: 每页数量（默认10）
+
+        Returns:
+            dict: 包含待遗忘节点列表和分页信息的字典
+                - items: 待遗忘节点列表
+                - page: 分页信息
+        """
+        # 获取遗忘引擎组件
+        _, _, forgetting_scheduler, config = await self._get_forgetting_components(db, config_id)
+
+        connector = forgetting_scheduler.connector
+        forgetting_threshold = config['forgetting_threshold']
+
+        # 验证 min_days_since_access 配置值
+        min_days = config.get('min_days_since_access')
+        if min_days is None or not isinstance(min_days, (int, float)) or min_days < 0:
+            api_logger.warning(
+                f"min_days_since_access 配置无效: {min_days}, 使用默认值 7"
+            )
+            min_days = 7
+
+        # 调用内部方法获取分页数据
+        pending_nodes_result = await self._get_pending_forgetting_nodes(
+            connector=connector,
+            end_user_id=end_user_id,
+            forgetting_threshold=forgetting_threshold,
+            min_days_since_access=int(min_days),
+            page=page,
+            pagesize=pagesize
+        )
+
+        api_logger.info(
+            f"成功获取待遗忘节点列表: end_user_id={end_user_id}, "
+            f"page={page}, pagesize={pagesize}, total={pending_nodes_result.get('page', {}).get('total', 0)}"
+        )
+
+        return pending_nodes_result
+
    async def get_forgetting_curve(
        self,
        db: Session,
--- a/api/app/services/multimodal_service.py
+++ b/api/app/services/multimodal_service.py
@@ -12,6 +12,9 @@ import base64
 import csv
 import io
 import json
+import re
+import olefile
+import struct
 import zipfile
 from abc import ABC, abstractmethod
 from typing import List, Dict, Any, Optional
@@ -602,31 +605,75 @@ class MultimodalService:
            try:
                word_file = io.BytesIO(file_content)
                doc = Document(word_file)
-                return '\n'.join(p.text for p in doc.paragraphs)
+                text_lines = []
+                for p in doc.paragraphs:
+                    text = p.text.strip()
+                    if text:
+                        text_lines.append(text)
+
+                for table in doc.tables:
+                    for row in table.rows:
+                        for cell in row.cells:
+                            text = cell.text.strip()
+                            if text:
+                                text_lines.append(text)
+
+                full_text = "\n".join(text_lines)
+                return full_text.strip() or "[docx 文件无文本内容]"
            except Exception as e:
-                logger.error(f"提取 docx 文本失败: {e}")
+                logger.error(f"提取 docx 文本失败: {str(e)}", exc_info=True)
                return f"[docx 提取失败: {str(e)}]"

-        # 旧版 .doc（OLE2 格式）
+        # 旧版 .doc（OLE2/CFB 格式），按 Word Binary Format 规范解析 piece table
        try:
-            import olefile
            ole = olefile.OleFileIO(io.BytesIO(file_content))
-            if not ole.exists('WordDocument'):
-                return "[doc 提取失败: 未找到 WordDocument 流]"
-            # 读取 WordDocument 流，提取可见 ASCII/Unicode 文本
-            stream = ole.openstream('WordDocument').read()
-            # Word Binary Format: 文本在流中以 UTF-16-LE 编码存储
-            # 简单提取：过滤出可打印字符段
-            try:
-                text = stream.decode('utf-16-le', errors='ignore')
-            except Exception:
-                text = stream.decode('latin-1', errors='ignore')
-            # 过滤控制字符，保留可打印内容
-            import re
-            text = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', text)
-            text = re.sub(r' +', ' ', text).strip()
+            word_stream = ole.openstream('WordDocument').read()
+
+            # FIB offset 0xA bit9 决定使用 0Table 还是 1Table
+            fib_flags = struct.unpack_from('<H', word_stream, 0xA)[0]
+            table_name = '1Table' if (fib_flags & 0x0200) else '0Table'
+            table_stream = ole.openstream(table_name).read()
+
+            # 从 FIB 读取 fcClx/lcbClx 定位 piece table
+            fc_clx, lcb_clx = struct.unpack_from("<II", word_stream, 0x1A2)
+            clx = table_stream[fc_clx: fc_clx + lcb_clx]
+
+            # 解析 CLX，找到 PlcPcd（piece table）
+            i, plc_pcd = 0, None
+            while i < len(clx):
+                clxt = clx[i]
+                if clxt == 0x01:
+                    i += 3 + struct.unpack_from('<H', clx, i + 1)[0]
+                elif clxt == 0x02:
+                    cb = struct.unpack_from('<I', clx, i + 1)[0]
+                    plc_pcd = clx[i + 5: i + 5 + cb]
+                    break
+                else:
+                    break
+
+            if plc_pcd is None:
+                raise ValueError("PlcPcd not found")
+
+            # PlcPcd: (n+1) 个 CP（4字节）+ n 个 PCD（8字节）
+            n_pieces = (len(plc_pcd) - 4) // 12
+            cp_array = [struct.unpack_from('<I', plc_pcd, k * 4)[0] for k in range(n_pieces + 1)]
+
+            parts = []
+            for k in range(n_pieces):
+                fc_value = struct.unpack_from('<I', plc_pcd, (n_pieces + 1) * 4 + k * 8 + 2)[0]
+                is_ansi = bool(fc_value & 0x40000000)
+                fc = fc_value & 0x3FFFFFFF
+                char_count = cp_array[k + 1] - cp_array[k]
+
+                if is_ansi:
+                    parts.append(word_stream[fc: fc + char_count].decode('cp1252', errors='replace'))
+                else:
+                    parts.append(word_stream[fc: fc + char_count * 2].decode('utf-16-le', errors='replace'))
+
            ole.close()
-            return text
+            result = re.sub(r'[\x00-\x1f\x7f]', '', ''.join(parts))
+            return result.strip()
+
        except Exception as e:
            logger.error(f"提取 doc 文本失败: {e}")
            return f"[doc 提取失败: {str(e)}]"
--- a/api/app/services/tenant_service.py
+++ b/api/app/services/tenant_service.py
@@ -138,7 +138,7 @@ class TenantService:
            
        except Exception as e:
            business_logger.error(f"删除租户失败: {str(e)}")
-            raise BusinessException(f"删除租户失败: {str(e)}", code=BizCode.DB_ERROR)
+            raise BusinessException(f"删除租户失败：{str(e)}", code=BizCode.DB_ERROR)

    # 租户用户管理
    def get_tenant_users(
@@ -147,6 +147,7 @@ class TenantService:
        skip: int = 0, 
        limit: int = 100,
        is_active: Optional[bool] = None,
+        is_superuser: Optional[bool] = None,
        search: Optional[str] = None
    ) -> List[UserModel]:
        """获取租户下的用户列表"""
@@ -155,6 +156,7 @@ class TenantService:
            skip=skip,
            limit=limit,
            is_active=is_active,
+            is_superuser=is_superuser,
            search=search
        )

@@ -162,12 +164,14 @@ class TenantService:
        self, 
        tenant_id: uuid.UUID,
        is_active: Optional[bool] = None,
+        is_superuser: Optional[bool] = None,
        search: Optional[str] = None
    ) -> int:
        """统计租户下的用户数量"""
        return self.user_repo.count_users_by_tenant(
            tenant_id=tenant_id,
            is_active=is_active,
+            is_superuser=is_superuser,
            search=search
        )

--- a/api/app/services/user_memory_service.py
+++ b/api/app/services/user_memory_service.py
@@ -472,6 +472,21 @@ class UserMemoryService:
            # 定义允许更新的字段白名单
            allowed_fields = {'other_name', 'aliases', 'meta_data'}
            
+            # 用户占位名称黑名单，不允许作为 other_name 或出现在 aliases 中
+            _user_placeholder_names = {'用户', '我', 'User', 'I'}
+            
+            # 过滤 other_name：不允许设置为占位名称
+            if 'other_name' in update_data and update_data['other_name'] and update_data['other_name'].strip() in _user_placeholder_names:
+                logger.warning(f"拒绝将占位名称 '{update_data['other_name']}' 设置为 other_name")
+                del update_data['other_name']
+            
+            # 过滤 aliases：移除占位名称和非字符串值
+            if 'aliases' in update_data and update_data['aliases']:
+                update_data['aliases'] = [
+                    a for a in update_data['aliases']
+                    if isinstance(a, str) and a.strip() and a.strip() not in _user_placeholder_names
+                ]
+            
            # 检查是否更新了 aliases 字段
            aliases_updated = 'aliases' in update_data and update_data['aliases'] != end_user_info_record.aliases