From eaf24376338fc68cd237ef4a82c3013621e7cd9e Mon Sep 17 00:00:00 2001 From: mengyonghao <1533512157@qq.com> Date: Mon, 5 Jan 2026 10:53:53 +0800 Subject: [PATCH] feat(prompt_opt): support streaming output for prompt optimization API --- .../prompt_optimizer_controller.py | 56 ++++++++---------- api/app/core/workflow/template_loader.py | 4 +- api/app/services/prompt_optimizer_service.py | 57 ++++++++++++++++--- .../prompt/prompt_optimizer_system.jinja2 | 2 +- 4 files changed, 75 insertions(+), 44 deletions(-) diff --git a/api/app/controllers/prompt_optimizer_controller.py b/api/app/controllers/prompt_optimizer_controller.py index d73ea0df..2069dd66 100644 --- a/api/app/controllers/prompt_optimizer_controller.py +++ b/api/app/controllers/prompt_optimizer_controller.py @@ -1,7 +1,9 @@ import uuid +import json from fastapi import APIRouter, Depends, Path from sqlalchemy.orm import Session +from starlette.responses import StreamingResponse from app.core.logging_config import get_api_logger from app.core.response_utils import success @@ -70,12 +72,12 @@ def get_prompt_session( SessionMessage(role=role, content=content) for role, content in history ] - + result = SessionHistoryResponse( session_id=session_id, messages=messages ) - + return success(data=result) @@ -104,35 +106,25 @@ async def get_prompt_opt( ApiResponse: Contains the optimized prompt, description, and a list of variables. """ service = PromptOptimizerService(db) - service.create_message( - tenant_id=current_user.tenant_id, - session_id=session_id, - user_id=current_user.id, - role=RoleType.USER, - content=data.message - ) - opt_result = await service.optimize_prompt( - tenant_id=current_user.tenant_id, - model_id=data.model_id, - session_id=session_id, - user_id=current_user.id, - current_prompt=data.current_prompt, - user_require=data.message - ) - service.create_message( - tenant_id=current_user.tenant_id, - session_id=session_id, - user_id=current_user.id, - role=RoleType.ASSISTANT, - content=opt_result.desc - ) - variables = service.parser_prompt_variables(opt_result.prompt) - result = { - "prompt": opt_result.prompt, - "desc": opt_result.desc, - "variables": variables - } - result_schema = OptimizePromptResponse.model_validate(result) - return success(data=result_schema) + async def event_generator(): + async for chunk in service.optimize_prompt( + tenant_id=current_user.tenant_id, + model_id=data.model_id, + session_id=session_id, + user_id=current_user.id, + current_prompt=data.current_prompt, + user_require=data.message + ): + # chunk 是 prompt 的增量内容 + yield f"event:'message'\ndata: {json.dumps(chunk)}\n\n" + return StreamingResponse( + event_generator(), + media_type="text/event-stream", + headers={ + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "X-Accel-Buffering": "no" + } + ) diff --git a/api/app/core/workflow/template_loader.py b/api/app/core/workflow/template_loader.py index ab5bd9fa..4ef49ba5 100644 --- a/api/app/core/workflow/template_loader.py +++ b/api/app/core/workflow/template_loader.py @@ -4,11 +4,11 @@ 从文件系统加载预定义的工作流模板 """ -import os -import yaml from pathlib import Path from typing import Optional +import yaml + class TemplateLoader: """工作流模板加载器""" diff --git a/api/app/services/prompt_optimizer_service.py b/api/app/services/prompt_optimizer_service.py index 5f325a1b..482e8213 100644 --- a/api/app/services/prompt_optimizer_service.py +++ b/api/app/services/prompt_optimizer_service.py @@ -1,5 +1,6 @@ import re import uuid +from typing import Any, AsyncGenerator import json_repair from langchain_core.prompts import ChatPromptTemplate @@ -123,7 +124,7 @@ class PromptOptimizerService: user_id: uuid.UUID, current_prompt: str, user_require: str - ) -> OptimizePromptResult: + ) -> AsyncGenerator[dict[str, str | Any], Any]: """ Optimize a user-provided prompt using a configured prompt optimizer LLM. @@ -161,6 +162,7 @@ class PromptOptimizerService: BusinessException: If the LLM response cannot be parsed as valid JSON or does not conform to the expected output format. """ + self.create_message(tenant_id, session_id, user_id, role=RoleType.USER, content=user_require) model_config = self.get_model_config(tenant_id, model_id) session_history = self.get_session_message_history(session_id=session_id, user_id=user_id) @@ -202,17 +204,54 @@ class PromptOptimizerService: messages.extend(session_history[:-1]) # last message is current message messages.extend([(RoleType.USER.value, rendered_user_message)]) logger.info(f"Prompt optimization message: {messages}") - optim_resp = await llm.ainvoke(messages) - logger.info(optim_resp.content) - optim_result = json_repair.repair_json(optim_resp.content, return_objects=True) - prompt = optim_result.get("prompt") - desc = optim_result.get("desc") + buffer = "" + prompt_started = False + prompt_finished = False + idx = 0 - return OptimizePromptResult( - prompt=prompt, - desc=desc + async for chunk in llm.astream(messages): + content = getattr(chunk, "content", chunk) + if not content: + continue + buffer += content + cache = buffer[:-20] + + # 尝试找到 "prompt": " 开始位置 + if prompt_finished: + continue + + if not prompt_started: + m = re.search(r'"prompt"\s*:\s*"', cache) + if m: + prompt_started = True + prompt_index = m.end() + idx = prompt_index + else: + m = re.search(r'"\s*,\s*\\?n?\s*"desc"\s*:\s*"', buffer) + if m: + prompt_index = m.start() + prompt_finished = True + yield {"type": "delta", "content": buffer[idx:prompt_index]} + else: + yield {"type": "delta", "content": cache[idx:]} + if len(cache) != 0: + idx = len(cache) + + # optim_resp = await llm.astream(messages) + logger.info(buffer) + optim_result = json_repair.repair_json(buffer, return_objects=True) + # prompt = optim_result.get("prompt") + desc = optim_result.get("desc") + self.create_message( + tenant_id=tenant_id, + session_id=session_id, + user_id=user_id, + role=RoleType.ASSISTANT, + content=desc ) + yield {"type": "done", "desc": optim_result.get("desc")} + @staticmethod def parser_prompt_variables(prompt: str): try: diff --git a/api/app/templates/prompt/prompt_optimizer_system.jinja2 b/api/app/templates/prompt/prompt_optimizer_system.jinja2 index ae19a6ab..b9060f68 100644 --- a/api/app/templates/prompt/prompt_optimizer_system.jinja2 +++ b/api/app/templates/prompt/prompt_optimizer_system.jinja2 @@ -25,7 +25,7 @@ Rules Basic Principles Priority Rule: When historical requirements conflict with current requirements, unconditionally prioritize current requirements. Completeness Rule: If the original prompt is empty, generate a complete prompt based on the current requirements. -Structure Rule: Use a clear block structure including [Role], [Task], [Requirements], [Input], [Output], [Constraints] labels. +Structure Rule: Use a clear block structure, and the contents of each block are roles, tasks, requirements, inputs, outputs, and constraints Language Rule: All label languages must fully match the user input language. Behavior Guidelines