[add] batch chunk. qa_prompt set

2026-04-28 15:33:44 +08:00
parent 140311048a
commit 64e640d882
5 changed files with 103 additions and 10 deletions
--- a/api/app/core/config.py
+++ b/api/app/core/config.py
@@ -98,6 +98,7 @@ class Settings:
    # File Upload
    MAX_FILE_SIZE: int = int(os.getenv("MAX_FILE_SIZE", "52428800"))
    MAX_FILE_COUNT: int = int(os.getenv("MAX_FILE_COUNT", "20"))
+    MAX_CHUNK_BATCH_SIZE: int = int(os.getenv("MAX_CHUNK_BATCH_SIZE", "8"))
    FILE_PATH: str = os.getenv("FILE_PATH", "/files")
    FILE_URL_EXPIRES: int = int(os.getenv("FILE_URL_EXPIRES", "3600"))

--- a/api/app/core/rag/prompts/generator.py
+++ b/api/app/core/rag/prompts/generator.py
@@ -138,9 +138,19 @@ def question_proposal(chat_mdl, content, topn=3):
    return "\n".join([p["question"] for p in pairs])


-def qa_proposal(chat_mdl, content, topn=3):
-    """生成 QA 对，返回 [{"question": ..., "answer": ...}, ...]"""
-    template = PROMPT_JINJA_ENV.from_string(QUESTION_PROMPT_TEMPLATE)
+def qa_proposal(chat_mdl, content, topn=3, custom_prompt=None):
+    """生成 QA 对，返回 [{"question": ..., "answer": ...}, ...]
+    
+    Args:
+        chat_mdl: LLM 模型
+        content: 文本内容
+        topn: 生成 QA 对数量
+        custom_prompt: 自定义 prompt 模板（支持 Jinja2，可用变量: content, topn）
+    """
+    if custom_prompt:
+        template = PROMPT_JINJA_ENV.from_string(custom_prompt)
+    else:
+        template = PROMPT_JINJA_ENV.from_string(QUESTION_PROMPT_TEMPLATE)
    rendered_prompt = template.render(content=content, topn=topn)

    msg = [{"role": "system", "content": rendered_prompt}, {"role": "user", "content": "Output: "}]