[ADD]Add functions related to knowledge base graph:

Add functions related to knowledge base graph: 1. Entity type generation, 2. Knowledge base graph acquisition, 3. Hard deletion of knowledge base graph, 4. Knowledge base graph reconstruction (asynchronous)
2025-12-27 13:53:10 +08:00
parent 06f64809c3
commit a0c362244e
35 changed files with 6267 additions and 143 deletions
--- a/api/app/core/rag/prompts/generator.py
+++ b/api/app/core/rag/prompts/generator.py
@@ -91,6 +91,7 @@ QUESTION_PROMPT_TEMPLATE = load_prompt("question_prompt")
 VISION_LLM_DESCRIBE_PROMPT = load_prompt("vision_llm_describe_prompt")
 VISION_LLM_FIGURE_DESCRIBE_PROMPT = load_prompt("vision_llm_figure_describe_prompt")
 STRUCTURED_OUTPUT_PROMPT = load_prompt("structured_output_prompt")
+GRAPH_ENTITY_TYPES_PROMPT_TEMPLATE = load_prompt("graph_entity_types")

 ANALYZE_TASK_SYSTEM = load_prompt("analyze_task_system")
 ANALYZE_TASK_USER = load_prompt("analyze_task_user")
@@ -144,6 +145,21 @@ def question_proposal(chat_mdl, content, topn=3):
    return kwd


+def graph_entity_types(chat_mdl, scenario):
+    template = PROMPT_JINJA_ENV.from_string(GRAPH_ENTITY_TYPES_PROMPT_TEMPLATE)
+    rendered_prompt = template.render(scenario=scenario)
+
+    msg = [{"role": "system", "content": rendered_prompt}, {"role": "user", "content": "Output: "}]
+    _, msg = message_fit_in(msg, getattr(chat_mdl, 'max_length', 8096))
+    kwd = chat_mdl.chat(rendered_prompt, msg[1:], {"temperature": 0.2})
+    if isinstance(kwd, tuple):
+        kwd = kwd[0]
+    kwd = re.sub(r"^.*</think>", "", kwd, flags=re.DOTALL)
+    if kwd.find("**ERROR**") >= 0:
+        return ""
+    return kwd
+
+
 def full_question(messages=[], language=None, chat_mdl=None):
    conv = []
    for m in messages:
--- a/api/app/core/rag/prompts/graph_entity_types.md
+++ b/api/app/core/rag/prompts/graph_entity_types.md
@@ -0,0 +1,49 @@
+## Role
+You are a knowledge graph entity type identifier.
+
+## Task
+Identify and extract all relevant entity types for constructing a knowledge graph based on a given scenario.
+
+## Requirements
+- Analyze the scenario and determine key entity categories (e.g., person, organization, location, event, concept).
+- Return all applicable entity types as an English comma-delimited list (no duplicates).
+- Entity types must be in lowercase and use underscores for multi-word terms (e.g., "movie_genre").
+- Output only the entity types, no explanations or additional text.
+
+---
+
+## Examples
+
+### Example 1
+**Scenario:**
+A knowledge base about historical battles, including commanders, armies, locations, and outcomes.
+
+**Output:** 
+person, military_commander, army, location, battle_event, outcome, date
+
+---
+
+### Example 2
+**Scenario:**
+A system tracking scientific research papers, including authors, institutions, fields of study, and citations.
+
+**Output:** 
+person, author, research_institution, academic_field, research_paper, citation, publication_date
+
+---
+
+### Example 3
+**Scenario:**
+A travel guide for cities, covering landmarks, restaurants, hotels, and local events.
+
+**Output:** 
+city, landmark, restaurant, hotel, local_event, cuisine_type, tourist_attraction
+
+---
+
+## Real Data
+
+**Scenario:**
+
+{{ scenario }}
+