Merge branch 'feature/20251219_lxc' into develop
This commit is contained in:
@@ -186,7 +186,7 @@ async def run_graphrag_for_kb(
|
|||||||
|
|
||||||
chunks = all_document_chunks.get(document_id, [])
|
chunks = all_document_chunks.get(document_id, [])
|
||||||
if not chunks:
|
if not chunks:
|
||||||
callback(msg=f"[GraphRAG] doc:{document_id} has no available chunks, skip generation.")
|
callback(msg=f"[GraphRAG] document:{document_id} has no available chunks, skip generation.")
|
||||||
return
|
return
|
||||||
|
|
||||||
kg_extractor = LightKGExt if ("method" not in parser_config.get("graphrag", {}) or parser_config["graphrag"]["method"] != "general") else GeneralKGExt
|
kg_extractor = LightKGExt if ("method" not in parser_config.get("graphrag", {}) or parser_config["graphrag"]["method"] != "general") else GeneralKGExt
|
||||||
|
|||||||
@@ -198,9 +198,10 @@ def parse_document(file_path: str, document_id: uuid.UUID):
|
|||||||
with_resolution = graphrag_conf.get("resolution", False)
|
with_resolution = graphrag_conf.get("resolution", False)
|
||||||
with_community = graphrag_conf.get("community", False)
|
with_community = graphrag_conf.get("community", False)
|
||||||
|
|
||||||
def callback(msg=None):
|
def callback(*args, msg=None, **kwargs):
|
||||||
nonlocal progress_msg
|
nonlocal progress_msg
|
||||||
progress_msg += f"{datetime.now().strftime('%H:%M:%S')} run graphrag msg: {msg}.\n"
|
message = msg or (args[0] if args else "No message")
|
||||||
|
progress_msg += f"{datetime.now().strftime('%H:%M:%S')} run graphrag msg: {message}.\n"
|
||||||
|
|
||||||
progress_msg += f"{datetime.now().strftime('%H:%M:%S')} Start to run graphrag.\n"
|
progress_msg += f"{datetime.now().strftime('%H:%M:%S')} Start to run graphrag.\n"
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
@@ -306,7 +307,7 @@ def build_graphrag_for_kb(kb_id: uuid.UUID):
|
|||||||
# 2. get all document_ids from knowledge base
|
# 2. get all document_ids from knowledge base
|
||||||
vector_service = ElasticSearchVectorFactory().init_vector(knowledge=db_knowledge)
|
vector_service = ElasticSearchVectorFactory().init_vector(knowledge=db_knowledge)
|
||||||
total, items = vector_service.search_by_segment(document_id=None, query=None, pagesize=9999, page=1, asc=True)
|
total, items = vector_service.search_by_segment(document_id=None, query=None, pagesize=9999, page=1, asc=True)
|
||||||
document_ids = [item.id for item in db_documents]
|
document_ids = [str(item.id) for item in db_documents]
|
||||||
|
|
||||||
# 2. using graphrag
|
# 2. using graphrag
|
||||||
if db_knowledge.parser_config.get("graphrag", {}).get("use_graphrag", False):
|
if db_knowledge.parser_config.get("graphrag", {}).get("use_graphrag", False):
|
||||||
@@ -314,8 +315,9 @@ def build_graphrag_for_kb(kb_id: uuid.UUID):
|
|||||||
with_resolution = graphrag_conf.get("resolution", False)
|
with_resolution = graphrag_conf.get("resolution", False)
|
||||||
with_community = graphrag_conf.get("community", False)
|
with_community = graphrag_conf.get("community", False)
|
||||||
|
|
||||||
def callback(msg=None):
|
def callback(*args, msg=None, **kwargs):
|
||||||
print(f"{datetime.now().strftime('%H:%M:%S')} run graphrag msg: {msg}.\n")
|
message = msg or (args[0] if args else "No message")
|
||||||
|
print(f"{datetime.now().strftime('%H:%M:%S')} run graphrag msg: {message}.\n")
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
task = {
|
task = {
|
||||||
|
|||||||
Reference in New Issue
Block a user