Merge remote-tracking branch 'origin/feature/20251219_lxc' into develop

This commit is contained in:
lixiangcheng1
2025-12-27 17:12:35 +08:00
2 changed files with 215 additions and 1 deletions

View File

@@ -0,0 +1,212 @@
{
"settings": {
"index": {
"number_of_shards": 2,
"number_of_replicas": 0,
"refresh_interval": "1000ms"
},
"similarity": {
"scripted_sim": {
"type": "scripted",
"script": {
"source": "double idf = Math.log(1+(field.docCount-term.docFreq+0.5)/(term.docFreq + 0.5))/Math.log(1+((field.docCount-0.5)/1.5)); return query.boost * idf * Math.min(doc.freq, 1);"
}
}
}
},
"mappings": {
"properties": {
"lat_lon": {
"type": "geo_point",
"store": "true"
}
},
"date_detection": "true",
"dynamic_templates": [
{
"int": {
"match": "*_int",
"mapping": {
"type": "integer",
"store": "true"
}
}
},
{
"ulong": {
"match": "*_ulong",
"mapping": {
"type": "unsigned_long",
"store": "true"
}
}
},
{
"long": {
"match": "*_long",
"mapping": {
"type": "long",
"store": "true"
}
}
},
{
"short": {
"match": "*_short",
"mapping": {
"type": "short",
"store": "true"
}
}
},
{
"numeric": {
"match": "*_flt",
"mapping": {
"type": "float",
"store": true
}
}
},
{
"tks": {
"match": "*_tks",
"mapping": {
"type": "text",
"similarity": "scripted_sim",
"analyzer": "whitespace",
"store": true
}
}
},
{
"ltks": {
"match": "*_ltks",
"mapping": {
"type": "text",
"analyzer": "whitespace",
"store": true
}
}
},
{
"kwd": {
"match_pattern": "regex",
"match": "^(.*_(kwd|id|ids|uid|uids)|uid)$",
"mapping": {
"type": "keyword",
"similarity": "boolean",
"store": true
}
}
},
{
"dt": {
"match_pattern": "regex",
"match": "^.*(_dt|_time|_at)$",
"mapping": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyy-MM-dd_HH:mm:ss",
"store": true
}
}
},
{
"nested": {
"match": "*_nst",
"mapping": {
"type": "nested"
}
}
},
{
"object": {
"match": "*_obj",
"mapping": {
"type": "object",
"dynamic": "true"
}
}
},
{
"string": {
"match_pattern": "regex",
"match": "^.*_(with_weight|list)$",
"mapping": {
"type": "text",
"index": "false",
"store": true
}
}
},
{
"rank_feature": {
"match": "*_fea",
"mapping": {
"type": "rank_feature"
}
}
},
{
"rank_features": {
"match": "*_feas",
"mapping": {
"type": "rank_features"
}
}
},
{
"dense_vector": {
"match": "*_512_vec",
"mapping": {
"type": "dense_vector",
"index": true,
"similarity": "cosine",
"dims": 512
}
}
},
{
"dense_vector": {
"match": "*_768_vec",
"mapping": {
"type": "dense_vector",
"index": true,
"similarity": "cosine",
"dims": 768
}
}
},
{
"dense_vector": {
"match": "*_1024_vec",
"mapping": {
"type": "dense_vector",
"index": true,
"similarity": "cosine",
"dims": 1024
}
}
},
{
"dense_vector": {
"match": "*_1536_vec",
"mapping": {
"type": "dense_vector",
"index": true,
"similarity": "cosine",
"dims": 1536
}
}
},
{
"binary": {
"match": "*_bin",
"mapping": {
"type": "binary"
}
}
}
]
}
}

View File

@@ -280,8 +280,10 @@ def build_graphrag_for_kb(kb_id: uuid.UUID):
build knowledge graph
"""
db = next(get_db()) # Manually call the generator
db_document = None
db_knowledge = None
try:
db_document = db.query(Document).filter(Document.kb_id == kb_id).all()
db_knowledge = db.query(Knowledge).filter(Knowledge.id == kb_id).first()
# 1. Prepare to configure chat_mdl、embedding_model、vision_model information
chat_model = Base(
@@ -304,7 +306,7 @@ def build_graphrag_for_kb(kb_id: uuid.UUID):
# 2. get all document_ids from knowledge base
vector_service = ElasticSearchVectorFactory().init_vector(knowledge=db_knowledge)
total, items = vector_service.search_by_segment(document_id=None, query=None, pagesize=9999, page=1, asc=True)
document_ids = [item.metadata["document_id"] for item in items]
document_ids = [item.id for item in db_document]
# 2. using graphrag
if db_knowledge.parser_config.get("graphrag", {}).get("use_graphrag", False):