[changes] Add restriction words to avoid the "implicit" and "emotional" content from being mistakenly pruned.
This commit is contained in:
@@ -252,6 +252,16 @@ class SemanticPruner:
|
||||
if re.fullmatch(r"[。!?,.!?…·\s]+", t):
|
||||
return True
|
||||
|
||||
# 安全防线:包含情绪词或兴趣词的消息,无论多短都不视为填充
|
||||
# 避免"我好开心呀"、"好喜欢打羽毛球呀"等被误删
|
||||
_emotion_interest_guard = re.compile(
|
||||
r"开心|高兴|快乐|幸福|感动|难过|悲伤|伤心|委屈|失落|沮丧|郁闷|"
|
||||
r"生气|愤怒|烦躁|焦虑|害怕|担心|压力|兴奋|期待|"
|
||||
r"喜欢|热爱|爱好|兴趣|擅长|享受|沉迷|着迷|讨厌|厌恶"
|
||||
)
|
||||
if _emotion_interest_guard.search(t):
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
async def _batch_evaluate_importance_with_llm(
|
||||
|
||||
@@ -51,6 +51,22 @@ class SceneConfigRegistry:
|
||||
(r"今天|明天|后天|昨天|前天", 3), # 相对时间(提高权重)
|
||||
(r"下周|下月|下年|上周|上月|上年|本周|本月|本年", 3),
|
||||
(r"今年|去年|明年", 3),
|
||||
# ---- 情绪内容(所有场景通用,用于情绪提取) ----
|
||||
(r"开心|高兴|快乐|兴奋|愉快|幸福|满足|喜悦|欣喜", 4),
|
||||
(r"难过|悲伤|伤心|痛苦|委屈|失落|沮丧|郁闷|忧郁|绝望", 4),
|
||||
(r"生气|愤怒|烦躁|焦虑|紧张|害怕|恐惧|担心|担忧|压力", 4),
|
||||
(r"感动|温暖|感激|感谢|惊喜|期待|憧憬|向往", 3),
|
||||
(r"无聊|无奈|尴尬|后悔|遗憾|羞愧|惭愧", 3),
|
||||
(r"好[开高快]心|很[开高快]心|超[开高快]心|非常[开高快]心", 4),
|
||||
(r"好难过|好伤心|好悲伤|好委屈|好痛苦", 4),
|
||||
(r"好开心|好高兴|好快乐|好幸福|好感动", 4),
|
||||
# ---- 兴趣/爱好内容(所有场景通用,用于兴趣提取) ----
|
||||
(r"喜欢|热爱|爱好|兴趣|擅长|享受|沉迷|着迷|痴迷", 4),
|
||||
(r"不喜欢|讨厌|厌恶|反感|排斥", 3),
|
||||
(r"羽毛球|篮球|足球|排球|乒乓球|网球|棒球|高尔夫", 4),
|
||||
(r"游泳|跑步|健身|瑜伽|舞蹈|武术|骑行|登山|徒步", 4),
|
||||
(r"音乐|唱歌|吉他|钢琴|绘画|摄影|书法|手工|烹饪", 4),
|
||||
(r"游戏|电影|动漫|小说|阅读|旅游|美食|宠物", 3),
|
||||
]
|
||||
|
||||
BASE_LOW_PRIORITY = [
|
||||
@@ -58,6 +74,8 @@ class SceneConfigRegistry:
|
||||
(r"\d{1,2}点\d{0,2}分?", 2), # 时间点 X点Y分 或 X点
|
||||
(r"上午|下午|中午|晚上|早上|傍晚|凌晨", 2), # 时段(提高权重并扩充)
|
||||
(r"AM|PM|am|pm", 1),
|
||||
# ---- 情绪程度副词(辅助情绪识别) ----
|
||||
(r"特别|非常|超级|极其|十分|很|好[开高快]|太.*了", 1),
|
||||
]
|
||||
|
||||
BASE_FILLERS = {
|
||||
|
||||
Reference in New Issue
Block a user