说明
这是一个简单的汉语模糊唤醒词的判断器,汉语发音中前后舌以及声母韵母的区别,如果进行精准判断,很容易误判。需要一个模糊判断的逻辑!
安装库
pip install pypinyin
代码Demo
# -*- coding: utf-8 -*-
# @Author : Dony YUAN
# @Time : 2024/12/06 20:02
from pypinyin import pinyin, Style
def compare_hanz(hanz_x:str, hanz_y:str, thresh:float=0.2)->bool:
"""
:param hanz_x: 需要对比的汉字
:param hanz_y: 需要对比的汉字
:param thresh: 相似度阈值, default 0.2
:return: bool 相似返回 True
"""
py1 = pinyin(hanz_x, style=Style.NORMAL, errors='ignore')
py2 = pinyin(hanz_y, style=Style.NORMAL, errors='ignore')
py1 = [i[0] for i in py1]
py2 = [i[0] for i in py2]
print(py1, py2)
diff = 0
for m, n in zip(py1, py2):
if m != n:
l_m = len(m)
l_n = len(n)
if l_m > l_n:
for x in n:
if x not in m:
diff += 1
else:
for x in m:
if x not in n:
diff += 1
diff += abs(l_m - l_n)
total_letters = max(len("".join(py1)), len("".join(py2)))
error_rate = diff / total_letters
print(f"error_rate: {error_rate}")
# return "".join(py1) == "".join(py2)
return error_rate < thresh
if __name__ == '__main__':
wd1 = "您好小杜?"
wd2 = "你好小布!"
is_same = compare_hanz(wd1, wd2)
print(f"是否相似:{is_same}")
[‘nin’, ‘hao’, ‘xiao’, ‘du’] [‘ni’, ‘hao’, ‘xiao’, ‘bu’]
error_rate: 0.16666666666666666
是否相似:True
其他
如大佬有更简便的方法,还请不吝赐教~