版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/wxf2012301351/article/details/86132209
1.读取词向量
def getWordWeight(weightfile, a=1e-3):
if a <=0: # when the parameter makes no sense, use unweighted
a = 1.0
word2weight = {}
with open(weightfile) as f:
lines = f.readlines()#读取所有行
N = 0
for i in lines:
i=i.strip()
if(len(i) > 0):
i=i.split()
if(len(i) == 2):
word2weight[i[0]] = float(i[1])
N += float(i[1])
else:
print(i)
for key, value in word2weight.iteritems():
word2weight[key] = a / (a + value/N)
return word2weight