续https://blog.csdn.net/m0_49621298/article/details/109896619
1、crf++的python接口安装
在CRF++-0.58\python路径下进入命令行,执行以下命令
python setup.py build
python setup.py install
2、接口调用
报错:ImportError: DLL load failed: %1 不是有效的 Win32 应用程序。
import CRFPP
File "D:\Anaconda3\lib\site-packages\CRFPP.py", line 26, in <module>
_CRFPP = swig_import_helper()
File "D:\Anaconda3\lib\site-packages\CRFPP.py", line 22, in swig_import_helper
_mod = imp.load_module('_CRFPP', fp, pathname, description)
File "D:\Anaconda3\lib\imp.py", line 243, in load_module
return load_dynamic(name, filename, file)
File "D:\Anaconda3\lib\imp.py", line 343, in load_dynamic
return _load(spec)
ImportError: DLL load failed: %1 不是有效的 Win32 应用程序。
网上搜索后没有解决,想到libcrfpp.dll这个文件,复制放到\Anaconda3\Lib\site-packages下面就没报错了。
3、模型调用及信息抽取
分词和词性标注这次采用pyhanlp,对比详见上一篇https://blog.csdn.net/m0_49621298/article/details/109896619,代码及结果如下
def singlesentence(sentence,modelname):#单句执行
print("句子:",sentence)
CustomDictionary.add("%", "q 0") # 自定义词性
CustomDictionary.add("%", "q 0") # 自定义词性
sentence_seged = HanLP.segment(sentence) # 分词、词性
tagger = CRFPP.Tagger("-m"+modelname)
tagger.clear()
for x in sentence_seged:
tagger.add(str(x.word)+ "\t"+str(x.nature))
tagger.parse()
size = tagger.size()
xsize = tagger.xsize()
prearr=[]#保存预测
for i in range(0, size):
temparr=[]
for j in range(0, xsize):
char = tagger.x(i, j)
temparr.append(char)
tag = tagger.y2(i)
temparr.append(tag)
prearr.append(temparr)
# print(prearr)
starr=[]#保存实体
numarr=[]#保存数值
dwarr=[]#保存单位
for i in range(len(prearr)):
# 搜索实体
tempstr = ""
if prearr[i][2] == "w":
starr.append(prearr[i][0])
if prearr[i][2] == "b":
tempstr = tempstr + prearr[i][0]
for j in range(i + 1, len(prearr)):
if prearr[j][2] == "m" or prearr[j][2] == "e":
tempstr = tempstr + prearr[j][0]
else:
break
starr.append(tempstr)
if prearr[i][1] == "m":
if prearr[i-1][1]=='v':
numarr.append([prearr[i-1][0],prearr[i][0]])
else:
numarr.append(prearr[i][0])
if prearr[i][1] == "q" :
dwarr.append(prearr[i][0])
print("实体:",starr)
print("数值:",numarr)
print("单位:",dwarr)
return starr,numarr,dwarr