在进行文本分类时,仅仅依靠模型是得到满意的效果,需要结合关键词提高精度,一个例子如下:
df = pd.read_excel(file_name, encoding='utf8') text = df.values.tolist() # key_words = ['牛肉','猪肉','鸡肉'] key_words = ['野鸡','野兔', '野生动物'] res = [] with open('./data/data_2020.txt', 'w', encoding='utf8') as fw: for i, each in enumerate(text): data = [] for c in each: if isinstance(c, str): data.append(c) to_write = data[0] if len(to_write) >= 5 and len(to_write) <= 150: # 如果里面包括关键词: if any((to_write.find(k) != -1) for k in key_words): print(to_write) res.append(to_write.strip()+'\n') fw.writelines(res)