from collections import Counter
import jieba.posseg as jp
counter = Counter()
posseg = jp.cut(text)for p in posseg:"""自定义过滤条件"""
counter[(p.word, p.flag)]+=1
most = counter.most_common()print(most)# 写入Excelimport pandas as pd
pd.DataFrame([(m[0][0], m[0][1], m[1])for m in most], columns=['word','flag','frequency'])\
.to_excel('word_count.xlsx', index=None)
数据量较大时,分批保存
from collections import Counter
import jieba.posseg as jp
counter = Counter()
posseg = jp.cut(text)for p in posseg:"""自定义过滤条件"""
counter[(p.flag, p.word)]+=1
most = counter.most_common(99)print(most)