【Python】统计csv某列中单词的词频
文章目录
0. 需求
某列的元素是字符串,需要统计所有字符串合计的词频。
1.
import pandas as pd
from collections import Counter
import os
import pickle
df = pd.read_csv('a.csv')
merged_string = ' '.join(df['message'].astype(str))
word_counts = Counter(merged_string.split())
tt = word_counts.most_common()
for word, count in tt:
print(word, count)
2.
root = "./testing/trace/"
st = set()
for i in os.listdir(root):
path = root + i
df = pd.read_csv(path)
merged_string = ' '.join(df['message'].astype(str))
word_counts = Counter(merged_string.split())
tt = word_counts.most_common()
if len(tt) == 0:
continue
k = None
for word, _ in tt:
if len(word)>3:
k = word
break
st.add(k)