punc_list = [',','.','\n','?','\'','\"','(',')',':']
with open('./tofel100.txt',encoding='utf-8') as f:
p = f.readlines()
all_words = []
for s in p:
for punc in punc_list:
s = s.replace(punc,'')
line_words = s.split(' ')
all_words = all_words + line_words
set_words = set(all_words)
word_list = list(set_words)
count_word = 'all'
print('The number of words in word_list is: ',len(word_list))
def get_word_counts(count_word):
count = 0
for word in all_words:
if count_word == word:
count += 1
return count
word_counter_dict = {}
for word in word_list:
word_counter_dict[word] = get_word_counts(word)
def get_sorted_tuple_list_from_dict(count_dict):
return sorted(count_dict.items(), key=lambda d: d[1], reverse = True)
for element in get_sorted_tuple_list_from_dict(word_counter_dict):
print(element)
统计托福高频词汇
猜你喜欢
转载自blog.csdn.net/weixin_41855010/article/details/105235358
今日推荐
周排行