import re from collections import Counter from pprint import pprint with open('test.txt') as f: txt = f.read() c = Counter(re.split('\W+', txt)) res = c.most_common(10) pprint(res)
test.txt:
The official home of the Python Programming Language... Python Events Archive User Group Events Archive Submit an EventBrowse the docs online or download a...
结果:
[('the', 2), ('Python', 2), ('Events', 2), ('Archive', 2), ('The', 1), ('official', 1), ('home', 1), ('of', 1), ('Programming', 1), ('Language', 1)]