《三国演义》,《水浒传》,《西游记》的人物出场次数Python代码:
- 经过代码运行的结果可以看出三国作者对曹操和孔明比较喜爱;水浒作者对宋江和武松比较喜爱;西游作者对齐天大圣孙悟空比较喜爱
- 通过这类代码,我们可以看出一篇文章中作者想表达的主要的的一些东西
#三国演义
print("三国演义人物出场次数:")
import jieba #jieba库的应用
import time #引入time库,计算下程序运行的时间
start=time.perf_counter()
txt=open("三国演义.txt","r",encoding="utf-8").read()
excludes={"将军","却说","二人","后主","上马","不知","天子","大叫","众将","不可",
"主公","蜀兵","只见","如何","商议","都督","一人","汉中","不敢","人马",
"陛下","魏兵","天下","今日","左右","东吴","于是","荆州","不能","如此",
"大喜","引兵","次日","军士","军马"} #这些文字是多次程序运行所得
words=jieba.lcut(txt)
counts={}
for word in words:
if len(word)==1:
continue
elif word=="诸葛亮" or word=="孔明曰":
rword="孔明"
elif word=="关公" or word=="云长":
rword="关羽"
elif word=="玄德" or word=="玄德曰":
rword="刘备"
elif word=="孟德" or word=="丞相":
rword="曹操" #把意思相同的归为一个人
else:
rword=word
counts[rword]=counts.get(rword,0)+1
for word in excludes:
del counts[word]
items=list(counts.items())
items.sort(key=lambda x:x[1],reverse=True)
for i in range(10):
word,count=items[i]
print("{0:<10}{1:>5}次".format(word,count))
dur=time.perf_counter()-start
print("运行时间为{:.2f}s".format(dur))
print("-----------------------------------")
#水浒传
print("水浒传人物出场次数:")
import jieba
import time
start=time.perf_counter()
txt=open("水浒传.txt","r",encoding="utf-8").read()
excludes={"二人","一个","来到","人马","你们","我们","好汉",
"知府","什么","他们","银子","梁山","两个"}
words=jieba.lcut(txt)
counts={}
for word in words:
if len(word)==1:
continue
elif word=="哥哥":
rword="宋江"
elif word=="头领":
rword="林冲"
else:
rword=word
counts[word]=counts.get(word,0)+1
for word in excludes:
del counts[word]
items=list(counts.items())
items.sort(key=lambda x:x[1],reverse=True)
for i in range(10):
word,count=items[i]
print("{0:<10}{1:>5}次".format(word,count))
dur=time.perf_counter()-start
print("运行时间为{:.2f}s".format(dur))
print("-----------------------------")
#西游记
print("西游记人物出场次数:")
import jieba
import time
start=time.perf_counter()
txt=open("西游记.TXT","r",encoding="utf-8").read()
excludes={"一个","那里","怎么","我们","不知","两个","甚么","只见","不是",
"原来","不敢","闻言","如何"}
words=jieba.lcut(txt)
counts={}
for word in words:
if len(word)==1:
continue
elif word=="行者" or word=="大圣" or word=="老孙":
rword="悟空"
elif word=="师父" or word=="三藏" or word=="长老":
rword="唐僧"
elif word=="和尚" or word=="呆子":
rword="沙僧"
else:
rword=word
counts[rword]=counts.get(rword,0)+1
for word in excludes:
del counts[word]
items=list(counts.items())
items.sort(key=lambda x:x[1],reverse=True)
for i in range(9):
word,count=items[i]
print("{0:<10}{1:>5}次".format(word,count))
dur=time.perf_counter()-start
print("运行时间为{:.2f}s".format(dur))