文章目录
用python实现小说的下载与打包
1、库函数的使用
from tkinter import *
import re
import requests
import urllib.parse
from bs4 import BeautifulSoup
import urllib
import os
import threading
import time
mulu1=[]
mulu2=[]
mulu3=[]
url=''
mkpath=''
xsm=''
str1=''
2、界面的搭建
gui=Tk()#主界面,搜索界面
gui.title('笔趣阁小说搜索下载软件')
gui.geometry("1200x900")
gui.geometry("+500+200")
labelx=Label(gui,text="小说名",fg="red",font=("宋体",25))
labelx.grid(row=0,column=0,sticky='W')
sousuo=Entry(gui,font=("宋体",25))
sousuo.grid(row=0,column=1)
buttonx=Button(gui,text='搜索',font=("宋体",25),comman=lambda :thread_it(xiaoshuoss,))#增加一个线程搜索小说
buttonx.grid(row=0,column=2)
gui.mainloop()
3、小说搜索
def xiaoshuoss():
global xsm
xsm=sousuo.get()#搜索得到的小说名
xsss(xsm) #搜索后得到结果 小说名、作者
labelx['fg']='green'
j=0
#print(mulu1)
for i in range(len(mulu1)):
#print(mulu1,mulu2,mulu3)
j=j+1
labely = Label(gui, text=mulu1[i][1]+"("+mulu2[i]+")"+mulu3[i], fg="red", font=("宋体", 12))#显示小说名和作者
labely.grid(row=j, column=0,sticky='W')
#print(mulu1[i][0])
buttony = Button(gui, text='下载', font=("宋体", 15), comman=lambda i=i :xz(mulu1[i][0],i))#进入下载界面
buttony.grid(row=j, column=1, padx='100')
4、小说下载界面
def xz(dmi,i):
global gui1
mkpath=''
gui1=Tk()
gui1.title('笔趣阁小说搜索下载软件')
gui1.geometry("800x500")
gui1.geometry("+500+500")
la=Label(gui1,text="下载路径",fg="red",font=("宋体",30))
la.grid(row=0,column=0)
lujing=Entry(gui1,font=("宋体",30))
lujing.grid(row=0,column=1)
buttonz=Button(gui1,text='确定'+mkpath,font=("宋体",30),comman=lambda:jinduxc(dmi,lujing.get(),i))#进入进度显示界面
buttonz1 = Button(gui1, text='返回', font=("宋体", 30), comman=gui1.destroy)
buttonz.grid(row=1, column=1, padx='50')
buttonz1.grid(row=2, column=1, padx='50')
gui1.mainloop()
5、进度显示界面
def jinduxc(url,mkpath,i):
global str1
global gui2
global gui1
gui1.destroy()
gui2 = Tk()
gui2.title(mulu1[i][1]+"下载进度")
gui2.geometry("500x100")
thread_it(getnovel,url,mkpath,i)#增加小说下载线程
gui2.mainloop()
6、小说下载
def getnovel(url,mkpath,k):
global xsm,str1
global gui2
global gui1
str1 = '地址错误'
lab = Label(gui2, text=str1)
lab.pack()
mkpath =mkpath+':\\xiaoshuo\\' + mulu1[k][1]
mkdir(mkpath)#创建文件夹
str1='开始'
lab.config(text=str1, font=("", 20))
mulu=requests.get(url)
mulu.encoding=mulu.apparent_encoding
html=mulu.text#得到小说目录
#print(html)
soup_mulu=BeautifulSoup(html,'lxml')
texts_mulu=soup_mulu.find_all(id='list')
texts_timu1=soup_mulu.find_all(rel='canonical')
seg=r'href="(.*?)"'
seg=re.compile(seg)
url=re.findall(seg,str(texts_timu1))
reg1=r'<a href="(.*?)">(.*?)</a>'
reg1=re.compile(reg1)
muluf=re.findall(reg1,str(texts_mulu))
l=len(muluf)
s=0.00001
v=0
i=0
while i<l:
try:
start=time.clock()
#print(str1)
str1 = str("已下载" + str(i + 1) + '章/' + str(l) + '章' + '\n' + str(round((i + 1) / l * 100, 1)) + "%"+" 剩余时间:%d秒"%((l-i)/s))
lab.config(text=str1, font=("", 20))
download_url=url[0]+muluf[i][0]
download_name=muluf[i][1]
request = requests.get(download_url)
request.encoding = request.apparent_encoding
html = request.text
soup = BeautifulSoup(html, 'lxml')
texts = soup.find_all(id="content")
soup_text = BeautifulSoup(str(texts), 'lxml')
f_d=mkpath + "\\{}.txt".format(download_name.replace("|",''))
f = open(f_d, 'w', encoding='utf-8')
f.write(str(soup_text.text).replace("[",'').replace(']',''))#写入txt文件
end=time.clock()
v=v+end-start
s=v/(i+1)
i=i+1
print(i)
except:
try:
start = time.clock()
# print(str1)
str1 = str("已下载" + str(i + 1) + '章/' + str(l) + '章' + '\n' + str(
round((i + 1) / l * 100, 1)) + "%" + " 剩余时间:%d秒" % ((l - i) / s))
lab.config(text=str1, font=("", 20))
download_url = url[0] + muluf[i][0]
download_name = muluf[i][1]
request = requests.get(download_url)
request.encoding = request.apparent_encoding
html = request.text
soup = BeautifulSoup(html, 'lxml')
texts = soup.find_all(id="content")
soup_text = BeautifulSoup(str(texts), 'lxml')
f_d = mkpath + "\\{}.txt".format('这一章'+str(i))
f = open(f_d, 'w', encoding='utf-8')
f.write(str(soup_text.text).replace("[", '').replace(']', '')) # 写入txt文件
end = time.clock()
v = v + end - start
s = v / (i + 1)
i=i+1
except:
str1 ='断网尼玛'
lab.config(text=str1, font=("", 20))
time.sleep(5)
continue
hbtxt(mkpath,k)
gui2.destroy()
7、其他功能函数
7.1 文件夹创建
def mkdir(path):
# 去除首位空格
path = path.strip()
# 去除尾部 \ 符号
path = path.rstrip("\\")
# 判断路径是否存在
# 存在 True
# 不存在 False
isExists = os.path.exists(path)
# 判断结果
if not isExists:
# 如果不存在则创建目录
# 创建目录操作函数
os.makedirs(path)
print(path + ' 创建成功')
return True
else:
# 如果目录存在则不创建,并提示目录已存在
print(path + '目录已存在')
return False
7.2 txt文件合并
def hbtxt(name,k):
#dirPath = "D:\\xiaoshuo\\"+name #所有txt位于的文件夹路径
dirPath=name
files = os.listdir(dirPath)
res = ""
i = 0
file1={}
for i in range(len(files)):
file1.update({os.path.getctime(dirPath+"/" +files[i]):i})
file2=sorted(file1.keys())
#print(file2)
#for i in file2:
# print(file1[i])
for j in file2:
i=file1[j]
if files[i].endswith(".txt"):
title = "%s" % files[i][0:len(files[i])-4]
with open(dirPath+"/" + files[i], "r", encoding='utf-8') as file:
content = file.read()
file.close()
append = "\n%s\n\n%s" % (title, content)
res += append
with open(dirPath+'/'+mulu1[k][1]+".txt", "w", encoding='utf-8') as outFile:
outFile.write(res)
outFile.close()
7.3 多线程函数
def thread_it(func,*args):
t=threading.Thread(target=func,args=args)
t.setDaemon(True)
t.start()
8、功能实现
8.1优点
输入小说名自动搜索小说,显示小说的作者、最新章节,点击下载后能够输入下载地址如:d、e、f。小说自动下载,并可以显示进度,下载完成后可以自动打包。
8.2缺点
小说下载速度有限,无法完全实现多线程同时下载多本小说,变量未锁定导致多本小说下载终止。单本小说下载功能良好。