import re
import requests
import threading
#得到所有的小说url
def url():
url="https://www.ykanxiaoshuo.com/paihangbang/"
data=requests.get(url)
#print(data.status_code) 查看状态吗,上面去掉.text
data.encoding='gbk'
data=data.text
URL_id=re.findall(r'<a href="https://www.ykanxiaoshuo.com(.*)">(.*)</a></li>',data)
del URL_id[0]
URL=[]
for i in URL_id:
a=f'https://www.ykanxiaoshuo.com{i[0]}'
URL.append([a,i[1]])
#with open('小说列表.txt','a') as f:
#f.write(a+"---"+i[1]+"\n")
return URL
#得到单个的小说的章节url和名称
def zhangjie_url(url):
#url='https://www.ykanxiaoshuo.com/0/2/'
data=requests.get(url)
data.encoding='gbk'
data=data.text
zhangjie_url=re.search(r'<div class="volume">正文.*<div id="footer" name="footer">',data,re.S)
zhangjie_url=zhangjie_url.group()
#print(type(zhangjie_url))
zhangjie_id=re.findall(r'<li class="chapter"><a href="(.*)">(.*)</a></li>',zhangjie_url)
url='https://www.ykanxiaoshuo.com'
URL=[]
for i in zhangjie_id:
URL.append([f'{url}{i[0]}',i[1]])
#for i in URL:
#print(i[0],i[1])
return URL
def task(j,z):
data=requests.get(j[0])
data.encoding='gbk'
data=data.text
content=re.search(r'<div id="content">(.*)<div class="bottem2">',data,re.S)
#print(content.group(1))
content=content.group(1)
content=re.sub(' ','',content)
content=re.sub('<br />','\n',content)
text.append([z,j[1],content])
url=url()
for i in url:
URL=zhangjie_url(i[0])
print(URL)
pool=[]
z=1
text=[]
for j in URL:
print(f'正在下载---{i[1]}---{j[1]}')
t=threading.Thread(target=task,args=(j,z))
pool.append(t)
t.start()
z+=1
for t in pool:
t.join()
text.sort()
for j in text:
with open(i[1]+'.txt','a') as f:
f.write(j[1]+'\n')
f.write(j[2]+'\n')
python爬易看网所有小说
猜你喜欢
转载自blog.csdn.net/haha13l4/article/details/93399312
今日推荐
周排行