——————来自某潮汕人的菜鸟教程
import requests import re import base64 def base64_decode1(s): s = s.encode() missing_padding = len(s) % 4 if missing_padding != 0: s += b'=' * (4 - missing_padding) return base64.decodestring(s) def get_info(url): '''返回下一页网页的URL和当前网页每个图片的img_hash 当flag=0时还有下一页,1时没有下一页 ''' next_pages_url='' result=requests.get(url=url).text flag=0 try: next_pages_url=re.findall('<a title="Older Comments" href="(.*?)"',result,re.DOTALL)[0] except: print('超过源网站的下载页数') flag=1 img_hash=re.findall('<span class="img-hash">(.*?)</span></p>',result,re.DOTALL) print(next_pages_url) print(img_hash) return 'http:'+next_pages_url,img_hash,flag if __name__=='__main__': image_list=[]#储存图片URL的列表 max_pages = 10 start_url='http://jandan.net/ooxx' next_pages_url=start_url file_path='E:\\' for _ in range(max_pages): '''得到所有的图片的url,并添加进image_list''' next_pages_url,img_hash,flag=get_info(next_pages_url) for hash in img_hash: image_list.append(base64_decode1(hash)) if flag == 1: break#没有下一页 sum_image=len(image_list) print('总共有'+str(sum_image)+'张图片') index=0 for image in image_list: '''下载图片''' try: image_url = 'http:'+image.decode() except: print('错误链接') continue filename = image_url.split('/')[-1] r = requests.get(image_url) with open(file_path+filename, 'wb') as f: f.write(r.content) index+=1 print('\r下载进度'+str(int(index*100/sum_image))+'%',end='')