#!/usr/bin/env python
#-*-conding:utf-8-*-
import requests # 发送http请求
from bs4 import BeautifulSoup # 解析html
import lxml # 解析器 中文不乱码
import os #创建文件夹
path = 'C:/Users/Administrator/Desktop/tmp' #保存文件路径
folder = '/mzitu/'
request_url = 'https://www.mzitu.com/all/'
headers = headers2 = {
'referer':request_url,
'user-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.6788.400 QQBrowser/10.3.2864.400'
}
# 下载图片
def down_img(img_url, path, headers):
ret_html = requests.get(url=img_url,headers=headers)
name = path + "/" + img_url.split('/')[-1]
with open(name, "ab") as f:
f.write(ret_html.content)
ret_html = requests.get(url=request_url,headers=headers)
# print(ret_html.content)
soup = BeautifulSoup(ret_html.content,'html.parser')
# print(soup)
a_list = soup.find('p', attrs={"class":"url"}).find_all('a')
# print(a_list)
url_list = []
for i in a_list:
url_list.append(i['href'])
print(url_list)
for i in url_list:
ret_html = requests.get(url=i,headers=headers)
soup = BeautifulSoup(ret_html.content,'html.parser')
title = soup.find("h2", attrs={"class":"main-title"}).text
img_url = soup.find("div", attrs={"class":"main-image"}).find("img")['src']
page = soup.find("div", attrs={"class":"pagenavi"}).find_all("a")[-2].find("span").text
ext = '.'+img_url.split('.')[-1]
# 创建文件夹
if not os.path.isdir(path+folder+title):
os.makedirs(path+folder+title)
# 下载第一张图片
img_url2 = img_url
headers2['referer'] = i
down_img(img_url2, path+folder+title, headers2)
for j in range(2, int(page)+1):
if j < 10:
img_url2 = img_url[0:-6] + "0" + str(j) + ext
else:
img_url2 = img_url[0:-6] + str(j) + ext
headers2['referer'] = i + "/" + str(j)
down_img(img_url2, path+folder+title, headers2)