import requests,os
from lxml import etree
burl = 'http://www.mzitu.com/mm/page/%s/'
headers = {
'Cookie':'Hm_lvt_dbc355aef238b6c32b43eacbbf161c3c=1534513371; Hm_lpvt_dbc355aef238b6c32b43eacbbf161c3c=1534515349',
'Referer':'http://www.mzitu.com/xinggan/',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
}
for i in range(1,32):
url = burl % i
response = requests.get(url, headers = headers)
html_ele = etree.HTML(response.text)
pins = html_ele.xpath('.//ul[@id="pins"]/li')
for abiaoqian in pins:
aname = abiaoqian.xpath('./span[1]/a')[0].text # 获取一组图片的名字
# print(aname)
if not os.path.exists(aname):
os.mkdir(aname) # 保存图片名字 创建文件夹
tuurl = abiaoqian.xpath('./a/@href')[0] # 获取到一组图片的全部 url
# print(tuurl)
res = requests.get(tuurl,headers = headers)
htmls = etree.HTML(res.text)
url_max = htmls.xpath('//div[@class="pagenavi"]/a/@href')[-2].split('/')[-1] # 获取图片最大页码数
# print(url_max)
for x in range(1,int(url_max)+1):
# print(x)
htmltu = tuurl+'/%s'%x
# print(htmltu)
h1_ele = requests.get(htmltu,headers = headers)
h2 = etree.HTML(h1_ele.text)
tupiandizhi = h2.xpath('//div[@class="main-image"]/p/a/img/@src')[0]
# print(tupiandizhi)
jpg_name = '{}/'.format(aname) + tupiandizhi.split('/')[-1]
headerss = {
'Referer': tuurl,
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
img = requests.get(tupiandizhi, headers = headerss)
# print(img)
with open(jpg_name, 'wb') as f:
f.write(img.content)
# print(jpg_name)
xpath爬取妹子图 http://www.mzitu.com 并保存文件夹
猜你喜欢
转载自blog.csdn.net/qq_41996633/article/details/81813245
今日推荐
周排行