爬取mm131图片
import urllib.request
import os
for i in range(4200, 4461):
os.mkdir('tupian/' + str(i))
for j in range(60):
try:
url = 'http://img1.mm131.me/pic/' + str(i) + '/' + str(j) + '.jpg'
print(url)
# urllib.request.urlretrieve(url, 'lala.jpg')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36',
'Referer': 'https://www.sogou.com/link?url=DSOYnZeCC_o7btUgpK402wmc9YOcsOr4cOOT57O29F8'
}
request = urllib.request.Request(url=url, headers=headers)
response = urllib.request.urlopen(request)
with open('tupian/' + str(i) + '/' + str(j) + '.jpg', 'wb') as fp:
fp.write(response.read())
except Exception as e:
print('下载失败,下载下一条')
break
爬取妹子图
import urllib.request
import re
import time
import os
proxy={
'http':'120.92.74.189:3128'
}
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3554.0 Safari/537.36',
'Referer':'http://mzitu.com'
}
dirname = 'mzt'
if not os.path.exists(dirname):
os.mkdir(dirname)
for page in range(2,3):
url='http://www.mzitu.com/page/%s/' % page
request=urllib.request.Request(url=url,headers=headers)
handler = urllib.request.ProxyHandler(proxies=proxy)
response=urllib.request.build_opener(handler).open(request)
content=response.read().decode('utf8')
pattern=re.compile(r'<li><a href="(.*?.com/(.*?))"')
ret=pattern.findall(content)
# print(ret)
for i in ret:
url1 = i[0]
baname = i[1]
name = os.path.join(dirname, baname)
if not os.path.exists(name):
os.mkdir(name)
request1 = urllib.request.Request(url=url1, headers=headers)
response1 = urllib.request.urlopen(request1)
content1 = response1.read().decode('utf8')
pattern1 = re.compile(r'<div class="main-image">.*?<img src="(.*?)\d+(.jpg)"')
pattern2=re.compile(r'…</span>.*?<span>(\d+)</span>')
ret1 = pattern1.findall(content1)[0]
ret2=int(pattern2.findall(content1)[0])
print(ret1)
print(ret2)
time.sleep(5)
for i in range(1, ret2 + 1):
i = "%02d" % i
url = ret1[0] + str(i) + ret1[1]
print(i)
request = urllib.request.Request(url, headers=headers)
response = urllib.request.build_opener(handler).open(request)
filename = '%s.jpg' % i
filepath = os.path.join(name, filename)
with open(filepath, 'wb') as fb:
fb.write(response.read())
time.sleep(0.3)