Day04_网络爬虫图片收获

#所需模块 requests 、Beautifulsoup、urllib

1. response = requests.get('www.baidu.com')  #获取网站响应

2.html = response.text #获取网页内容

3.soup = Beautifulsoup(html,'html.parser')  #创建对象,对网页进行解析

4.girl = soup.find_all('img') #根据img标签进行查找,形成列表

5.imgsrc = i.get('src') #根据src字段进行获取src路径

6.urllib.request.urlretrieve(imgsrc,'./iamge/%s.jpg'%x) #下载图片

附代码:

#Author_Liukai 2018年8月13日07:43:35

import requests
import urllib.request
from bs4 import BeautifulSoup

x = 1

def getgirlimg(page):
response = requests.get('https://www.dbmeinv.com/index.htm?cid={}'.format(page+1))
html = response.text
soup = BeautifulSoup(html,'html.parser')
girl = soup.find_all('img')
global x
for i in girl:
imgsrc = i.get('src')
print('下载第{}张图片'.format(x))
urllib.request.urlretrieve(imgsrc,'./image1/%s.jpg'%x)
x+=1

for i in range(1,11):
print('+++++++++++++++++++++')
print('正在准备下载第{}页图片'.format(i))
print('+++++++++++++++++++++')
getgirlimg(i)

猜你喜欢

转载自www.cnblogs.com/postgraduatek/p/9466108.html