#encoding:'utf-8'
import urllib.request
from bs4 import BeautifulSoup
import os
#找到网址
def getDatas():
# 伪装
header={'User-Agent':"Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11"}
url="https://movie.douban.com/top250"
# url="file:///E:/scrapy/2018-04-27/movie/movie.html"
ret=urllib.request.Request(url=url,headers=header)
# 打开网页
res=urllib.request.urlopen(ret)
# 转化格式
response=BeautifulSoup(res,'html.parser')
# 找到想要数据的父元素
datas=response.find_all('div',{'class':'item'})
# print(datas)
#创建存放数据的文件夹
folder_name="images"
if not os.path.exists(folder_name):
os.mkdir(folder_name)
# 定义文件
# current_time=time.strftime('%Y-%m-%d',time.localtime())
# file_name="move"+current_time+".jpg"
# 文件路径
# file_path=folder_name+"/"+file_name
# print("保存图片=============")
for item in datas:
# print(item)
# dict1={}
rank=item.find('div',{'class':'pic'}).find('em').get_text()
title=item.find('div',{'class':'info'}).find('div',{'class':'hd'}).find('a').find('span',{'class':'title'}).get_text()
picUrl=item.find('div',{'class':'pic'}).find('a').find('img').get('src')
# print(dict1['picUrl'])
# pic_url=item['picUrl'][0]
image_name=picUrl.split("/")[-1]
# print(pic_url)
print(image_name)
try:
urllib.request.urlretrieve(picUrl,folder_name+"/%s" %image_name)
except IOError as err:
print("error"+str(err))
# 保存数据为json格式
getDatas()
python爬取数据保存为图片
猜你喜欢
转载自blog.csdn.net/zhanghl150426/article/details/82022668
今日推荐
周排行