通过访问接口的形式爬取网络的图片
import json import os import urllib.request from _md5 import md5 import requests from requests import RequestException # 通过requests.post访问接口获取内容 def get_one_page(): data = { 'applyid': "c8cda453-d2ff-****-a496-09c0ef6cc9a4", 'earmark': "", 'farm': "" } url = 'http://hb.****.cn/Server/PutOnEarmarkListData' try: response = requests.post(url, data) if response.status_code == 200: return response.text return None except RequestException: return None
# 解析数据并且下载的图片 def parse_page_detail(html): data = json.loads(html) photos = [item.get('PHOTO') for item in data] x = 0 # 名称计数 for index in range(len(photos)): # 每一组图片字符串 for photoitem in photos[index].split(','): # 每一组图片用逗号隔开 # 通过自己写访问形式网络的下载图片到本地 # download_image(photoitem) # 通过urllib.request下载图片到本地 urllib.request.urlretrieve(photoitem, 'D:\photos\%s.jpg' % x) x += 1 print('第%s张图片下载完成' % x) return { 'PHOTO': photos }
#主函数 def main(): data= get_one_page() result = parse_page_detail(data) print(result) if __name__ == '__main__': main()
# 保存图片到本地 def save_image(content): file_path = '{0}/{1}.{2}'.format(os.getcwd(), md5(content).hexdigest(), 'jpg') if not os.path.exists(file_path): with open(file_path, 'wb') as f: f.write(content) f.close() # 下载图片 def download_image(url): print('正在下载', url) try: response = requests.get(url) if response.status_code == 200: save_image(response.content) return None return None except RequestException: print('图片下载失败', url) return None