参考网址:https://www.ivsky.com/ from requests_html import HTMLSession session = HTMLSession() BASE_URL = 'https://www.ivsky.com' # 获取图片页码链接 def get_page_url(): for i in range(1,21): yield 'https://www.ivsky.com/tupian/ziranfengguang/index_{}.html'.format(i) # 获取总图的链接 ==》》 单个图的所有 # 测试: # r = session.get(url='https://www.ivsky.com/tupian/ziranfengguang/index_1.html') # # BASE_URL = 'https://www.ivsky.com' # element_list = r.html.find('.il_img a') # for element in element_list: # # print(element.attrs.get('href')) # a_url = BASE_URL + element.attrs.get('href') # print(a_url) # title = element.attrs.get('title') # # 进入到具体的图片内部 # h = session.get(url=a_url) # element_list = h.html.find('.il_img img') # for element in element_list: # url = element.attrs.get('src')[15:] # url_detail = BASE_URL + url # print(url_detail) def get_url_page(url): r = session.get(url=url) element_list = r.html.find('.il_img a') for element in element_list: a_url = BASE_URL + element.attrs.get('href') title = element.attrs.get('title') # 进入到具体的图片内部 h = session.get(url=a_url) element_list = h.html.find('.il_img img') for element in element_list: url = element.attrs.get('src')[15:] url_detail = BASE_URL + url save(url_detail,title) import os def save(url,title): base_url = '风景图片' file_path = os.path.join(base_url,title+'.png') r = session.get(url=url) with open(file_path,'wb')as f: f.write(r.content) print('{}图片保存成功'.format(title)) if __name__ == '__main__': for page_url in get_page_url(): get_url_page(page_url) # 可以考虑单行打印进度条
实现爬取图片
猜你喜欢
转载自www.cnblogs.com/changwenjun-666/p/11323262.html
今日推荐
周排行