python2下载汽车之家2018年年款高清图片
import urllib2
import os
from bs4 import BeautifulSoup
import random
import urllib
import time
end =['A','B','C','D','E','F','G','H','I','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']#无J,会乱码
#end = ['J']
print('ok')
for e in end:
url = "https://www.autohome.com.cn/grade/carhtml/%s.html"%(e)
url_html = urllib2.urlopen(url).read()
url_content = BeautifulSoup(url_html ,'html.parser')#下载J时,注掉,'html.parser'
names = url_content.find_all('h4')
n=-1
for i in url_content.find_all('a',attrs = {'id':True}):
n= n+1
name = names[n].text
car_url ='https:'+ i.get('href')
#car_url = 'https://car.autohome.com.cn/pic/series/145.html#pvareaid=103448'
car_url_html = urllib2.urlopen(car_url).read()
car_url_content = BeautifulSoup(car_url_html,'html.parser')
try:
classes = car_url_content.find_all('dl',attrs = {'class':"search-pic-cardl"})[0]
years = classes.find_all('dt')
indexs = ""
for year in years:
if '2018' in year.text:
indexs = years.index(year)
break
if indexs == "":
pass
else:
path = '2018_year_cartype/'+name
if os.path.exists(path):
pass
else:
os.makedirs(path)
photourl_2018 = classes.find_all('ul')[indexs]
for j in photourl_2018.find_all('a'):
time.sleep(0.3)
photourl_type2018='https://car.autohome.com.cn'+j.get('href')
photourl_type = urllib2.urlopen(photourl_type2018).read()
photourl_type = BeautifulSoup(photourl_type,'html.parser')
for eve in photourl_type.find_all('a',attrs = {'href':True,'title':True,'target':"_blank"})[0:3]:#我只需要正面照
eve_url = 'https://car.autohome.com.cn'+eve.get('href')
eve_html = url_html = urllib2.urlopen(eve_url).read()
eve_content = BeautifulSoup(eve_html ,'html.parser')
photo_url = eve_content.find_all('img',attrs = {"id":'img','src':True})[0].get('src')
carphoto_name = str(random.uniform(0,30))+'.jpg'
if 'https:' in photo_url:
time.sleep(0.3)
urllib.urlretrieve(photo_url,path+'/'+carphoto_name)
else:
time.sleep(0.3)
photo_url = 'https:'+photo_url
urllib.urlretrieve(photo_url,path+'/'+carphoto_name)
except:
print(name)
pass
print('end')