python2下载汽车之家2018年车型图片

python2下载汽车之家的图片

python2下载汽车之家的图片


import urllib2
import os
from bs4 import BeautifulSoup
import random
import urllib
import time
end = ['A','B','C','D','E','F','G','H','I','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']#没有J,会乱码
for e in end:
    url = "https://www.autohome.com.cn/grade/carhtml/%s.html"%(e)
    url_html = urllib2.urlopen(url).read()
    url_content = BeautifulSoup(url_html  ,'html.parser')#下载J,注掉,'html.parser'
    names = url_content.find_all('h4')
    n=-1
    for i in url_content.find_all('a',attrs = {
    
    'id':True}):
        n= n+1
        name = names[n].text

        car_url ='https:'+ i.get('href')
        #car_url = 'https://car.autohome.com.cn/pic/series/145.html#pvareaid=103448'

        car_url_html = urllib2.urlopen(car_url).read()

        car_url_content = BeautifulSoup(car_url_html,'html.parser')
        try:
            classes = car_url_content.find_all('dl',attrs = {
    
    'class':"search-pic-cardl"})[0]
            years = classes.find_all('dt')
            indexs = ""


            for year in years:
                if '2018' in year.text:
                    indexs = years.index(year)
                    break
            if indexs == "":
                pass
            else:
                path = '路径'+name
                if os.path.exists(path):
                    pass
                else:
                    os.makedirs(path)
                photourl_2018 = classes.find_all('ul')[indexs]
                for j in photourl_2018.find_all('a'):
                    photourl_type2018='https://car.autohome.com.cn'+j.get('href')
                    photourl_type = urllib2.urlopen(photourl_type2018).read()
                    photourl_type = BeautifulSoup(photourl_type,'html.parser')
                    for eve in photourl_type.find_all('img',attrs = {
    
    'src':True,'alt':True,'title':True})[0:3]:#我只想要车正面图片
                        eve_url = 'https:'+eve.get('src')
                        carphoto_name = str(random.uniform(1, 30))+".jpg"#图片名称,用的随机数
                        time.sleep(0.5)
                        urllib.urlretrieve(eve_url,path+'/'+carphoto_name)
        except:
            print(name)
            pass

猜你喜欢

转载自blog.csdn.net/qq_34496674/article/details/88183595