- GitHub:https://github.com/121812/python
- 转载请注明出处
详细信息请看:http://www.forever121.cn/?p=138
下面为源代码
import os
import re
import time
import request
import requests
import xlsxwriter
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
url = 'https://www.dior.cn/zh_cn/products/search?page=3&query=珠宝'
#url = 'http://www.baidu.com'
chrome="C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe"
options = webdriver.ChromeOptions()
options.add_argument('lang=zh_CN.UTF-8')
options.add_argument('user-agent="Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Mobile Safari/537.36"')
browser = webdriver.Chrome(chrome_options=options)
browser.maximize_window()
wait = WebDriverWait(browser,2)
def get_source(url):
browser.get(url)
num = 0
try:
while True:
print(num)
num += 1
login = browser.find_element_by_xpath('//*[@id="main"]/div/div[2]/div[3]/button/span/span')
login.click()
time.sleep(1)
if num == 20:
break
except:
pass
html = browser.page_source
return html
def find_combination(html):
combination = re.findall('<div class="product-legend"><span class="title-with-level product-title century-std size-s"><span class="multiline-text multiline-text--is-china">(.*?)</span>.*?<img src="(.*?)"',
return combination
def find_jpg(combination):
jpg = []
txt = []
a = 0
b = 4
e = 1
num = 1
file = xlsxwriter.Workbook('迪奥.xls')
table = file.add_worksheet('迪奥')
table.write(0, 0, '总体')
table.write(0, 1, '名称')
table.write(0, 2, '质量')
table.write(0, 3, '用料')
table.write(0, 4, '图片')
for i in combination:
i = list(i)
down = i[1]
save_jpg = requests.get(down)
time.sleep(1)
print('正在下载:%s'%down)
save = open('F:\\python测试\\data\\迪奥\\%s'%num + '.jpg', 'wb')
save.write(save_jpg.content)
save.close()
name = re.findall('^(.*?)750', i[0])
quality = re.findall('750/1000(.*?)$', i[0])
try:
table.write(e,a , '%s'%i[0])
table.write(e,1, '%s'%name[0])
table.write(e,2, '750/1000')
table.write(e,3, '%s'%quality[0])
except:
pass
table.insert_image(e,b, 'F:\\python测试\\data\\迪奥\\%s.jpg'%num)
e += 1
num += 1
file.close()
find_jpg(find_combination(get_source(url)))
print('下载完成')