from selenium import webdriver from bs4 import BeautifulSoup import lxml import re import time driver = webdriver.Chrome() driver.get('https://www.taobao.com/') q = driver.find_element_by_id('q') q.send_keys('python') driver.find_element_by_class_name('search-button').click() total_text = driver.find_element_by_class_name('total').text total_page = re.search('(\d+)', total_text).group(1) print(total_page) i = 0 while(i < int(total_page)): html = driver.page_source soup = BeautifulSoup(html, 'lxml') items = soup.select('#mainsrp-itemlist .items .item') for item in items: product = { 'name':item.select('.title')[0].get_text().strip(), 'price':item.select('.g_price-highlight > strong')[0].get_text(), 'deal-cnt':item.select('.deal-cnt')[0].get_text(), 'shop':item.select('.shop')[0].get_text().strip(), 'location':item.select('.location')[0].get_text() } print(product) blank = driver.find_element_by_css_selector('#mainsrp-pager > div > div > div > div.form > input') blank.clear() blank.send_keys(i + 1) i = i + 1; jump = driver.find_element_by_css_selector('#mainsrp-pager > div > div > div > div.form > span.btn.J_Submit') jump.click() time.sleep(3)
python爬取淘宝商品数据
猜你喜欢
转载自blog.csdn.net/qq_32862515/article/details/79048650
今日推荐
周排行