from selenium import webdriver from lxml import etree import time driver = webdriver.Chrome(r"C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe") driver.maximize_window() def get_url(url): driver.get(url) driver.implicitly_wait(10) get_info() def get_info(): '''解析页面,查找元素''' '''通过观察页面,发现第一个元素的规则比较特殊,剩下的可通过一套规则找到''' selector = etree.HTML(driver.page_source) infos1 = selector.xpath('//*[@class="item J_MouserOnverReq item-ad "]') infos2 = selector.xpath('//*[@class="item J_MouserOnverReq "]') infos = infos1 + infos2 for info in infos: moneys = info.xpath('//*[@class="price g_price g_price-highlight"]/strong/text()') # names = info.xpath('//*[@class="row row-2 title"]/a/span/text()')[0] numbers = info.xpath('//*[@class="deal-cnt"]/text()') dian_names = info.xpath('//*[@class="shopname J_MouseEneterLeave J_ShopInfo"]/span[2]/text()') for money, number, dian_name in zip(moneys, numbers, dian_names): print(money,number,dian_name) time.sleep(3) next_url() def next_url(): '''点击下一页''' driver.find_element_by_link_text('下一页').click() get_info() if __name__ == '__main__': url = 'https://www.taobao.com/' driver.get(url) driver.implicitly_wait(10) driver.find_element_by_name('q').send_keys('python') driver.find_element_by_class_name('search-button').click() #点击搜索 get_url(driver.current_url) #传递当前页面url driver.quit()
python + selenium爬取淘宝
猜你喜欢
转载自blog.csdn.net/qq_18525247/article/details/80384824
今日推荐
周排行