from selenium import webdriver from selenium.webdriver import ChromeOptions from selenium.webdriver import ActionChains from selenium.webdriver.common.keys import Keys import time option = ChromeOptions() option.add_argument("disable-infobars") def get_goods(driver): num = 400 for line in range(20): js = """ window.scrollTo(0,%s) """%num num+=500 driver.execute_script(js) time.sleep(0.1) #1查找所有商品的爷爷标签 good_div = driver.find_element_by_id("J_goodsList") #2获取所有商品li标签 good_list = good_div.find_elements_by_class_name("gl-item") print(good_list) for good in good_list: """ 商品信息: 名称 价格 链接 图片 评论人数 """ good_name = good.find_element_by_css_selector(".p-name em").text.replace("\n","") good_price = good.find_element_by_css_selector(".p-price").text.replace("\n","") good_link = good.find_element_by_css_selector(".p-img a").get_attribute("href") good_img = good.find_element_by_css_selector('.p-img img').get_attribute('src') # good_img = good.find_element_by_css_selector(".p-img img").get_attribute("src") good_commit = good.find_element_by_css_selector(".p-commit").text.replace("\n","") goods = ''' 商品信息: 名称 %s 价格 %s 链接 %s 图片 %s 评论人数 %s '''%(good_name,good_price,good_link,good_img,good_commit) print(goods) with open("jd.txt","a",encoding="utf-8")as f: f.write(goods+"\n") next_tag = driver.find_element_by_class_name('pn-next') next_tag.click() time.sleep(2) get_goods(driver) driver = webdriver.Chrome(chrome_options=option) try: driver.get("https://www.jd.com/") driver.implicitly_wait(10) input_tag = driver.find_element_by_id("key") input_tag.send_keys("坦克") #通过回车查找 # input_tag.send_keys(Keys.ENTER) search_button = driver.find_element_by_class_name("button") search_button.click() get_goods(driver) time.sleep(1000) finally: driver.close()
爬取某东商品信息
猜你喜欢
转载自www.cnblogs.com/tangda/p/10945487.html
今日推荐
周排行