from selenium import webdriver from selenium.common.exceptions import NoSuchElementException from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By import time import re dr = webdriver.Chrome('C:/Users/chromedriver.exe') dr.implicitly_wait(1) drug_name = open('keywords.txt', 'r').readlines() for i in drug_name: if i == '': print('查询结束!') break else: print('正在查询:', i) print('当前页数为:1') dr.get('https://www.baidu.com/s?wd=' + i) time.sleep(5) for i in range(4): url = dr.find_element_by_xpath('//*[@id="page"]/a[last()]') url = url.get_attribute("href") # wait = WebDriverWait(dr, 3600*24) # wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".head_wrapper"))) # wait = WebDriverWait(dr, 3600*24) # wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "#jgwab"))) dr.get(url) page_now = re.findall('<strong>.*?class="fk fk_cur".*?<span class="pc">(\d+)</span>', dr.page_source) print('当前页数为:', page_now[0]) time.sleep(5) if int(page_now[0]) >= 5: break # wait = WebDriverWait(dr, 3600*24) # wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "#jgwab"))) print('查询结束') dr.quit()
百度自动查询信息辅助工具
猜你喜欢
转载自blog.csdn.net/luzaofa/article/details/79712367
今日推荐
周排行