import time
import pandas as pd
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
browser = webdriver.Chrome() #驱动谷歌浏览器
wait = WebDriverWait(browser, 3)
try:
browser.get("https://souky.eol.cn/api/newapi/assess_result")
wait.until(
EC.presence_of_element_located((By.XPATH, '/html/body/div[4]/div[1]/ul/li[1]/div')),
)
except TimeoutException:
print('Timeout')
def click_according_text(text):
try:
button = browser.find_element_by_link_text(text)
button.click()
except:
print(text+'不可点击')
#click_according_text("理学")
a=[1,17]
b=[1,14]
c=[1,36]
d=[1,9]
e=[1,9]
f=[1,5]
g=[1,5]
def click_locatin_element(element, text):
try:
button = browser.find_element_by_xpath(element)
button.click()
except:
print(text + "不可点击")
def get_secien(element):
button = browser.find_element_by_xpath(element)
text = pd.DataFrame([(button.text)])
text.to_csv('C:/Users/Administrator/Desktop/学科2014.csv', sep=',', mode='a',header=None,index=None)
click_locatin_element(element,element)
data = pd.read_html("https://souky.eol.cn/api/newapi/assess_result")[0]
data.to_csv('C:/Users/Administrator/Desktop/学科2014.csv', sep=',', mode='a',header=None,index=None)
time.sleep(3)
for i in range(1,8):
k=[18,15,37,10,10,6,6]
for j in range(1,k[i-1]):
element = "/html/body/div[4]/div[1]/ul/li["+str(i)+"]/ul/li["+str(j)+"]"
get_secien(element)
Python——selenium爬取学科
猜你喜欢
转载自blog.csdn.net/weixin_43213658/article/details/88673290
今日推荐
周排行