selenium链接
selenlum基本使用-操作表单元素,行为链,键盘操作
from selenium import webdriver
import time
driver_path = r"C:\Users\top\Downloads\chromedriver_win32\chromedriver.exe"
driver = webdriver.Chrome(executable_path=driver_path)
driver.get('https://www.baidu.com/')
time.sleep(5)
print(driver.page_source)
driver.close()#关闭该网页
driver.quit()#关闭浏览器
from selenium import webdriver
import time
from lxml import etree
driver_path = r"C:\Users\top\Downloads\chromedriver_win32\chromedriver.exe"
driver = webdriver.Chrome(executable_path=driver_path)
driver.get('https://www.baidu.com/')
inputTag = driver.find_element_by_id('kw')
inputTag = driver.find_element_by_name('wd')
inputTag = driver.find_element_by_class_name('s_ipt')#class="s_ipt nobg_s_fm_hover"但是写s_ipt nobg_s_fm_hover不对,所以就改成s_ipt了
inputTag = driver.find_element_by_xpath("//input[@id='kw']")
inputTag.send_keys('python')
'''
inputTag = driver.find_element_by_xpath("//input[@id='kw']")效率没有xpath高,因为xpath是c语言写的,如果不需要输入数据时。尽量用xpath
'''
清除输入的内容
from selenium import webdriver
import time
from lxml import etree
driver_path = r"C:\Users\top\Downloads\chromedriver_win32\chromedriver.exe"
driver = webdriver.Chrome(executable_path=driver_path)
driver.get('https://www.baidu.com/')
inputTag = driver.find_element_by_id('kw')
inputTag.send_keys('python')
time.sleep(5)
inputTag.clear()
豆瓣网登录页面自动点击下次自动登录
from selenium import webdriver
import time
from lxml import etree
driver_path = r"C:\Users\top\Downloads\chromedriver_win32\chromedriver.exe"
driver = webdriver.Chrome(executable_path=driver_path)
driver.get('https://www.douban.com/')
driver.switch_to.frame(driver.find_element_by_tag_name("iframe"))
remember = driver.find_element_by_name('remember')
time.sleep(2)
remember.click()
亚马逊网站 操作表单元素
from selenium import webdriver
import time
from selenium.webdriver.support.ui import Select
driver_path = r"C:\Users\top\Downloads\chromedriver_win32\chromedriver.exe"
driver = webdriver.Chrome(executable_path=driver_path)
driver.get('https://www.amazon.cn/')
# remember = Select(driver.find_element_by_class_name('nav-line-2'))
remember = Select(driver.find_element_by_tag_name("select"))
# remember.select_by_index(4)
remember.select_by_visible_text("音像")
百度 搜索并点击
from selenium import webdriver
driver_path = r"C:\Users\top\Downloads\chromedriver_win32\chromedriver.exe"
driver = webdriver.Chrome(executable_path=driver_path)
driver.get('https://www.baidu.com/')
input_Tag = driver.find_element_by_id('kw')
input_Tag.send_keys('python')
input_Tag = driver.find_element_by_id('su')
input_Tag.click()
行为链
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
import time
driver_path = r"C:\Users\top\Downloads\chromedriver_win32\chromedriver.exe"
driver = webdriver.Chrome(executable_path=driver_path)
driver.get('https://www.baidu.com/')
inputTag = driver.find_element_by_id('kw')
submitBtn = driver.find_element_by_id('su')
actions = ActionChains(driver)
actions.move_to_element(inputTag)
actions.send_keys_to_element(inputTag,'python')
actions.move_to_element(submitBtn)
actions.click(submitBtn)
actions.perform()
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
import time
driver_path = r"C:\Users\top\Downloads\chromedriver_win32\chromedriver.exe"
driver = webdriver.Chrome(executable_path=driver_path)
driver.get('https://www.baidu.com/')
for cookie in driver.get_cookies():
print(cookie)
print(driver.get_cookie("PSTM"))
driver.delete_cookie("PSTM")
print(driver.get_cookie("PSTM"))
driver.delete_all_cookies()
from selenium import webdriver
import time
driver_path = r"C:\Users\top\Downloads\chromedriver_win32\chromedriver.exe"
driver = webdriver.Chrome(executable_path=driver_path)
driver.get('https://www.baidu.com/')
driver.implicitly_wait(5)#瞎写的id,5秒后报错
driver.find_element_by_id("ahhdggcsdhgc")
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver_path = r"C:\Users\top\Downloads\chromedriver_win32\chromedriver.exe"
driver = webdriver.Chrome(executable_path=driver_path)
driver.get('https://www.douban.com/')
driver.switch_to.frame(driver.find_element_by_tag_name("iframe"))
element=WebDriverWait(driver,4).until(
EC.presence_of_all_elements_located((By.NAME,'phone'))
)
print(element)
#浏览器中切换页面
from selenium import webdriver
driver_path = r"C:\Users\top\Downloads\chromedriver_win32\chromedriver.exe"
driver = webdriver.Chrome(executable_path=driver_path)
driver.get('https://www.baidu.com/')
driver.execute_script("window.open('https://www.douban.com/')")
driver.execute_script("window.open('https://www.csdn.net/')")
driver.execute_script("window.open('https://www.bilibili.com/')")
print(driver.window_handles)
driver.switch_to.window(driver.window_handles[1])#使用execute_script打开的网站中,1是倒数第一个
print(driver.current_url)#查看driver当前指向的网站
print(driver.page_source)#查看当前页面的源代码。
#使用代理ip
from selenium import webdriver
print(1)
driver_path = r"C:\Users\top\Downloads\chromedriver_win32/chromedriver.exe"
options = webdriver.ChromeOptions()
options.add_argument("--proxy-server=http://118.212.104.150:9999")
driver = webdriver.Chrome(executable_path=driver_path,options=options)
driver.get("http://httpbin.org/ip")
使用代理ip
from selenium import webdriver
print(1)
driver_path = r"C:\Users\top\Downloads\chromedriver_win32/chromedriver.exe"
options = webdriver.ChromeOptions()
options.add_argument("--proxy-server=http://118.212.104.150:9999")
driver = webdriver.Chrome(executable_path=driver_path,options=options)
driver.get("http://httpbin.org/ip")
from selenium import webdriver
from selenium.webdriver.remote.webelement import WebElement
driver_path = r"C:\Users\top\Downloads\chromedriver_win32\chromedriver.exe"
driver = webdriver.Chrome(executable_path=driver_path)
driver.get("https://www.baidu.com/")
submitBtn = driver.find_element_by_id('su')
print(type(submitBtn))
print(submitBtn.get_attribute("value"))
driver.save_screenshot('baidu.png')#截屏,并放到baidu.png
url = 'https://blog.csdn.net/weixin_45949073/article/details/106135692'
driver.execute_script("window.open('"+url+"')")#打开一个新页面
from selenium import webdriver#还需要把红包去掉才行
from lxml import etree
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
class lagouSplider(object):
driver_path = r"C:\Users\top\Downloads\chromedriver_win32\chromedriver.exe"
def __init__(self):
self.driver = webdriver.Chrome(executable_path=lagouSplider.driver_path)
self.url = 'https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput='
self.list_1 = list()
def run(self):
self.driver.get(self.url)
while True:
source = self.driver.page_source
WebDriverWait(driver=self.driver,timeout=10).until(
EC.presence_of_element_located((By.XPATH,"//div[@class='pager_container']/span[last()]"))
)
self.parse_list_page(source)
next_btn = self.driver.find_element_by_xpath("//div[@class='pager_container']/span[last()]")
if "pager_container_disabled" in next_btn.get_attribute("class"):
break
else:
next_btn.click()
time.sleep(1)
a(self.list_1)
def a(self,list_1):
for link in link_1:
self.request_detail_page(link)
time.sleep(1)
def parse_list_page(self,source):
html = etree.HTML(source)
links = html.xpath("//a[@class='position_link']/@href")
self.list_1.append(links)
def request_detail_page(self,url):
self.driver.get(url)
source = self.driver.page_source
self.parse_detail_page(source)
def parse_detail_page(self,source):
html = etree.HTML(source)
position_name = html.xpath("//h1[@class='name']/text()")
position_salary = html.xpath("//dd[@class='job_request']//span[@class='salary']/text()")
print(position_name)
print(position_salary)
if __name__ == '__main__':
spider = lagouSplider()
spider.run()