图片懒加载
# 图片懒加载
from lxml import etree
import requests
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36'
}
url='http://sc.chinaz.com/tupian/fengjingtupian_%d.html'
fp=open('fengjing.txt','w',encoding='utf-8')
for page in range(1,5):
if page==1:
new_url='http://sc.chinaz.com/tupian/fengjingtupian.html'
else:
new_url=format(url%page)
page_text=requests.get(url=new_url,headers=headers).text
tree=etree.HTML(page_text)
div_list=tree.xpath('//*[@id="container"]/div')
for div in div_list:
img_title=div.xpath('./p/a/text()')[0].encode('iso-8859-1').decode('utf-8')
# 图片懒加载
img_src=div.xpath('./ div/a/img/@src2')[0]
content=img_title+':'+img_src+'\n'
fp.write(content)
fp.close()
qq空间模拟登录
from selenium import webdriver
from time import sleep
# 指定驱动位置
driver=webdriver.Chrome(executable_path='chromedriver.exe')
url='https://qzone.qq.com/'
driver.get(url)
driver.switch_to.frame('login_frame')
m=driver.find_element_by_id('switcher_plogin')
print(m)
m.click()
driver.find_element_by_id('u').send_keys('用户名')
driver.find_element_by_id('p').send_keys('密码')
driver.find_element_by_id('login_button').click()
sleep(3)
page_text=driver.page_source
print(page_text)
driver.quit()
药# 使用selenium将药监总局的首页的企业名称进行爬取(1-5页)
from selenium import webdriver
from time import sleep
# 指定浏览器驱动
driver = webdriver.Chrome(executable_path=r'chromedriver.exe')
# 用get打开页面
driver.get('http://125.35.6.84:81/xk/')
fp = open('药监企业.txt', 'w', encoding='utf-8')
for page in range(1, 6):
if page == 1:
path = 'pageIto_first'
else:
path = f'pageIto_first{page}'
#所见即所得
driver.find_element_by_id(path).click()
sleep(3)
a_list = driver.find_elements_by_xpath('//*[@id="gzlist"]/li/dl/a')
for a in a_list:
msg = a.text
fp.write(msg + '\n')
print(f'完成第{page}页面数据获取')
fp.close()
driver.close()