from selenium import webdriver import time from bs4 import BeautifulSoup class douyuSelenium(): #初始化,启动斗鱼浏览器 def setup(self): self.driver=webdriver.PhantomJS() #获取斗鱼房间信息 def testDouyu(self): self.driver.get('https://www.douyu.com/directory/all') while True: time.sleep(2) #指定解析器,生成一个soup对象 soup=BeautifulSoup(self.driver.page_source,'lxml') # 获取当前页面所有的房间标题,观众人数 titles=soup.find_all('h3',{'class':'ellipsis'}) # for title in titles: # title=title.text.strip() # print(title) # #人气 nums=soup.find_all('span',{'class':'dy-num fr'}) # for num in nums: # num=num.text.strip()'房间标题:'+title.text.strip()+'\t'+'人气:'+num.text # print(num) # print(title+'\t'+num) for title,num in zip(titles,nums):#感觉标题和人气不匹配 info='房间标题:' + title.text.strip() + '\t' + '人气:' + num.text print(info) #下一页 #查找下一页 # self.driver.find_element_by_class_name('shark-pager-next shark-pager-disable shark-pager-disable-next') if self.driver.page_source.find('shark-pager-disable-next')!=-1: break #点击 next_page=self.driver.find_element_by_class_name('shark-pager-next') next_page.click() #退出 def shutdown(self): print('加载完成。。。。') self.driver.quit() if __name__=='__main__': douyu=douyuSelenium() douyu.setup() douyu.testDouyu() douyu.shutdown()
Python3-selenium\phantomjs\bs4爬取斗鱼页面
猜你喜欢
转载自blog.csdn.net/zbrj12345/article/details/80373497
今日推荐
周排行