Python3-selenium\phantomjs\bs4爬取斗鱼页面

from selenium import webdriver
import time
from bs4 import BeautifulSoup

class douyuSelenium():
    #初始化，启动斗鱼浏览器
    def setup(self):
        self.driver=webdriver.PhantomJS()

    #获取斗鱼房间信息
    def testDouyu(self):
        self.driver.get('https://www.douyu.com/directory/all')

        while True:
            time.sleep(2)

            #指定解析器，生成一个soup对象
            soup=BeautifulSoup(self.driver.page_source,'lxml')

            # 获取当前页面所有的房间标题，观众人数
            titles=soup.find_all('h3',{'class':'ellipsis'})
            # for title in titles:
            #     title=title.text.strip()
            #     print(title)
            # #人气
            nums=soup.find_all('span',{'class':'dy-num fr'})
            # for num in nums:
            #     num=num.text.strip()'房间标题：'+title.text.strip()+'\t'+'人气：'+num.text
            #     print(num)
            # print(title+'\t'+num)
            for title,num in zip(titles,nums):#感觉标题和人气不匹配
                info='房间标题：' + title.text.strip() + '\t' + '人气：' + num.text
                print(info)

            #下一页
            #查找下一页
            # self.driver.find_element_by_class_name('shark-pager-next shark-pager-disable shark-pager-disable-next')
            if self.driver.page_source.find('shark-pager-disable-next')!=-1:
                break
            #点击
            next_page=self.driver.find_element_by_class_name('shark-pager-next')
            next_page.click()
    #退出
    def shutdown(self):
        print('加载完成。。。。')
        self.driver.quit()
if __name__=='__main__':
    douyu=douyuSelenium()
    douyu.setup()
    douyu.testDouyu()
    douyu.shutdown()
Python3-selenium\phantomjs\bs4爬取斗鱼页面

猜你喜欢