python爬虫四

爬取斗破苍穹小说全本

import requests
from bs4 import BeautifulSoup
import re
import time
import lxml
class Spider():
    headers = {'User-Agent': 'Mozilla/5.0 (Windows'
                             ' NT 10.0; Win64; x64) '
                             'AppleWebKit/537.36 (KHTM'
                             'L, like Gecko) Chrome/79'
                             '.0.3945.88 Safari/537.36'
                   }

    def __analyse(self,url):
        f = open('C:/Users/baishuai/Desktop/斗破苍穹.txt', 'a+')
        html = requests.get(url, headers=Spider.headers)


        if html.status_code == 200:
            contents=re.findall('<p>(.*?)</p>',html.content.decode('utf-8'),re.S)

            for content in contents:
                print (content)
                f.write(content+'\n')
        else:
            pass
        f.close()


    def __urll(self):
        urls=['http://www.doupoxs.com/doupocangqiong/{}.html'.format(str(i)) for i in range(1562,1666)]
        for url in urls:
            self.__analyse(url)
            time.sleep(1)


    def go(self):
        self.__urll()


spider=Spider()
spider.go()


猜你喜欢

转载自blog.csdn.net/weixin_45955630/article/details/103759456