scrapy对接selenium【极简】代码(待完成)

方法

  1. 修改middlewares.py中的DownloaderMiddleware
  2. 修改配置文件settings.py,line55

简单版

# -*- coding: utf-8 -*-

from scrapy import signals
from scrapy.http import HtmlResponse
from selenium import webdriver


class ADownloaderMiddleware:

    @classmethod
    def from_crawler(cls, crawler):
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s

    def process_request(self, request, spider):
        driver = webdriver.Firefox()
        driver.get(request.url)
        page_source = driver.page_source
        driver.close()
        return HtmlResponse(url=request.url, body=page_source, request=request, encoding='utf-8', status=200)

    def process_response(self, request, response, spider):
        return response

    def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)

猜你喜欢

转载自blog.csdn.net/Yellow_python/article/details/83050047