方法
- 修改
middlewares.py
中的DownloaderMiddleware
- 修改配置文件
settings.py
,line55
简单版
from scrapy import signals
from scrapy.http import HtmlResponse
from selenium import webdriver
class ADownloaderMiddleware:
@classmethod
def from_crawler(cls, crawler):
s = cls()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
return s
def process_request(self, request, spider):
driver = webdriver.Firefox()
driver.get(request.url)
page_source = driver.page_source
driver.close()
return HtmlResponse(url=request.url, body=page_source, request=request, encoding='utf-8', status=200)
def process_response(self, request, response, spider):
return response
def spider_opened(self, spider):
spider.logger.info('Spider opened: %s' % spider.name)