参考链接:
scrapy自定义重试方法
Retrying a Scrapy Request even when receiving a 200 status code
def parse(self, response):
try:
data = json.loads(response.text)
except json.decoder.JSONDecodeError:
# 通过meta存储重试次数
next_retry = response.meta.get('retry_times', 0) + 1
# 重试次数是否超过最大次数
if next_retry <= self.max_retry_times:
# 拷贝request
retry_request = response.request.copy()
# 累加重试次数
retry_request.meta['retry_times'] = retries
# 指定请求url不被scheduler过滤(即允许相同url的request可以被重复爬取,谨慎使用,否则可能造成死循环)
retry_request.dont_filter = True
yield retry_request
else:
self.logger.debug("Give up retrying {}, failed {} times".format(
response.url, next_retry
))