1.qishu.py
# 需要下载的文件地址,需要是一个列表
# 如果不下载,只是将地址保存在数据库中,不需要设置列表
qishu['download_url'] = [download_url]
2.在pipelines.py中自定义自己的pipeline
from scrapy.pipelines.files import FilesPipeline
from scrapy.http import Request
class QishuxiazaiPipeline(FilesPipeline):
def get_media_requests(self, item, info):
image_url = item['download_url'][0]
yield Request(image_url, meta={'item': item})
def file_path(self, request, response=None, info=None):
item = request.meta['item']
novel_name = item['download_url'][0].split('/')[-1]
return '%s' % novel_name
def item_completed(self, results, item, info):
print(results)
return item
3.在settings.py中开启自己的pipeline
ITEM_PIPELINES = {
'Qishu.pipelines.QishuxiazaiPipeline': 3,
#如果采用自定义的CustomImagesPipeline,需要将自带的ImagesPipeline设置为None。
'scrapy.pipelines.files.FilesPipeline':None
}
FILES_STORE = 'files'
FILES_URLS_FIELD = 'download_url'