前言
最近偶然发现新浪动图的其中一个接口,就顺便写了一个脚本定时把动图拿下来。
接口地址:https://interface.sina.cn/tech/gif/album.d.json
Known Issues:
- 只能拿到最近的1000条数据
代码块
#!/usr/bin/python3
# -*- coding:utf-8 –*-
import os
import time
import requests
class SinaGifCrawler:
url = f'https://interface.sina.cn/tech/gif/album.d.json'
num = 200
folder = './gif'
def __init__(self):
self.page = 0
self._createFolder()
def _createFolder(self):
if os.path.exists(self.folder):
return
os.mkdir(self.folder)
def getJsonByPage(self, page):
params = {'page': page, 'num': self.num}
r = requests.get(self.url, params=params, timeout=10)
if r.text:
return r.json()
return {}
def run(self):
while True:
self.page += 1
jsonData = self.getJsonByPage(self.page)
if jsonData and jsonData['status']['code'] == '0':
if jsonData['data'] is None:
print(jsonData)
break
for imgData in jsonData['data']:
self.downloadImg(imgData['img_url'], imgData['short_intro'])
else:
break
print('finish')
def filterImgName(self, imgName):
imgName = imgName.replace('"', '-')
imgName = imgName.replace('*', '-')
imgName = imgName.replace('/', '-')
imgName = imgName.replace('\\', '-')
imgName = imgName.replace(' ', '-')
imgName = imgName.replace('?', '-')
return imgName
def downloadImg(self, imgUrl, imgName):
imgName = self.filterImgName(imgName)
fileName = f'{self.folder}/{imgName}.gif'
if os.path.exists(fileName):
return
with open(fileName, 'wb') as f:
f.write(requests.get(imgUrl).content)
print(f'{fileName} save success')
if __name__ == "__main__":
while True:
SinaGifCrawler().run()
time.sleep(60 * 60)