Python 爬取多玩图库

拿多玩图库练手，爬取了其图片，gif和视频
找到接口就好办多了，直接上代码

import urllib.request
import os
import json
import requests
import time
import re

#今日囧图的编号，gif编号，视频编号
jinristr = "138622" 
#生成文件时要对文件名字做处理
def validateTitle(title):
    rstr = r"[\/\\\:\*\?\"\<\>\|]"  # '/ \ : * ? " < > |'
    new_title = re.sub(rstr, "_", title)  # 替换为下划线
    return new_title
#接口
url = "http://tu.duowan.com/index.php?r=show/getByGallery&gid="
#生成有参数的接口
url = url + jinristr + "&_=" + str(int(round(time.time() * 1000)))
resp=requests.get(url)
dataJson=json.loads(resp.content)
listdaa=dataJson['picInfo']
listdaa.sort(key = lambda x:(int)(x['ding']))
str = ''
#循环找到的数据
for i in listdaa:
    print(i['ding'])
    print(i['add_intro'])
    str = str+i['add_intro']+"\n"
#在电脑中定义存放图片的位置并新建
path1='G:\\多玩爬取图片\\'+dataJson['gallery_title']
if os.path.exists(path1):
    pass
else:
    os.makedirs(path1)
#定义文件的连接
for i in listdaa:
	#后面的文件名应该是随着类型的不同而变化的
    picpath = path1+'\\'+validateTitle(i['add_intro']+'.gif')
    if not os.path.exists(picpath):
    	#下载
        urllib.request.urlretrieve(i['source'], picpath)
#写一个文本文件，里面放着他们的文件名字，这个其实没用的
txtpath = path1 + '\\1.txt'
f = open(txtpath,'w')
f.write(str)
f.close()

爬下来就这样子
在这里插入图片描述

赫凯

发布了163 篇原创文章 · 获赞 117 · 访问量 21万+

私信关注

Python 爬取多玩图库

猜你喜欢