直接上干货
一.页面分析
1.打开Chrome,输入 https://www.baomihua.com/
2.随便打开一个视频页
网址为:https://video.baomihua.com/v/37869538?reportto=REC_37869538
经测试 :?reportto=REC_37869538 可以不加
所以入口地址为:https://video.baomihua.com/v/37869538
效果如下图:
打开 开发者工具 或 按F12
按Alt+F5 刷新页面 抓包
分析出视频的真正播放地址
在以上图片上可以看出
第1个media 的Status为(canceled) 所以忽略掉
第2个media 的Status为302, Type为空,这个重定向拿到了真实的播放地址,如下图
第3个media就是我们要的数据源
经以上数据分析得出:
最后的真实视频地址url:
http://219.135.240.146/vm36003.baomihua.com/dad0a3a539b1a623b61cf793948cd6ed/5B286BA1/3787/37869538_7_eaf7b02fbe74965404c0e7e301b2b3f7.mp4?wsrid_tag=5b286b03_PSgdhzdx7zz143_9439-19129&wsiphost=local
初步的视频地址url:
http://vm36003.baomihua.com/dad0a3a539b1a623b61cf793948cd6ed/5B286BA1/3787/37869538_7_eaf7b02fbe74965404c0e7e301b2b3f7.mp4
获得视频地址的url:
https://play.baomihua.com/getvideourl.aspx?jsoncallback=jQuery112409860452230116918_1529375493874&flvid=37869538&devicetype=pc_noflash&dataType=json&_=1529375493875
接着我们要做:先获得视频地址url,接着得到初步视频地址url,最后得到真实视频地址url
获得视频地址的url参数分析:
jsoncallback=jQuery112409860452230116918_1529375493874 经测试,jQuery后面的数字是随机的
flvid=37869538 传入视频id
devicetype=pc_noflash 不变
dataType=json 不变
_=1529375493875 不变
referer: https://video.baomihua.com/v/37869538
请求发出后得到数据
初步的视频地址url地址分析:
http://vm36003.baomihua.com/dad0a3a539b1a623b61cf793948cd6ed/5B286BA1/3787/37869538_7_eaf7b02fbe74965404c0e7e301b2b3f7.mp4
对应
http://host/dir/stream_name.videofiletype
以下是python2.7代码
# coding:utf-8 import requests import urllib import re import os import json import random import time from hashlib import md5 from requests.exceptions import RequestException class BaomihuaMovie(object): #基本链接 base_url = 'http://video.baomihua.com/v/' #请求头信息 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36' } # 获取影片数据url # 参数1:{0}生成jQuery动态函数,{1}影片id playurl = 'http://play.baomihua.com/getvideourl.aspx?jsoncallback=jQuery{0}&flvid={1}&devicetype=pc_noflash&dataType=json' # 获取影片ids url # 参数1:{0}类型,{1}每页多少个,{2}当前在第几页 movie_ids = 'http://www.baomihua.com/interfaces/getindexinfo.ashx?datatype=VideoListRec&typeid={0}&pagesize={1}&curpage={2}&scenetype=pc_channel' # 获取影片movie url1 # 参数1:{0}jsondata['host'],{1}jsondata['dir'],{2}jsondata['stream_name'], {3}jsondata['videofiletype'] movie_url1 = 'http://{0}/{1}/{2}.{3}' ''' 获取播放地址 :param flvid: 影片id :return jsondata 影片数据 ''' def get_movie_data(self, flvid): url = self.playurl.format(random.randint(100000000000000, 999999999999999), flvid) print '第一个解释: ' + url self.headers['Host'] = 'vm36003.baomihua.com' self.headers['Referer'] = 'http://video.baomihua.com/v/' + str(flvid) response = requests.get(url, headers=self.headers) if response.status_code == 200: pattern = re.compile('.*?\((.*?)\).*?', re.S) result = re.search(pattern, response.content).group(1) jsondata = json.loads(result) return jsondata else: print 'get_play_data error' ''' 解释播放地址并保存到本地 :param flvid 视频id ''' def parse_video(self, flvid): jsondata = self.get_movie_data(flvid) if jsondata: filename = urllib.unquote(str(jsondata['title'])); print '影片名:' + filename movieurl1 = self.movie_url1.format(jsondata['host'], jsondata['dir'], jsondata['stream_name'],jsondata['videofiletype']) print '第二个解释: ' + movieurl1 self.headers['Host'] = 'vm36003.baomihua.com' self.headers['Referer'] = 'http://video.baomihua.com/v/' + str(id) movie = requests.get(movieurl1, headers=self.headers, allow_redirects=False) #allow_redirects=False 为拒绝默认的301/302重定向从而可以通过html.headers[‘Location’]拿到重定向的URL if movie.status_code == 200: with open(filename.decode('utf-8') + '.' + jsondata['videofiletype'], 'wb') as mp4file: mp4file.write(movie.content) print 'movie url1 parse_video ok' elif movie.status_code == 302: movieurl2 = movie.headers['Location'] print '第三个解释: ' + movieurl2 pattern = re.compile('.*?//(.*?)/.*?') movieip = re.match(pattern, movieurl2).group(1) # print movieip self.headers['Host'] = movieip movie2 = requests.get(movieurl2, headers=self.headers) if movie2.status_code == 200 or movie2.status_code == 206: with open(filename.decode('utf-8') + '.' + jsondata['videofiletype'], 'wb') as mp4file: mp4file.write(movie2.content) print 'movie url2 parse_video ok' else: print 'parse_video error' else: print 'no jsondata' ''' 获取视频id组 :param label 标签名 :param typeid 标签名对应的类型id :param pagesize 一页共显示多个条记录 :param curpage 当前第几页 ''' def get_movie_id(self, label, typeid, pagesize, curpage): url = self.movie_ids.format(typeid, pagesize, curpage) self.headers['Host'] = 'www.baomihua.com' self.headers['Referer'] = 'http://www.baomihua.com/' + label result = requests.get(url, headers = self.headers) if result.status_code == 200: resultjson = result.json() items = resultjson['Videolist'] if items: for item in items: yield item['videoId'] else: print '链接解释错误!!!' if __name__=='__main__': bmh = BaomihuaMovie() label = 'funny' #搞笑 typeid = 3 #搞笑类目对应的id号 pagesize = 5 curpage = 1 #label 与typeid 要对应 for flvid in bmh.get_movie_id(label, typeid, pagesize, curpage): print '开始解释: ' + str(flvid) bmh.parse_video(flvid) print '-' * 50 time.sleep(random.randint(1,8)) # bmh.parse_video(37873004) # url = 'http://play.baomihua.com/getvideourl.aspx?jsoncallback=jQuery1124032523956343&flvid=37797680&devicetype=pc_noflash&dataType=json&_=1529120814393'