唱吧音乐爬取
import requests
import re
url = 'http://changba.com/u/461549830'
def changba(url):
res = requests.get(url)
if res.status_code == 200:
res_html = res.text
reg1 = '<a href="(/s/.{22})" style="color:#999;display:block;" target="_blank">'
media = re.findall(reg1, res_html)
media_mid = []
for i in range(len(media)):
media_mid.append('http://changba.com'+media[i])
print(media_mid)
reg2 = '^style="color:#999;display:block;" target="_blank">\n\t*([\u4e00-\u9fa5]{1,20})\t*<div class="userPage-work-detail">$'
reg2 = '(.*)<div class="userPage-work-detail">'
name = re.findall(reg2, res_html)
song_name = []
for i in range(len(name)):
song_name.append(name[i].strip())
print(song_name)
reg3 = 'http://\w{4,20}.changba.com/\d{10}.mp3'
for i in range(len(media_mid)):
result = requests.get(media_mid[i]).text
MP3_html = re.findall(reg3,result)
print(MP3_html)
if(MP3_html):
MP3 = requests.get(MP3_html[0])
if MP3.status_code ==200:
with open(song_name[i]+'.mp3','wb') as f:
f.write(MP3.content)
else:
continue
changba(url)