#coding utf-8
import time
import requests
from bs4 import BeautifulSoup
class spider_KG_top500(object):
def __init__(self):
print('Welcome to spider_KG_top500')
def get_song_info(self, page_num):
for page_num in range(page_num):
page_num += 1
if page_num > 23:
print('Spider end!')
break
url = r'http://www.kugou.com/yy/rank/home/%s-8888.html?from=rank'%page_num
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 Safari/537.36 Core/1.53.4882.400 QQBrowser/9.7.13059.400'}
res = requests.get(url, headers=headers)
res.encoding = 'utf-8'
page = res.text
soup = BeautifulSoup(page,'html.parser')
song_rank = soup.select('.pc_temp_num')
#'.pc_temp_songlist > ul > li > a' 标签之间必须带空格
song_info = soup.select('.pc_temp_songlist > ul > li > a')
song_time = soup.select('.pc_temp_time')
for rank, info, song_time in zip(song_rank, song_info, song_time):
data = {
'rank': rank.text.strip(),
'name': info['title'].split('-')[0].split(),
'singer': info['title'].split('-')[1].split(),
'song_time': song_time.text.strip()
}
print('Page %s:\n%s'%(page_num,data))
if __name__ == '__main__':
while True:
try:
page_num = int(input('please input how many page to spider: '))
break
except Exception as e:
print('please input 数字: ')
start_time = time.time()
spider = spider_KG_top500()
spider.get_song_info(page_num)
end_time = time.time()
spend_time = end_time - start_time
print('spend: %ss'%spend_time)
【爬虫入门5】爬取酷狗TOP500
猜你喜欢
转载自blog.csdn.net/weixin_39723165/article/details/81407000
今日推荐
周排行