版权声明:自学笔记,如有引用请标明博客,感谢 https://blog.csdn.net/feng_jlin/article/details/81943894
目录
一、课程所用API爬取豆瓣电影评分程序注释 (根据豆瓣电影编号)
二、课程作业API爬取豆瓣电影评分程序注释(根据豆瓣电影名称)
一、课程所用API爬取豆瓣电影评分程序注释 (根据豆瓣电影编号)
# coding: utf-8
# In[ ]:
import urllib.request as urlrequest
import json
id_list = [26387939, 25882296, 26752088]
with open("douban_movie_rank.txt","w") as outputfile: # w是重写,a是追加
for id in id_list:
url_visit = 'https://api.douban.com/v2/movie/subject/{}'.format(id) #{}后加.format()即是在大括号中加入format()里的内容
crawl_content = urlrequest.urlopen(url_visit).read() #利用Python读取url的核心代码
json_content = json.loads(crawl_content.decode('utf8')) #利用 python 解析 JSON 代码
#print(crawl_content.decode('unicode-escape'))
#print(json_content['rating']['average'])
rank = json_content['rating']['average'] #json_content列表下的嵌套列表rating中的average元素
outputfile.write("{} {}\n".format(id,rank))
二、课程作业API爬取豆瓣电影评分程序注释(根据豆瓣电影名称)
import urllib.request as urlrequest
import urllib.parse as urlparse
import json
name_list=['异形:契约','摔跤吧!爸爸','速度与激情8']
for name in name_list:
id = urllib.parse.quote(name) #这里需要将中文名转换为网页链接中能够读取的编码
url_visit = 'https://api.douban.com/v2/movie/search?q={}'.format(id) #根据API格式更改
crawl_content = urlrequest.urlopen(url_visit).read()
json_content = json.loads(crawl_content.decode('utf-8'))
rank = json_content['subjects'][0]['rating']['average']
id1 = json_content['subjects'][0]['id']
with open('douban_movie_rankother.txt','a') as outputfile:
outputfile.write('{} {} {}\n'.format(name,id1,rank))