#通过豆瓣top250网页,将排行榜中的250个电影名字,发布年份,评分,评论人数四项内容写入到data.csv文件中
import requests
import re
import csv
url = "https://movie.douban.com/top250"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"}
number = 0
obj=re.compile(r'<li>.*?<div class="item">.*?<span class="title">(?P<name>.*?)</span>.*?<p class="">.*?<br>(?P<year>.*?) .*?<span class="rating_num" property="v:average">(?P<score>.*?)</span>.*?<span>(?P<num>.*?)人评价</span>',re.S) #命名时?P中的P必须是大写,re.S中的S必须是大写。
for i in range(10):
keyvalue = {
"start": number,
"filter": ""
}
filename = "第{}页.csv".format(i+1)
f = open(filename, mode="w", newline="")
csvwriter = csv.writer(f)
r = requests.get(url=url, headers=headers, params=keyvalue)
page_content = r.text
result = obj.finditer(page_content)
for it in result:
dic = it.groupdict()
dic["year"] = dic["year"].strip()
csvwriter.writerow(dic.values())
f.close()
number = number + 25
print("运行结束!")
douban top250爬取
猜你喜欢
转载自blog.csdn.net/weixin_47401101/article/details/121059298
今日推荐
周排行