因为项目需要,昨天花了一天时间写了百度地图与携程网景区评论获取程序,景区id这里就放给大家了。
这里先给大家放爬百度地图的代码
import requests
import re,json,csv
from urllib.parse import quote,urlencode
import gevent
class Comment(object):
header = {
"Cookie":"BAIDUID = CDA0B9326BA8A900680441B071F170B1:FG = 1;BIDUPSID = CDA0B9326BA8A900680441B071F170B1;PSTM = 1551404779;delPer = 0;H_PS_PSSID = 1451_21109_28584_26350_28413;PSINO = 6",
"Host":"ugcapi.baidu.com",
"Referer":"https: // map.baidu.com /",
"User - Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"
}
def __init__(self):
pass
def request(self,page):
req = {
"uid": "9ee392b821442941bbbd630e", #景区id
"pageIndex": page, #评论页数
"pageCount": 10,
"pic_videos": 1,
"tab": 1,
"from": "map_zhongtai"
}
url = "https://ugcapi.baidu.com/richindex/2/comment?" + urlencode(req)
s = requests.Session()
response = s.get( url=url,headers=Comment.header)
data = json.loads(response.text)
with open("/Volumes/Tigo/Data/date.csv", "a+", encoding="utf-8") as f:
writeFile = csv.writer(f)
for comment_item in data["data"]["comment_list"]:
l = []
l.append(comment_item["user_name"]) #姓名
l.append(comment_item["date"]) #日期
l.append(comment_item["content"]) #评论内容
writeFile.writerow(l)
#开始获取数据
def getInformation(self):
#爬取1-20页评论
gevent.joinall([gevent.spawn(self.request,i) for i in range(1,21)])
if __name__ == "__main__":
c = Comment()
c.getInformation()