# coding=utf-8 import csv import time import requests import json # 区域店铺id ct_Poi cateName抓取,传入参数为区域id def crow_id(areaid): id_list = [] url = 'https://meishi.meituan.com/i/api/channel/deal/list' head = {'Host': 'meishi.meituan.com', 'Accept': 'application/json', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', 'Referer': 'https://meishi.meituan.com/i/?ci=30&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Mobile Safari/537.36', 'Cookie': 'XXXXXXXXXXXXXX' } p = {'https': 'https://27.157.76.75:4275'} data = {"uuid": "09dbb48e-4aed-4683-9ce5-c14b16ae7539", "version": "8.3.3", "platform": 3, "app": "", "partner": 126, "riskLevel": 1, "optimusCode": 10, "originUrl": "http://meishi.meituan.com/i/?ci=30&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1", "offset": 0, "limit": 15, "cateId": 1, "lineId": 0, "stationId": 0, "areaId": areaid, "sort": "default", "deal_attr_23": "", "deal_attr_24": "", "deal_attr_25": "", "poi_attr_20043": "", "poi_attr_20033": ""} r = requests.post(url, headers=head, data=data, proxies=p) result = json.loads(r.text) totalcount = result['data']['poiList']['totalCount'] # 获取该分区店铺总数,计算出要翻的页数 datas = result['data']['poiList']['poiInfos'] print(len(datas), totalcount) for d in datas: d_list = ['', '', '', ''] d_list[0] = d['name'] d_list[1] = d['cateName'] d_list[2] = d['poiid'] d_list[3] = d['ctPoi'] id_list.append(d_list) print('Page:1') # 将数据保存到本地csv with open('meituan_id.csv', 'a', newline='', encoding='gb18030')as f: write = csv.writer(f) for i in id_list: write.writerow(i) # 开始爬取第2页到最后一页 offset = 0 if totalcount > 15: totalcount -= 15 while offset < totalcount: id_list = [] offset += 15 m = offset / 15 + 1 print('Page:%d' % m) # 构造post请求参数,通过改变offset实现翻页 data2 = {"uuid": "09dbb48e-4aed-4683-9ce5-c14b16ae7539", "version": "8.3.3", "platform": 3, "app": "", "partner": 126, "riskLevel": 1, "optimusCode": 10, "originUrl": "http://meishi.meituan.com/i/?ci=30&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1", "offset": offset, "limit": 15, "cateId": 1, "lineId": 0, "stationId": 0, "areaId": areaid, "sort": "default", "deal_attr_23": "", "deal_attr_24": "", "deal_attr_25": "", "poi_attr_20043": "", "poi_attr_20033": ""} try: r = requests.post(url, headers=head, data=data2, proxies=p) print(r.text) result = json.loads(r.text) datas = result['data']['poiList']['poiInfos'] print(len(datas)) for d in datas: d_list = ['', '', '', ''] d_list[0] = d['name'] d_list[1] = d['cateName'] d_list[2] = d['poiid'] d_list[3] = d['ctPoi'] id_list.append(d_list) # 保存到本地 with open('meituan_id.csv', 'a', newline='', encoding='gb18030')as f: write = csv.writer(f) for i in id_list: write.writerow(i) except Exception as e: print(e) if __name__ == '__main__': # 直接将html代码中区域的信息复制出来,南澳新区的数据需要处理下,它下面没有分区 a = {"areaObj": {"28": [{"id": 28, "name": "全部", "regionName": "福田区", "count": 4022}, {"id": 139, "name": "历下区", "regionName": "历下区", "count": 3307}, {"id": 744, "name": "梅林", "regionName": "梅林", "count": 421}, {"id": 7996, "name": "福田保税区", "regionName": "福田保税区", "count": 29}], }} datas = a['areaObj'] b = datas.values() area_list = [] for data in b: for d in data[1:]: area_list.append(d) # 将每个区域信息保存到列表,元素是字典 l = 0 old = time.time() for i in area_list: l += 1 print('开始抓取第%d个区域:' % l, i['regionName'], '店铺总数:', i['count']) try: crow_id(i['id']) now = time.time() - old print(i['name'], '抓取完成!', '时间:%d' % now) except Exception as e: print(e)
python爬取美团数据
猜你喜欢
转载自blog.csdn.net/jidawanghao/article/details/108336975
今日推荐
周排行