#! -*- encoding:utf-8 -*-
import requests
import json,csv,time
from urllib import request
import datetime,zlib,base64
def str_replace(string):
return string.replace('/', '%2F') \
.replace('+', '%2B') \
.replace('=', '%3D') \
.replace(':', '%3A')
simulateBrowserHeader = {
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Host': 'sh.meituan.com',
'Referer': 'https://sh.meituan.com/meishi/',
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.87 Safari/537.36"
}
PROXY_POOL_URL = 'http://localhost:5555/random'
def get_proxy():
try:
response = requests.get(PROXY_POOL_URL)
if response.status_code == 200:
return response.text
except ConnectionError:
return None
# 生成token
def encode_token():
ts = int(datetime.datetime.now().timestamp() * 1000)
token_dict = {
'rId': 100900,
'ver': '1.0.6',
'ts': ts,
'cts': ts + 100 * 1000,
'brVD': [1190,1030],
'brR': [[1011,875],[1011,875],24,24],
'bI': ['https://sh.meituan.com/meishi/b4739/', ''],
'mT': [],
'kT': [],
'aT': [],
'tT': [],
'aM': '',
'sign': "eJwdzU1OxCAYBuC7zILlQIFOWxMWSCExMe48ALY4JZafwIeJV/A+Rg9kPIfN7J7F+3OyxdmHVTC0WHAHCFo8fDzZ4MTvz+ff1zdafYyuqNQiSIByZFDK4EOrKq1OdASl4q8+PpddbAC53mFct3NwHpqN5yUFfLhuHr8wnGNHMMr2eitmW+DYFh29oLxbeE0liA4VX98e3bvbD9dUQKBW3e24Nb+KyUhyIcNoKOkpM4qo+36eqdRSDnRSnFMz0lHrTk6zNoPqB85nMzBm9Cg5Jad/Pg9QMw=="
}
encode = str(token_dict).encode()
compress = zlib.compress(encode)
b_encode = base64.b64encode(compress)
token = str(b_encode, encoding='utf-8')
return token
if __name__ == '__main__':
originUrl = str_replace('https://sh.meituan.com/meishi/b4739/')
token_encode = encode_token()
token = str_replace(token_encode)
for num in range(1,70):
print(num)
time.sleep(2.5)
url = 'https://sh.meituan.com/meishi/api/poi/getPoiList?' \
'cityName=上海' \
'&cateId=0' \
'&areaId=4739' \
'&sort=' \
'&dinnerCountAttrId=' \
'&page=%d' \
'&userId=' \
'&uuid=9FA06078F20523FC0CB5DD2AEAA729C442F828EE1A9DEF7C5744DF733FE8A420' \
'&platform=1' \
'&partner=126' \
'&originUrl=%s' \
'&riskLevel=1' \
'&optimusCode=1' \
'&_token=%s' % (num, originUrl, token)
proxy = get_proxy()
while proxy == None:
proxy = get_proxy()
jsons = None
response = requests.get(url, headers=simulateBrowserHeader, proxies={"http":"http://HKGYJ5BCT545T08D:[email protected]:9020"})
# print(response.text)
# try:
# response = requests.get(url, headers=simulateBrowserHeader, proxies={"http": proxy })
# jsons = json.loads(response.text)
# except:
# print("error")
# proxy = get_proxy()
# while proxy == None:
# proxy = get_proxy()
# response = requests.get(url, headers=simulateBrowserHeader, proxies={"http": proxy })
# jsons = json.loads(response.text)
jsons = json.loads(response.text)
data = jsons["data"]["poiInfos"]
awrite = open('meishi_15.csv','a', newline='',encoding='utf-8')
for i in data:
title = i["title"]
avgScore = i["avgScore"]
address = i["address"]
allCommentNum = i["allCommentNum"]
csv_write = csv.writer(awrite, dialect='excel')
csv_write.writerow([title, avgScore, address, allCommentNum])
print("END...")