#!/usr/bin/env python3 # -*- coding: utf-8 -*- import requests from lib.re_util import ReUtil base_url = 'http://ns.meituan.com/meishi/b25710/' cookies_str = '_lx_utm=utm_source%3Dgoogle%26utm_medium%3Dorganic; _lxsdk_cuid=169416ad3eec8-047ac50146444b-24414032-1fa400-169416ad3eec8; __mta=247371067.1551580718640.1551580718640.1551580718640.1; ci=547; rvct=547%2C20%2C406; _lxsdk=169416ad3eec8-047ac50146444b-24414032-1fa400-169416ad3eec8; mtcdn=K; client-id=0c7a0c6e-a2ae-4078-935c-09341ba89f12; lat=22.884164; lng=113.458377; userTicket=khcRUTMcyLDWOzCyHezncrrepAbXpqpxDHzQUdLl; u=274668244; n=BpY342261584; lsu=; token2=ge9zM0PLN0SCKhx2Pwe7MZzBwRoAAAAA_gcAALQDil5w1rLuttboIENK9gmMKE2ZJDAp8lyGAqOJ3Mu6opaoFQHf6u065s2tZaonxg; lt=ge9zM0PLN0SCKhx2Pwe7MZzBwRoAAAAA_gcAALQDil5w1rLuttboIENK9gmMKE2ZJDAp8lyGAqOJ3Mu6opaoFQHf6u065s2tZaonxg; uuid=e6b22bfb73b9426ba0c6.1551580712.2.0.0; unc=BpY342261584; _lxsdk_s=169483cec48-b6a-7bc-1de%7C%7C1' cookies_dict = {} for cookie in cookies_str.split(";"): k, v = cookie.split("=", 1) cookies_dict[k.strip()] = v.strip() headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/71.0.3578.98 Chrome/71.0.3578.98 Safari/537.36' } page = requests.get( url=base_url, cookies=cookies_dict, headers=headers ) def get_element_from_html(raw_html): regex = ReUtil.get_regex(begin_with=['"poiInfos":'], end_with=['},"comHeader"']) result = regex.findall(raw_html) print(result[0][1]) ans = "" for i in range(4): ans += result[0][i] return result get_element_from_html(page.text)
爬取美团网数据
猜你喜欢
转载自www.cnblogs.com/liuweimingcprogram/p/10472391.html
今日推荐
周排行