版权声明:自学笔记,如有引用请标明博客,感谢 https://blog.csdn.net/feng_jlin/article/details/81944679
原博主:铁血阿郎
基础篇:https://blog.csdn.net/sinat_41310868/article/details/78746094
代码篇:https://blog.csdn.net/sinat_41310868/article/details/78746224
进阶篇:https://blog.csdn.net/sinat_41310868/article/details/78746251
# -*- coding:utf-8 -*
def Baidu_PC(lat_1,lat_2,lon_1,lon_2,las,lx_type,page_size,page_num_range,ak,push):
import os
import sys
import urllib2
import json
import time #用于爬取时候间歇休息
import itertools #用于简化循环的模块
reload(sys)
sys.setdefaultencoding('utf-8') #py27转义中文专用
lat_count=int((lat_2-lat_1)/las+1)
lon_count=int((lon_2-lon_1)/las+1) #横纵切割数计算
iterproduct=itertools.product(range(0,lat_count),range(0,lon_count),range(0,page_num_range)) ## 多个循环器集合的笛卡尔积。相当于嵌套循环
for lat_c,lon_c,i in iterproduct:
lat_b1=lat_1+las*lat_c
lon_b1=lon_1+las*lon_c
page_num=str(i)
url='http://api.map.baidu.com/place/v2/search?query='+lx_type+'&&bounds='+str(lat_b1)+','+str(lon_b1)+','+str(lat_b1+las)+','+str(lon_b1+las)+'&page_size='+str(page_size)+'&page_num='+str(page_num)+'&scope=2&output=json&ak='+ak
print url
time.sleep(10) #每次爬去休息10s防爬
response=urllib2.urlopen(url)
data=json.load(response)
output_file=open(push,'a')
try:
for item in data['results']:
jprovince=item['province']
jcity=item['city']
jarea=item['area']
jname=item['name']
jlat=item['location']['lat']
jlon=item['location']['lng']
jadd=item['address']
jdetail_url=item['detail_info']['detail_url']
joverall_rating=item['detail_info']['overall_rating']
j_str=jprovince+','+jcity+','+jarea+','+jname+','+str(jlat)+','+str(jlon)+','+jadd+','+joverall_rating+','+jdetail_url+'\n'
output_file.write(j_str)
except:
continue #防中间爬取的20页为空,判断若有错不跳出,继续
output_file.close()
print 'OK'
return
#以上已经优化为函数,则直接调用即可
Baidu_PC(24.390894,26.548645,102.174112,103.678942,1,'中学',20,20,'8ZAITojOniBCWz89OXNKD3LVlBMTljai',r'C:\Users\feng_jlin\Desktop\kunmingschoolsm.txt')