2-2-2 零基础掌握百度地图兴趣点获取POI爬虫(python语言爬取)练习(版本:py2)

版权声明:自学笔记,如有引用请标明博客,感谢 https://blog.csdn.net/feng_jlin/article/details/81944679

原博主:铁血阿郎

基础篇:https://blog.csdn.net/sinat_41310868/article/details/78746094

代码篇:https://blog.csdn.net/sinat_41310868/article/details/78746224

进阶篇:https://blog.csdn.net/sinat_41310868/article/details/78746251

# -*- coding:utf-8 -*

def Baidu_PC(lat_1,lat_2,lon_1,lon_2,las,lx_type,page_size,page_num_range,ak,push):
    import os
    import sys
    import urllib2
    import json
    import time #用于爬取时候间歇休息
    import itertools #用于简化循环的模块


    reload(sys)
    sys.setdefaultencoding('utf-8') #py27转义中文专用
    
    lat_count=int((lat_2-lat_1)/las+1)
    lon_count=int((lon_2-lon_1)/las+1) #横纵切割数计算
    iterproduct=itertools.product(range(0,lat_count),range(0,lon_count),range(0,page_num_range)) ## 多个循环器集合的笛卡尔积。相当于嵌套循环

    for lat_c,lon_c,i in iterproduct:
        lat_b1=lat_1+las*lat_c
        lon_b1=lon_1+las*lon_c
        page_num=str(i)
        url='http://api.map.baidu.com/place/v2/search?query='+lx_type+'&&bounds='+str(lat_b1)+','+str(lon_b1)+','+str(lat_b1+las)+','+str(lon_b1+las)+'&page_size='+str(page_size)+'&page_num='+str(page_num)+'&scope=2&output=json&ak='+ak
        print url
        time.sleep(10) #每次爬去休息10s防爬
        response=urllib2.urlopen(url)
        data=json.load(response)
        output_file=open(push,'a')
        try:
            for item in data['results']:
                jprovince=item['province']
                jcity=item['city']
                jarea=item['area']
                jname=item['name']
                jlat=item['location']['lat']
                jlon=item['location']['lng']
                jadd=item['address']
                jdetail_url=item['detail_info']['detail_url']
                joverall_rating=item['detail_info']['overall_rating']
                j_str=jprovince+','+jcity+','+jarea+','+jname+','+str(jlat)+','+str(jlon)+','+jadd+','+joverall_rating+','+jdetail_url+'\n'
                output_file.write(j_str)
        except: 
            continue #防中间爬取的20页为空,判断若有错不跳出,继续
    output_file.close()
    print 'OK'
    return


#以上已经优化为函数,则直接调用即可

Baidu_PC(24.390894,26.548645,102.174112,103.678942,1,'中学',20,20,'8ZAITojOniBCWz89OXNKD3LVlBMTljai',r'C:\Users\feng_jlin\Desktop\kunmingschoolsm.txt')

猜你喜欢

转载自blog.csdn.net/feng_jlin/article/details/81944679
2
>&2
α2
2-2