版权声明:本文为博主原创文章,若有错误之处望大家批评指正!转载需附上原文链接,谢谢! https://blog.csdn.net/summer_dew/article/details/84591719
爬取的数据仅用于科研
爬取的接口就不讲解了,代码中有
代码写于2017年9月,今天找当时爬取的相关数据顺便找回,代码时效性差
# -*- coding:utf-8 -*-
# Author:PasserQi
# Time:2017/9/29
# Function:矢量化厦门市公园范围
import json
import time
import urllib
import arcgisscripting
import arcpy
import coordinate_conversion
import os
from bs4 import BeautifulSoup
outPath = r"G:\workspace\python\arcpy\park_polygon.shp"
AMAP_API_KEY = "***" #高德地图密匙
urlParamJson = {
'city' : '厦门',
'output' : 'xml',
'key' : AMAP_API_KEY,
'types' : '公园',
'citylimit' : 'true', #只返回指定城市数据
'offset' : '20'#每页条数
}
MAX_PAGE = 100 #最大页数
# return: list 厦门市公园POI的ID
def getParkPoiid():
poiidList = []
for page in range(1,MAX_PAGE) : #页数
urlParamJson["page"] = page
print "当前 %s 页..." % page
params = urllib.urlencode(urlParamJson)
url = "http://restapi.amap.com/v3/place/text?%s" % params
http = urllib.urlopen(url)
dom = BeautifulSoup(http)
poiList = dom.findAll("poi")
if len(poiList)==0: #没有数据时则跳出
break
for poi in poiList:
poiid = poi.id.get_text()
poiidList.append(poiid.encode("utf8") )
return poiidList
def getParkInfoList(poiidList):
parkInfoList = []
i = 1 #number
for poiid in poiidList:
parkInfo = {}
params = urllib.urlencode({
'id' : poiid
})
url = "http://ditu.amap.com/detail/get/detail?%s" % params
print "查询url %s" % url
http = urllib.urlopen(url)
jsonStr = http.read()
park = json.loads(jsonStr)
spec = park["data"]["spec"] #spec每个数据都有
haveShp = "没有"
for key in spec:
if key=="mining_shape": #有 面状或线状 信息
haveShp = "有"
parkInfo["shape"] = spec[key]["shape"] #保存 shape属性
parkInfo["name"] = park["data"]["base"]["name"].encode("utf8")
parkInfo["type"] = park["data"]["base"]["business"].encode("utf8")
parkInfoList.append(parkInfo)
if len(parkInfoList) % 11 == 0:
print "已获取 %s 个公园的矢量信息" % len(parkInfoList)
break
print "%s :%s" % (park["data"]["base"]["name"].encode("utf8"), haveShp )
time.sleep(1)
i = i+1
if i%51==0:
time.sleep(60)
return parkInfoList
# @funtion: 通过coordinates解析出XY的数组
# @param: coordinates字符串 格式"x,y;x,y;x,y..."
# @desc: 传入为gcj02坐标系坐标,返回wgs84坐标
# @dependence: arcpy,coordinate_conversion
# @return: arcpy.array
def getXYArray(XYsStr):
XYarray = arcpy.CreateObject("array")
XYList = XYsStr.split(';')
for XYstr in XYList:
XY = XYstr.split(',')
XY[0],XY[1] = float(XY[0]),float(XY[1])
point = arcpy.CreateObject("point")
point.X,point.Y = coordinate_conversion.gcj02towgs84(XY[0], XY[1])
XYarray.add(point)
return XYarray
def saveParkPolygon(parkInfoList):
gp = arcgisscripting.create()
outWorkspace = os.path.split(outPath)[0]
outName = os.path.split(outPath)[-1]
spat_ref = "4326"
gp.CreateFeatureClass_management(outWorkspace, outName, "POLYGON", "", "", "", spat_ref)
gp.AddField_management(outPath, "name", "TEXT", field_length=250)
gp.AddField_management(outPath, "type", "TEXT", field_length=250)
cur = gp.InsertCursor(outPath)
newRow = cur.newRow()
for parkInfo in parkInfoList:
for attr in parkInfo:
if attr=="shape":
# array = getXYArray(parkInfo["shape"])
XYsStr = parkInfo["shape"]
XYarray = gp.CreateObject("array")
XYList = XYsStr.split(';')
for XYstr in XYList:
XY = XYstr.split(',')
XY[0], XY[1] = float(XY[0]), float(XY[1])
point = gp.CreateObject("point")
point.X, point.Y = coordinate_conversion.gcj02towgs84(XY[0], XY[1])
XYarray.add(point)
newRow.setValue("Shape",XYarray)
else:
newRow.setValue(attr, parkInfo[attr] )
cur.InsertRow(newRow)
del cur,newRow
if __name__ == '__main__':
poiidList = getParkPoiid() #得到公园id
print "已得到 %s 个公园POI ID" %len(poiidList)
parkInfoList = getParkInfoList(poiidList)
print parkInfoList
saveParkPolygon(parkInfoList)