版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_34069180/article/details/84954174
# -*- coding: UTF-8 -*-
_author_ = 'zy'
_date_ = '2018/12/10 0010 19:56'
import requests,pymongo,json,time
APPKEY=‘’
appid='357415058'
url='http://opendata.sz.gov.cn/api/{appid}/1/service.xhtml?page={pageid}&rows=100&appKey={appkey}'
def get_data(appid,appkey,dbname,allpagenum):
# if allpagenum==None:
# allpagenum=100
squared=[x for x in range(allpagenum)]
client = pymongo.MongoClient('127.0.0.1', 27017) # 缺少一步骤进行属性的清洗操作,确定是否有这个值
db = client.shenzhen
#pageid = 1
for page in squared:
try:
pjurl=url.format(appid=appid,pageid=page,appkey=appkey)#357415058,
print('正在抓取页面'+pjurl)
result=requests.get(pjurl)
code=result.status_code
print('当前状态'+str(result.status_code))
content=json.loads(result.text)['data']
for i in content:
data={
'建筑物编号':i['BLDG_NO'],
'名称':i['NOWNAME'],
'地址':i['BLDADDR']
}
db[dbname].insert_one(data)
print('插入一条')
except ConnectionRefusedError:
print('计算机访问频率过快,此时抓取到第'+str(page)+'页')
time.sleep(2)
if __name__=='__main__':
#appid=input('请输入自己的appid')
get_data(appid,APPKEY,'housetest',2)