海地地震危机数据分析 + 标记中山、成都、重庆、昆山所在

我们需要分析海地地震求助的数据(GitHub),并画出求助分布点。
1.检查并清洗数据。
2.画出dummy_frame,为点分布做准备。
3.在地图上显示求助信息。

1.检查并清洗数据。

import pandas as pd
import numpy as np
%matplotlib inline
%matplotlib notebook
import matplotlib.pyplot as plt
from pandas import DataFrame
data = pd.read_csv('data\haiti\Haiti.csv')
data.info()
#<class 'pandas.core.frame.DataFrame'>
#RangeIndex: 3593 entries, 0 to 3592
#Data columns (total 10 columns):
# #   Column          Non-Null Count  Dtype  
#---  ------          --------------  -----  
# 0   Serial          3593 non-null   int64  
# 1   INCIDENT TITLE  3593 non-null   object 
# 2   INCIDENT DATE   3593 non-null   object 
# 3   LOCATION        3592 non-null   object 
# 4   DESCRIPTION     3593 non-null   object 
# 5   CATEGORY        3587 non-null   object 
# 6   LATITUDE        3593 non-null   float64
# 7   LONGITUDE       3593 non-null   float64
# 8   APPROVED        3593 non-null   object 
# 9   VERIFIED        3593 non-null   object 
#dtypes: float64(2), int64(1), object(7)
#memory usage: 182.5+ KB
# 检查数据
# 检查1
data[['INCIDENT DATE','LATITUDE','LONGITUDE']][:10]
#      INCIDENT DATE   LATITUDE   LONGITUDE
#0  05/07/2010 17:26  18.233333  -72.533333
#1  28/06/2010 23:06  50.226029    5.729886
#2  24/06/2010 16:21  22.278381  114.174287
#3  20/06/2010 21:59  44.407062    8.933989
#4  18/05/2010 16:26  18.571084  -72.334671
#5  26/04/2010 13:14  18.593707  -72.310079
#6  26/04/2010 14:19  18.482800  -73.638800
#7  26/04/2010 14:27  18.415000  -73.195000
#8  15/03/2010 10:58  18.517443  -72.236841
#9  15/03/2010 11:00  18.547790  -72.410010

# 检查2
data['CATEGORY'][:6]
#Out[4]:
#0          1. Urgences | Emergency, 3. Public Health, 
#1    1. Urgences | Emergency, 2. Urgences logistiqu...
#2    2. Urgences logistiques | Vital Lines, 8. Autr...
#3                            1. Urgences | Emergency, 
#4                            1. Urgences | Emergency, 
#5                       5e. Communication lines down, 
#Name: CATEGORY, dtype: object

# 检查3
data.describe()
#            Serial     LATITUDE    LONGITUDE
#count  3593.000000  3593.000000  3593.000000
#mean   2080.277484    18.611495   -72.322680
#std    1171.100360     0.738572     3.650776
#min       4.000000    18.041313   -74.452757
#25%    1074.000000    18.524070   -72.417500
#50%    2163.000000    18.539269   -72.335000
#75%    3088.000000    18.561820   -72.293570
#max    4052.000000    50.226029   114.174287

# 移除错误位置信息、移除缺失分类信息
data = data[(data.LATITUDE > 18)&(data.LATITUDE < 20)&
            (data.LONGITUDE > -75)&(data.LONGITUDE <-70)&
            (data.CATEGORY.notnull())]
data.describe()
#            Serial     LATITUDE    LONGITUDE
#count  3569.000000  3569.000000  3569.000000
#mean   2081.498459    18.592503   -72.424994
#std    1170.311824     0.273695     0.291018
#min       4.000000    18.041313   -74.452757
#25%    1074.000000    18.524200   -72.417498
#50%    2166.000000    18.539269   -72.335000
#75%    3089.000000    18.561800   -72.293939
#max    4052.000000    19.940630   -71.099489

2.画出dummy_frame,为点分布做准备。

# 将“用,分割的字符串”转化为list
def to_cat_list(catstr):
    stripped = (x.strip() for x in catstr.split(','))
    return [x for x in stripped if x]

# 将series转化为集合
def get_all_categories(cat_series):
    cat_sets = (set(to_cat_list(x)) for  x  in cat_series)
    return sorted(set.union(*cat_sets))

# 删掉“用|分割字符串”的部分内容
def get_english(cat):
    code , names = cat.split('.')
    if '|' in names:
        names = names.split('|')[1]
    return code,names.strip()
# 提取分类
all_cats = get_all_categories(data.CATEGORY)
english_mapping = dict(get_english(x) for x in all_cats)
# 建立dummy_table表格

# 获取编码 这里用在set和list
def get_code(seq):
    return [x.split('.')[0] for x in seq if x ]

all_codes = get_code(all_cats)
code_index = pd.Index(np.unique(all_codes))
dummy_frame = pd.DataFrame(np.zeros((len(data),len(code_index))),index= data.index , columns = code_index,)

print(dummy_frame.iloc[:,:])
#        1   1a   1b   1c   1d    2   2a   2b   2c   2d  ...   7c   7d   7g  \
#0     0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0  0.0   
#4     0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0  0.0   
#5     0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0  0.0   
#6     0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0  0.0   
#7     0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0  0.0   
# 画1:进行第二次取code
for row , cat in zip(data.index,data.CATEGORY):
    codes = get_code(to_cat_list(cat))
    dummy_frame.loc[row,codes] = 1

data = data.join(dummy_frame.add_prefix('category_'))
data2.iloc[:,:]

在这里插入图片描述

3.在地图上显示求助信息。

from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt

def basic_haiti_map(ax = None,lllat = 17.25,urlat = 20.25,lllon = -75 ,urlon = -71):
    m = Basemap(ax=ax,projection='stere',
                              lon_0 = (urlon+lllon)/2,
                              lat_0 =(urlat + lllat) /2,
                              llcrnrlat = lllat,urcrnrlat = urlat,
                              llcrnrlon =lllon,urcrnrlon = urlon,
                              resolution='f')
    m.drawcoastlines()
    m.drawstates()
    m.drawcountries()
    return m
fig,axes = plt.subplots(nrows =2,ncols=2,figsize =(12,10))
fig.subplots_adjust(hspace=0.05,wspace=0.05)

to_plot = ['2a','1','3c','7a']

lllat =17.25;urlat=20.25;lllon=-75;urlon=-71

for code,ax in zip(to_plot ,axes.flat):
    m = basic_haiti_map(ax,lllat=lllat,urlat=urlat,lllon=lllon,urlon=urlon)
    cat_data = data[data['category_%s' %code] == 1]
    x,y = m(cat_data.LONGITUDE.values,cat_data.LATITUDE.values)
    m.plot(x,y,'k.',alpha = 0.5)
    ax.set_title('%s:%s' % (code,english_mapping[code]))

在这里插入图片描述

PS:画出中国地图,标记中山、成都、重庆、昆山所在

fig,axes = plt.subplots(nrows =1,ncols=1,figsize =(12,10))
fig.subplots_adjust(hspace=0.05,wspace=0.05)
lllat = 2   # left corner latitude
lllon = 72    # left corner longitude
urlat = 55    # right corner latitude
urlon = 135   # right corner longitude
code=['China : ZS,CD,CQ,KS']
LON=[113.38,104.07,106.33,120.98]
LAT=[22.52,30.67,29.35,31.38]

m = basic_haiti_map(ax=axes,lllat=lllat,urlat=urlat,lllon=lllon,urlon=urlon)
for i in range(len(LON)):
    x,y = m(LON[i],LAT[i])
    m.scatter(x, y, s=100, marker='o',color='#FF5600')
axes.set_title('%s:'%(code))

在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/m0_46629123/article/details/108876138