知识点回顾
zip()
>>>a = [1,2,3]
>>> b = [4,5,6]
>>> c = [4,5,6,7,8]
>>> zipped = zip(a,b) # 打包为元组的列表
[(1, 4), (2, 5), (3, 6)]
>>> zip(a,c) # 元素个数与最短的列表一致
[(1, 4), (2, 5), (3, 6)]
>>> zip(*zipped) # 与 zip 相反,*zipped 可理解为解压,返回二维矩阵式
[(1, 2, 3), (4, 5, 6)]
add_prefix() 与join()合并使用添加别名
#去重union() 排序sorted(), 返回的列表 不加*报错descriptor 'union' requires a 'set' object but received a 'generator'
#strip()和tirm()一个功能
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from datetime import datetime
'''
图形化显示海地地震危机数据
'''
#1.获取原始数据
data=pd.read_csv('C:\\tools\\pydata-book-master\\ch08\\Haiti.csv')
#列信息
print(data.dtypes)
# Serial int64
# INCIDENT TITLE object
# INCIDENT DATE object
# LOCATION object
# DESCRIPTION object
# CATEGORY object
# LATITUDE float64
# LONGITUDE float64
# APPROVED object
# VERIFIED object
#2.处理数据
#Category信息的处理
print(data['CATEGORY'][:6])
# 0 1. Urgences | Emergency, 3. Public Health,
# 1 1. Urgences | Emergency, 2. Urgences logistiqu...
# 2 2. Urgences logistiques | Vital Lines, 8. Autr...
# 3 1. Urgences | Emergency,
# 4 1. Urgences | Emergency,
# 5 5e. Communication lines down,
#看一下数据的描述
print(data.describe())
# Serial LATITUDE LONGITUDE
# count 3593.000000 3593.000000 3593.000000
# mean 2080.277484 18.611495 -72.322680
# std 1171.100360 0.738572 3.650776
# min 4.000000 18.041313 -74.452757
# 25% 1074.000000 18.524070 -72.417500
# 50% 2163.000000 18.539269 -72.335000
# 75% 3088.000000 18.561820 -72.293570
# max 4052.000000 50.226029 114.174287
#存在缺失数据,进行数据筛选,方法!!
data=data[(data.LATITUDE>18)&(data.LATITUDE<20)&(data.LONGITUDE>-75)&(data.LONGITUDE<70)&data.CATEGORY.notnull()]
#方法一
def to_cat_list(catstr):
#strip()和tirm()一个功能
stripped=(x.strip() for x in catstr.split(','))
#返回列表,除去了空字符串
return [x for x in stripped if x]
print(to_cat_list('a,as,ff,,'))#['a', 'as', 'ff']
#方法二
def get_all_categories(cat_series):
cat_sets=(set(to_cat_list(x)) for x in cat_series)
#去重union() 排序sorted(), 返回的列表 不加*报错descriptor 'union' requires a 'set' object but received a 'generator'
return sorted(set.union(*cat_sets))
print(get_all_categories(['a,as,ff,,','a,c,d,f,g']))#['a', 'as', 'c', 'd', 'f', 'ff', 'g']
#方法三:将分类信息拆分为编码和英文名称
def get_english(cat):
code,names=cat.split('.')
if '|' in names:
names=names.split('|')[1]
#返回编码和英文名火车的元组
return code,names.strip();
print(get_english('1. Urgences | Emergency'))#('1', 'Emergency')
#元组—>字典
truple=('a','b')
# print(dict(get_english('1. Urgences | Emergency')))
#获取CATEGORY的列表
all_cats=get_all_categories(data.CATEGORY)
#print(all_cats)
#将元组转换为字典
english_mapping=dict(get_english(x) for x in all_cats)
#print(english_mapping)
print(english_mapping['2a'])
def get_code(seq):
return [x.split('.')[0] for x in seq if x]
#获取所有的编号
all_codes=get_code(all_cats)
print(all_codes)
#获取所有编号下的索引
code_index=pd.Index(np.unique(all_codes))
print(code_index)
#创建一个全零的DataFrame,数据结构与data数据一致
dummy_frame=pd.DataFrame(np.zeros((len(data),len(code_index))),index=data.index,columns=code_index)
#print(dummy_frame)
# 1 1a 1b 1c 1d 2 2a ... 7h 8 8a 8c 8d 8e 8f
# 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0
# 4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0
# 5 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0
print(dummy_frame.ix[:,:6])
#3.处理后的矩阵与原矩阵进行赋值,与原矩阵对应的项设置为一
#zip()
# add_prefix() 与join()合并使用添加别名
for row,cat in zip(data.index,data.CATEGORY):
codes=get_code(to_cat_list(cat))
dummy_frame.ix[row,codes]=1
data=data.join(dummy_frame.add_prefix('category_'))
print(data.ix[:,10:15])
# category_1 category_1a ... category_1c category_1d
# 0 1.0 0.0 ... 0.0 0.0
# 4 1.0 0.0 ... 0.0 0.0
# 5 0.0 0.0 ... 0.0 0.0
# 6 0.0 0.0 ... 0.0 0.0
#from mpl_toolkits.basemap import Basemap
import mpl_toolkits as mpl
def basic_haiti_map(ax=None, lllat = 17.25,urlat = 20.25,lllon = -75,urlon=-71):
m = mpl.basemap.Basemap(ax=ax,projection = 'stere',
lon_0=(urlon+lllon)/2,
lat_0 = (urlat+lllat)/2,
llcrnrlat = lllat,urcrnrlat=urlat,
llcrnrlon = lllon,urcrnrlon = urlon,
resolution = 'f')
m.drawcoastlines()
m.drawstates()
m.drawcountries()
return m
fig,axes = plt.subplots(nrows = 2,ncols =2,figsize=(12,10))
fig.subplots_adjust(hspace =0.05,wspace=0.05)
to_plot = ['2a','1','3c','7a']
lllat =17.25
urlat=20.25
lllon =-75
urlon=-71
for code,ax in zip(to_plot,axes.flat):
m = basic_haiti_map(ax,lllat=lllat,urlat=urlat,lllon=lllon,urlon=urlon)
cat_data=data[data['category_%s' % code] ==1]
#x, y =m(cat_data.LONGITUDE,cat_data.LATITUDE)
x, y = m(cat_data.LONGITUDE.values, cat_data.LATITUDE.values)
m.plot(x,y,'k.',alpha = 0.5)
ax.set_title('%s:%s' % (code, english_mapping[code]))