城市气候与海洋的关系研究
导入包
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
import matplotlib.pyplot as plt
%matplotlib inline
城市信息
citis = np.array(['asti','bologna','cesena','faenza','ferrara','mantova','milano','piacenza','ravenna','torino'])
数据集如下图:
选择某个城市进行查看
df1 = pd.read_csv('asti_150715.csv')
df1
df2 = pd.read_csv('asti_250715.csv')
df2
df3 = pd.read_csv('asti_270615.csv')
df3
# pandas进行级联的时候,行索引是不是可以重复
df = pd.concat([df1,df2,df3],ignore_index=True)
df
df.drop('Unnamed: 0',axis = 1)
使用程序快速导入数据集
citis = np.array(['asti','bologna','cesena','faenza','ferrara','mantova','milano',
'piacenza','ravenna','torino'])
citis.size
Out: 10
# 使用城市名称作为键,城市数据为值 存储至字典
data = {}
for city in citis:
df1 = pd.read_csv('%s_150715.csv'%(city))
df2 = pd.read_csv('%s_250715.csv'%(city))
df3 = pd.read_csv('%s_270615.csv'%(city))
# 合并
df = pd.concat([df1,df2,df3],ignore_index=True)
df.drop('Unnamed: 0',axis = 1,inplace=True)
data[city] = df
各城市与海洋距离,最高温度,最低温度,最高湿度,最低湿度
data['milano']
info = []
for city in citis:
df = data[city]
temp_min = df['temp'].min()
temp_max = df['temp'].max()
humidity_min = df['humidity'].min()
humidity_max = df['humidity'].max()
dist = df['dist'][0]
info.append([city,temp_min,temp_max,humidity_min,humidity_max,dist])
使用选取的数据新建一个数据表
df_cities = DataFrame(info,columns=['city','temp_min','temp_max','humidity_min',
'humidity_max','dist'])
df_cities
显示最高温度与离海远近的关系
df_cities.plot('dist','temp_max',kind = 'scatter')
线性回归:算法解决问题
# 算法解决问题
# 线性回归,研究一些数据之后,规律归纳出来------> 方程
from sklearn.linear_model import LinearRegression
lrg = LinearRegression()
近海和最高温度关系
# 求解近海的数据 < 150
cond = df_cities['dist'] < 150
# 训练数据,有格式要求:必须是二维的
[[样本一],[样本二],[样本三]……]
X = df_cities[['dist']][cond]
y = df_cities['temp_max'][cond]
# 训练,近海的数据和海洋的距离,目标值,最高温度
lrg.fit(X,y)
Out: LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
# 算法可以接收ndarray
X1 = X.values
y1 = y.values
lrg.fit(X1,y1)
Out: LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
计算y=w*x+b
w_ = lrg.coef_[0]
b_ = lrg.intercept_
display(w_,b_)
Out:
0.013760226316996685
32.61657542625584
# f(x) = w*x + b
plt.scatter(df_cities['dist'],df_cities['temp_max'])
# 绘制机器学习获得线
x = np.linspace(0,150,10)
# 趋势
y = w_*x + b_
plt.plot(x,y,color = 'green')
远海和最高温度关系
# 获取远海的数据 >120
cond = df_cities['dist'] > 120
X = df_cities[['dist']][cond]
y = df_cities['temp_max'][cond]
lrg = LinearRegression()
lrg.fit(X,y)
Out: LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
# weight 权重
w1_ = lrg.coef_[0]
# bias 偏差 f(x) = w*x + b
b1_ = lrg.intercept_
display(w1_,b1_)
# 机器学习,人工智能
# 数学问题
计算y=w*x+b
# f(x) = w*x + b
plt.scatter(df_cities['dist'],df_cities['temp_max'])
# 绘制机器学习获得线
x = np.linspace(0,150,10)
# 趋势
y = w_*x + b_
plt.plot(x,y,color = 'green')
# 远海
x = np.linspace(120,360,100)
y = w1_*x + b1_
plt.plot(x,y,color = 'red')
用numpy创建一个直方图,将360度划分为8个面元,将数据分类到这8个面元中
wind_deg = data['milano']['wind_deg']
wind_deg
Out:
0 100
1 0
2 140
3 0
4 80
5 0
……
wind_data,wind_range = np.histogram(wind_deg,bins=8,range=[0,360])
wind_data
Out:
array([21, 9, 9, 6, 14, 3, 2, 2], dtype=int64)
plt.figure(figsize=(8,8))
plt.axes(polar = True)
colors = np.random.rand(8,3)
plt.bar(np.arange(0,2*np.pi,np.pi/4),wind_data,color = colors)