数据处理的标准化主要调用sklearn.preprocessing.
StandardScaler
(copy=True, with_mean=True, with_std=True)
下面用代码带领着我们去看一下它的使用方式:
from sklearn.preprocessing import StandardScaler
import numpy as np
##给出数据,若数据只有一行,要转为列向量,否则会报错
data = np.array(range(100)).reshape(-1, 1)
out:
array([[ 0],
[ 1],
[ 2],
[ 3],
[ 4],
[ 5],
[ 6],
[ 7],
[ 8],
[ 9],
[10],
[11],
[12],
[13],
[14],
[15],
[16],
[17],
[18],
[19],
[20],
[21],
[22],
[23],
[24],
[25],
[26],
[27],
[28],
[29],
[30],
[31],
[32],
[33],
[34],
[35],
[36],
[37],
[38],
[39],
[40],
[41],
[42],
[43],
[44],
[45],
[46],
[47],
[48],
[49],
[50],
[51],
[52],
[53],
[54],
[55],
[56],
[57],
[58],
[59],
[60],
[61],
[62],
[63],
[64],
[65],
[66],
[67],
[68],
[69],
[70],
[71],
[72],
[73],
[74],
[75],
[76],
[77],
[78],
[79],
[80],
[81],
[82],
[83],
[84],
[85],
[86],
[87],
[88],
[89],
[90],
[91],
[92],
[93],
[94],
[95],
[96],
[97],
[98],
[99]])
scaler = StandardScaler()
scaler.fit(data)
out:
StandardScaler(copy=True, with_mean=True, with_std=True)
##下面的这几行主要是观测参数,没有多大作用
scaler.scale_
scaler.mean_ #样本均值
scaler.var_ #样本方差
scaler.n_samples_seen_ #样本的shape
##对数据进行标准化,下面的这两行作用相同
scaler.fit_transform(data)
scaler.transform(data)
out:
array([[-1.71481604],
[-1.68017329],
[-1.64553055],
[-1.6108878 ],
[-1.57624505],
[-1.5416023 ],
[-1.50695955],
[-1.4723168 ],
[-1.43767406],
[-1.40303131],
[-1.36838856],
[-1.33374581],
[-1.29910306],
[-1.26446031],
[-1.22981757],
[-1.19517482],
[-1.16053207],
[-1.12588932],
[-1.09124657],
[-1.05660382],
[-1.02196108],
[-0.98731833],
[-0.95267558],
[-0.91803283],
[-0.88339008],
[-0.84874733],
[-0.81410459],
[-0.77946184],
[-0.74481909],
[-0.71017634],
[-0.67553359],
[-0.64089084],
[-0.6062481 ],
[-0.57160535],
[-0.5369626 ],
[-0.50231985],
[-0.4676771 ],
[-0.43303435],
[-0.39839161],
[-0.36374886],
[-0.32910611],
[-0.29446336],
[-0.25982061],
[-0.22517786],
[-0.19053512],
[-0.15589237],
[-0.12124962],
[-0.08660687],
[-0.05196412],
[-0.01732137],
[ 0.01732137],
[ 0.05196412],
[ 0.08660687],
[ 0.12124962],
[ 0.15589237],
[ 0.19053512],
[ 0.22517786],
[ 0.25982061],
[ 0.29446336],
[ 0.32910611],
[ 0.36374886],
[ 0.39839161],
[ 0.43303435],
[ 0.4676771 ],
[ 0.50231985],
[ 0.5369626 ],
[ 0.57160535],
[ 0.6062481 ],
[ 0.64089084],
[ 0.67553359],
[ 0.71017634],
[ 0.74481909],
[ 0.77946184],
[ 0.81410459],
[ 0.84874733],
[ 0.88339008],
[ 0.91803283],
[ 0.95267558],
[ 0.98731833],
[ 1.02196108],
[ 1.05660382],
[ 1.09124657],
[ 1.12588932],
[ 1.16053207],
[ 1.19517482],
[ 1.22981757],
[ 1.26446031],
[ 1.29910306],
[ 1.33374581],
[ 1.36838856],
[ 1.40303131],
[ 1.43767406],
[ 1.4723168 ],
[ 1.50695955],
[ 1.5416023 ],
[ 1.57624505],
[ 1.6108878 ],
[ 1.64553055],
[ 1.68017329],
[ 1.71481604]])
##下面是对数据进行还原
scaler.inverse_transform(scaler.fit_transform(data))