scikit-learn 之岭回归
基本语句
from sklearn import linear_model
reg = linear_model.Ridge(alpha = 0.5)
reg.fit([[0,0],[1,1],[2,2]],[0,1,2])
reg.coef_ #coeficients
reg.intercept_
Plot Ridge coefficients as a function of the regularization
from sklearn import linear_model
reg = linear_model.Ridge(alpha = 0.5)
reg.fit([[0,0],[1,1],[2,2]],[0,1,2])
reg.coef_ #coeficients
reg.intercept_
#Plot Ridge coefficients as a function of the regularization
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
x = 1/(np.arange(1,11)+np.arange(0,10)[:,np.newaxis])#生成一个希尔伯特矩阵
(np.arange(1,11)+np.arange(0,10)[:,np.newaxis])#看一下这个矩阵
Out[65]:
array([[ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
[ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
[ 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
[ 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
[ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
[ 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
[ 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
[ 8, 9, 10, 11, 12, 13, 14, 15, 16, 17],
[ 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
[10, 11, 12, 13, 14, 15, 16, 17, 18, 19]])
一个ndarray( shape is m * 1) + ndarray(shape is n * 1) 变成了一个新的ndarray(shape is m * n)
运算过程是range(1,11)的矩阵每一行,加上range(0,10)的每个数生成一个新的行,最后生成10行矩阵
希尔伯特矩阵,来自360百科…了解一下hh:
y = np.ones(10)
n_alpha = 200#我们的alpha取值有200个,用于做图看趋势
alphas = np.logspace(-10,-2,n_alpha)#生成alpha
#np.logspace(起点,终点,数量)
np.logspace(2.0, 3.0, num=4)#表示以10^2为起点,10^3为终点,生成4个数
Out[67]: array([ 100. , 215.443469 , 464.15888336, 1000. ])
coefs =[]
for a in alphas:
ridge = linear_model.Ridge(alpha=a,fit_intercept=False)
ridge.fit(x,y)
coefs.append(ridge.coef_)
ax = plt.gca()#画个坐标轴先
ax.plot(alphas,coefs)#这个时候的图挤到了一起
ax.set_xscale('log')#把x值取log
ax.set_xlim(ax.get_xlim()[::-1]) # 把x轴从大到小排列
#a = [1,3,4]则a[::-1]是[4,3,2]逆序
plt.xlabel('alpha')
plt.ylabel('coef')
plt.axes('tight')
#Classification of text documents using sparse features
#这个例子我没看,因为还没学这么多。。。。
留一交叉验证(leave-one-out cross-validation)确定alpha
reg = linear_model.RidgeCV(alphas=[0.1,1,10],cv=3)
reg.fit([[0, 0], [0, 0], [1, 1]], [0, .1, 1])
reg.alpha_#获得最好的alpha值