岭回归Ridge Regression
标准方程法-岭回归
import numpy as np
from numpy import genfromtxt
import matplotlib.pyplot as plt
# 读入数据
data = genfromtxt(r"longley.csv",delimiter=',')
print(data)
# 切分数据
x_data = data[1:,2:]
y_data = data[1:,1,np.newaxis]
print(x_data)
print(y_data)
print(np.mat(x_data).shape)
print(np.mat(y_data).shape)
# 给样本添加偏置项
X_data = np.concatenate((np.ones((16,1)),x_data),axis=1)
print(X_data.shape)
print(X_data[:3])
# 岭回归标准方程法求解回归参数
def weights(xArr, yArr, lam=0.2):
xMat = np.mat(xArr)
yMat = np.mat(yArr)
xTx = xMat.T*xMat # 矩阵乘法
rxTx = xTx + np.eye(xMat.shape[1])*lam
# 计算矩阵的值,如果值为0,说明该矩阵没有逆矩阵
if np.linalg.det(rxTx) == 0.0:
print("This matrix cannot do inverse")
return
# xTx.I为xTx的逆矩阵
ws = rxTx.I*xMat.T*yMat
return ws
ws = weights(X_data,y_data)
print(ws)
# 计算预测值
np.mat(X_data)*np.mat(ws)
sklearn-岭回归
import numpy as np
from numpy import genfromtxt
from sklearn import linear_model
import matplotlib.pyplot as plt
# 读入数据
data = genfromtxt(r"longley.csv",delimiter=',')
print(data)
# 切分数据
x_data = data[1:,2:]
y_data = data[1:,1]
print(x_data)
print(y_data)
# 创建模型
# 生成50个值
alphas_to_test = np.linspace(0.001, 1)
# 创建模型,保存误差值
model = linear_model.RidgeCV(alphas=alphas_to_test, store_cv_values=True)
model.fit(x_data, y_data)
# 岭系数
print(model.alpha_)
# loss值
print(model.cv_values_.shape)
# 画图
# 岭系数跟loss值的关系
plt.plot(alphas_to_test, model.cv_values_.mean(axis=0))
# 选取的岭系数值的位置
plt.plot(model.alpha_, min(model.cv_values_.mean(axis=0)),'ro')
plt.show()
model.predict(x_data[2,np.newaxis])