"""
实战案例1:利用NumPy实现梯度下降算法预测疾病
任务:根据体重指数(BMI)和疾病发展的定量测量值(Y)使用梯度下降算法拟合出一条直线 y_hat=aX+b
数据集来源:http://sklearn.apachecn.org/cn/0.19.0/sklearn/datasets/descr/diabetes.html
"""
import numpy as np
import matplotlib.pylab as plt
Data_Path = 'F:/diabetes.csv'
def load_data(data_file):
'''
读取数据文件,加载数据
参数:
data_file:文件路径
data_arr:数据的多维数组表示
'''
data_arr = np.loadtxt(data_file,delimiter=',',skiprows=1)
return data_arr
def get_gradient(theta,x,y):
m = x.shape[0]
y_estimate = x.dot(theta)
error = y_estimate - y
grad = 1.0/m * error.dot(x)
cost = 1.0/(2*m) * np.sum(error ** 2)
return grad,cost
def gradient_descent(x,y,max_iter=1500,alpha=0.01):
theta = np.random.randn(2)
#收敛阈值
tolerance = 1e-3
# Perform Gradient Descent
iterations = 1
is_converged = False
while not is_converged:
grad,cost=get_gradient(theta,x,y)
new_theta = theta - alpha * grad
# Stopping Condition
if np.sum(abs(new_theta-theta)) < tolerance:
is_converged = True
print("参数收敛")
# Print error every 50 iterations
if iterations % 10 == 0:
print("第{}次迭代,损失值{:.4f}".format(iterations,cost))
iterations += 1
theta = new_theta
if iterations > max_iter:
is_converged = True
print("已经到达最大迭代次数{}".format(max_iter))
return theta
def main():
'''
主函数
'''
data_arr = load_data(Data_Path)
x = data_arr[:,0].reshape(-1,1)
# 添加一列全1的向量
x = np.hstack((np.ones_like(x),x))
y = data_arr[:,1]
theta = gradient_descent(x,y,alpha=0.001,max_iter=200)
print("线性模型参数:",theta)
#绘制结果
y_pred = theta[0] + theta[1] * x[:,1]
plt.figure()
#绘制样本点
plt.scatter(x[:,1],y)
#绘制拟合线
plt.plot(x[:,1],y_pred,c = 'red')
plt.show()
if __name__ == '__main__':
main()