0.引入依赖
import numpy as np
import matplotlib.pyplot as plt
1.导入数据(data.csv)
points = np.genfromtxt('data.csv',delimiter=',')
# 提取points中的两列数据,分别作为x,y
x=points[:,0] #取所有的第一列
y=points[:,1] #取所有的第二列
# 用plt画出散点图
plt.scatter(x,y)
plt.show()
2. 定义损失函数
# 损失函数是系数的函数,还要传入数据的x,y
def computer_cost(w,b,points):
total_cost = 0
M = len(points)
# 逐点计算平方损失误差,然后求平均值
for i in range(M):
x=points[i,0]
y=points[i,1]
total_cost += (y - w * x - b) ** 2
return total_cost/M
3. 定义核心算法拟合函数
# 先定义一个球均值的函数
def average(data):
sum = 0
num = len(data)
for i in range(num):
sum += data[i]
return sum/num
# 定义核心拟合函数
def fit(points):
M = len(points)
x_bar= average(points[:,0])
sum_yx = 0
sum_x2 = 0
sum_delta = 0
for i in range(M):
x=points[i,0]
y=points[i,1]
sum_yx += y * (x - x_bar)
sum_x2 += x ** 2
# 根据公式计算w
w = sum_yx / (sum_x2 - M * (x_bar ** 2))
for i in range(M):
x=points[i,0]
y=points[i,1]
sum_delta += (y - w * x)
b = sum_delta / M
return w,b
4. 测试
w,b = fit(points)
print("w = " ,w)
print("b = " , b)
cost = computer_cost(w,b,points)
print("cost = ", cost)
5. 画出拟合曲线
plt.scatter(x,y)
# 针对每一个x,计算出预测的y值
pred_y = w * x + b
plt.plot(x,pred_y,c='r')
plt.show()