版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/jiaoyangwm/article/details/82498286
1、梯度下降
#!usr/bin/python3
# coding:utf-8
# BGD 批梯度下降代码实现
# SGD 随机梯度下降代码实现
import numpy as np
import random
def batchGradientDescent(x, y, theta, alpha, m, maxInteration):
x_train = x.transpose()
for i in range(0, maxInteration):
hypothesis = np.dot(x, theta)
# 损失函数
loss = hypothesis - y
# 下降梯度
gradient = np.dot(x_train, loss) / m
# 求导之后得到theta
theta = theta - alpha * gradient
return theta
def stochasticGradientDescent(x, y, theta, alpha, m, maxInteration):
data = []
for i in range(4):
data.append(i)
x_train = x.transpose()
for i in range(0, maxInteration):
hypothesis = np.dot(x, theta)
# 损失函数
loss = hypothesis - y
# 选取一个随机数
index = random.sample(data, 1)
index1 = index[0]
# 下降梯度
gradient = loss[index1] * x[index1]
# 求导之后得到theta
theta = theta - alpha * gradient
return theta
def main():
trainData = np.array([[1, 4, 2], [2, 5, 3], [5, 1, 6], [4, 2, 8]])
trainLabel = np.array([19, 26, 19, 20])
print(trainData)
print(trainLabel)
m, n = np.shape(trainData)
theta = np.ones(n)
print(theta.shape)
maxInteration = 500
alpha = 0.01
theta1 = batchGradientDescent(trainData, trainLabel, theta, alpha, m, maxInteration)
print(theta1)
theta2 = stochasticGradientDescent(trainData, trainLabel, theta, alpha, m, maxInteration)
print(theta2)
return
if __name__ == "__main__":
main()
2、KNN
关键点:计算点和中心间的距离:
伪代码:
- 计算训练集到该点的距离
- 选择距离最小的k个点
- 返回k个点出现的频率最高的类别最为当前点的预测类别
代码:
'''
in_x:输入向量,也就是要分类的向量
data_set:测试集,也就是输入向量和测试集中所有的样本向量都要求距离
labels:
'''
def knn(in_x,data_set,labels,k):
# 将输入的行向量,扩展成和测试向量大小相同的矩阵,再和测试矩阵的每行求距离
diff_mat=tile(in_x,(data_size,1))-data_set
sq_diff_mat=diff_mat**2
# 距离开方
distances=sq_diff_mat.sum(axis=1)**0.5
# 距离从小到大排列,保存索引值
sorted_dist_indicies=distances.argsort()
# 对前k个距离的样本的类别求取
for i in range(k):
vote_label=labels[sorted_dist_indices[i]]
# get:根据键值取值
class_count[vote_label]=class_count.get(vote_label,0)+1
sortedClassCount=sorted(classCount.iteritems(),\\
key=operator.itemegetter(1),reverse=True)
return sortedClassCount[0][0]