简单算法的代码

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/jiaoyangwm/article/details/82498286

1、梯度下降

#!usr/bin/python3
# coding:utf-8

# BGD 批梯度下降代码实现
# SGD 随机梯度下降代码实现
import numpy as np

import random


def batchGradientDescent(x, y, theta, alpha, m, maxInteration):
    x_train = x.transpose()
    for i in range(0, maxInteration):
        hypothesis = np.dot(x, theta)
        # 损失函数
        loss = hypothesis - y
        # 下降梯度
        gradient = np.dot(x_train, loss) / m
        # 求导之后得到theta
        theta = theta - alpha * gradient
    return theta


def stochasticGradientDescent(x, y, theta, alpha, m, maxInteration):
    data = []
    for i in range(4):
        data.append(i)
    x_train = x.transpose()
    for i in range(0, maxInteration):
        hypothesis = np.dot(x, theta)
        # 损失函数
        loss = hypothesis - y
        # 选取一个随机数
        index = random.sample(data, 1)
        index1 = index[0]
        # 下降梯度
        gradient = loss[index1] * x[index1]
        # 求导之后得到theta
        theta = theta - alpha * gradient
    return theta


def main():
    trainData = np.array([[1, 4, 2], [2, 5, 3], [5, 1, 6], [4, 2, 8]])
    trainLabel = np.array([19, 26, 19, 20])
    print(trainData)
    print(trainLabel)
    m, n = np.shape(trainData)
    theta = np.ones(n)
    print(theta.shape)
    maxInteration = 500
    alpha = 0.01
    theta1 = batchGradientDescent(trainData, trainLabel, theta, alpha, m, maxInteration)
    print(theta1)
    theta2 = stochasticGradientDescent(trainData, trainLabel, theta, alpha, m, maxInteration)
    print(theta2)
    return


if __name__ == "__main__":
    main()

2、KNN

关键点:计算点和中心间的距离: d = ( ( x i x t e s t ) 2 )

伪代码:

  • 计算训练集到该点的距离
  • 选择距离最小的k个点
  • 返回k个点出现的频率最高的类别最为当前点的预测类别

代码:

'''
in_x:输入向量,也就是要分类的向量
data_set:测试集,也就是输入向量和测试集中所有的样本向量都要求距离
labels:

'''
def knn(in_x,data_set,labels,k):
    # 将输入的行向量,扩展成和测试向量大小相同的矩阵,再和测试矩阵的每行求距离
    diff_mat=tile(in_x,(data_size,1))-data_set
    sq_diff_mat=diff_mat**2
    # 距离开方
    distances=sq_diff_mat.sum(axis=1)**0.5
    # 距离从小到大排列,保存索引值
    sorted_dist_indicies=distances.argsort()
    # 对前k个距离的样本的类别求取
    for i in range(k):
        vote_label=labels[sorted_dist_indices[i]]
        # get:根据键值取值
        class_count[vote_label]=class_count.get(vote_label,0)+1
    sortedClassCount=sorted(classCount.iteritems(),\\
           key=operator.itemegetter(1),reverse=True)
    return sortedClassCount[0][0]

猜你喜欢

转载自blog.csdn.net/jiaoyangwm/article/details/82498286