Back-Propagation Neural Network BP反向传播网络算法

01 网络描述

BP网络由大量简单处理单元广泛互联而成，是一种对非线性函数进行权值训练的多层映射网络。具有优良的非线性映射能力，理论上它能够以任意精度逼近任意非线性函数。本文采用BP神经网络解决下列函数拟合问题。

函数逼近：设计一个神经网络拟合下列的函数：

02 BP神经网络模型的建立

(1)样本数据的产生

为建立函数拟合的BP神经网络模型，一般要考虑以下几步：

样本数据的产生，这里用来两种方法，

第一种方法先在x属于[-π,π]区间内生成500个均匀分布的数据点，然后对这500个样本点的标签进行shuffle，然后取前30%个点为测试集，后70%个点为训练集。如图所示。

第二种方法在x属于[-π,π]区间内生成350个均匀分布的数据点为训练集，在[-π,π]区间内生成150个均匀分布的数据点为测试集

(2) 数据的预处理

通常需要将样本数据归一化，由于这里的数据变化范围不大，所以暂不考虑对数据的归一化。

(3)BP神经网络

一个典型的3层BP神经网络结果如图1所示，包括输入层、隐藏层和输出层。其中隐藏层的状态影响输入输出之间的关系，及通过改变隐藏层的系数，就可以改变多层神经网络的性能。

BP神经网络的学习过程由正向传播和反向传播组成。
通过正向传播算法，即通过矩阵乘法计算出输出值，并将真实值和输出值对比得到两者之间的差距。
通过反向传播算法，计算每个损失函数对模型中每个参数的梯度，通过梯度下降算法来更新每一个参数。

梯度下降法即比如我们目前处在一个大山的某处，不知道如何下山，于是决定走一步算一步，也就是每走到一个位置，求当前位置的梯度，沿着梯度的负方向，即当前最陡峭的位置向下走一步，然后继续重复上步；一直走到我们觉得我们走到了山脚，当然，这样我们可能走不到山脚，而是走到某一个局部的山峰低处。

当然如果损失函数是凸函数，梯度下降得到的解一定是全局最优解。

BP 算法的实质是求取误差函数最小值问题，通过多个样本的反复训练，一般采用梯度下降法，按误差函数的负梯度方向修改系数。

(4) 训练及测试

采用第一种采样方法，产生训练数据和测试数据，batch_size大小设置为32 训练轮数为3000轮，学习率为0.001，将训练数据送入bp网络中进行训练。
之后将测试数据送入已训练好参数的模型中，进行预测，得到上述两个函数的结果。

03 存在的问题及解决方案

模型拟合边界数据效果很不好，用第一种数据取样方法，由于对边界数据取样少，所以使得模型对边界数据欠拟合；
采取措施：采用交叉验证的办法，将数据集划分成K份，然后K-1份为训练集，剩下的一份为测试集，然后进行K次训练；这样充分利用了数据集进行训练。

04 算法评估

(1) 问题复杂程度

(2)采样方法

(3)学习率

(4)样本数目

(5)批量数目

(6)激励函数

(7)隐藏层层数及节点个数

05 代码展示

# python- Back Propagation
# coding=utf-8
import numpy as np
import matplotlib.pyplot as plt


plt.rcParams['font.sans-serif'] = ['SimHei']  # 用于正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号


# 定义数据集分割函数
def train_test_split(x, y, test_ratio=0.3, seed=None):
    if seed:
        np.random.seed(seed)
    shuffled_indexs = np.random.permutation(len(x))
    test_size = int(len(x) * test_ratio)
    train_index = shuffled_indexs[test_size:]
    test_index = shuffled_indexs[:test_size]
    train_index = np.sort(train_index)
    test_index = np.sort(test_index)
    return x[train_index], x[test_index], y[train_index], y[test_index]


# 定义function
def f(a, b, c, d, x):
    return a * np.sin(b * x) + c * np.cos(d * x)

def f1(a, b, c, d, x):
    return a * x * np.sin(b * x) + c * x * np.cos(d * x)

# 随机采样
def load_data(step, a, b, c, d):
    x = np.linspace(-np.pi, np.pi, step).T
    x = np.expand_dims(x, -1)
    y = f(a, b, c, d, x) + f(3,3,3,3,x)
    x_train, x_test, y_train, y_test = train_test_split(x, y, seed=2019)
    return x_train, y_train, x_test, y_test

# 均匀采样
def load_train_data(step, a, b, c, d):
    x_train = np.linspace(-np.pi, np.pi, step).T
    x_train = np.expand_dims(x_train, -1)
    y_train = f(a, b, c, d, x_train)  + f(3,3,3,3,x_train)

    return x_train, y_train


def load_test_data(step, a, b, c, d):
    x_test = np.linspace(-np.pi, np.pi, step).T
    x_test = np.expand_dims(x_test, -1)
    y_test = f(a, b, c, d, x_test)
    y_test = f(a, b, c, d, x_test) + f(3, 3, 3, 3, x_test)
    return x_test, y_test

# 归一化数据
def normalize(data):
    data_min, data_max = data.min(), data.max()
    data = (data - data_min) / (data_max - data_min)
    return data

# 激活函数tanh
def tanh(z):
    return np.tanh(z)


def tanh_derivative(z):
    return 1.0 - np.tanh(z) * np.tanh(z)


def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def sigmoid_derivative(z):
    return sigmoid(z) * (1 - sigmoid(z))


def relu(z):
    return np.maximum(0, z)


def relu_derivative(z):
    if (z >= 0):
        return 1
    else:
        return 0


# 损失函数
def loss_derivative(output_activations, y):
    return 2 * (output_activations - y)


def mean_squared_error(predictY, realY):
    Y = np.array(realY)
    return np.sum((predictY - Y) ** 2) / realY.shape[0]


# BP神经网络类
class BP:
    # BP神经网络初始化
    def __init__(self, sizes, activity, activity_derivative, loss_derivative):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.zeros((nueron, 1)) for nueron in sizes[1:]]
        self.weights = [np.random.randn(next_layer_nueron, nueron) for nueron, next_layer_nueron in
                        zip(sizes[:-1], sizes[1:])]
        self.activity = activity
        self.activity_derivative = activity_derivative
        self.loss_derivative = loss_derivative

    # 预测函数
    def predict(self, a):
        re = a.T
        n = len(self.biases) - 1
        for i in range(n):
            b, w = self.biases[i], self.weights[i]
            re = self.activity(np.dot(w, re) + b)
        re = np.dot(self.weights[n], re) + self.biases[n]
        return re.T

    # 更新一个batch的值
    def update_batch(self, batch, learning_rate):
        temp_b = [np.zeros(b.shape) for b in self.biases]
        temp_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in batch:
            delta_temp_b, delta_temp_w = self.update_parameter(x, y)
            temp_w = [w + dw for w, dw in zip(temp_w, delta_temp_w)]
            temp_b = [b + db for b, db in zip(temp_b, delta_temp_b)]
        self.weights = [sw - (learning_rate / len(batch)) * w for sw, w in zip(self.weights, temp_w)]
        self.biases = [sb - (learning_rate / len(batch)) * b for sb, b in zip(self.biases, temp_b)]

    def update_parameter(self, x, y):
        temp_b = [np.zeros(b.shape) for b in self.biases]
        temp_w = [np.zeros(w.shape) for w in self.weights]
        activation = x
        activations = [x]
        zs = []
        n = len(self.biases)
        for i in range(n):
            b, w = self.biases[i], self.weights[i]
            z = np.dot(w, activation) + b
            zs.append(z)
            if i != n - 1:
                activation = self.activity(z)
            else:
                activation = z
            activations.append(activation)
        d = self.loss_derivative(activations[-1], y)
        temp_b[-1] = d
        temp_w[-1] = np.dot(d, activations[-2].T)
        for i in range(2, self.num_layers):
            z = zs[-i]
            d = np.dot(self.weights[-i + 1].T, d) * self.activity_derivative(z)
            temp_b[-i] = d
            temp_w[-i] = np.dot(d, activations[-i - 1].T)
        return (temp_b, temp_w)

    def fit(self, train_data, epochs, batch_size, learning_rate, validation_data=None):
        n = len(train_data)
        for j in range(epochs):
            np.random.shuffle(train_data)
            batches = [train_data[k:k + batch_size] for k in range(0, n, batch_size)]
            for batch in batches:
                self.update_batch(batch, learning_rate)
            if (validation_data != None):
                val_pre = self.predict(validation_data[0])
                print("Epoch", j + 1, '/', epochs,
                      '  val loss:%12.12f' % mean_squared_error(val_pre, validation_data[1]))
                losses.append(mean_squared_error(val_pre, validation_data[1]))
                epoches.append(j + 1)
        return epoches, losses


if __name__ == "__main__":
    losses = []
    epoches = []
    # 设置随机种子
    np.random.seed(2019)
    # function函数系数设置
    a, b, c, d = 2, 2, 2, 2
    num_step = 500
    # 随机采样
    x_train, y_train, x_test, y_test = load_data(num_step, a, b, c, d)
    # 均匀采样
    # rate = 0.3
    # num_train_step = num_step * (1 - rate)
    # num_test_step = num_step * (rate)
    # x_train, y_train = load_train_data(num_train_step, a, b, c, d)
    # x_test, y_test = load_test_data(num_test_step, a, b, c, d)
    print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)
    data = [(np.array([x_value]), np.array([y_value])) for x_value, y_value in zip(x_train, y_train)]
    # BP神经网络参数设置
    beta = 1e-2
    layer = [1, 5, 5, 1]
    epochs = 1000
    model = BP(layer, tanh, tanh_derivative, loss_derivative)
    # BP神经网络训练
    epoches, losses = model.fit(train_data=data, epochs=epochs, batch_size=8, learning_rate=beta,
                                 validation_data=(x_test, y_test))
    # BP神经网络预测
    predict = model.predict(x_test)
    # 预测误差计算
    loss_p = abs(predict-y_test)
    sum  = sum(loss_p)
    sum = sum[0]
    print("误差是:","%12.12f"%(sum/100.0))
    # 绘图函数
    plt.figure()
    plt.title("BP神经网络拟合非线性  y2= x2*sin*2*x2 +3*x2*cos4*x2曲线")
    plt.plot(x_test, y_test, "-r", linewidth=2, label='origin')
    plt.plot(x_test, predict, "-b", linewidth=1, label='predict')

    plt.legend()
    plt.grid(True)
    plt.show()

    plt.figure()
    plt.title("BP神经网络误差下降曲线")
    plt.plot(epoches, losses, "-r", linewidth=2, label="误差曲线")
    plt.legend()
    plt.show()

mind_programmonkey 博客专家

发布了478 篇原创文章 · 获赞 417 · 访问量 32万+

他的留言板关注