说明
暂时贴上程序实现,现在的问题是,应为没有优化程序,所以速度感人但是相对耶容易读一些。
1.矩阵实现
继承vector< vector< double>>实现了矩阵类
C++实现的矩阵
2.神经网络类
文件名 liuke.hpp
#pragma once
#include "Matrix.hpp"
#include <iostream>
#include <chrono>
#include <list>
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
class LIUKE
{
public:
/*
* note_num_of_every_layer[] --> the array contain the number of node of every layers
* layer_num --> the number of layers
* 构造函数
* layers 神经网络每层的节点数,包括输入和输出层,比传入 a = {10,10,2},即代表一个三层的神经网络,输入层10个节点,隐藏层10个结点,输出层2个节点
* lay_num 神经网络层数
*/
LIUKE(int layers[], int layer_num)
{
target_matrix = Matrix(layers[layer_num - 1], 1);
this->num_of_every_layer = layers;
this->layer_num = layer_num;
// 1. node 节点初始化
for (int i = 0; i < layer_num; i++)
node_vector.push_back(Matrix(layers[i], 1));
// 2. weights matrix 权重初始化
for (int i = 0; i < layer_num - 1; i++)
weight_vector.push_back(Matrix(layers[i + 1], layers[i], true));
//3 b matrix 截距顶初始化
for (int i = 0; i < layer_num - 1; i++)
b_vector.push_back(Matrix(layers[i + 1], 1, true));
}
// 传入激活函数
void ActivationFunction(double(*callback)(double))
{
callback_ACTIVATION = callback;
}
// 激活函数求导
void ActivationFunctionDerivation(double(*callback)(double))
{
callback_ACTIVATION_DERI = callback;
}
// 误差函数导数
void CFunctionDerivation(double(*callback)(double, double))
{
callback_C_DERI = callback;
}
// 训练
/*
* input_s 输入数据
* target_s 目标结果
* train_num 对应的组数
* 即输入节点和输出节点的值都已一维数组的形式传入
*/
void Train(double* input_s, double* target_s, int train_num)
{
double* p_inpud_data = target_s;
double* p_target_data = target_s;
for (int i = 0; i < train_num; i++)
{
// 1.
for (int i = 0; i < num_of_every_layer[0]; i++)
{
node_vector[0][i][0] = *p_inpud_data;
p_inpud_data++;
}
for (int i = 0; i < num_of_every_layer[layer_num - 1]; i++)
{
target_matrix[i][0] = *p_target_data;
p_target_data++;
}
// 2.
TRAIN_();
//if (i % 10 == 0)
printf("Train Process:%d/%d\t\t", i, train_num);
}
}
// 测试函数,
/*
*input_s 同Tran
*target_s 同Tran
* test_num 同Tran
* 返回值 返回正确的次数
*/
int Test(double* input_s, double* target_s, int test_num)
{
int sum = 0;
double* p_inpud_data = input_s;
double* p_target_data = target_s;
for (int i = 0; i < test_num; i++)
{
// 1.
for (int i = 0; i < num_of_every_layer[0]; i++)
{
node_vector[0][i][0] = *p_inpud_data;
p_inpud_data++;
}
for (int i = 0; i < num_of_every_layer[layer_num - 1]; i++)
{
target_matrix[i][0] = *p_target_data;
p_target_data++;
}
// 2.
sum += TEST_();
printf("Test Process:%d/%d\n", i, test_num);
}
return sum;
}
private:
/*
* 训练一次
*/
void TRAIN_()
{
e_vector.clear(); // 清空记录误差的vector
z_vector.clear(); // z 表示未经过激活函数的值 a = gx(z)
// 1.前向传播
for (int i = 1; i < layer_num; i++)
{
node_vector[i] = (weight_vector[i - 1] * node_vector[i - 1]) + b_vector[i - 1];
z_vector.push_back(node_vector[i]);
MatrixActivationFunction(node_vector[i]);
}
// 2.BP 反向传播
// 2.1输出层误差
e_vector.push_back(MatrixCFunctionDerivation(node_vector[layer_num - 1], target_matrix)
/ MatrixActivationFunctionDerivation(z_vector[layer_num - 2]));
//2.2隐藏层误差
for (int i = layer_num - 3; i >= 0; i--)
{
Matrix tmp = weight_vector[i + 1].transposition();
tmp = tmp * e_vector.front() / MatrixActivationFunctionDerivation(z_vector[i]);
e_vector.insert(e_vector.begin(), tmp);
}
// 3.更新权值
for (int i = 0; i < layer_num - 1; i++)
{
b_vector[i] = b_vector[i] - e_vector[i] * learn_rate;
Matrix tmp = node_vector[i].transposition();
weight_vector[i] = weight_vector[i] - e_vector[i] * tmp * learn_rate;
}
}
// 一次测试
bool TEST_()
{
e_vector.clear();
z_vector.clear();
// 1.
for (int i = 1; i < layer_num; i++)
{
node_vector[i] = weight_vector[i - 1] * node_vector[i - 1];
node_vector[i] = node_vector[i] + b_vector[i - 1];
z_vector.push_back(node_vector[i]);
MatrixActivationFunction(node_vector[i]);
}
// 2.
double tmp = 0;
int index = 0;
for (int i = 0; i < num_of_every_layer[layer_num - 1]; i++)
{
if (node_vector[layer_num - 1][i][0] > tmp)
{
tmp = node_vector[layer_num - 1][i][0];
index = i;
}
}
if (target_matrix[index][0] == 1)
{
return 1;
}
return 0;
}
// 矩阵 误差函数求导
Matrix MatrixCFunctionDerivation(Matrix& matrix, Matrix& target_matrix)
{
Matrix tmp(matrix.row, matrix.col);
for (int i = 0; i < matrix.row; i++)
{
for (int j = 0; j < matrix.col; j++)
{
tmp[i][j] = callback_C_DERI(matrix[i][j], target_matrix[i][j]);
}
}
return tmp;
}
// 矩阵 激活函数求导
Matrix MatrixActivationFunctionDerivation(Matrix& matrix)
{
Matrix tmp(matrix.row, matrix.col);
for (int i = 0; i < matrix.row; i++)
{
for (int j = 0; j < matrix.col; j++)
{
tmp[i][j] = callback_ACTIVATION_DERI(matrix[i][j]);
}
}
return tmp;
}
// 矩阵 激活函数
void MatrixActivationFunction(Matrix& matrix)
{
for (int i = 0; i < matrix.row; i++)
{
for (int j = 0; j < matrix.col; j++)
{
matrix[i][j] = callback_ACTIVATION(matrix[i][j]);
}
}
}
private:
// 三个回调函数 激活函数、激活函数求导、误差函数求导
double(*callback_ACTIVATION)(double);
double(*callback_ACTIVATION_DERI)(double);
double(*callback_C_DERI)(double, double);
// 节点
std::vector<Matrix> node_vector;
//权重
std::vector<Matrix> weight_vector;
// 截距顶
std::vector<Matrix> b_vector;
// z值,a = gx(z), gx表示激活函数,z表示权重和上一层节点的积
std::vector<Matrix> z_vector;
// 误差
std::vector<Matrix> e_vector;
//目标结果
Matrix target_matrix;
// 神经网络每层节点数
int *num_of_every_layer;
// 神经网络层数
int layer_num;
public:
// (未使用)计划用于测试
double test_e = 0.1;
// 学习效率
double learn_rate = 0.01;
};
3使用MINIST数据集测试
其中MINIST数据读取的函数我分开了
C++读取MINIST
// 04liuke.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
//
#define _CRT_SECURE_NO_WARNINGS
#include "Matrix.hpp"
#include "liuke.hpp"
#include <vector>
#include <iostream>
int main()
{
printf("Starting...\n");
std::vector<double> Train_Data;
std::vector<double> Train_Label;
std::vector<double> Test_Data;
std::vector<double> Test_Label;
printf("Encode image from MINIST\n");
printf("Encode Train_Data\n");
read_data_train_image(Train_Data);
printf("Encode Train_Label\n");
read_data_train_label(Train_Label);
printf("Encode Test_Data\n");
read_data_test_image(Test_Data);
printf("Encode Test_Label\n");
read_data_test_label(Test_Label);
printf("End encode\n");
int nn_array[] = { 28 * 28, 500,10 };
LIUKE liuke(nn_array, 3);
liuke.ActivationFunction(gx);
liuke.ActivationFunctionDerivation(gx_d);
liuke.CFunctionDerivation(c_d);
liuke.learn_rate = 0.001;
liuke.Train(Train_Data.data(), Train_Label.data(), Train_Data.size() / 28 /28);
int sum = liuke.Test(Test_Data.data(), Test_Label.data(), Test_Data.size() /28/28);
printf("Result:%d/%d \t\t %f\n", sum, Test_Data.size()/28/28, (double)sum / (double)(Test_Data.size()/28/28));
return 0;
}