此代码参照教学视频编写,调了将近一天才调通,写这篇文章,主要是为了保存调好的源码。日后使用,可以免去调试的过程。
一,Network2.py
import json
import random #产生随机数
import sys
import numpy as np
class QuadraticCost(object):
def fn(a,y):
return 0.5*np.linalg.norm(a-y)**2
def delta(z,a,y):
return (a-y)*sigmoid_prime(z)
# cost函数
class CrossEntropyCost(object):
def fn(a,y):
return np.sum(np.nan_to_num(-y*np.log(a)-(1-y)*np.log(1-a)))
def delta(z,a,y):
return (a-y)
# 神经网络类
class Network(object):
# 构造函数
def __init__(self,sizes,cost=CrossEntropyCost):
self.num_layers=len(sizes) #神经网络层数
self.sizes=sizes #每层参数, sizes每层神经元的个数,net=Network{[2,3,1]}
self.default_weight_initializer()
self.cost=cost
def default_weight_initializer(self):
self.biases=[np.random.randn(y,1) for y in self.sizes[1:]] #偏移值
# np.random.randn(y, 1) 随机从正态分布(均值为0,方差为1)中生成
self.weights=[np.random.randn(y,x)/np.sqrt(x)
for x,y in zip(self.sizes[:-1],self.sizes[1:])] #权重
def large_weight_initializer(self):
self.biases=[np.random.randn(y,1) for y in self.sizes[1:]] #偏移值
# np.random.randn(y, 1) 随机从正态分布(均值为0,方差为1)中生成
self.weights=[np.random.randn(y,x)
for x,y in zip(self.sizes[:-1],self.sizes[1:])] #权重
#输入层向输出层更新
def feedforward(self,a):
for b,w in zip(self.biases,self.weights):
a=sigmoid(np.dot(w,a)+b) # a=w1*a1+w2*a2...+b
return a
#随机梯度下降算法
# 指向当前类,训练集,训练多少轮,最小训练集大小,学习率,测试数据集
def SGD(self,training_data,epochs,mini_batch_size,eta,
lmbda=0.0,
evaluation_data=None,
monitor_evaluation_cost=False,
monitor_evaluation_accuracy=False,
monitor_training_cost=False,
monitor_training_accuracy=False):
if evaluation_data:n_data=len(evaluation_data) #如果test_data不为空,求出长度
n=len(training_data) #求训练集的长度
evaluation_cost,evaluation_accuracy=[],[]
training_cost,training_accuracy=[],[]
for j in range(epochs): #轮数循环
random.shuffle(training_data) #随机打乱训练集
#将训练集分成小块,比如训练集有1000个,最小训练集大小为100,则将训练集分为10个大小为100的小训练集
#取最小块数据集 0-99,100-199,200-299,300-399...900-999
mini_batches=[
training_data[k:k+mini_batch_size]
for k in range(0,n,mini_batch_size)]
#针对分好的小训练集,逐个选取单个小训练集
for mini_batch in mini_batches:
#关键步骤
self.update_mini_batch(
mini_batch,eta,lmbda,len(training_data)) #更新参数w,b
print ("Epoch %s training complete" % j)
if monitor_training_cost:
cost=self.total_cost(training_data,lmbda)
training_cost.append(cost)
print("Cost on training data:{}".format(cost))
if monitor_training_accuracy:
accuracy=self.accuracy(training_data,convert=True)
training_accuracy.append(accuracy)
print("Accuracy on training data:{}/{}".format(
accuracy,n))
if monitor_evaluation_cost:
cost=self.total_cost(evaluation_data,lmbda,convert=True)
evaluation_cost.append(cost)
print("Cost on evaluation data:{}".format(cost))
if monitor_evaluation_accuracy:
accuracy=self.accuracy(evaluation_data)
evaluation_accuracy.append(accuracy)
print("Accuracy on evaluation data:{}/{}".format(
self.accuracy(evaluation_data),n_data))
print
return evaluation_cost,evaluation_accuracy,\
training_cost,training_accuracy
#更新w,b #单个块数据集
def update_mini_batch(self,mini_batch,eta,lmbda,n):
#初始化两个0矩阵,类型和w,b一样
nabla_b=[np.zeros(b.shape) for b in self.biases] #nabla倒三角符号
nabla_w=[np.zeros(w.shape) for w in self.weights]
# 取小数据集中的每一行x,y
for x,y in mini_batch:
# 算出w,b的偏导数
delta_nabla_b,delta_nabla_w=self.backprop(x,y)
# 更新w b
nabla_b=[nb+dnb for nb,dnb in zip(nabla_b,delta_nabla_b)]
nabla_w=[nw+dnw for nw,dnw in zip(nabla_w,delta_nabla_w)]
self.weights=[(1-eta*(lmbda/n))*w-(eta/len(mini_batch))*nw
for w,nw in zip(self.weights,nabla_w)]
self.biases=[b-(eta/len(mini_batch))*nb
for b,nb in zip(self.biases,nabla_b)] #self.biases.nabla_b
# 算出关于w,b的偏导数
# 1,输入x,设置输入层activation a
# 2,正向更新:对于l=1,2,3,...L,计算 z=w*x+b,a=sigmoid(z)
# 3, 计算输出层err,隐藏层err,权重更新,偏向更新
def backprop(self,x,y):
nabla_b=[np.zeros(b.shape) for b in self.biases]
nabla_w=[np.zeros(w.shape) for w in self.weights]
#正向更新
activation=x # x赋值给activation,第一层,输入层
activations=[x] # 所有层的activations,
zs=[] # z=w*x+b,zs存储所有的z
for b,w in zip(self.biases,self.weights):
z=np.dot(w,activation)+b # z=w*x+b
zs.append(z) # z加入zs
activation=sigmoid(z) #激励函数
activations.append(activation) #加入数组
#反向更新
#输出层
delta=(self.cost).delta(zs[-1],activations[-1],y)
nabla_b[-1]=delta #输出层b
#activations[-2].transpose() 导数第二层矩阵转置
nabla_w[-1]=np.dot(delta,activations[-2].transpose()) #输出层w
#隐藏层
for l in range(2,self.num_layers):
z = zs[-l] #z = zs[-1] 写成123的1,实际应该是L的小写l
sp=sigmoid_prime(z)
delta=np.dot(self.weights[-l+1].transpose(),delta)*sp
nabla_b[-l]=delta
nabla_w[-l]=np.dot(delta,activations[-l-1].transpose())
return (nabla_b,nabla_w)
def accuracy(self,data,convert=False):
if convert:
results=[(np.argmax(self.feedforward(x)),np.argmax(y))
for (x,y) in data]
else:
results=[(np.argmax(self.feedforward(x)),y)
for (x,y) in data]
return sum(int(x==y) for(x,y) in results)
def total_cost(self,data,lmbda,convert=False):
cost=0.0
for x,y in data:
a=self.feedforward(x)
if convert:y=vectorized_result(y)
cost+=self.cost.fn(a,y)/len(data)
cost+=0.5*(lmbda/len(data))*sum(
np.linalg.norm(w)**2 for w in self.weights)
return cost
def save(self,filename):
data={"sizes":self.sizes,
"weights": [w.totlist() for w in self.weights],
"biases":[b.totlist() for b in self.biases],
"cost":str(self.cost.__name__)}
f=open(filename,"w")
json.dump(data,f)
f.close()
def load(filename):
f=open(filename,"r")
data=json.load(f)
f.close()
cost=getattr(sys.modules[__name__],data["cost"])
net=Network(data["sizes"],cost=cost)
net.weights=[np.array(w) for w in data["weights"]]
net.biases=[np.array(b) for b in data["biases"]]
return net
def vectorized_result(j):
e=np.zeros((10,1))
e[j]=1.0
return e
#激励函数
def sigmoid(z):
return 1.0/(1.0+np.exp(-z))
# sigmoid函数的导数
def sigmoid_prime(z):
return sigmoid(z)*(1-sigmoid(z))
二,demo.py
import mnist_loader
# import network
import network2
################load data#################################################
#训练集, 验证集, 测试集
training_data,validation_data,test_data=mnist_loader.load_data_wrapper()
# print("training_data")
# print(type(training_data)) #数据类型
# print(len(training_data)) #数据长度
# print(training_data[0][0].shape) #第一维是元祖,第二维0是x
# print(training_data[0][1].shape) #第一维是元祖,第二维1是y
# print(training_data[0])
#
# print("validation_data")
# print(len(validation_data))
#
# print("test_data")
# print(len(test_data))
#
################load data#################################################
################network2#################################################
#第一层784个神经元,第二层30个神经元,输出10个
print("666")
net=network2.Network([784,30,10],cost=network2.CrossEntropyCost)
# net.large_weight_initializer()
# 训练集,训练多少轮,最小训练集大小,学习率,测试数据集
net.SGD(training_data,30,10,0.5,5.0,evaluation_data=validation_data,monitor_evaluation_cost=True,monitor_evaluation_accuracy=True,
monitor_training_cost = True, monitor_training_accuracy=True)
################network2#################################################
三,运行结果