下面是数据集下载的地址
http://yann.lecun.com/exdb/mnist/
首先,我们写出读入数据集的函数
def load_data_train():
train_image = 'train-images.idx3-ubyte'
train_label = 'train-labels.idx1-ubyte'
""" train datas """
binfile = open(train_image, 'rb')
buffers = binfile.read()
head = struct.unpack_from('>IIII', buffers, 0)
offset = struct.calcsize('>IIII')
imgNum = head[1]
width = head[2]
height = head[3]
bits = imgNum * width * height
bitsString = '>' + str(bits) + 'B'
imgs = struct.unpack_from(bitsString, buffers, offset)
binfile.close()
imgs = np.reshape(imgs, [imgNum, width, height, 1])
"""train label"""
binfile = open(train_label, 'rb')
buffers = binfile.read()
head = struct.unpack_from('>II', buffers, 0)
imgNum = head[1]
offset = struct.calcsize('>II')
numString = '>' + str(imgNum) + 'B'
labels = struct.unpack_from(numString, buffers, offset)
binfile.close()
labels = np.reshape(labels, [imgNum, 1])
return imgs, labels
def load_data_test():
test_image = 't10k-images.idx3-ubyte'
test_label = 't10k-labels.idx1-ubyte'
""" train datas """
binfile = open(test_image, 'rb')
buffers = binfile.read()
head = struct.unpack_from('>IIII', buffers, 0)
offset = struct.calcsize('>IIII')
imgNum = head[1]
width = head[2]
height = head[3]
bits = imgNum * width * height
bitsString = '>' + str(bits) + 'B'
imgs = struct.unpack_from(bitsString, buffers, offset)
binfile.close()
imgs = np.reshape(imgs, [imgNum, width, height, 1])
"""train label"""
binfile = open(test_label, 'rb')
buffers = binfile.read()
head = struct.unpack_from('>II', buffers, 0)
imgNum = head[1]
offset = struct.calcsize('>II')
numString = '>' + str(imgNum) + 'B'
labels = struct.unpack_from(numString, buffers, offset)
binfile.close()
labels = np.reshape(labels, [imgNum, 1])
return imgs, labels
读入数据之后,请注意这里的label都是整数,我们所要使用的是其one-hot表示来计算loss
下面我们先演示一个网络结构
其中,网络有两部分,分别是卷积层和全连接层
卷积层的结构有卷积、非线性激活、池化
"""layer 1"""
net = tf.layers.conv2d(
inputs=net,
filters=16,
kernel_size=(3, 3),
kernel_initializer=w_init,
bias_initializer=b_init,
padding='SAME',
activation=tf.nn.leaky_relu,
name="1/conv"
)
net = tf.layers.max_pooling2d(
inputs=net,
pool_size=[2, 2],
strides=2,
name='1/pooling'
)
上面是一个卷积层的结构,其中,我们定义了变量初始化方法
w_init = tf.random_normal_initializer(mean=0, stddev=0.02)
b_init = tf.random_normal_initializer(mean=1, stddev=0.02)
全连接层结构如下
net = tf.reshape(net, [batch_size, -1])
net = tf.layers.dense(
inputs=net,
units=512,
kernel_initializer=w_init,
bias_initializer=b_init,
name='dense1'
)
logits = tf.layers.dense(
inputs=net,
units=10,
kernel_initializer=w_init,
bias_initializer=b_init,
name='dense2',
activation=tf.nn.sigmoid
)
全连接层有两层,每层的节点分别有1024、10个,经过激活,得到一个10维矢量,最大的值的下标就是我们的预测结果
给出整个网络的架构
def network(input, batch_size, reuse):
net = input
with tf.variable_scope('network', reuse=reuse) as vs:
w_init = tf.random_normal_initializer(mean=0, stddev=0.02)
b_init = tf.random_normal_initializer(mean=1, stddev=0.02)
"""layer 1"""
net = tf.layers.conv2d(
inputs=net,
filters=16,
kernel_size=(3, 3),
kernel_initializer=w_init,
bias_initializer=b_init,
padding='SAME',
activation=tf.nn.leaky_relu,
name="1/conv"
)
net = tf.layers.max_pooling2d(
inputs=net,
pool_size=[2, 2],
strides=2,
name='1/pooling'
)
"""layer 2"""
net = tf.layers.conv2d(
inputs=net,
filters=32,
kernel_size=(3, 3),
kernel_initializer=w_init,
bias_initializer=b_init,
padding='SAME',
activation=tf.nn.leaky_relu,
name='2/conv'
)
net = tf.layers.max_pooling2d(
inputs=net,
pool_size=[2, 2],
strides=2,
name='2/pooling'
)
"""layer 3"""
net = tf.layers.conv2d(
inputs=net,
filters=64,
kernel_size=(3, 3),
kernel_initializer=w_init,
bias_initializer=b_init,
padding='SAME',
activation=tf.nn.leaky_relu,
name='3/conv'
)
net = tf.layers.max_pooling2d(
inputs=net,
pool_size=[2, 2],
strides=2,
name='3/pooling'
)
"""dense layer"""
net = tf.reshape(net, [batch_size, -1])
net = tf.layers.dense(
inputs=net,
units=512,
kernel_initializer=w_init,
bias_initializer=b_init,
name='dense1'
)
logits = tf.layers.dense(
inputs=net,
units=10,
kernel_initializer=w_init,
bias_initializer=b_init,
name='dense2',
activation=tf.nn.sigmoid
)
return logits
给出损失函数的定义
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_train_one_hot, logits=f)
cross_entropy = tf.reduce_mean(cross_entropy)
这里,y_train_one_hot使我们将label转为one-hot编码的值,f是神经网络的输出
评估准确率
correct_prediction = tf.equal((tf.arg_max(test_f, 1)), tf.arg_max(y_test_one_hot, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
程序主体
def main():
x = tf.placeholder(dtype=tf.float32, shape=[batch_size, 28, 28, 1])
y = tf.placeholder(dtype=tf.int32, shape=[batch_size, 1])
y_train_one_hot = tf.one_hot(indices=y, depth=10)
x_test = tf.placeholder(dtype=tf.float32, shape=[10000, 28, 28, 1])
y_test = tf.placeholder(dtype=tf.int32, shape=[10000, 1])
y_test_one_hot = tf.one_hot(indices=tf.reshape(y_test, [10000]), depth=10)
S = func.load_data_train()
train_set_image = S[0]
train_set_label = S[1]
R = func.load_data_test()
test_set_image = R[0]
test_set_label = R[1]
_ = func.network(input=x, batch_size=batch_size, reuse=False)
f = func.network(input=x, batch_size=batch_size, reuse=True)
test_f = func.network(input=x_test, batch_size=10000, reuse=True)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_train_one_hot, logits=f)
cross_entropy = tf.reduce_mean(cross_entropy)
train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy)
correct_prediction = tf.equal((tf.arg_max(test_f, 1)), tf.arg_max(y_test_one_hot, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(60000//batch_size):
index = batch_size * i
train_X = train_set_image[index:index+batch_size]
train_Y = train_set_label[index:index+batch_size]
sess.run(train_step, feed_dict={x:train_X, y:train_Y})
# print(sess.run(cross_entropy, feed_dict={x:train_X, y:train_Y}))
# print(sess.run(cross_entropy, feed_dict={x: train_X, y: train_Y}))
if i % 100 == 0:
print(i, "test accuracy", sess.run(accuracy, feed_dict={x_test:test_set_image, y_test:test_set_label}))
if __name__ == "__main__":
main()
最终正确率可达97%