版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/u010472607/article/details/81990310
在Eager模式下, 使用TensorFlow实现一个简单的卷积神经网络实验.
测试环境:
win10 x64
anaconda3-5.2_x86_64
python3.6.5
TensorFlow1.10(cpu版)
Ubuntu16.04
anaconda3-5.2_x86_64
python3.6.5
TensorFlow1.10(gpu版)
完整代码
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
import argparse
import tensorflow as tf
import tensorflow.contrib.eager as tfe
"""两种方式加载的数据集不同图像部分数据是不同的,
official.mnist: 加载的图像是uint8数据类型编码, /255. 需要归一化
tensorflow.examples.tutorials.mnist 是float类型编码, 无需归一化操作
"""
from tensorflow.examples.tutorials.mnist import input_data
from official.mnist import dataset as mnist_dataset
tf.enable_eager_execution()
def create_model(data_format=None):
"""手写字符识别模型.
网络结构等同于下列链接中:
https://github.com/tensorflow/tensorflow/blob/r1.5/tensorflow/examples/tutorials/mnist/mnist_deep.py
和
https://github.com/tensorflow/models/blob/master/tutorials/image/mnist/convolutional.py
这里使用 tf.keras API.
参数:
data_format: 可以为 'channels_first' 或 'channels_last'.
'channels_first' 在 GPUs 上运行更快 ; 'channels_last'在 CPUs 中运行的更快.
参考 https://www.tensorflow.org/performance/performance_guide#data_formats
Returns:
A tf.keras.Model.
"""
if data_format == 'channels_first':
input_shape = [1, 28, 28]
else:
assert data_format == 'channels_last'
input_shape = [28, 28, 1]
KL = tf.keras.layers
maxpool = KL.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="same", data_format=data_format)
return tf.keras.Sequential([
KL.Reshape(target_shape=input_shape, input_shape=(28*28, )),
KL.Conv2D(filters=32, kernel_size=5, padding="same", data_format=data_format, activation=tf.nn.relu),
maxpool,
KL.Conv2D(filters=64, kernel_size=5, padding="same", data_format=data_format, activation=tf.nn.relu),
maxpool,
KL.Flatten(), # 卷积结果压成一维
KL.Dense(units=1024, activation=tf.nn.relu),
KL.Dropout(rate=0.5),
KL.Dense(units=10)
])
def loss(logits, labels):
return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits))
def compute_accuracy(logits, labels):
predictions = tf.argmax(input=logits, axis=1, output_type=tf.int64)
labels = tf.cast(x=labels, dtype=tf.int64)
batch_size = int(logits.shape[0])
return tf.reduce_sum(tf.cast(tf.equal(predictions, labels), dtype=tf.float32)) / batch_size
def train(model, optimizer, dataset, step_counter, log_interval=None):
"""在数据集上训练模型"""
start = time.time()
for (batch, (images, labels)) in enumerate(tfe.Iterator(dataset)):
with tf.GradientTape() as tape:
logits = model(images, training=True)
loss_value = loss(logits, labels)
grads = tape.gradient(loss_value, model.variables)
optimizer.apply_gradients(zip(grads, model.variables), global_step=step_counter)
if log_interval and batch % log_interval == 0:
rate = log_interval / (time.time() - start)
print('Step #%d\tLoss: %.6f (%d steps/sec)' % (batch, loss_value, rate))
start = time.time()
def test(model, dataset):
"""模型会在测试数据集上进行评估"""
avg_loss = tfe.metrics.Mean('loss')
accuracy = tfe.metrics.Accuracy('accuracy')
for (images, labels) in tfe.Iterator(dataset):
logits = model(images, training=False)
avg_loss(loss(logits, labels))
accuracy(tf.argmax(logits, axis=1, output_type=tf.int64), tf.cast(labels, tf.int64))
print('Test set: Average loss: %.4f, Accuracy: %4f%%\n' %
(avg_loss.result(), 100 * accuracy.result()))
def run_mnist_eager(cfg):
# 自动选择设备
(device, data_format) = ('/gpu:0', 'channels_first')
if not tf.test.is_gpu_available():
(device, data_format) = ('/cpu:0', 'channels_last')
print('Using device %s, and data format %s.' % (device, data_format))
# 载入数据集
# 方式1
# train_ds = mnist_dataset.train(cfg.data_dir).shuffle(60000, reshuffle_each_iteration=True).batch(cfg.batch_size)
# test_ds = mnist_dataset.test(cfg.data_dir).batch(cfg.batch_size)
# 方式2
train_ds, test_ds = load_mnist() # shape = (?, 768) / (?)
train_ds = train_ds.shuffle(60000, reshuffle_each_iteration=True).batch(cfg.batch_size)
test_ds = test_ds.batch(cfg.batch_size)
# print(train_ds.output_shapes, test_ds.output_shapes)
# 创建 model and optimizer
model = create_model(data_format=data_format)
optimizer = tf.train.MomentumOptimizer(cfg.lr, cfg.momentum)
# Create and restore checkpoint (if one exists on the path)
checkpoint_prefix = os.path.join(cfg.model_dir, 'ckpt')
step_counter = tf.train.get_or_create_global_step()
checkpoint = tfe.Checkpoint(model=model, optimizer=optimizer, step_counter=step_counter)
# 从检查点文件恢复模型参数, 如果文件存在.
checkpoint.restore(tf.train.latest_checkpoint(cfg.model_dir))
# Train and evaluate for a set number of epochs.
with tf.device(device): # 使用GPU必须有此一句
for _ in range(cfg.train_epochs):
start = time.time()
train(model, optimizer, train_ds, step_counter, cfg.log_interval)
end = time.time()
print('\nTrain time for epoch #%d (%d total steps): %f' %
(checkpoint.save_counter.numpy() + 1, step_counter.numpy(), end - start))
test(model, test_ds)
checkpoint.save(checkpoint_prefix)
def arg_parse():
"""参数定义"""
parser = argparse.ArgumentParser(description="Lenet-5 MNIST 模型")
parser.add_argument("--lr", dest="lr", help="学习率", default=0.01, type=float)
parser.add_argument("--momentum", dest="momentum", help="SGD momentum.", default=0.5)
parser.add_argument("--data_dir", dest="data_dir", help="数据集下载/保存目录", default="data/mnist/input_data/")
parser.add_argument("--model_dir", dest="model_dir", help="模型保存目录", default="data/mnist/checkpoints/")
parser.add_argument("--batch_size", dest="batch_size", help="训练或测试时 Batch Size", default=100, type=int)
parser.add_argument("--train_epochs", dest="train_epochs", help="训练时epoch迭代次数", default=4, type=int)
parser.add_argument("--log_interval", dest="log_interval", help="日志打印间隔", default=10, type=int)
# 返回转换好的结果
return parser.parse_args()
def load_mnist():
mnist = input_data.read_data_sets(train_dir="data/mnist/input_data", one_hot=False,
source_url="http://yann.lecun.com/exdb/mnist/")
train = mnist.train
val = mnist.validation
# train_ds = tf.data.Dataset.from_tensor_slices({
# "images": train.images/255.,
# "labels": train.labels
# })
# test_ds = tf.data.Dataset.from_tensor_slices({
# "images": val.images/255.,
# "labels": val.labels
# })
train_ds = tf.data.Dataset.from_tensor_slices((
train.images,
train.labels.astype(int)))
test_ds = tf.data.Dataset.from_tensor_slices((
val.images,
val.labels.astype(int)))
# print(train_ds.output_shapes, test_ds.output_shapes)
return train_ds, test_ds
if __name__ == '__main__':
args = arg_parse()
run_mnist_eager(args)
模型训练4个epoch, 控制台输出如下(GPU模式下)
Using device /gpu:0, and data format channels_first.
Extracting data/mnist/input_data/train-images-idx3-ubyte.gz
Extracting data/mnist/input_data/train-labels-idx1-ubyte.gz
Extracting data/mnist/input_data/t10k-images-idx3-ubyte.gz
Extracting data/mnist/input_data/t10k-labels-idx1-ubyte.gz
...重复部分,同下...
Step #0 Loss: 0.220385 (50 steps/sec)
Step #10 Loss: 0.032230 (103 steps/sec)
Step #20 Loss: 0.116962 (146 steps/sec)
Step #30 Loss: 0.209136 (94 steps/sec)
Step #40 Loss: 0.088716 (103 steps/sec)
Step #50 Loss: 0.069884 (163 steps/sec)
Step #60 Loss: 0.203790 (168 steps/sec)
Step #70 Loss: 0.142821 (156 steps/sec)
Step #80 Loss: 0.094008 (106 steps/sec)
Step #90 Loss: 0.094178 (124 steps/sec)
Step #100 Loss: 0.105072 (99 steps/sec)
Step #110 Loss: 0.081600 (152 steps/sec)
Step #120 Loss: 0.092357 (108 steps/sec)
Step #130 Loss: 0.033834 (142 steps/sec)
Step #140 Loss: 0.091614 (107 steps/sec)
Step #150 Loss: 0.117287 (126 steps/sec)
Step #160 Loss: 0.041859 (130 steps/sec)
Step #170 Loss: 0.107207 (127 steps/sec)
Step #180 Loss: 0.056127 (131 steps/sec)
Step #190 Loss: 0.057065 (100 steps/sec)
Step #200 Loss: 0.145719 (128 steps/sec)
Step #210 Loss: 0.260964 (152 steps/sec)
Step #220 Loss: 0.081878 (101 steps/sec)
Step #230 Loss: 0.101457 (124 steps/sec)
Step #240 Loss: 0.040356 (94 steps/sec)
Step #250 Loss: 0.118761 (96 steps/sec)
Step #260 Loss: 0.237692 (129 steps/sec)
Step #270 Loss: 0.195366 (138 steps/sec)
Step #280 Loss: 0.079028 (114 steps/sec)
Step #290 Loss: 0.091825 (99 steps/sec)
Step #300 Loss: 0.041029 (128 steps/sec)
Step #310 Loss: 0.085999 (105 steps/sec)
Step #320 Loss: 0.072076 (124 steps/sec)
Step #330 Loss: 0.116839 (128 steps/sec)
Step #340 Loss: 0.049044 (127 steps/sec)
Step #350 Loss: 0.181384 (129 steps/sec)
Step #360 Loss: 0.084100 (127 steps/sec)
Step #370 Loss: 0.115539 (100 steps/sec)
Step #380 Loss: 0.099011 (99 steps/sec)
Step #390 Loss: 0.042452 (123 steps/sec)
Step #400 Loss: 0.051751 (130 steps/sec)
Step #410 Loss: 0.144888 (130 steps/sec)
Step #420 Loss: 0.036556 (126 steps/sec)
Step #430 Loss: 0.077765 (129 steps/sec)
Step #440 Loss: 0.092617 (129 steps/sec)
Step #450 Loss: 0.052487 (126 steps/sec)
Step #460 Loss: 0.054671 (129 steps/sec)
Step #470 Loss: 0.093571 (119 steps/sec)
Step #480 Loss: 0.129751 (97 steps/sec)
Step #490 Loss: 0.048160 (126 steps/sec)
Step #500 Loss: 0.047236 (123 steps/sec)
Step #510 Loss: 0.048452 (129 steps/sec)
Step #520 Loss: 0.092106 (123 steps/sec)
Step #530 Loss: 0.044974 (128 steps/sec)
Step #540 Loss: 0.155552 (123 steps/sec)
Train time for epoch #4 (2200 total steps): 4.788206
Test set: Average loss: 0.0667, Accuracy: 98.040000%