TensorFlow Eager模式--手写字符识别

在Eager模式下, 使用TensorFlow实现一个简单的卷积神经网络实验.

测试环境:

win10 x64
anaconda3-5.2_x86_64
python3.6.5
TensorFlow1.10(cpu版)

Ubuntu16.04
anaconda3-5.2_x86_64
python3.6.5
TensorFlow1.10(gpu版)

完整代码

# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import time
import argparse

import tensorflow as tf
import tensorflow.contrib.eager as tfe

"""两种方式加载的数据集不同图像部分数据是不同的,
official.mnist： 加载的图像是uint8数据类型编码, /255. 需要归一化
tensorflow.examples.tutorials.mnist 是float类型编码, 无需归一化操作
"""
from tensorflow.examples.tutorials.mnist import input_data
from official.mnist import dataset as mnist_dataset
tf.enable_eager_execution()

def create_model(data_format=None):
    """手写字符识别模型.

      网络结构等同于下列链接中:
      https://github.com/tensorflow/tensorflow/blob/r1.5/tensorflow/examples/tutorials/mnist/mnist_deep.py
      和
      https://github.com/tensorflow/models/blob/master/tutorials/image/mnist/convolutional.py

      这里使用 tf.keras API.

      参数:
        data_format: 可以为 'channels_first' 或 'channels_last'.
            'channels_first' 在 GPUs 上运行更快 ; 'channels_last'在 CPUs 中运行的更快.
            参考 https://www.tensorflow.org/performance/performance_guide#data_formats

      Returns:
        A tf.keras.Model.
      """
    if data_format == 'channels_first':
        input_shape = [1, 28, 28]
    else:
        assert data_format == 'channels_last'
        input_shape = [28, 28, 1]

    KL = tf.keras.layers
    maxpool = KL.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="same", data_format=data_format)
    return tf.keras.Sequential([
        KL.Reshape(target_shape=input_shape, input_shape=(28*28, )),
        KL.Conv2D(filters=32, kernel_size=5, padding="same", data_format=data_format, activation=tf.nn.relu),
        maxpool,
        KL.Conv2D(filters=64, kernel_size=5, padding="same", data_format=data_format, activation=tf.nn.relu),
        maxpool,
        KL.Flatten(), # 卷积结果压成一维
        KL.Dense(units=1024, activation=tf.nn.relu),
        KL.Dropout(rate=0.5),
        KL.Dense(units=10)
    ])

def loss(logits, labels):
    return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits))

def compute_accuracy(logits, labels):
    predictions = tf.argmax(input=logits, axis=1, output_type=tf.int64)
    labels = tf.cast(x=labels, dtype=tf.int64)
    batch_size = int(logits.shape[0])
    return tf.reduce_sum(tf.cast(tf.equal(predictions, labels), dtype=tf.float32)) / batch_size


def train(model, optimizer, dataset, step_counter, log_interval=None):
    """在数据集上训练模型"""
    start = time.time()
    for (batch, (images, labels)) in enumerate(tfe.Iterator(dataset)):

        with tf.GradientTape() as tape:
            logits = model(images, training=True)
            loss_value = loss(logits, labels)
        grads = tape.gradient(loss_value, model.variables)
        optimizer.apply_gradients(zip(grads, model.variables), global_step=step_counter)
        if log_interval and batch % log_interval == 0:
            rate = log_interval / (time.time() - start)
            print('Step #%d\tLoss: %.6f (%d steps/sec)' % (batch, loss_value, rate))
            start = time.time()


def test(model, dataset):
    """模型会在测试数据集上进行评估"""
    avg_loss = tfe.metrics.Mean('loss')
    accuracy = tfe.metrics.Accuracy('accuracy')

    for (images, labels) in tfe.Iterator(dataset):
        logits = model(images, training=False)
        avg_loss(loss(logits, labels))
        accuracy(tf.argmax(logits, axis=1, output_type=tf.int64), tf.cast(labels, tf.int64))
    print('Test set: Average loss: %.4f, Accuracy: %4f%%\n' %
          (avg_loss.result(), 100 * accuracy.result()))


def run_mnist_eager(cfg):
    # 自动选择设备
    (device, data_format) = ('/gpu:0', 'channels_first')
    if not tf.test.is_gpu_available():
        (device, data_format) = ('/cpu:0', 'channels_last')

    print('Using device %s, and data format %s.' % (device, data_format))
    # 载入数据集
    # 方式1
    # train_ds = mnist_dataset.train(cfg.data_dir).shuffle(60000, reshuffle_each_iteration=True).batch(cfg.batch_size)
    # test_ds = mnist_dataset.test(cfg.data_dir).batch(cfg.batch_size)
    # 方式2
    train_ds, test_ds = load_mnist() # shape = (?, 768) / (?)
    train_ds = train_ds.shuffle(60000, reshuffle_each_iteration=True).batch(cfg.batch_size)
    test_ds = test_ds.batch(cfg.batch_size)
    # print(train_ds.output_shapes, test_ds.output_shapes)

    # 创建 model and optimizer
    model = create_model(data_format=data_format)
    optimizer = tf.train.MomentumOptimizer(cfg.lr, cfg.momentum)

    # Create and restore checkpoint (if one exists on the path)
    checkpoint_prefix = os.path.join(cfg.model_dir, 'ckpt')
    step_counter = tf.train.get_or_create_global_step()
    checkpoint = tfe.Checkpoint(model=model, optimizer=optimizer, step_counter=step_counter)
    # 从检查点文件恢复模型参数, 如果文件存在.
    checkpoint.restore(tf.train.latest_checkpoint(cfg.model_dir))

    # Train and evaluate for a set number of epochs.
    with tf.device(device): # 使用GPU必须有此一句
        for _ in range(cfg.train_epochs):
            start = time.time()
            train(model, optimizer, train_ds, step_counter, cfg.log_interval)
            end = time.time()
            print('\nTrain time for epoch #%d (%d total steps): %f' %
                    (checkpoint.save_counter.numpy() + 1, step_counter.numpy(), end - start))

            test(model, test_ds)
            checkpoint.save(checkpoint_prefix)

def arg_parse():
    """参数定义"""
    parser = argparse.ArgumentParser(description="Lenet-5 MNIST 模型")
    parser.add_argument("--lr", dest="lr", help="学习率", default=0.01, type=float)
    parser.add_argument("--momentum", dest="momentum", help="SGD momentum.", default=0.5)

    parser.add_argument("--data_dir", dest="data_dir", help="数据集下载/保存目录", default="data/mnist/input_data/")
    parser.add_argument("--model_dir", dest="model_dir", help="模型保存目录", default="data/mnist/checkpoints/")
    parser.add_argument("--batch_size", dest="batch_size", help="训练或测试时 Batch Size", default=100, type=int)
    parser.add_argument("--train_epochs", dest="train_epochs", help="训练时epoch迭代次数", default=4, type=int)
    parser.add_argument("--log_interval", dest="log_interval", help="日志打印间隔", default=10, type=int)

    # 返回转换好的结果
    return parser.parse_args()

def load_mnist():

    mnist = input_data.read_data_sets(train_dir="data/mnist/input_data", one_hot=False,
                                      source_url="http://yann.lecun.com/exdb/mnist/")
    train = mnist.train
    val = mnist.validation
    # train_ds = tf.data.Dataset.from_tensor_slices({
    #     "images": train.images/255.,
    #     "labels": train.labels
    # })
    # test_ds = tf.data.Dataset.from_tensor_slices({
    #     "images": val.images/255.,
    #     "labels": val.labels
    # })
    train_ds = tf.data.Dataset.from_tensor_slices((
        train.images,
        train.labels.astype(int)))
    test_ds = tf.data.Dataset.from_tensor_slices((
        val.images,
        val.labels.astype(int)))
    # print(train_ds.output_shapes, test_ds.output_shapes)
    return train_ds, test_ds

if __name__ == '__main__':
  args = arg_parse()
  run_mnist_eager(args)

模型训练4个epoch, 控制台输出如下(GPU模式下)

Using device /gpu:0, and data format channels_first.

Extracting data/mnist/input_data/train-images-idx3-ubyte.gz
Extracting data/mnist/input_data/train-labels-idx1-ubyte.gz
Extracting data/mnist/input_data/t10k-images-idx3-ubyte.gz
Extracting data/mnist/input_data/t10k-labels-idx1-ubyte.gz
...重复部分,同下...

Step #0 Loss: 0.220385 (50 steps/sec)
Step #10    Loss: 0.032230 (103 steps/sec)
Step #20    Loss: 0.116962 (146 steps/sec)
Step #30    Loss: 0.209136 (94 steps/sec)
Step #40    Loss: 0.088716 (103 steps/sec)
Step #50    Loss: 0.069884 (163 steps/sec)
Step #60    Loss: 0.203790 (168 steps/sec)
Step #70    Loss: 0.142821 (156 steps/sec)
Step #80    Loss: 0.094008 (106 steps/sec)
Step #90    Loss: 0.094178 (124 steps/sec)
Step #100   Loss: 0.105072 (99 steps/sec)
Step #110   Loss: 0.081600 (152 steps/sec)
Step #120   Loss: 0.092357 (108 steps/sec)
Step #130   Loss: 0.033834 (142 steps/sec)
Step #140   Loss: 0.091614 (107 steps/sec)
Step #150   Loss: 0.117287 (126 steps/sec)
Step #160   Loss: 0.041859 (130 steps/sec)
Step #170   Loss: 0.107207 (127 steps/sec)
Step #180   Loss: 0.056127 (131 steps/sec)
Step #190   Loss: 0.057065 (100 steps/sec)
Step #200   Loss: 0.145719 (128 steps/sec)
Step #210   Loss: 0.260964 (152 steps/sec)
Step #220   Loss: 0.081878 (101 steps/sec)
Step #230   Loss: 0.101457 (124 steps/sec)
Step #240   Loss: 0.040356 (94 steps/sec)
Step #250   Loss: 0.118761 (96 steps/sec)
Step #260   Loss: 0.237692 (129 steps/sec)
Step #270   Loss: 0.195366 (138 steps/sec)
Step #280   Loss: 0.079028 (114 steps/sec)
Step #290   Loss: 0.091825 (99 steps/sec)
Step #300   Loss: 0.041029 (128 steps/sec)
Step #310   Loss: 0.085999 (105 steps/sec)
Step #320   Loss: 0.072076 (124 steps/sec)
Step #330   Loss: 0.116839 (128 steps/sec)
Step #340   Loss: 0.049044 (127 steps/sec)
Step #350   Loss: 0.181384 (129 steps/sec)
Step #360   Loss: 0.084100 (127 steps/sec)
Step #370   Loss: 0.115539 (100 steps/sec)
Step #380   Loss: 0.099011 (99 steps/sec)
Step #390   Loss: 0.042452 (123 steps/sec)
Step #400   Loss: 0.051751 (130 steps/sec)
Step #410   Loss: 0.144888 (130 steps/sec)
Step #420   Loss: 0.036556 (126 steps/sec)
Step #430   Loss: 0.077765 (129 steps/sec)
Step #440   Loss: 0.092617 (129 steps/sec)
Step #450   Loss: 0.052487 (126 steps/sec)
Step #460   Loss: 0.054671 (129 steps/sec)
Step #470   Loss: 0.093571 (119 steps/sec)
Step #480   Loss: 0.129751 (97 steps/sec)
Step #490   Loss: 0.048160 (126 steps/sec)
Step #500   Loss: 0.047236 (123 steps/sec)
Step #510   Loss: 0.048452 (129 steps/sec)
Step #520   Loss: 0.092106 (123 steps/sec)
Step #530   Loss: 0.044974 (128 steps/sec)
Step #540   Loss: 0.155552 (123 steps/sec)

Train time for epoch #4 (2200 total steps): 4.788206
Test set: Average loss: 0.0667, Accuracy: 98.040000%

TensorFlow Eager模式--手写字符识别

猜你喜欢