一、TensorFlow Mechanics 101
此 blog 为 tensorflow 官网 TensorFlow Mechanics 101的学习笔记,改写了其中的部分代码,整理了下。主要用到了tensorflow/examples/tutorials/mnist下的如下代码:
二、命令行及输入输出参数设置
- 浅蓝色:命令行参数,使用
tf.app.flags.DEFINE_integer/float/string/boolean()
来定义,使用FLAGS.parameter
来引用- 红色:各函数
输出结果
以及作为其它函数的输入
- 紫色:
fill_feed_dict
函数的输出feed_dict
,用于喂数据和标签
# 定义一个全局对象来获取参数的值,在程序中使用(eg: FLAGS.max_steps)来引用参数
FLAGS = tf.app.flags.FLAGS
# 定义命令行参数
tf.app.flags.DEFINE_integer(
'max_steps',
2000,
'Number of steps to run trainer.'
)
...
...
...
三、设计计算图(6大组件)
1、数据输入设计(Input)
在计算图的
设计
过程中只使用placeholder 占位符
,并不传入真实数据,只有在执行
计算图的时候,才使用feed_dict
传入真实的数据!
- 输入参数:
FLAGS.input_data_dir, FLAGS.input_units
- 输出:
images_placeholder, labels_placeholder
- 函数封装:
mnist.placeholder_inputs()
# 准备训练/验证/测试数据集,默认 one_hot=False, 类标为0~9
data_sets = input_data.read_data_sets(FLAGS.input_data_dir)
# 使用 placeholder 定义数据占位符
images_placeholder, labels_placeholder = mnist.placeholder_inputs(
FLAGS.input_units)
# mnist.py 中 placeholder_inputs 函数的实现,注意此处和官网不一样
# 把它放进了 minst.py 中,并将 shape 的第一个值设置为 None,方便传入不同的 batch_size
def placeholder_inputs(input_units):
images_placeholder = tf.placeholder(tf.float32, shape=[None, input_units])
labels_placeholder = tf.placeholder(tf.int32, shape=[None])
return images_placeholder, labels_placeholder
2、前向网络设计(Inference)
- 输入参数:
images_placeholder, FLAGS.input_units, FLAGS.hidden1, FLAGS.hidden2, FLAGS.num_classes
- 输出:
logits
- 函数封装:
mnist.inference()
# 和官网稍有不同,加入了输入层和输出层参数,这样前向网络更加清晰
with tf.name_scope('Inference'):
logits = mnist.inference(images_placeholder,
FLAGS.input_units,
FLAGS.hidden1,
FLAGS.hidden2,
FLAGS.num_classes))
3、损失函数设计(Loss)
- 输入参数:
logits, labels_placeholder
- 输出:
loss
- 函数封装:
mnist.loss()
with tf.name_scope('Loss'):
loss = mnist.loss(logits, labels_placeholder)
# mnist.py 中 loss 函数的定义
def loss(logits, labels):
# mnist labels 本来是 int32 类型的,所以此步骤可省略
# labels = tf.to_int64(labels)
# 由于我们传入的 labels 是整型的(0~9),所以此处调用的是 tf.nn.sparse_softmax...
# 此函数会自动把 labels 转换为 one_hot 的形式,然后再做 softmax 和计算交叉熵
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=labels, logits=logits, name='xentropy')
return tf.reduce_mean(cross_entropy, name='xentropy_mean')
4、参数学习算法设计(Train)
- 输入参数:
loss, FLAGS.learning_rate
- 输出:
train_op
- 函数封装:
mnist.training()
,在此函数中添加 loss 统计节点
with tf.name_scope('Train'):
train_op = mnist.training(loss, FLAGS.learning_rate)
# mnist.py 中 training 函数的实现,添加了 loss 统计节点
def training(loss, learning_rate):
tf.summary.scalar('loss', loss)
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
# global_step 的作用还有待理解?可能跟 Saver 有关
global_step = tf.Variable(0, name='global_step', trainable=False)
train_op = optimizer.minimize(loss, global_step=global_step)
return train_op
5、评估节点设计(Evaluate)
- 输入参数:
logits, labels_placeholder
- 输出:
eval_correct
- 函数封装:
mnist.evaluation()
# 使用 tf.nn.in_top_k 进行评估,所以 logits 就不用经过 softmax 了
with tf.name_scope('Evaluation'):
eval_correct = mnist.evaluation(logits, labels_placeholder)
# mnist.py 中 evaluation 函数的实现
def evaluation(logits, labels):
correct = tf.nn.in_top_k(logits, labels, 1)
# Return the number of true entries.
return tf.reduce_sum(tf.cast(correct, tf.int32))
6、添加模型保存节点(Saver)
with tf.name_scope('Saver'):
saver = tf.train.Saver()
# 执行计算图时定义保存模型的频率和数量
if step % FLAGS.save_freq == 0:
checkpoint_file = os.path.join(FLAGS.checkpoint_dir,
FLAGS.model_name)
# 不传入 global_step, 只保存最新的 model
saver.save(sess, checkpoint_file)
四、准备喂数据的函数和评估函数
1、定义喂数据和标签的函数
- 输入参数:
data_set, feed_batch_size, images_pl, labels_pl
- 输出:
feed_dict
# 和官网不同,加入了 feed_batch_size,以适应不同 batch_size 的输入
def fill_feed_dict(data_set, feed_batch_size, images_pl, labels_pl):
images_feed, labels_feed = data_set.next_batch(feed_batch_size)
feed_dict = {
images_pl: images_feed,
labels_pl: labels_feed
}
return feed_dict
2、评估函数
- 输入参数:
sess, eval_correct, images_placeholder, labels_placeholder, data_set, eval_batch_size
- 输出:
样本数、正确预测的样本数、准确率
- 好处:
防止测试集太大,一次性输入全部数据造成内存爆掉
# 返回预测正确的样本数
def evaluation(logits, labels):
correct = tf.nn.in_top_k(logits, labels, 1)
# correct 得到的是 bool 形式的结果,所以要转换一下数据形式
return tf.reduce_sum(tf.cast(correct, tf.int32))
# 利用 evaluation 函数的结果进行评估
eval_correct = mnist.evaluation(logits, labels_placeholder)
def do_eval(sess, eval_correct, images_placeholder, labels_placeholder,
data_set, eval_batch_size):
true_count = 0
steps_per_epoch = data_set.num_examples // eval_batch_size
num_examples = steps_per_epoch * eval_batch_size
for step in range(steps_per_epoch):
feed_dict = fill_feed_dict(data_set, eval_batch_size,
images_placeholder, labels_placeholder)
true_count += sess.run(eval_correct, feed_dict=feed_dict)
precision = float(true_count) / num_examples
print('Num examples: %d, Num correct: %d, Precision: %0.04f' %
(num_examples, true_count, precision))
五、mnist.py 完整代码
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import tensorflow as tf
def placeholder_inputs(input_units):
images_placeholder = tf.placeholder(tf.float32, shape=[None, input_units])
labels_placeholder = tf.placeholder(tf.int32, shape=[None])
return images_placeholder, labels_placeholder
def inference(images, input_units, hidden1_units, hidden2_units,
num_classes):
# Hidden 1
with tf.name_scope('hidden1'):
weights = tf.Variable(
tf.truncated_normal([input_units, hidden1_units],
stddev=1.0 / math.sqrt(float(input_units))),
name='weights')
biases = tf.Variable(tf.zeros([hidden1_units]),
name='biases')
# 通过矩阵相乘&相加,实现全连接层
hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases)
# Hidden 2
with tf.name_scope('hidden2'):
weights = tf.Variable(
tf.truncated_normal([hidden1_units, hidden2_units],
stddev=1.0 / math.sqrt(float(hidden1_units))),
name='weights')
biases = tf.Variable(tf.zeros([hidden2_units]),
name='biases')
hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases)
# Linear
with tf.name_scope('softmax_linear'):
weights = tf.Variable(
tf.truncated_normal([hidden2_units, num_classes],
stddev=1.0 / math.sqrt(float(hidden2_units))),
name='weights')
biases = tf.Variable(tf.zeros([num_classes]),
name='biases')
logits = tf.matmul(hidden2, weights) + biases
return logits
def loss(logits, labels):
# mnist labels 本来是 int32 类型的,所以此步骤可省略
labels = tf.to_int64(labels)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=labels, logits=logits, name='xentropy')
return tf.reduce_mean(cross_entropy, name='xentropy_mean')
def training(loss, learning_rate):
tf.summary.scalar('loss', loss)
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
# global_step 的作用还有待理解?可能跟 Saver 有关
global_step = tf.Variable(0, name='global_step', trainable=False)
train_op = optimizer.minimize(loss, global_step=global_step)
return train_op
def evaluation(logits, labels):
correct = tf.nn.in_top_k(logits, labels, 1)
# Return the number of true entries.
return tf.reduce_sum(tf.cast(correct, tf.int32))
六、fully_connected_feed.py 完整代码及结果
此程序调用了
mnist.py
, 而mnist.py
对计算图中用到的代码进行了函数封装
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import mnist # 注意此步和官网不一样
# 设置命令行参数
print('~~~~~~~~~~~~设置命令行参数~~~~~~~~~~~~~')
# 定义一个全局对象来获取参数的值,在程序中使用(eg: FLAGS.max_steps)来引用参数
FLAGS = tf.app.flags.FLAGS
# Input 相关参数
tf.app.flags.DDEFINE_string(
'input_data_dir',
'MNIST_data',
'Directory of dataset in the form of TFRecords.'
)
# Inference 相关参数
tf.app.flags.DEFINE_integer(
'input_units',
784,
'Number of units in input layer.'
)
tf.app.flags.DEFINE_integer(
'hidden1',
128,
'Number of units in hidden layer 1.'
)
tf.app.flags.DEFINE_integer(
'hidden2',
32,
'Number of units in hidden layer 2.'
)
tf.app.flags.DEFINE_integer(
'num_classes',
10,
'Number of units in output layer.'
)
# Train 相关参数
tf.app.flags.DEFINE_float(
'learning_rate',
0.5,
'Initial learning rate.'
)
tf.app.flags.DEFINE_integer(
'max_steps',
2001,
'Number of steps to run trainer.'
)
tf.app.flags.DEFINE_integer(
'disp_freq',
100,
'Display the current results every disp_freq iterations.'
)
tf.app.flags.DEFINE_integer(
'save_freq',
1000,
'Save the checkpoints every save_freq iterations.'
)
tf.app.flags.DEFINE_integer(
'train_batch_size',
100,
'The size of train batch images.'
)
# Evaluation 相关参数
tf.app.flags.DEFINE_integer(
'val_batch_size',
200,
'The size of validation batch images.'
)
tf.app.flags.DEFINE_integer(
'test_batch_size',
128, # 注意此处test_batch_size 没有设置成整除的形式,只是为了测试下 do_eval 函数
'The size of validation batch images.'
)
# Summary & Saver 相关参数
tf.app.flags.DEFINE_string(
'log_dir',
'logs/mnist',
'Directory to put the summary log data.'
)
tf.app.flags.DEFINE_string(
'checkpoint_dir',
'checkpoint/mnist',
'Directory name to save the checkpoints'
)
tf.app.flags.DEFINE_string(
'model_name',
'mnist',
'Model name prefix'
)
# 定义喂数据和标签的函数
def fill_feed_dict(data_set, feed_batch_size, images_pl, labels_pl):
images_feed, labels_feed = data_set.next_batch(feed_batch_size)
feed_dict = {
images_pl: images_feed,
labels_pl: labels_feed
}
return feed_dict
# 定义评估函数
def do_eval(sess, eval_correct, images_placeholder, labels_placeholder,
data_set, eval_batch_size):
true_count = 0
steps_per_epoch = data_set.num_examples // eval_batch_size
num_examples = steps_per_epoch * eval_batch_size
for step in range(steps_per_epoch):
feed_dict = fill_feed_dict(data_set, eval_batch_size,
images_placeholder, labels_placeholder)
true_count += sess.run(eval_correct, feed_dict=feed_dict)
precision = float(true_count) / num_examples
print('Num examples: %d, Num correct: %d, Precision: %0.04f' %
(num_examples, true_count, precision))
# 定义主函数
def main(_):
# 检查日志记录目录和模型保存目录是否存在, 不存在则递归创建
if not os.path.exists(FLAGS.log_dir):
os.makedirs(FLAGS.log_dir)
if not os.path.exists(FLAGS.checkpoint_dir):
os.makedirs(FLAGS.checkpoint_dir)
# 0、准备训练/验证/测试数据集
data_sets = input_data.read_data_sets(FLAGS.input_data_dir)
# 1、数据输入设计:使用 placeholder 将数据送入网络, None 表示第一维是任意长度的
with tf.name_scope('Input'):
images_placeholder, labels_placeholder = mnist.placeholder_inputs(
FLAGS.input_units)
# 2、前向网络设计
with tf.name_scope('Inference'):
logits = mnist.inference(images_placeholder,
FLAGS.input_units,
FLAGS.hidden1,
FLAGS.hidden2,
FLAGS.num_classes)
# 3、损失函数设计
with tf.name_scope('Loss'):
loss = mnist.loss(logits, labels_placeholder)
# 4、参数学习算法设计
with tf.name_scope('Train'):
train_op = mnist.training(loss, FLAGS.learning_rate)
# 5、评估节点设计
with tf.name_scope('Evaluation'):
eval_correct = mnist.evaluation(logits, labels_placeholder)
# 6、添加模型保存节点
with tf.name_scope('Saver'):
saver = tf.train.Saver()
print('~~~~~~~~~~~~~开始执行计算图~~~~~~~~~~~~~~~')
with tf.Session() as sess:
summary = tf.summary.merge_all()
summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)
init = tf.global_variables_initializer()
sess.run(init)
total_loss = 0
# Start the training loop
for step in range(FLAGS.max_steps):
feed_dict = fill_feed_dict(data_sets.train, FLAGS.train_batch_size,
images_placeholder, labels_placeholder)
_, loss_value, summary_str = sess.run([train_op, loss, summary],
feed_dict=feed_dict)
total_loss += loss_value
# Update the events file.
summary_writer.add_summary(summary_str, step)
summary_writer.flush()
if step % FLAGS.disp_freq == 0:
if step == 0:
print('Step: %d, Loss_mean: %.3f' % (step, total_loss))
else:
print('Step: %d, Loss_mean: %.3f' % (step, total_loss /
FLAGS.disp_freq))
total_loss = 0
if step % FLAGS.save_freq == 0:
checkpoint_file = os.path.join(FLAGS.checkpoint_dir,
FLAGS.model_name)
# 不传入 global_step, 只保存最新的 model
saver.save(sess, checkpoint_file)
print('Training Data Eval:')
do_eval(sess,
eval_correct,
images_placeholder,
labels_placeholder,
data_sets.train,
FLAGS.train_batch_size)
print('Validation Data Eval:')
do_eval(sess,
eval_correct,
images_placeholder,
labels_placeholder,
data_sets.validation,
FLAGS.val_batch_size)
print('Test Data Eval:')
do_eval(sess,
eval_correct,
images_placeholder,
labels_placeholder,
data_sets.test,
FLAGS.test_batch_size)
# 执行main函数
if __name__ == '__main__':
tf.app.run()
# 输出结果如下:
Extracting MNIST_data\train-images-idx3-ubyte.gz
Extracting MNIST_data\train-labels-idx1-ubyte.gz
Extracting MNIST_data\t10k-images-idx3-ubyte.gz
Extracting MNIST_data\t10k-labels-idx1-ubyte.gz
~~~~~~~~~~~开始执行计算图~~~~~~~~~~~~~~
Step: 0, Loss_mean: 2.312
Training Data Eval:
Num examples: 55000, Num correct: 15263, Precision: 0.2775
Validation Data Eval:
Num examples: 5000, Num correct: 1448, Precision: 0.2896
Test Data Eval:
Num examples: 9984, Num correct: 2763, Precision: 0.2767
Step: 100, Loss_mean: 0.913
......
......
......
Step: 1000, Loss_mean: 0.111
Training Data Eval:
Num examples: 55000, Num correct: 53158, Precision: 0.9665
Validation Data Eval:
Num examples: 5000, Num correct: 4813, Precision: 0.9626
Test Data Eval:
Num examples: 9984, Num correct: 9599, Precision: 0.9614
Step: 1100, Loss_mean: 0.093
......
......
......
Step: 2000, Loss_mean: 0.058
Training Data Eval:
Num examples: 55000, Num correct: 54093, Precision: 0.9835
Validation Data Eval:
Num examples: 5000, Num correct: 4883, Precision: 0.9766
Test Data Eval:
Num examples: 9984, Num correct: 9740, Precision: 0.9756
七、参考资料
1、TensorFlow Mechanics 101
2、TensorFlow实现双隐层SoftMax Regression分类器