FCN 图像语义分割训练(从零开始训练 FCN)
前言
大部分博客都是使用了现成的 VGG 权值进行 fine-tuning,但不巧的是,本人有点懒,不想使用现有的 VGG 权值,所以自己写了一个 FCN 进行调参。虽然结果不太理想,但是还是满足了自己的好奇心。(写代码只是为了玩,仅此而已)
以下是效果图:
效果不好大家不要吐槽啦,然后就是准确率:
可以看到结果(灰度图)在一点一点的发生改变。一下每四张图片为一个单元。
大约训练了 30 个小时,最后准确率大概是 70% 左右,惨不忍睹。接下来是网络图
代码也很容易懂,就不多说了。
代码中输出的都是单通道的灰度图,上面的效果图是经过伪彩色图进行变换的。
代码
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
import numpy as np
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
os.system("rm -r logs")
import tensorflow as tf
import matplotlib.pyplot as plt
# In[ ]:
trainPath = '/home/winsoul/disk/Segmentation/SiftFlow/data/GeoLabels/tfrecords/train.tfrecords'
testPath = '/home/winsoul/disk/Segmentation/SiftFlow/data/GeoLabels/tfrecords/test.tfrecords'
valPath = '/home/winsoul/disk/Segmentation/SiftFlow/data/GeoLabels/tfrecords/val.tfrecords'
model_path = '/home/winsoul/disk/Segmentation/SiftFlow/FCN/model/'
DisplayStep = 25
ModelSaverStep = 2500
decay_step = 200
decay_rate = 0.98
# In[ ]:
def read_tfrecords(TFRecordsPath):
with tf.Session() as sess:
feature = {
'image': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.string),
'name': tf.FixedLenFeature([], tf.string),
}
filename_queue = tf.train.string_input_producer([TFRecordsPath])
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example, features = feature)
image = tf.decode_raw(features['image'], tf.float32)
image = tf.reshape(image, [224, 224, 3])
label = tf.decode_raw(features['label'], tf.uint8)
label = tf.reshape(label, [224, 224])
return image, label
# In[ ]:
def conv_layer(X, k, s, channels_in, channels_out, name = 'CONV', padding = 'SAME'):
with tf.name_scope(name):
W = tf.Variable(tf.truncated_normal([k, k, channels_in, channels_out], stddev = 0.1));
b = tf.Variable(tf.constant(0.1, shape = [channels_out]))
conv = tf.nn.conv2d(X, W, strides = [1, s, s, 1], padding = padding)
result = tf.nn.relu(conv + b)
# tf.summary.histogram('weights', W)
# tf.summary.histogram('biases', b)
# tf.summary.histogram('activations', result)
return result
def pool_layer(X, k, s, strr = 'SAME', pool_type = 'MAX', name = 'pool'):
with tf.name_scope(name):
if pool_type == 'MAX':
result = tf.nn.max_pool(X,
ksize = [1, k, k, 1],
strides = [1, s, s, 1],
padding = strr, name = name)
else:
result = tf.nn.avg_pool(X,
ksize = [1, k, k, 1],
strides = [1, s, s, 1],
padding = strr, name = name)
return result
def fc_layer(X, neurons_in, neurons_out, last = False, name = 'FC'):
with tf.name_scope(name):
W = tf.Variable(tf.truncated_normal([neurons_in, neurons_out], stddev = 0.1))
b = tf.Variable(tf.constant(0.1, shape = [neurons_out]))
# tf.summary.histogram('weights', W)
# tf.summary.histogram('biases', b)
if last == False:
result = tf.nn.relu(tf.matmul(X, W) + b)
else:
result = tf.nn.softmax(tf.matmul(X, W) + b)
# tf.summary.histogram('activations', result)
return result
# In[ ]:
def conv_transpose_layer(X, k, s, input_shape, output_shape, name = 'CONV_TRAN', padding = 'SAME'):
with tf.name_scope(name):
W = tf.Variable(tf.truncated_normal([k, k, output_shape[3].value, input_shape[3].value], stddev = 0.1));
b = tf.Variable(tf.constant(0.1, shape = [output_shape[3].value]))
deconv = tf.nn.conv2d_transpose(X, W, output_shape, strides=[1, s, s, 1], padding = "SAME")
result = tf.add(deconv, b)
result = deconv
# tf.summary.histogram('weights', W)
# tf.summary.histogram('biases', b)
# tf.summary.histogram('activations', result)
return result
# In[ ]:
def Network(BatchSize, start_learning_rate):
tf.reset_default_graph()
with tf.Session() as sess:
is_training = tf.placeholder(dtype = tf.bool)
keep_prob = tf.placeholder(dtype = tf.float32)
global_step = tf.placeholder(dtype = tf.int32)
origin_image = tf.placeholder(tf.uint8, shape=([BatchSize, 224, 224, 4]))
y_label = tf.placeholder(tf.int32, shape=[None, 224, 224, 1], name="y_label")
image_train, label_train = read_tfrecords(trainPath)
image_val, label_val = read_tfrecords(valPath)
image_train_batch, label_train_batch = tf.train.shuffle_batch([image_train, label_train],
batch_size = BatchSize,
capacity = BatchSize * 3 + 200,
min_after_dequeue = BatchSize)
image_val_batch, label_val_batch = tf.train.shuffle_batch([image_val, label_val],
batch_size = BatchSize,
capacity = BatchSize * 3 + 200,
min_after_dequeue = BatchSize)
image_Batch = tf.cond(is_training, lambda: image_train_batch, lambda: image_val_batch)
label_Batch = tf.cond(is_training, lambda: label_train_batch, lambda: label_val_batch)
X = tf.identity(image_Batch)
y = tf.identity(label_Batch)
y = tf.cast(y, tf.int32)
conv1_1 = conv_layer(X, 3, 1, 3, 64, "conv1_1")
conv1_2 = conv_layer(conv1_1, 3, 1, 64, 64, "conv1_2")
pool1 = pool_layer(conv1_2, 2, 2, "SAME", "MAX", "pool1")
conv2_1 = conv_layer(pool1, 3, 1, 64, 128, "conv2_1")
conv2_2 = conv_layer(conv2_1, 3, 1, 128, 128, "conv2_2")
pool2 = pool_layer(conv2_2, 2, 2, "SAME", "MAX", 'pool2')
conv3_1 = conv_layer(pool2, 3, 1, 128, 256, "conv3_1")
conv3_2 = conv_layer(conv3_1, 3, 1, 256, 256, "conv3_2")
conv3_3 = conv_layer(conv3_2, 3, 1, 256, 256, "conv3_3")
pool3 = pool_layer(conv3_3, 2, 2, "SAME", "MAX", 'pool3')
conv4_1 = conv_layer(pool3, 3, 1, 256, 512, "conv4_1")
conv4_2 = conv_layer(conv4_1, 3, 1, 512, 512, "conv4_2")
conv4_3 = conv_layer(conv4_2, 3, 1, 512, 512, "conv4_3")
pool4 = pool_layer(conv4_3, 2, 2, "SAME", "MAX", 'pool4')
print(pool4)
conv5_1 = conv_layer(pool4, 3, 1, 512, 512, "conv5_1")
conv5_2 = conv_layer(conv5_1, 3, 1, 512, 512, "conv5_2")
conv5_3 = conv_layer(conv5_2, 3, 1, 512, 512, "conv5_3")
pool5 = pool_layer(conv5_3, 2, 2, "SAME", "MAX", 'pool5')
print(pool5)
conv6_1 = conv_layer(pool5, 7, 1, 512, 1024, "conv6_1")
conv6_2 = conv_layer(conv6_1, 1, 1, 1024, 512, "conv6_2")
conv6_3 = conv_layer(conv6_2, 1, 1, 512, 4, "conv6_3")
drop1 = tf.nn.dropout(conv6_3, keep_prob)
print(drop1)
deconv1 = conv_transpose_layer(drop1, 4, 2, conv6_3.get_shape(), pool4.get_shape(), name = 'CONV_TRAN_1')
fuse1 = tf.add(deconv1, pool4, name = 'fuse_1')
print(fuse1)
deconv2 = conv_transpose_layer(fuse1, 4, 2, pool4.get_shape(), pool3.get_shape(), name = 'CONV_TRAN_2')
fuse2 = tf.add(deconv2, pool3, name = 'fuse_2')
print(fuse2)
deconv3 = conv_transpose_layer(fuse2, 16, 8, pool3.get_shape(), origin_image.get_shape(), name = 'CONV_TRAN_3')
print(deconv3)
y_result = tf.argmax(deconv3, dimension = 3, name = 'y_result')
print(y_result)
y_result = tf.cast(y_result, tf.int32)
with tf.name_scope('input'):
tf.summary.image('input', X, BatchSize)
with tf.name_scope('output'):
y_paint = tf.cast(y_result, tf.uint8)
y_paint = (y_paint * (y_paint + 6)) * 9
tf.summary.image('output', tf.image.grayscale_to_rgb(tf.reshape(y_paint, [-1, 224, 224, 1])), BatchSize)
with tf.name_scope('summaries'):
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
cross_entropy = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits(logits = deconv3,
labels = y,
name = "cross_entropy")))
learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, decay_step, decay_rate, staircase = True)
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
#train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)
corrent_prediction = tf.equal(y_result, y)
accuracy = tf.reduce_mean(tf.cast(corrent_prediction, 'float', name = 'accuracy'))
tf.summary.scalar("loss", cross_entropy)
tf.summary.scalar("accuracy", accuracy)
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord = coord)
merge_summary = tf.summary.merge_all()
summary__train_writer = tf.summary.FileWriter("./logs/train" + '_rate:' + str(learning_rate), sess.graph)
summary_val_writer = tf.summary.FileWriter("./logs/test" + '_rate:' + str(learning_rate))
saver = tf.train.Saver()
# saver.restore(sess, model_path + 'Model_rate_1e-5__Step_00020000')
try:
batch_index = 1
while not coord.should_stop():
sess.run(train_step, feed_dict = {is_training: True, keep_prob: 0.5, global_step: batch_index})
if batch_index % 25 == 0:
summary_train, acc_train, loss_train, _ = sess.run([merge_summary, accuracy, cross_entropy, train_step], feed_dict = {is_training: True, keep_prob: 0.5, global_step: batch_index})
summary__train_writer.add_summary(summary_train, batch_index)
print(str(batch_index) + ' train:' + ' ' + str(acc_train) + ' ' + str(loss_train), end = ' ')
summary_val, acc_val, loss_val = sess.run([merge_summary, accuracy, cross_entropy], feed_dict = {is_training: False, keep_prob: 1.0})
summary_val_writer.add_summary(summary_val, batch_index)
print(' val: ' + ' ' + str(acc_val) + ' ' + str(loss_val))
if batch_index % ModelSaverStep == 0:
save_path = saver.save(sess, model_path + '/newModel/Model_rate_1e-5__Step_{:08d}'.format(batch_index))
batch_index += 1;
# if batch_index > 1000:
# break;
# for i in range(BatchSize):
# plt.imshow(ans[0], cmap = 'gray')
# plt.show()
except tf.errors.OutOfRangeError:
print("OutofRangeError!")
coord.request_stop()
coord.join(threads)
sess.close()
# In[ ]:
def main():
rate = 1e-5
while True:
print('-----------------------------------------------------')
print('Batch: 16 learning_rate:', rate)
try:
Network(16, rate)
except KeyboardInterrupt:
pass
rate /= 3
if __name__ == '__main__':
main()