【深度学习】GoogLeNet 中 inception v1 的 tensorflow 的简单实现(没有使用 slim)

GoogLeNet 中 inception v1 的 tensorflow 的简单实现

前言

网上很多代码使用了 slim 来对代码进行简化，但是无奈笔者比较懒，不想学 slim ，所以就重复造了个轮子，希望对读者有些许帮助。
而且前文说了，读者比较懒，所以没有构造出完整的 GoogLeNet，只是写了浅浅几层，我的参考如下。

更新：真香，是的没错，我上传了完整的 GoogLeNet 的代码，但是正确率不太理想(管它呢，应该是我的数据集太简单，网络太复杂)。最后附有 tensorboard 曲线
在这里插入图片描述

代码

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import numpy as np
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.system("rm -r logs")
import tensorflow as tf
get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt 
from PIL import Image
# import multiprocessing
from multiprocessing import Process
import threading
import time


# In[2]:



TrainPath = '/home/winsoul/disk/MyML/data/tfrecord/train.tfrecords'
ValPath = '/home/winsoul/disk/MyML/data/tfrecord/val.tfrecords'


# In[3]:


def read_tfrecord(TFRecordPath):
    with tf.Session() as sess:
        feature = {
            'image': tf.FixedLenFeature([], tf.string),
            'label': tf.FixedLenFeature([], tf.int64)
        }
#         filename_queue = tf.train.string_input_producer([TFRecordPath], num_epochs = 1)
        filename_queue = tf.train.string_input_producer([TFRecordPath])
        reader = tf.TFRecordReader()
        _, serialized_example = reader.read(filename_queue)
        features = tf.parse_single_example(serialized_example, features = feature)
        image = tf.decode_raw(features['image'], tf.float32)
        image = tf.reshape(image, [299, 299, 3])
        label = tf.cast(features['label'], tf.int32)
        return image, label


# In[4]:


def conv_layer(X, k, s, channels_in, channels_out, is_training, name = 'CONV'):
    with tf.name_scope(name):
        W = tf.Variable(tf.truncated_normal([k, k, channels_in, channels_out], stddev = 0.1));
        b = tf.Variable(tf.constant(0.1, shape = [channels_out]))
        conv = tf.nn.conv2d(X, W, strides = [1, s, s, 1], padding = 'SAME')
        conv_b = tf.nn.bias_add(conv, b)
#         bn = tf.layers.batch_normalization(conv_b, training = is_training)
        result = tf.nn.relu(conv_b)
        tf.summary.histogram('weights', W)
        tf.summary.histogram('biases', b)
        tf.summary.histogram('activations', result)
        return result
    
def pool_layer(X, k, s, strr = 'SAME', pool_type = 'MAX'):
    if pool_type == 'MAX':
        result = tf.nn.max_pool(X,
                              ksize = [1, k, k, 1],
                              strides = [1, s, s, 1],
                              padding = strr)
    else:
        result = tf.nn.avg_pool(X,
                              ksize = [1, k, k, 1],
                              strides = [1, s, s, 1],
                              padding = strr)
    return result

def fc_layer(X, neurons_in, neurons_out, last = False, name = 'FC'):
    with tf.name_scope(name):
        W = tf.Variable(tf.truncated_normal([neurons_in, neurons_out], stddev = 0.1))
        b = tf.Variable(tf.constant(0.1, shape = [neurons_out]))
        tf.summary.histogram('weights', W)
        tf.summary.histogram('biases', b)
        if last == False:
            result = tf.nn.relu(tf.matmul(X, W) + b)
        else:
            result =  tf.matmul(X, W) + b
        tf.summary.histogram('activations', result)
        return result


# In[5]:


def inception(X, channels_in, core_channels_out, is_training, name = 'inception'):
    with tf.name_scope(name + '_1'):
        conv1_1 = conv_layer(X, 1, 1, channels_in, core_channels_out, is_training, name = name + '_1-conv1_1_')
        
    with tf.name_scope(name + '_2'):
        conv2_1 = conv_layer(X, 1, 1, channels_in, core_channels_out, is_training, name = name + '_2-conv2_1')
        conv2_2 = conv_layer(conv2_1, 3, 1, core_channels_out, core_channels_out, is_training, name = name + '_2-conv2_2')
    
    with tf.name_scope(name + '_3'):
        conv3_1 = conv_layer(X, 1, 1, channels_in, core_channels_out, is_training, name = name + '_3-conv3_1')
        conv3_2 = conv_layer(conv3_1, 5, 1, core_channels_out, core_channels_out, is_training, name = name + '_3-conv3_2')
    
    with tf.name_scope(name + '_2'):
        pool4_1 = pool_layer(X, 3, 1, strr = 'SAME', pool_type = 'MAX')
        conv4_2 = conv_layer(pool4_1, 1, 1, channels_in, core_channels_out, is_training, name = name + '_4-conv4_3')
    
    result = tf.concat([conv1_1, conv2_2, conv3_2, conv4_2], 3)
    return result


# In[6]:


def Network(BatchSize, learning_rate):
    tf.reset_default_graph()
    with tf.Session() as sess:
        is_training = tf.placeholder(dtype = tf.bool, shape=())
        keep_prob = tf.placeholder('float32', name = 'keep_prob')
        
        judge = tf.Print(is_training, ['is_training:', is_training])
        
        image_train, label_train = read_tfrecord(TrainPath) 
        image_val, label_val = read_tfrecord(ValPath) 

        image_train_Batch, label_train_Batch = tf.train.shuffle_batch([image_train, label_train], 
                                                     batch_size = BatchSize, 
                                                     capacity = BatchSize*3 + 200,
                                                     min_after_dequeue = BatchSize)
        image_val_Batch, label_val_Batch = tf.train.shuffle_batch([image_val, label_val], 
                                                     batch_size = BatchSize, 
                                                     capacity = BatchSize*3 + 200,
                                                     min_after_dequeue = BatchSize)
        
        image_Batch = tf.cond(is_training, lambda: image_train_Batch, lambda: image_val_Batch)
        label_Batch = tf.cond(is_training, lambda: label_train_Batch, lambda: label_val_Batch)
        
        label_Batch = tf.one_hot(label_Batch, depth = 5)
        


        X = tf.identity(image_Batch)
        y = tf.identity(label_Batch)
        
        
        with tf.name_scope('input_reshape'):
            tf.summary.image('input', X, 32)
            
        conv1 = conv_layer(X, 7, 2, 3, 24, is_training, "conv1")
        max_pool1 = pool_layer(conv1, 3, 2)
        #bn1
        conv2 = conv_layer(max_pool1, 1, 1, 24, 16, is_training, "conv2")
        conv3 = conv_layer(conv2, 3, 3, 16, 24, is_training, "conv3")
        
        max_pool2 = pool_layer(conv3, 3, 2)
        
        net1 = inception(max_pool2, 24, 16, is_training)
        print(net1.shape)
        
        max_pool3 = pool_layer(net1, 3, 2)
        print(max_pool3.shape)
        
        net2 = inception(max_pool3, 4 * 16, 20, is_training)
        print(net2.shape)
        
        mean_pool1 = pool_layer(net2, 3, 2, pool_type = 'MEAN')
        print(mean_pool1.shape)

        drop1 = tf.nn.dropout(mean_pool1, keep_prob)
        fc1 = fc_layer(tf.reshape(drop1, [-1, 4 * 4 * 80]), 4 * 4 * 80, 256)
        
        drop2 = tf.nn.dropout(fc1, keep_prob)
        y_result = fc_layer(drop2, 256, 5, True)
        
        with tf.name_scope('summaries'):
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops): 
                cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = y_result, labels = y))
                train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
                #train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)
                corrent_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_result, 1))
                accuracy = tf.reduce_mean(tf.cast(corrent_prediction, 'float', name = 'accuracy'))
                tf.summary.scalar("loss", cross_entropy)
                tf.summary.scalar("accuracy", accuracy)
            
        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
        sess.run(init_op)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord = coord)
        
        merge_summary = tf.summary.merge_all()
        summary__train_writer = tf.summary.FileWriter("./logs/train" + '_rate:' + str(learning_rate), sess.graph)
        summary_val_writer = tf.summary.FileWriter("./logs/test" + '_rate:' + str(learning_rate))
        
        try:
            batch_index = 0
            while not coord.should_stop():
                sess.run([train_step], feed_dict = {keep_prob: 0.5, is_training: True})
                if batch_index % 10 == 0:
                    summary_train, _, acc_train, loss_train = sess.run([merge_summary, train_step, accuracy, cross_entropy], feed_dict = {keep_prob: 1.0, is_training: True})   
                    summary__train_writer.add_summary(summary_train, batch_index) 
                    print(str(batch_index) + ' train:' + '  ' + str(acc_train) + ' ' + str(loss_train))
                    summary_val, acc_val, loss_val = sess.run([merge_summary, accuracy, cross_entropy], feed_dict = {keep_prob: 1.0, is_training: False}) 
                    summary_val_writer.add_summary(summary_val, batch_index) 
                    print(str(batch_index) + '  val: ' + '  ' + str(acc_val) + ' ' + str(loss_val))
                batch_index += 1;
#                 if batch_index > 1500:
#                     break
                    
        except tf.errors.OutOfRangeError:
            print("OutofRangeError!")
        finally:
            print("Finish")
    
        coord.request_stop()
        coord.join(threads)
        sess.close()


# In[7]:


def main():
    rate = 0.00001
    while True:
        try:
            Network(64, rate)
        except KeyboardInterrupt:
            pass
#     for rate in (0.00007, 0.00003):
#         try:
#             print("-----------------------------------------------")
#             print(str(rate) + ':')
#             Network(64, rate)
#         except KeyboardInterrupt:
#             pass


# In[ ]:


if __name__ == '__main__':
    main()

完整GoogLeNet

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import numpy as np
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.system("rm -r logs")
import tensorflow as tf
get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt 
from PIL import Image
# import multiprocessing
from multiprocessing import Process
import threading
import time


# In[2]:



TrainPath = '/home/winsoul/disk/MyML/data/tfrecord/train.tfrecords'
ValPath = '/home/winsoul/disk/MyML/data/tfrecord/val.tfrecords'


# In[3]:


def read_tfrecord(TFRecordPath):
    with tf.Session() as sess:
        feature = {
            'image': tf.FixedLenFeature([], tf.string),
            'label': tf.FixedLenFeature([], tf.int64)
        }
#         filename_queue = tf.train.string_input_producer([TFRecordPath], num_epochs = 1)
        filename_queue = tf.train.string_input_producer([TFRecordPath])
        reader = tf.TFRecordReader()
        _, serialized_example = reader.read(filename_queue)
        features = tf.parse_single_example(serialized_example, features = feature)
        image = tf.decode_raw(features['image'], tf.float32)
        image = tf.reshape(image, [299, 299, 3])
        label = tf.cast(features['label'], tf.int32)
        return image, label


# In[4]:


def conv_layer(X, k, s, channels_in, channels_out, is_training, name = 'CONV'):
    with tf.name_scope(name):
        W = tf.Variable(tf.truncated_normal([k, k, channels_in, channels_out], stddev = 0.1));
        b = tf.Variable(tf.constant(0.1, shape = [channels_out]))
        conv = tf.nn.conv2d(X, W, strides = [1, s, s, 1], padding = 'SAME')
        conv_b = tf.nn.bias_add(conv, b)
#         bn = tf.layers.batch_normalization(conv_b, training = is_training)
        result = tf.nn.relu(conv_b)
        tf.summary.histogram('weights', W)
        tf.summary.histogram('biases', b)
        tf.summary.histogram('activations', result)
        return result
    
def pool_layer(X, k, s, strr = 'SAME', pool_type = 'MAX'):
    if pool_type == 'MAX':
        result = tf.nn.max_pool(X,
                              ksize = [1, k, k, 1],
                              strides = [1, s, s, 1],
                              padding = strr)
    else:
        result = tf.nn.avg_pool(X,
                              ksize = [1, k, k, 1],
                              strides = [1, s, s, 1],
                              padding = strr)
    return result

def fc_layer(X, neurons_in, neurons_out, last = False, name = 'FC'):
    with tf.name_scope(name):
        W = tf.Variable(tf.truncated_normal([neurons_in, neurons_out], stddev = 0.1))
        b = tf.Variable(tf.constant(0.1, shape = [neurons_out]))
        tf.summary.histogram('weights', W)
        tf.summary.histogram('biases', b)
        if last == False:
            result = tf.nn.relu(tf.matmul(X, W) + b)
        else:
            result =  tf.matmul(X, W) + b
        tf.summary.histogram('activations', result)
        return result


# In[5]:


def inception(X, channels_in, core_channels_out, is_training, name = 'inception'):
    with tf.name_scope(name + '_1'):
        conv1_1 = conv_layer(X, 1, 1, channels_in, core_channels_out, is_training, name = name + '_1-conv1_1_')
        
    with tf.name_scope(name + '_2'):
        conv2_1 = conv_layer(X, 1, 1, channels_in, core_channels_out, is_training, name = name + '_2-conv2_1')
        conv2_2 = conv_layer(conv2_1, 3, 1, core_channels_out, core_channels_out, is_training, name = name + '_2-conv2_2')
    
    with tf.name_scope(name + '_3'):
        conv3_1 = conv_layer(X, 1, 1, channels_in, core_channels_out, is_training, name = name + '_3-conv3_1')
        conv3_2 = conv_layer(conv3_1, 5, 1, core_channels_out, core_channels_out, is_training, name = name + '_3-conv3_2')
    
    with tf.name_scope(name + '_2'):
        pool4_1 = pool_layer(X, 3, 1, strr = 'SAME', pool_type = 'MAX')
        conv4_2 = conv_layer(pool4_1, 1, 1, channels_in, core_channels_out, is_training, name = name + '_4-conv4_3')
    
    result = tf.concat([conv1_1, conv2_2, conv3_2, conv4_2], 3)
    return result


# In[6]:


def Network(BatchSize, learning_rate):
    tf.reset_default_graph()
    with tf.Session() as sess:
        is_training = tf.placeholder(dtype = tf.bool, shape=())
        keep_prob = tf.placeholder('float32', name = 'keep_prob')
        
        judge = tf.Print(is_training, ['is_training:', is_training])
        
        image_train, label_train = read_tfrecord(TrainPath) 
        image_val, label_val = read_tfrecord(ValPath) 

        image_train_Batch, label_train_Batch = tf.train.shuffle_batch([image_train, label_train], 
                                                     batch_size = BatchSize, 
                                                     capacity = BatchSize*3 + 200,
                                                     min_after_dequeue = BatchSize)
        image_val_Batch, label_val_Batch = tf.train.shuffle_batch([image_val, label_val], 
                                                     batch_size = BatchSize, 
                                                     capacity = BatchSize*3 + 200,
                                                     min_after_dequeue = BatchSize)
        
        image_Batch = tf.cond(is_training, lambda: image_train_Batch, lambda: image_val_Batch)
        label_Batch = tf.cond(is_training, lambda: label_train_Batch, lambda: label_val_Batch)
        
        label_Batch = tf.one_hot(label_Batch, depth = 5)
        


        X = tf.identity(image_Batch)
        y = tf.identity(label_Batch)
        
        
        with tf.name_scope('input_reshape'):
            tf.summary.image('input', X, 32)
            
        conv1 = conv_layer(X, 7, 2, 3, 24, is_training, "conv1")
        max_pool1 = pool_layer(conv1, 3, 2)
        #bn1
        conv2 = conv_layer(max_pool1, 1, 1, 24, 16, is_training, "conv2")
        conv3 = conv_layer(conv2, 3, 3, 16, 24, is_training, "conv3")
        
        max_pool2 = pool_layer(conv3, 3, 2)
        
        net1 = inception(max_pool2, 24, 16, is_training)
        
        net2 = inception(net1, 4 * 16, 20, is_training)
        
        max_pool3 = pool_layer(net2, 3, 2)
        print(max_pool3.shape)
        
        net3 = inception(max_pool3, 4 * 20, 24, is_training)
        net4 = inception(net3, 4 * 24, 32, is_training)
        net5 = inception(net4, 4 * 32, 38, is_training)
        net6 = inception(net5, 4 * 38, 42, is_training)
        net7 = inception(net6, 4 * 42, 56, is_training)
        
        max_pool4 = pool_layer(net7, 3, 2)
        print(max_pool4.shape)
        
        net8 = inception(max_pool4, 4 * 56, 42, is_training)
        net9 = inception(net8, 4 * 42, 38, is_training)
        
        mean_pool1 = pool_layer(net9, 7, 1, pool_type = 'MEAN')
        print(mean_pool1.shape)
        
        drop1 = tf.nn.dropout(mean_pool1, keep_prob)
        fc1 = fc_layer(tf.reshape(drop1, [-1, 4 * 4 * 152]), 4 * 4 * 152, 256)
        
        drop2 = tf.nn.dropout(fc1, keep_prob)
        y_result = fc_layer(drop2, 256, 5, True)
        
        with tf.name_scope('summaries'):
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops): 
                cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = y_result, labels = y))
                train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
                #train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)
                corrent_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_result, 1))
                accuracy = tf.reduce_mean(tf.cast(corrent_prediction, 'float', name = 'accuracy'))
                tf.summary.scalar("loss", cross_entropy)
                tf.summary.scalar("accuracy", accuracy)
            
        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
        sess.run(init_op)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord = coord)
        
        merge_summary = tf.summary.merge_all()
        summary__train_writer = tf.summary.FileWriter("./logs/train" + '_rate:' + str(learning_rate), sess.graph)
        summary_val_writer = tf.summary.FileWriter("./logs/test" + '_rate:' + str(learning_rate))
        
        try:
            batch_index = 0
            while not coord.should_stop():
                sess.run([train_step], feed_dict = {keep_prob: 0.5, is_training: True})
                if batch_index % 10 == 0:
                    summary_train, _, acc_train, loss_train = sess.run([merge_summary, train_step, accuracy, cross_entropy], feed_dict = {keep_prob: 1.0, is_training: True})   
                    summary__train_writer.add_summary(summary_train, batch_index) 
                    print(str(batch_index) + ' train:' + '  ' + str(acc_train) + ' ' + str(loss_train))
                    summary_val, acc_val, loss_val = sess.run([merge_summary, accuracy, cross_entropy], feed_dict = {keep_prob: 1.0, is_training: False}) 
                    summary_val_writer.add_summary(summary_val, batch_index) 
                    print(str(batch_index) + '  val: ' + '  ' + str(acc_val) + ' ' + str(loss_val))
                batch_index += 1;
#                 if batch_index > 1500:
#                     break
                    
        except tf.errors.OutOfRangeError:
            print("OutofRangeError!")
        finally:
            print("Finish")
    
        coord.request_stop()
        coord.join(threads)
        sess.close()


# In[7]:


def main():
    rate = 0.00001
    while True:
        try:
            Network(64, rate)
        except KeyboardInterrupt:
            pass
#     for rate in (0.00007, 0.00003):
#         try:
#             print("-----------------------------------------------")
#             print(str(rate) + ':')
#             Network(64, rate)
#         except KeyboardInterrupt:
#             pass


# In[ ]:


if __name__ == '__main__':
    main()

在这里插入图片描述

西域狂猪

发布了79 篇原创文章 · 获赞 56 · 访问量 50万+

他的留言板关注

【深度学习】GoogLeNet 中 inception v1 的 tensorflow 的简单实现(没有使用 slim)

GoogLeNet 中 inception v1 的 tensorflow 的简单实现

前言

代码

完整GoogLeNet

猜你喜欢