TensorFlow入门：（四）CNN训练CIFAR-100

CIFAR-100数据集有100个类，每个类600张图片，总共有60000张图片。其中50000张是训练集，10000张作为测试集。
CNN的网络结构为：
第一层为：卷积层（64个卷积核）+最大池化层+LRN层
第二层为：卷积层（64个卷积核）+LRN层+最大池化层
第三层为：全连接层（384个隐藏单元）
第四层为：全连接层（192个隐藏单元）
第五层：softmax层（输出100为的向量）
采用CNN训练该数据集的代码如下（网络层数太少了，训练结果top1只有0.2341的准确率）：
#coding=utf-8
import pickle   # 用于序列化和反序列化
import numpy as np  
import os  
import matplotlib.pyplot as plt
import tensorflow as tf
import time
import math
'''
字典形式的数据：
cifar100 data content: 
    { 
    "data" : [(R,G,B, R,G,B ,....),(R,G,B, R,G,B, ...),...]    # 50000张图片，每张： 32 * 32 * 3
    "coarse_labels":[0,...,19],                         # 0~19 super category 
    "filenames":["volcano_s_000012.png",...],   # 文件名
    "batch_label":"", 
    "fine_labels":[0,1...99]          # 0~99 category 
    } 
'''
  
class Cifar100DataReader():  
    def __init__(self,cifar_folder,onehot=True):  
        self.cifar_folder=cifar_folder  
        self.onehot=onehot  
        self.data_label_train=None            # 训练集
        self.data_label_test=None             # 测试集
        self.batch_index=0                    # 训练数据的batch块索引
        self.test_batch_index=0                # 测试数据的batch_size
        f=os.path.join(self.cifar_folder,"train")  # 训练集有50000张图片，100个类，每个类500张
        print ('read: %s'%f  )
        fo = open(f, 'rb')
        self.dic_train = pickle.load(fo,encoding='bytes')
        fo.close()
        self.data_label_train=list(zip(self.dic_train[b'data'],self.dic_train[b'fine_labels']) ) #label 0~99  
        np.random.shuffle(self.data_label_train)           

    
    def dataInfo(self):
        print (self.data_label_train[0:2] )# 每个元素为二元组，第一个是numpy数组大小为32*32*3，第二是label
        print (self.dic_train.keys())
        print (b"coarse_labels:",len(self.dic_train[b"coarse_labels"]))
        print (b"filenames:",len(self.dic_train[b"filenames"]))
        print (b"batch_label:",len(self.dic_train[b"batch_label"]))
        print (b"fine_labels:",len(self.dic_train[b"fine_labels"]))
        print (b"data_shape:",np.shape((self.dic_train[b"data"])))
        print (b"data0:",type(self.dic_train[b"data"][0]))


    # 得到下一个batch训练集，块大小为100
    def next_train_data(self,batch_size=100):  
        """ 
        return list of numpy arrays [na,...,na] with specific batch_size 
                na: N dimensional numpy array  
        """            
        if self.batch_index<len(self.data_label_train)/batch_size:  
            print ("batch_index:",self.batch_index  )
            datum=self.data_label_train[self.batch_index*batch_size:(self.batch_index+1)*batch_size]  
            self.batch_index+=1  
            return self._decode(datum,self.onehot)  
        else:  
            self.batch_index=0  
            np.random.shuffle(self.data_label_train)  
            datum=self.data_label_train[self.batch_index*batch_size:(self.batch_index+1)*batch_size]  
            self.batch_index+=1  
            return self._decode(datum,self.onehot)  
              
    
    # 把一个batch的训练数据转换为可以放入神经网络训练的数据 
    def _decode(self,datum,onehot):  
        rdata=list()     # batch训练数据
        rlabel=list()  
        if onehot:  
            for d,l in datum:  
                rdata.append(np.reshape(np.reshape(d,[3,1024]).T,[32,32,3]))   # 转变形状为：32*32*3
                hot=np.zeros(100)    
                hot[int(l)]=1            # label设为100维的one-hot向量
                rlabel.append(hot)  
        else:  
            for d,l in datum:  
                rdata.append(np.reshape(np.reshape(d,[3,1024]).T,[32,32,3]))  
                rlabel.append(int(l))  
        return rdata,rlabel  
    

    # 得到下一个测试数据 ，供神经网络计算模型误差用         
    def next_test_data(self,batch_size=100):  
        ''''' 
        return list of numpy arrays [na,...,na] with specific batch_size 
                na: N dimensional numpy array  
        '''  
        if self.data_label_test is None:  
            f=os.path.join(self.cifar_folder,"test")  
            print ('read: %s'%f  )
            fo = open(f, 'rb')
            dic_test = pickle.load(fo,encoding='bytes')
            fo.close()           
            data=dic_test[b'data']            
            labels=dic_test[b'fine_labels']   # 0 ~ 99  
            self.data_label_test=list(zip(data,labels) )
            self.batch_index=0

        if self.test_batch_index<len(self.data_label_test)/batch_size:  
            print ("test_batch_index:",self.test_batch_index )
            datum=self.data_label_test[self.test_batch_index*batch_size:(self.test_batch_index+1)*batch_size]  
            self.test_batch_index+=1  
            return self._decode(datum,self.onehot)  
        else:  
            self.test_batch_index=0  
            np.random.shuffle(self.data_label_test)  
            datum=self.data_label_test[self.test_batch_index*batch_size:(self.test_batch_index+1)*batch_size]  
            self.test_batch_index+=1  
            return self._decode(datum,self.onehot)    

    # 显示 9张图像
    def showImage(self):
        rdata,rlabel = self.next_train_data()
        fig = plt.figure()  
        ax = fig.add_subplot(331)
        ax.imshow(rdata[0])
        ax = fig.add_subplot(332)
        ax.imshow(rdata[1]) 
        ax = fig.add_subplot(333)
        ax.imshow(rdata[2]) 
        ax = fig.add_subplot(334)
        ax.imshow(rdata[3]) 
        ax = fig.add_subplot(335)
        ax.imshow(rdata[4]) 
        ax = fig.add_subplot(336)
        ax.imshow(rdata[5]) 
        ax = fig.add_subplot(337)
        ax.imshow(rdata[6]) 
        ax = fig.add_subplot(338)
        ax.imshow(rdata[7]) 
        ax = fig.add_subplot(339)
        ax.imshow(rdata[8]) 
        plt.show()


# 定义卷积神经网络模型
def CNN():
    sess=tf.InteractiveSession()
    
    max_steps=3000    # 最大迭代次数
    batch_size=50   # 每次迭代的样本数量

    # 设置CNN的输入值
    image_holder=tf.placeholder(tf.float32,[batch_size,32,32,3]) # 图像大小：32 * 32 * 3 
    label_holder=tf.placeholder(tf.float32,[batch_size,100])

    # 创建第一个卷积层
    weight1=variable_with_weight_loss(shape=[5,5,3,64],stddev=5e-2,w1=0.0) # 64个5*5*3的卷积核初始化
    kernel1=tf.nn.conv2d(image_holder,weight1,[1,1,1,1],padding='SAME') # 卷积操作,步长为1
    bias1=tf.Variable(tf.constant(0.0,shape=[64]))   # 偏置初始化
    conv1=tf.nn.relu(tf.nn.bias_add(kernel1,bias1))  # 加上偏置，代入激活函数
    pool1=tf.nn.max_pool(conv1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME') # 池化操作，步长为2
    norm1=tf.nn.lrn(pool1,4,bias=1.0,alpha=0.001/9.0,beta=0.75) # LRN层

    # 创建第二个卷积层
    weight2=variable_with_weight_loss(shape=[5,5,64,64],stddev=5e-2,w1=0.0) # 64个5*5*64的卷积核初始化
    kernel2=tf.nn.conv2d(norm1,weight2,[1,1,1,1],padding='SAME') # 卷积操作，步长为1
    bias2=tf.Variable(tf.constant(0.1,shape=[64]))  # 初始化偏置
    conv2=tf.nn.relu(tf.nn.bias_add(kernel2,bias2)) # 加上偏置，代入激活函数
    norm2=tf.nn.lrn(conv2,4,bias=1.0,alpha=0.001/9.0,beta=0.75) # LRN层
    pool2=tf.nn.max_pool(norm2,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME') # 池化操作步长为2

    # 全连接层1
    reshape=tf.reshape(pool2,[batch_size,-1])  # 对上一层结果展开为一维
    dim=reshape.get_shape()[1].value            # 获取一维向量的大小
    weight3=variable_with_weight_loss(shape=[dim,384],stddev=0.04,w1=0.004)  # 下一层有384个单元
    bias3=tf.Variable(tf.constant(0.1,shape=[384]))
    local3=tf.nn.relu(tf.matmul(reshape,weight3)+bias3)

    #全连接层2
    weight4=variable_with_weight_loss(shape=[384,192],stddev=0.04,w1=0.004) # 下一层有192个单元
    bias4=tf.Variable(tf.constant(0.1,shape=[192]))
    local4=tf.nn.relu(tf.matmul(local3,weight4)+bias4)

    #最后一层softmax层
    weight5=variable_with_weight_loss(shape=[192,100],stddev=1/192.0,w1=0.0) # 输入100维的向量
    bias5=tf.Variable(tf.constant(0.0,shape=[100]))
    logits=tf.nn.softmax(tf.matmul(local4,weight5)+bias5)

    # 定义损失函数和优化器       
    cross_entropy=tf.reduce_mean(-tf.reduce_sum(label_holder*tf.log(logits),reduction_indices=[1]))
    tf.add_to_collection('losses',cross_entropy)  # 将交叉熵加入损失函数集合losses
    loss=tf.add_n(tf.get_collection('losses'))    # 将losses全部结果相加

    train_op=tf.train.AdamOptimizer(1e-3).minimize(loss)  # 定义优化器
    # top_k_op=tf.nn.in_top_k(logits,label_holder,1)  # 返回一个向量，向量长度为样本点个数
    '''
    函数原型：in_top_k(predictions, targets, k, name=None)
      predictions：预测的结果，预测矩阵大小为样本数×标注的label类的个数的二维矩阵。
      targets：实际的标签，大小为样本数。
      k：每个样本的预测结果的前k个最大的数里面是否包含targets预测中的标签，一般都是取1，
         即取预测最大概率的索引与标签对比。
      top_1_op(k=1)为True的地方top_2_op(k=2)一定为True，top_1_op取样本的最大预测概率的索引与实际标签对比，
    top_2_op取样本的最大和仅次最大的两个预测概率与实际标签对比，如果实际标签在其中则为True，否则为False。
    其他k的取值可以类推。
    '''

    # 开始训练模型
    sess=tf.InteractiveSession()
    tf.global_variables_initializer().run()
    print ("Training begin......")
    cifar100=Cifar100DataReader(cifar_folder="E:/testdata/cifar-100")
    for step in range(max_steps):
        start=time.time()
        image_batch,label_batch=cifar100.next_train_data(batch_size=batch_size)
        train_op.run(feed_dict={image_holder:image_batch,label_holder:label_batch})
    
    print ("training end.")
    print ("caculate precision......")
    

    # 计算测试集上的误差率
    num_example=10000      # 测试集有1000张图片
    num_iter=int(math.ceil(num_example/batch_size))  # 最大迭代次数
    true_count=0
    total_sample_count=num_iter*batch_size  
    step=0
    while step<num_iter:
        test_data,test_label=cifar100.next_test_data(batch_size=batch_size) 
        correction_prediction=tf.equal(tf.argmax(logits,1),tf.argmax(label_holder,1))     
        correction=sess.run([correction_prediction],feed_dict={image_holder:test_data,label_holder:test_label})
        true_count+=np.sum(correction)
        step+=1
    precision=true_count/total_sample_count
    print ("precision:",precision)
  
    # 保存模型
    saver = tf.train.Saver() 
    save_path = saver.save(sess,"./Cifar100/model.ckpt")  
    print("save model:{0} Finished".format(save_path))      
    
  
# 定义损失函数(可以作为上面损失函数的替换)
def loss(logits,labels):
    labels=tf.cast(labels,tf.int64)
    cross_entropy=tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='cross_entropy_per_example')
    cross_entropy_mean=tf.reduce_mean(cross_entropy,name='cross_entropy')
    tf.add_to_collection('losses',cross_entropy_mean)
    return tf.add_n(tf.get_colletion('losses'),name='tatol_loss')


# 定义初始化weight的函数，计算weight的L2范数，并作为损失函数中的正则化项
def variable_with_weight_loss(shape,stddev,w1):
    var=tf.Variable(tf.truncated_normal(shape,stddev=stddev))
    if w1 is not None:
        weight_loss=tf.multiply(tf.nn.l2_loss(var),w1,name='weight_loss') # w1表示正则化项的权重
        tf.add_to_collection('losses',weight_loss)  # 把正则化项统一存到collection中，最后加入目标函数
    return var

if __name__=='__main__':    
    CNN()
TensorFlow入门：（四）CNN训练CIFAR-100

猜你喜欢