CIFAR-100数据集有100个类,每个类600张图片,总共有60000张图片。其中50000张是训练集,10000张作为测试集。
CNN的网络结构为:
第一层为:卷积层(64个卷积核)+最大池化层+LRN层
第二层为:卷积层(64个卷积核)+LRN层+最大池化层
第三层为:全连接层(384个隐藏单元)
第四层为:全连接层(192个隐藏单元)
第五层:softmax层(输出100为的向量)
采用CNN训练该数据集的代码如下(网络层数太少了,训练结果top1只有0.2341的准确率):
#coding=utf-8 import pickle # 用于序列化和反序列化 import numpy as np import os import matplotlib.pyplot as plt import tensorflow as tf import time import math ''' 字典形式的数据: cifar100 data content: { "data" : [(R,G,B, R,G,B ,....),(R,G,B, R,G,B, ...),...] # 50000张图片,每张: 32 * 32 * 3 "coarse_labels":[0,...,19], # 0~19 super category "filenames":["volcano_s_000012.png",...], # 文件名 "batch_label":"", "fine_labels":[0,1...99] # 0~99 category } ''' class Cifar100DataReader(): def __init__(self,cifar_folder,onehot=True): self.cifar_folder=cifar_folder self.onehot=onehot self.data_label_train=None # 训练集 self.data_label_test=None # 测试集 self.batch_index=0 # 训练数据的batch块索引 self.test_batch_index=0 # 测试数据的batch_size f=os.path.join(self.cifar_folder,"train") # 训练集有50000张图片,100个类,每个类500张 print ('read: %s'%f ) fo = open(f, 'rb') self.dic_train = pickle.load(fo,encoding='bytes') fo.close() self.data_label_train=list(zip(self.dic_train[b'data'],self.dic_train[b'fine_labels']) ) #label 0~99 np.random.shuffle(self.data_label_train) def dataInfo(self): print (self.data_label_train[0:2] )# 每个元素为二元组,第一个是numpy数组大小为32*32*3,第二是label print (self.dic_train.keys()) print (b"coarse_labels:",len(self.dic_train[b"coarse_labels"])) print (b"filenames:",len(self.dic_train[b"filenames"])) print (b"batch_label:",len(self.dic_train[b"batch_label"])) print (b"fine_labels:",len(self.dic_train[b"fine_labels"])) print (b"data_shape:",np.shape((self.dic_train[b"data"]))) print (b"data0:",type(self.dic_train[b"data"][0])) # 得到下一个batch训练集,块大小为100 def next_train_data(self,batch_size=100): """ return list of numpy arrays [na,...,na] with specific batch_size na: N dimensional numpy array """ if self.batch_index<len(self.data_label_train)/batch_size: print ("batch_index:",self.batch_index ) datum=self.data_label_train[self.batch_index*batch_size:(self.batch_index+1)*batch_size] self.batch_index+=1 return self._decode(datum,self.onehot) else: self.batch_index=0 np.random.shuffle(self.data_label_train) datum=self.data_label_train[self.batch_index*batch_size:(self.batch_index+1)*batch_size] self.batch_index+=1 return self._decode(datum,self.onehot) # 把一个batch的训练数据转换为可以放入神经网络训练的数据 def _decode(self,datum,onehot): rdata=list() # batch训练数据 rlabel=list() if onehot: for d,l in datum: rdata.append(np.reshape(np.reshape(d,[3,1024]).T,[32,32,3])) # 转变形状为:32*32*3 hot=np.zeros(100) hot[int(l)]=1 # label设为100维的one-hot向量 rlabel.append(hot) else: for d,l in datum: rdata.append(np.reshape(np.reshape(d,[3,1024]).T,[32,32,3])) rlabel.append(int(l)) return rdata,rlabel # 得到下一个测试数据 ,供神经网络计算模型误差用 def next_test_data(self,batch_size=100): ''''' return list of numpy arrays [na,...,na] with specific batch_size na: N dimensional numpy array ''' if self.data_label_test is None: f=os.path.join(self.cifar_folder,"test") print ('read: %s'%f ) fo = open(f, 'rb') dic_test = pickle.load(fo,encoding='bytes') fo.close() data=dic_test[b'data'] labels=dic_test[b'fine_labels'] # 0 ~ 99 self.data_label_test=list(zip(data,labels) ) self.batch_index=0 if self.test_batch_index<len(self.data_label_test)/batch_size: print ("test_batch_index:",self.test_batch_index ) datum=self.data_label_test[self.test_batch_index*batch_size:(self.test_batch_index+1)*batch_size] self.test_batch_index+=1 return self._decode(datum,self.onehot) else: self.test_batch_index=0 np.random.shuffle(self.data_label_test) datum=self.data_label_test[self.test_batch_index*batch_size:(self.test_batch_index+1)*batch_size] self.test_batch_index+=1 return self._decode(datum,self.onehot) # 显示 9张图像 def showImage(self): rdata,rlabel = self.next_train_data() fig = plt.figure() ax = fig.add_subplot(331) ax.imshow(rdata[0]) ax = fig.add_subplot(332) ax.imshow(rdata[1]) ax = fig.add_subplot(333) ax.imshow(rdata[2]) ax = fig.add_subplot(334) ax.imshow(rdata[3]) ax = fig.add_subplot(335) ax.imshow(rdata[4]) ax = fig.add_subplot(336) ax.imshow(rdata[5]) ax = fig.add_subplot(337) ax.imshow(rdata[6]) ax = fig.add_subplot(338) ax.imshow(rdata[7]) ax = fig.add_subplot(339) ax.imshow(rdata[8]) plt.show() # 定义卷积神经网络模型 def CNN(): sess=tf.InteractiveSession() max_steps=3000 # 最大迭代次数 batch_size=50 # 每次迭代的样本数量 # 设置CNN的输入值 image_holder=tf.placeholder(tf.float32,[batch_size,32,32,3]) # 图像大小:32 * 32 * 3 label_holder=tf.placeholder(tf.float32,[batch_size,100]) # 创建第一个卷积层 weight1=variable_with_weight_loss(shape=[5,5,3,64],stddev=5e-2,w1=0.0) # 64个5*5*3的卷积核初始化 kernel1=tf.nn.conv2d(image_holder,weight1,[1,1,1,1],padding='SAME') # 卷积操作,步长为1 bias1=tf.Variable(tf.constant(0.0,shape=[64])) # 偏置初始化 conv1=tf.nn.relu(tf.nn.bias_add(kernel1,bias1)) # 加上偏置,代入激活函数 pool1=tf.nn.max_pool(conv1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME') # 池化操作,步长为2 norm1=tf.nn.lrn(pool1,4,bias=1.0,alpha=0.001/9.0,beta=0.75) # LRN层 # 创建第二个卷积层 weight2=variable_with_weight_loss(shape=[5,5,64,64],stddev=5e-2,w1=0.0) # 64个5*5*64的卷积核初始化 kernel2=tf.nn.conv2d(norm1,weight2,[1,1,1,1],padding='SAME') # 卷积操作,步长为1 bias2=tf.Variable(tf.constant(0.1,shape=[64])) # 初始化偏置 conv2=tf.nn.relu(tf.nn.bias_add(kernel2,bias2)) # 加上偏置,代入激活函数 norm2=tf.nn.lrn(conv2,4,bias=1.0,alpha=0.001/9.0,beta=0.75) # LRN层 pool2=tf.nn.max_pool(norm2,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME') # 池化操作步长为2 # 全连接层1 reshape=tf.reshape(pool2,[batch_size,-1]) # 对上一层结果展开为一维 dim=reshape.get_shape()[1].value # 获取一维向量的大小 weight3=variable_with_weight_loss(shape=[dim,384],stddev=0.04,w1=0.004) # 下一层有384个单元 bias3=tf.Variable(tf.constant(0.1,shape=[384])) local3=tf.nn.relu(tf.matmul(reshape,weight3)+bias3) #全连接层2 weight4=variable_with_weight_loss(shape=[384,192],stddev=0.04,w1=0.004) # 下一层有192个单元 bias4=tf.Variable(tf.constant(0.1,shape=[192])) local4=tf.nn.relu(tf.matmul(local3,weight4)+bias4) #最后一层softmax层 weight5=variable_with_weight_loss(shape=[192,100],stddev=1/192.0,w1=0.0) # 输入100维的向量 bias5=tf.Variable(tf.constant(0.0,shape=[100])) logits=tf.nn.softmax(tf.matmul(local4,weight5)+bias5) # 定义损失函数和优化器 cross_entropy=tf.reduce_mean(-tf.reduce_sum(label_holder*tf.log(logits),reduction_indices=[1])) tf.add_to_collection('losses',cross_entropy) # 将交叉熵加入损失函数集合losses loss=tf.add_n(tf.get_collection('losses')) # 将losses全部结果相加 train_op=tf.train.AdamOptimizer(1e-3).minimize(loss) # 定义优化器 # top_k_op=tf.nn.in_top_k(logits,label_holder,1) # 返回一个向量,向量长度为样本点个数 ''' 函数原型:in_top_k(predictions, targets, k, name=None) predictions:预测的结果,预测矩阵大小为样本数×标注的label类的个数的二维矩阵。 targets:实际的标签,大小为样本数。 k:每个样本的预测结果的前k个最大的数里面是否包含targets预测中的标签,一般都是取1, 即取预测最大概率的索引与标签对比。 top_1_op(k=1)为True的地方top_2_op(k=2)一定为True,top_1_op取样本的最大预测概率的索引与实际标签对比, top_2_op取样本的最大和仅次最大的两个预测概率与实际标签对比,如果实际标签在其中则为True,否则为False。 其他k的取值可以类推。 ''' # 开始训练模型 sess=tf.InteractiveSession() tf.global_variables_initializer().run() print ("Training begin......") cifar100=Cifar100DataReader(cifar_folder="E:/testdata/cifar-100") for step in range(max_steps): start=time.time() image_batch,label_batch=cifar100.next_train_data(batch_size=batch_size) train_op.run(feed_dict={image_holder:image_batch,label_holder:label_batch}) print ("training end.") print ("caculate precision......") # 计算测试集上的误差率 num_example=10000 # 测试集有1000张图片 num_iter=int(math.ceil(num_example/batch_size)) # 最大迭代次数 true_count=0 total_sample_count=num_iter*batch_size step=0 while step<num_iter: test_data,test_label=cifar100.next_test_data(batch_size=batch_size) correction_prediction=tf.equal(tf.argmax(logits,1),tf.argmax(label_holder,1)) correction=sess.run([correction_prediction],feed_dict={image_holder:test_data,label_holder:test_label}) true_count+=np.sum(correction) step+=1 precision=true_count/total_sample_count print ("precision:",precision) # 保存模型 saver = tf.train.Saver() save_path = saver.save(sess,"./Cifar100/model.ckpt") print("save model:{0} Finished".format(save_path)) # 定义损失函数(可以作为上面损失函数的替换) def loss(logits,labels): labels=tf.cast(labels,tf.int64) cross_entropy=tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='cross_entropy_per_example') cross_entropy_mean=tf.reduce_mean(cross_entropy,name='cross_entropy') tf.add_to_collection('losses',cross_entropy_mean) return tf.add_n(tf.get_colletion('losses'),name='tatol_loss') # 定义初始化weight的函数,计算weight的L2范数,并作为损失函数中的正则化项 def variable_with_weight_loss(shape,stddev,w1): var=tf.Variable(tf.truncated_normal(shape,stddev=stddev)) if w1 is not None: weight_loss=tf.multiply(tf.nn.l2_loss(var),w1,name='weight_loss') # w1表示正则化项的权重 tf.add_to_collection('losses',weight_loss) # 把正则化项统一存到collection中,最后加入目标函数 return var if __name__=='__main__': CNN()