1. activation function : sigmoid
loss function : mean squared error
optimizer : SGD
import numpy as np
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers import Conv2D, MaxPool2D, Flatten
from keras.optimizers import SGD, Adam
from keras.utils import np_utils
from keras.datasets import mnist
def load_data():
(x_train, y_train), (x_test, y_test) = mnist.load_data()
number = 10000
x_train = x_train[0:number]
y_train = y_train[0:number]
x_train = x_train.reshape(number, 28*28)
x_test = x_test.reshape(x_test.shape[0], 28*28)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)
x_train = x_train
x_test = x_test
x_train = x_train / 255
x_test = x_test / 255
return (x_train, y_train), (x_test, y_test)
(x_train, y_train), (x_test, y_test) = load_data()
model = Sequential()
# add layer
model.add(Dense(input_dim=28*28, units=733, activation='sigmoid'))
model.add(Dense(units=733, activation='sigmoid'))
model.add(Dense(units=733, activation='sigmoid'))
model.add(Dense(units=10, activation='softmax'))
# select loss function; optimizer :决定 learning rate 的方法
model.compile(loss='mse', optimizer=SGD(lr=0.1), metrics=['accuracy'])
# start training network
model.fit(x_train, y_train, batch_size=100, epochs=20)
# 算出正确率
train_result = model.evaluate(x_train, y_train)
print('\n train Acc: ', train_result[1])
result = model.evaluate(x_test, y_test)
print('\n Test Acc: ', result[1])
结果:
train Acc: 0.1127
Test Acc: 0.1135
问题出在loss function上, mse确实不适合分类问题, 那将loss function换一下再试一下
2. activation function : sigmoid
loss function : cross entropy
optimizer : SGD
结果:
train Acc: 0.8352
Test Acc: 0.8275
3. 多层 layer ----> Deep
import numpy as np
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers import Conv2D, MaxPool2D, Flatten
from keras.optimizers import SGD, Adam
from keras.utils import np_utils
from keras.datasets import mnist
def load_data():
(x_train, y_train), (x_test, y_test) = mnist.load_data()
number = 10000
x_train = x_train[0:number]
y_train = y_train[0:number]
x_train = x_train.reshape(number, 28*28)
x_test = x_test.reshape(x_test.shape[0], 28*28)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)
x_train = x_train
x_test = x_test
x_train = x_train / 255
x_test = x_test / 255
return (x_train, y_train), (x_test, y_test)
(x_train, y_train), (x_test, y_test) = load_data()
model = Sequential()
# add layer
model.add(Dense(input_dim=28*28, units=733, activation='sigmoid'))
for i in range(10):
model.add(Dense(units=733, activation='sigmoid'))
model.add(Dense(units=10, activation='softmax'))
# select loss function; optimizer :决定 learning rate 的方法
model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.1), metrics=['accuracy'])
# start training network
model.fit(x_train, y_train, batch_size=100, epochs=20)
# 算出正确率
train_result = model.evaluate(x_train, y_train)
print('\n train Acc: ', train_result[1])
result = model.evaluate(x_test, y_test)
print('\n Test Acc: ', result[1])
结果如何呢?
train Acc: 0.1032
Test Acc: 0.101
这是什么原因呢? 是因为 sigmoid 导致了 gradient vanishing , 我们将activition function 换成 RuLU
4. activation function : ReLU
loss function : cross entropy
optimizer : SGD
结果:
train Acc: 0.9998
Test Acc: 0.9556
5.结果已经比较好了, 现在我们回到 3 中, 看一下test acc在每个epoch中的大小:
(这里先取消掉deep,cpu运行太慢了)
Epoch1 : 0.1295
Epoch2 : 0.2712
Epoch3 : 0.5532
Epoch4 : 0.7416
Epoch5 : 0.8222
......
Epoch20 : 0.9204
6.我们尝试把SGD改为adam
Epoch1 : 0.7334
Epoch2 : 0.9244
Epoch3 : 0.9514
......
Epoch20 : 0.9644
可见,收敛的地方其实差不多,但是adam的上升速度快很多
7.如果我们故意加一些noise, 那结果会如何呢?
# load_data()
# add noise
x_test = np.random.normal(x_test)
return (x_train, y_train), (x_test, y_test)
结果:
train Acc: 1.0
Test Acc: 0.4299
其实,已经overfitting了
8.那我们试一下dropout
import numpy as np
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers import Conv2D, MaxPool2D, Flatten
from keras.optimizers import SGD, Adam
from keras.utils import np_utils
from keras.datasets import mnist
def load_data():
(x_train, y_train), (x_test, y_test) = mnist.load_data()
number = 10000
x_train = x_train[0:number]
y_train = y_train[0:number]
x_train = x_train.reshape(number, 28*28)
x_test = x_test.reshape(x_test.shape[0], 28*28)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)
x_train = x_train
x_test = x_test
x_train = x_train / 255
x_test = x_test / 255
# add noise
x_test = np.random.normal(x_test)
return (x_train, y_train), (x_test, y_test)
(x_train, y_train), (x_test, y_test) = load_data()
model = Sequential()
# add layer
model.add(Dense(input_dim=28*28, units=733, activation='relu'))
model.add(Dropout(0.7))
model.add(Dense(units=733, activation='sigmoid'))
model.add(Dropout(0.7))
model.add(Dense(units=733, activation='sigmoid'))
model.add(Dropout(0.7))
model.add(Dense(units=10, activation='softmax'))
# select loss function; optimizer :决定 learning rate 的方法
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# start training network
model.fit(x_train, y_train, batch_size=100, epochs=20)
# 算出正确率
train_result = model.evaluate(x_train, y_train)
print('\n train Acc: ', train_result[1])
result = model.evaluate(x_test, y_test)
print('\n Test Acc: ', result[1])
结果:
train Acc: 0.9923
Test Acc: 0.5604
可见,training 的 performance 有所下降, 而testing 的 performance 是有所上升的