[TF进阶] 多层神经网络

多层神经网络——解决非线性问题

实例28：用线性逻辑回归分析肿瘤的良性or恶性

1. 生成样本类

# -*- coding: utf-8 -*-
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from sklearn.utils import shuffle


#模拟数据点   
def generate(sample_size, mean, cov, diff,regression):   
    num_classes = 2 #len(diff)
    samples_per_class = int(sample_size/2)

    X0 = np.random.multivariate_normal(mean, cov, samples_per_class)
    Y0 = np.zeros(samples_per_class)
    
    for ci, d in enumerate(diff):
        X1 = np.random.multivariate_normal(mean+d, cov, samples_per_class)
        Y1 = (ci+1)*np.ones(samples_per_class)
    
        X0 = np.concatenate((X0,X1))
        Y0 = np.concatenate((Y0,Y1))
        
    if regression==False: #one-hot  0 into the vector "1 0
        print("ssss")
        class_ind = [Y0==class_number for class_number in range(num_classes)]
        Y = np.asarray(np.hstack(class_ind), dtype=np.float32)
    X, Y = shuffle(X0, Y0)
    
    return X,Y

input_dim = 2                    
np.random.seed(10)
num_classes =2
mean = np.random.randn(num_classes)
cov = np.eye(num_classes) 
X, Y = generate(1000, mean, cov, [3.0],True)
colors = ['r' if l == 0 else 'b' for l in Y[:]]
plt.scatter(X[:,0], X[:,1], c=colors)
plt.xlabel("Scaled age (in yrs)")
plt.ylabel("Tumor size (in cm)")
plt.show()
lab_dim = 1

2. 构建网络结构

# tf Graph Input
input_features = tf.placeholder(tf.float32, [None, input_dim])
input_lables = tf.placeholder(tf.float32, [None, lab_dim])
# Set model weights
W = tf.Variable(tf.random_normal([input_dim,lab_dim]), name="weight")
b = tf.Variable(tf.zeros([lab_dim]), name="bias")

output =tf.nn.sigmoid( tf.matmul(input_features, W) + b)
cross_entropy = -(input_lables * tf.log(output) + (1 - input_lables) * tf.log(1 - output))
ser= tf.square(input_lables - output)
loss = tf.reduce_mean(cross_entropy)
err = tf.reduce_mean(ser)
optimizer = tf.train.AdamOptimizer(0.04) #尽量用这个--收敛快，会动态调节梯度
train = optimizer.minimize(loss)  # let the optimizer train

3. 设置参数进行训练

maxEpochs = 50
minibatchSize = 25

# Launch the graph
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for epoch in range(maxEpochs):
        sumerr=0
        for i in range(np.int32(len(Y)/minibatchSize)):
            x1 = X[i*minibatchSize:(i+1)*minibatchSize,:]
            y1 = np.reshape(Y[i*minibatchSize:(i+1)*minibatchSize],[-1,1])
            tf.reshape(y1,[-1,1])
            _,lossval, outputval,errval = sess.run([train,loss,output,err], feed_dict={input_features: x1, input_lables:y1})
            sumerr =sumerr+errval

        print ("Epoch:", '%04d' % (epoch+1), "cost=","{:.9f}".format(lossval),"err=",sumerr/np.int32(len(Y)/minibatchSize))
        
    # Graphic display
    train_X, train_Y = generate(100, mean, cov, [3.0],True)
    colors = ['r' if l == 0 else 'b' for l in train_Y[:]]
    plt.scatter(train_X[:,0], train_X[:,1], c=colors)
    #plt.scatter(train_X[:, 0], train_X[:, 1], c=train_Y)
    #plt.colorbar()


#    x1w1+x2*w2+b=0
#    x2=-x1* w1/w2-b/w2
    x = np.linspace(-1,8,200) 
    y=-x*(sess.run(W)[0]/sess.run(W)[1])-sess.run(b)/sess.run(W)[1]
    plt.plot(x,y, label='Fitted line')
    plt.legend()
    plt.show()

Epoch: 0001 cost= 0.427363694 err= 0.3149222269654274
Epoch: 0002 cost= 0.275242448 err= 0.12916538752615453
Epoch: 0003 cost= 0.198488832 err= 0.07311406331136823
Epoch: 0004 cost= 0.155583516 err= 0.049487294629216194
Epoch: 0005 cost= 0.129410699 err= 0.03783809146843851
Epoch: 0006 cost= 0.111637138 err= 0.031109510990791022
Epoch: 0007 cost= 0.098720580 err= 0.026801466860342772
Epoch: 0008 cost= 0.088899851 err= 0.023845877964049578
Epoch: 0009 cost= 0.081172511 err= 0.02171302803326398
Epoch: 0010 cost= 0.074926406 err= 0.02011334609705955
Epoch: 0011 cost= 0.069766864 err= 0.018876566260587424
Epoch: 0012 cost= 0.065427810 err= 0.01789664597599767
Epoch: 0013 cost= 0.061723653 err= 0.017104446739540435
Epoch: 0014 cost= 0.058520988 err= 0.016453136381460353
Epoch: 0015 cost= 0.055721492 err= 0.015909987088525666
Epoch: 0016 cost= 0.053251043 err= 0.015451466900412925
Epoch: 0017 cost= 0.051052839 err= 0.015060284163337202
Epoch: 0018 cost= 0.049082574 err= 0.014723470641183668
Epoch: 0019 cost= 0.047305182 err= 0.01443111448752461
Epoch: 0020 cost= 0.045692459 err= 0.014175528672058135
Epoch: 0021 cost= 0.044221617 err= 0.013950665328593459
Epoch: 0022 cost= 0.042873926 err= 0.013751706171024124
Epoch: 0023 cost= 0.041633889 err= 0.013574765577504876
Epoch: 0024 cost= 0.040488526 err= 0.013416683129617014
Epoch: 0025 cost= 0.039426997 err= 0.01327486191294156
Epoch: 0026 cost= 0.038439859 err= 0.013147142445814098
Epoch: 0027 cost= 0.037519407 err= 0.01303173367632553
Epoch: 0028 cost= 0.036658742 err= 0.012927120527456282
Epoch: 0029 cost= 0.035852008 err= 0.012832018547487677
Epoch: 0030 cost= 0.035094090 err= 0.012745325943251373
Epoch: 0031 cost= 0.034380447 err= 0.012666120514040813
Epoch: 0032 cost= 0.033707298 err= 0.012593578745509149
Epoch: 0033 cost= 0.033071052 err= 0.012527015041268897
Epoch: 0034 cost= 0.032468583 err= 0.012465806912223343
Epoch: 0035 cost= 0.031897344 err= 0.012409434546134435
Epoch: 0036 cost= 0.031354845 err= 0.012357419628096977
Epoch: 0037 cost= 0.030838819 err= 0.012309352662123274
Epoch: 0038 cost= 0.030347236 err= 0.012264869653517963
Epoch: 0039 cost= 0.029878592 err= 0.012223649651059532
Epoch: 0040 cost= 0.029431008 err= 0.012185401207898395
Epoch: 0041 cost= 0.029003244 err= 0.012149863801823813
Epoch: 0042 cost= 0.028593836 err= 0.012116818057256751
Epoch: 0043 cost= 0.028201800 err= 0.012086044963871246
Epoch: 0044 cost= 0.027825732 err= 0.012057367098532269
Epoch: 0045 cost= 0.027464826 err= 0.012030600460639107
Epoch: 0046 cost= 0.027118132 err= 0.012005612059874693
Epoch: 0047 cost= 0.026784733 err= 0.011982245713625161
Epoch: 0048 cost= 0.026463941 err= 0.01196038762536773
Epoch: 0049 cost= 0.026155062 err= 0.01193991965665191
Epoch: 0050 cost= 0.025857275 err= 0.01192073398942739

实例29：线性多分类

# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from sklearn.utils import shuffle
from matplotlib.colors import colorConverter, ListedColormap 
    
# 对于上面的fit可以这么扩展变成动态的
from sklearn.preprocessing import OneHotEncoder
def onehot(y,start,end):
    ohe = OneHotEncoder()
    a = np.linspace(start,end-1,end-start)
    b =np.reshape(a,[-1,1]).astype(np.int32)
    ohe.fit(b)
    c=ohe.transform(y).toarray()  
    return c     
#
    
def generate(sample_size, num_classes, diff,regression=False):
    np.random.seed(10)
    mean = np.random.randn(2)
    cov = np.eye(2)  
    
    #len(diff)
    samples_per_class = int(sample_size/num_classes)

    X0 = np.random.multivariate_normal(mean, cov, samples_per_class)
    Y0 = np.zeros(samples_per_class)
    
    for ci, d in enumerate(diff):
        X1 = np.random.multivariate_normal(mean+d, cov, samples_per_class)
        Y1 = (ci+1)*np.ones(samples_per_class)
    
        X0 = np.concatenate((X0,X1))
        Y0 = np.concatenate((Y0,Y1))
        #print(X0, Y0)
    
  
    if regression==False: #one-hot  0 into the vector "1 0
        Y0 = np.reshape(Y0,[-1,1])        
        #print(Y0.astype(np.int32))
        Y0 = onehot(Y0.astype(np.int32),0,num_classes)
        #print(Y0)
    X, Y = shuffle(X0, Y0)
    #print(X, Y)
    return X,Y    

 
# Ensure we always get the same amount of randomness
np.random.seed(10)

input_dim = 2
num_classes =3 
X, Y = generate(2000,num_classes,  [[3.0],[3.0,0]],False)
aa = [np.argmax(l) for l in Y]
colors =['r' if l == 0 else 'b' if l==1 else 'y' for l in aa[:]]

plt.scatter(X[:,0], X[:,1], c=colors)
plt.xlabel("Scaled age (in yrs)")
plt.ylabel("Tumor size (in cm)")
plt.show()

lab_dim = num_classes
# tf Graph Input
input_features = tf.placeholder(tf.float32, [None, input_dim])
input_lables = tf.placeholder(tf.float32, [None, lab_dim])
# Set model weights
W = tf.Variable(tf.random_normal([input_dim,lab_dim]), name="weight")
b = tf.Variable(tf.zeros([lab_dim]), name="bias")
output = tf.matmul(input_features, W) + b

z = tf.nn.softmax( output )

a1 = tf.argmax(tf.nn.softmax( output ), axis=1)#按行找出最大索引，生成数组
b1 = tf.argmax(input_lables, axis=1)
err = tf.count_nonzero(a1-b1) #两个数组相减，不为0的就是错误个数

cross_entropy = tf.nn.softmax_cross_entropy_with_logits( labels=input_lables,logits=output)
loss = tf.reduce_mean(cross_entropy)#对交叉熵取均值很有必要



optimizer = tf.train.AdamOptimizer(0.04) #尽量用这个--收敛快，会动态调节梯度
train = optimizer.minimize(loss)  # let the optimizer train

maxEpochs = 50
minibatchSize = 25

# 启动session
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for epoch in range(maxEpochs):
        sumerr=0
        for i in range(np.int32(len(Y)/minibatchSize)):
            x1 = X[i*minibatchSize:(i+1)*minibatchSize,:]
            y1 = Y[i*minibatchSize:(i+1)*minibatchSize,:]

            _,lossval, outputval,errval = sess.run([train,loss,output,err], feed_dict={input_features: x1, input_lables:y1})
            sumerr =sumerr+(errval/minibatchSize)

        print ("Epoch:", '%04d' % (epoch+1), "cost=","{:.9f}".format(lossval),"err=",sumerr/minibatchSize)
 
    train_X, train_Y = generate(200,num_classes,  [[3.0],[3.0,0]],False)
    aa = [np.argmax(l) for l in train_Y]        
    colors =['r' if l == 0 else 'b' if l==1 else 'y' for l in aa[:]]
    plt.scatter(train_X[:,0], train_X[:,1], c=colors)
    
    x = np.linspace(-1,8,200) 

    y=-x*(sess.run(W)[0][0]/sess.run(W)[1][0])-sess.run(b)[0]/sess.run(W)[1][0]
    plt.plot(x,y, label='first line',lw=3)

    y=-x*(sess.run(W)[0][1]/sess.run(W)[1][1])-sess.run(b)[1]/sess.run(W)[1][1]
    plt.plot(x,y, label='second line',lw=2)

    y=-x*(sess.run(W)[0][2]/sess.run(W)[1][2])-sess.run(b)[2]/sess.run(W)[1][2]
    plt.plot(x,y, label='third line',lw=1)
    
    plt.legend()
    plt.show() 
    print(sess.run(W),sess.run(b))
    

    train_X, train_Y = generate(200,num_classes,  [[3.0],[3.0,0]],False)
    aa = [np.argmax(l) for l in train_Y]        
    colors =['r' if l == 0 else 'b' if l==1 else 'y' for l in aa[:]]
    plt.scatter(train_X[:,0], train_X[:,1], c=colors)    
    
    nb_of_xs = 200
    xs1 = np.linspace(-1, 8, num=nb_of_xs)
    xs2 = np.linspace(-1, 8, num=nb_of_xs)
    xx, yy = np.meshgrid(xs1, xs2) # create the grid
    # Initialize and fill the classification plane
    classification_plane = np.zeros((nb_of_xs, nb_of_xs))
    for i in range(nb_of_xs):
        for j in range(nb_of_xs):
            #classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
            classification_plane[i,j] = sess.run(a1, feed_dict={input_features: [[ xx[i,j], yy[i,j] ]]} )
    
    
    # Create a color map to show the classification colors of each grid point
    cmap = ListedColormap([
            colorConverter.to_rgba('r', alpha=0.30),
            colorConverter.to_rgba('b', alpha=0.30),
            colorConverter.to_rgba('y', alpha=0.30)])
    # Plot the classification plane with decision boundary and input samples
    plt.contourf(xx, yy, classification_plane, cmap=cmap)
    plt.show()

Epoch: 0001 cost= 0.464984536 err= 1.0191999999999997
Epoch: 0002 cost= 0.355924010 err= 0.3856
Epoch: 0003 cost= 0.335220724 err= 0.3408000000000002
Epoch: 0004 cost= 0.332817644 err= 0.32320000000000015
Epoch: 0005 cost= 0.336272120 err= 0.3168000000000002
Epoch: 0006 cost= 0.341775596 err= 0.3088000000000002
Epoch: 0007 cost= 0.347882152 err= 0.3024000000000002
Epoch: 0008 cost= 0.353986651 err= 0.2992000000000002
Epoch: 0009 cost= 0.359829128 err= 0.29280000000000017
Epoch: 0010 cost= 0.365304261 err= 0.2896000000000002
Epoch: 0011 cost= 0.370378375 err= 0.2896000000000002
Epoch: 0012 cost= 0.375052452 err= 0.2832000000000002
Epoch: 0013 cost= 0.379343629 err= 0.2864000000000002
Epoch: 0014 cost= 0.383275449 err= 0.28480000000000016
Epoch: 0015 cost= 0.386874378 err= 0.2832000000000002
Epoch: 0016 cost= 0.390166640 err= 0.2816000000000002
Epoch: 0017 cost= 0.393177480 err= 0.2816000000000002
Epoch: 0018 cost= 0.395930529 err= 0.2816000000000002
Epoch: 0019 cost= 0.398447692 err= 0.2816000000000002
Epoch: 0020 cost= 0.400749117 err= 0.2800000000000002
Epoch: 0021 cost= 0.402853549 err= 0.27680000000000016
Epoch: 0022 cost= 0.404777676 err= 0.27680000000000016
Epoch: 0023 cost= 0.406537145 err= 0.27680000000000016
Epoch: 0024 cost= 0.408146054 err= 0.27680000000000016
Epoch: 0025 cost= 0.409617245 err= 0.27680000000000016
Epoch: 0026 cost= 0.410962522 err= 0.27680000000000016
Epoch: 0027 cost= 0.412192762 err= 0.27680000000000016
Epoch: 0028 cost= 0.413317710 err= 0.27680000000000016
Epoch: 0029 cost= 0.414346457 err= 0.27680000000000016
Epoch: 0030 cost= 0.415287137 err= 0.27680000000000016
Epoch: 0031 cost= 0.416147500 err= 0.27680000000000016
Epoch: 0032 cost= 0.416933984 err= 0.27680000000000016
Epoch: 0033 cost= 0.417653352 err= 0.27520000000000017
Epoch: 0034 cost= 0.418310970 err= 0.27520000000000017
Epoch: 0035 cost= 0.418912292 err= 0.27520000000000017
Epoch: 0036 cost= 0.419462293 err= 0.27520000000000017
Epoch: 0037 cost= 0.419964910 err= 0.27520000000000017
Epoch: 0038 cost= 0.420424640 err= 0.27680000000000016
Epoch: 0039 cost= 0.420844853 err= 0.27680000000000016
Epoch: 0040 cost= 0.421229184 err= 0.27680000000000016
Epoch: 0041 cost= 0.421580553 err= 0.2784000000000002
Epoch: 0042 cost= 0.421901524 err= 0.2784000000000002
Epoch: 0043 cost= 0.422195137 err= 0.2784000000000002
Epoch: 0044 cost= 0.422463447 err= 0.2784000000000002
Epoch: 0045 cost= 0.422708929 err= 0.2784000000000002
Epoch: 0046 cost= 0.422933191 err= 0.2784000000000002
Epoch: 0047 cost= 0.423138201 err= 0.2784000000000002
Epoch: 0048 cost= 0.423325777 err= 0.2784000000000002
Epoch: 0049 cost= 0.423497021 err= 0.2784000000000002
Epoch: 0050 cost= 0.423653603 err= 0.2784000000000002

[[-1.2782756   1.7228742   1.8240309 ]
 [-0.46404395  2.5893972  -0.4549423 ]] [ 6.951592  -8.270903  -1.3872018]

实例30：使用带隐藏层解决非线性问题

# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np

# 网络结构：2维输入 --> 2维隐藏层 --> 1维输出

learning_rate = 1e-4
n_input  = 2
n_label  = 1
n_hidden = 2


x = tf.placeholder(tf.float32, [None,n_input])
y = tf.placeholder(tf.float32, [None, n_label])

weights = {
    'h1': tf.Variable(tf.truncated_normal([n_input, n_hidden], stddev=0.1)),
    'h2': tf.Variable(tf.random_normal([n_hidden, n_label], stddev=0.1))
	} 
biases = {
    'h1': tf.Variable(tf.zeros([n_hidden])),
    'h2': tf.Variable(tf.zeros([n_label]))
    }    


layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['h1']), biases['h1']))
#y_pred = tf.nn.tanh(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))
#y_pred = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))#局部最优解

#y_pred = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))

#Leaky relus  40000次 ok
layer2 =tf.add(tf.matmul(layer_1, weights['h2']),biases['h2'])
y_pred = tf.maximum(layer2,0.01*layer2)
  
loss=tf.reduce_mean((y_pred-y)**2)
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

#生成数据
X=[[0,0],[0,1],[1,0],[1,1]]
Y=[[0],[1],[1],[0]]
X=np.array(X).astype('float32')
Y=np.array(Y).astype('int16')

#加载
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

#训练
for i in range(10000):
    sess.run(train_step,feed_dict={x:X,y:Y} )

     
#计算预测值
print(sess.run(y_pred,feed_dict={x:X}))
#输出：已训练100000次

       
#查看隐藏层的输出
print(sess.run(layer_1,feed_dict={x:X}))

[[0.49758607]
 [0.50016683]
 [0.49955258]
 [0.5021333 ]]
[[0.5226183  0.        ]
 [0.53049105 0.        ]
 [0.52861726 0.        ]
 [0.53649    0.        ]]

练习：异或one hot

# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np

# 网络结构：2维输入 --> 2维隐藏层 --> 1维输出

learning_rate = 1e-4
n_input  = 2
n_label  = 2#1
n_hidden = 100


x = tf.placeholder(tf.float32, [None,n_input])
y = tf.placeholder(tf.float32, [None, n_label])

weights = {
    'h1': tf.Variable(tf.truncated_normal([n_input, n_hidden], stddev=0.1)),
    'h2': tf.Variable(tf.truncated_normal([n_hidden, n_label], stddev=0.1))
	} 
biases = {
    'h1': tf.Variable(tf.zeros([n_hidden])),
    'h2': tf.Variable(tf.zeros([n_label]))
    }    


layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['h1']), biases['h1']))

#1 softmax 方法
#y_pred = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2'])) 
#cross_entropy = tf.nn.softmax_cross_entropy_with_logits( labels=y,logits=y_pred)
#loss = tf.reduce_mean(cross_entropy)

#2 sigmoid方法+平方差
y_pred = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))
loss=tf.reduce_mean((y_pred-y)**2)

#3 relu方法+平方差
#layer2 =tf.add(tf.matmul(layer_1, weights['h2']),biases['h2'])
#y_pred = tf.maximum(layer2,0.01*layer2) 
#loss=tf.reduce_mean((y_pred-y)**2)
 

train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

#生成数据
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[1, 0], [0, 1], [0, 1], [1, 0]])
X=np.array(X).astype('float32')
Y=np.array(Y).astype('int16')

#加载
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

#训练
for i in range(10000):
    sess.run(train_step,feed_dict={x:X,y:Y} )

     
#计算预测值
print(sess.run(y_pred,feed_dict={x:X}))
#输出：已训练100000次

[[0.95220375 0.03481147]
 [0.01518062 0.98357517]
 [0.0157361  0.9843861 ]
 [0.98463315 0.0158883 ]]

实例31：mnist多层分类

# -*- coding: utf-8 -*-
import tensorflow as tf
# 导入 MINST 数据集
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/data/", one_hot=True)

#参数设置
learning_rate = 0.001
training_epochs = 25
batch_size = 100
display_step = 1

# Network Parameters
n_hidden_1 = 256 # 1st layer number of features
n_hidden_2 = 256 # 2nd layer number of features
n_input = 784 # MNIST data 输入 (img shape: 28*28)
n_classes = 10  # MNIST 列别 (0-9 ，一共10类)

# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])

# Create model
def multilayer_perceptron(x, weights, biases):
    # Hidden layer with RELU activation
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    # Hidden layer with RELU activation
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
    # Output layer with linear activation
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
    return out_layer
    
# Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

# 构建模型
pred = multilayer_perceptron(x, weights, biases)

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# 初始化变量
init = tf.global_variables_initializer()


# 启动session
with tf.Session() as sess:
    sess.run(init)

    # 启动循环开始训练
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        # 遍历全部数据集
        for i in range(total_batch):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
                                                          y: batch_y})
            # Compute average loss
            avg_cost += c / total_batch
        # 显示训练中的详细信息
        if epoch % display_step == 0:
            print ("Epoch:", '%04d' % (epoch+1), "cost=", \
                "{:.9f}".format(avg_cost))
    print (" Finished!")

    # 测试 model
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    # 计算准确率
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print ("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))

Epoch: 0001 cost= 168.430218674
Epoch: 0002 cost= 42.537414488
Epoch: 0003 cost= 26.428106686
Epoch: 0004 cost= 18.280389462
Epoch: 0005 cost= 13.288486609
Epoch: 0006 cost= 9.804760788
Epoch: 0007 cost= 7.270819309
Epoch: 0008 cost= 5.431077717
Epoch: 0009 cost= 4.041842815
Epoch: 0010 cost= 2.940616167
Epoch: 0011 cost= 2.287564471
Epoch: 0012 cost= 1.553566692
Epoch: 0013 cost= 1.299858189
Epoch: 0014 cost= 0.973526390
Epoch: 0015 cost= 0.778381801
Epoch: 0016 cost= 0.659176175
Epoch: 0017 cost= 0.654275050
Epoch: 0018 cost= 0.509398232
Epoch: 0019 cost= 0.451068804
Epoch: 0020 cost= 0.460308700
Epoch: 0021 cost= 0.436099133
Epoch: 0022 cost= 0.353821884
Epoch: 0023 cost= 0.376541839
Epoch: 0024 cost= 0.340227318
Epoch: 0025 cost= 0.289853257
 Finished!
Accuracy: 0.9525

实例32：过拟合

# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from sklearn.utils import shuffle
from matplotlib.colors import colorConverter, ListedColormap 
# 对于上面的fit可以这么扩展变成动态的
from sklearn.preprocessing import OneHotEncoder
def onehot(y,start,end):
    ohe = OneHotEncoder()
    a = np.linspace(start,end-1,end-start)
    b =np.reshape(a,[-1,1]).astype(np.int32)
    ohe.fit(b)
    c=ohe.transform(y).toarray()  
    return c  
    
def generate(sample_size, num_classes, diff,regression=False):
    np.random.seed(10)
    mean = np.random.randn(2)
    cov = np.eye(2)  
    
    #len(diff)
    samples_per_class = int(sample_size/num_classes)

    X0 = np.random.multivariate_normal(mean, cov, samples_per_class)
    Y0 = np.zeros(samples_per_class)
    
    for ci, d in enumerate(diff):
        X1 = np.random.multivariate_normal(mean+d, cov, samples_per_class)
        Y1 = (ci+1)*np.ones(samples_per_class)
    
        X0 = np.concatenate((X0,X1))
        Y0 = np.concatenate((Y0,Y1))

  
    if regression==False: #one-hot  0 into the vector "1 0
        Y0 = np.reshape(Y0,[-1,1])        
        #print(Y0.astype(np.int32))
        Y0 = onehot(Y0.astype(np.int32),0,num_classes)
        #print(Y0)
    X, Y = shuffle(X0, Y0)
    #print(X, Y)
    return X,Y   
    
# Ensure we always get the same amount of randomness
np.random.seed(10)

input_dim = 2
num_classes =4 
X, Y = generate(320,num_classes,  [[3.0,0],[3.0,3.0],[0,3.0]],True)
Y=Y%2
#colors = ['r' if l == 0.0 else 'b' for l in Y[:]]
#plt.scatter(X[:,0], X[:,1], c=colors)
xr=[]
xb=[]
for(l,k) in zip(Y[:],X[:]):
    if l == 0.0 :
        xr.append([k[0],k[1]])        
    else:
        xb.append([k[0],k[1]])
xr =np.array(xr)
xb =np.array(xb)      
plt.scatter(xr[:,0], xr[:,1], c='r',marker='+')
plt.scatter(xb[:,0], xb[:,1], c='b',marker='o')

plt.show() 
Y=np.reshape(Y,[-1,1])

learning_rate = 1e-4
n_input  = 2
n_label  = 1
#n_hidden = 2#欠拟合
n_hidden = 200


x = tf.placeholder(tf.float32, [None,n_input])
y = tf.placeholder(tf.float32, [None, n_label])

weights = {
    'h1': tf.Variable(tf.truncated_normal([n_input, n_hidden], stddev=0.1)),
    'h2': tf.Variable(tf.random_normal([n_hidden, n_label], stddev=0.1))
	} 
biases = {
    'h1': tf.Variable(tf.zeros([n_hidden])),
    'h2': tf.Variable(tf.zeros([n_label]))
    }    


layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['h1']), biases['h1']))
#y_pred = tf.nn.tanh(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))
#y_pred = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))#局部最优解

#y_pred = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))

#Leaky relus  40000次 ok
layer2 =tf.add(tf.matmul(layer_1, weights['h2']),biases['h2'])
y_pred = tf.maximum(layer2,0.01*layer2)
 
 
loss=tf.reduce_mean((y_pred-y)**2)
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

#加载
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
    
for i in range(20000):#  
  
  _, loss_val = sess.run([train_step, loss], feed_dict={x: X, y: Y})

  if i % 1000 == 0:
    print ("Step:", i, "Current loss:", loss_val)

#colors = ['r' if l == 0.0 else 'b' for l in Y[:]]
#plt.scatter(X[:,0], X[:,1], c=colors)
xr=[]
xb=[]
for(l,k) in zip(Y[:],X[:]):
    if l == 0.0 :
        xr.append([k[0],k[1]])        
    else:
        xb.append([k[0],k[1]])
xr =np.array(xr)
xb =np.array(xb)      
plt.scatter(xr[:,0], xr[:,1], c='r',marker='+')
plt.scatter(xb[:,0], xb[:,1], c='b',marker='o')

    
nb_of_xs = 200
xs1 = np.linspace(-3, 10, num=nb_of_xs)
xs2 = np.linspace(-3, 10, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2) # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
    for j in range(nb_of_xs):
        #classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
        classification_plane[i,j] = sess.run(y_pred, feed_dict={x: [[ xx[i,j], yy[i,j] ]]} )
        classification_plane[i,j] = int(classification_plane[i,j])

# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
        colorConverter.to_rgba('r', alpha=0.30),
        colorConverter.to_rgba('b', alpha=0.30)])
# Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.show() 


xTrain, yTrain = generate(12,num_classes,  [[3.0,0],[3.0,3.0],[0,3.0]],True)
yTrain=yTrain%2
#colors = ['r' if l == 0.0 else 'b' for l in yTrain[:]]
#plt.scatter(xTrain[:,0], xTrain[:,1], c=colors)

xr=[]
xb=[]
for(l,k) in zip(yTrain[:],xTrain[:]):
    if l == 0.0 :
        xr.append([k[0],k[1]])        
    else:
        xb.append([k[0],k[1]])
xr =np.array(xr)
xb =np.array(xb)      
plt.scatter(xr[:,0], xr[:,1], c='r',marker='+')
plt.scatter(xb[:,0], xb[:,1], c='b',marker='o')


#plt.show() 
yTrain=np.reshape(yTrain,[-1,1])           
print ("loss:\n", sess.run(loss, feed_dict={x: xTrain, y: yTrain}))          

nb_of_xs = 200
xs1 = np.linspace(-1, 8, num=nb_of_xs)
xs2 = np.linspace(-1, 8, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2) # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
    for j in range(nb_of_xs):
        #classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
        classification_plane[i,j] = sess.run(y_pred, feed_dict={x: [[ xx[i,j], yy[i,j] ]]} )
        classification_plane[i,j] = int(classification_plane[i,j])

# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
        colorConverter.to_rgba('r', alpha=0.30),
        colorConverter.to_rgba('b', alpha=0.30)])
# Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.show()

Step: 0 Current loss: 0.5018894
Step: 1000 Current loss: 0.10373752
Step: 2000 Current loss: 0.086291894
Step: 3000 Current loss: 0.077173755
Step: 4000 Current loss: 0.07151143
Step: 5000 Current loss: 0.06800972
Step: 6000 Current loss: 0.066351056
Step: 7000 Current loss: 0.0651166
Step: 8000 Current loss: 0.06417427
Step: 9000 Current loss: 0.06318717
Step: 10000 Current loss: 0.06220935
Step: 11000 Current loss: 0.0611071
Step: 12000 Current loss: 0.060015637
Step: 13000 Current loss: 0.05878272
Step: 14000 Current loss: 0.05735097
Step: 15000 Current loss: 0.056046672
Step: 16000 Current loss: 0.054749347
Step: 17000 Current loss: 0.053585578
Step: 18000 Current loss: 0.05233752
Step: 19000 Current loss: 0.051035363

loss:
 0.11412191

实例33：通过正则化改善过拟合

实例34：通过数据增强改善过拟合

# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from sklearn.utils import shuffle
from matplotlib.colors import colorConverter, ListedColormap 
# 对于上面的fit可以这么扩展变成动态的
from sklearn.preprocessing import OneHotEncoder
def onehot(y,start,end):
    ohe = OneHotEncoder()
    a = np.linspace(start,end-1,end-start)
    b =np.reshape(a,[-1,1]).astype(np.int32)
    ohe.fit(b)
    c=ohe.transform(y).toarray()  
    return c  
    
def generate(sample_size, num_classes, diff,regression=False):
    np.random.seed(10)
    mean = np.random.randn(2)
    cov = np.eye(2)  
    
    #len(diff)
    samples_per_class = int(sample_size/num_classes)

    X0 = np.random.multivariate_normal(mean, cov, samples_per_class)
    Y0 = np.zeros(samples_per_class)
    
    for ci, d in enumerate(diff):
        X1 = np.random.multivariate_normal(mean+d, cov, samples_per_class)
        Y1 = (ci+1)*np.ones(samples_per_class)
    
        X0 = np.concatenate((X0,X1))
        Y0 = np.concatenate((Y0,Y1))

  
    if regression==False: #one-hot  0 into the vector "1 0
        Y0 = np.reshape(Y0,[-1,1])        
        #print(Y0.astype(np.int32))
        Y0 = onehot(Y0.astype(np.int32),0,num_classes)
        #print(Y0)
    X, Y = shuffle(X0, Y0)
    #print(X, Y)
    return X,Y   
    
# Ensure we always get the same amount of randomness
np.random.seed(10)

input_dim = 2
num_classes =4 
X, Y = generate(120,num_classes,  [[3.0,0],[3.0,3.0],[0,3.0]],True)
Y=Y%2
colors = ['r' if l == 0.0 else 'b' for l in Y[:]]
plt.scatter(X[:,0], X[:,1], c=colors)
plt.xlabel("Scaled age (in yrs)")
plt.ylabel("Tumor size (in cm)")
plt.show() 
Y=np.reshape(Y,[-1,1])

learning_rate = 1e-4
n_input  = 2
n_label  = 1
n_hidden = 200


x = tf.placeholder(tf.float32, [None,n_input])
y = tf.placeholder(tf.float32, [None, n_label])

weights = {
    'h1': tf.Variable(tf.truncated_normal([n_input, n_hidden], stddev=0.1)),
    'h2': tf.Variable(tf.random_normal([n_hidden, n_label], stddev=0.1))
	} 
biases = {
    'h1': tf.Variable(tf.zeros([n_hidden])),
    'h2': tf.Variable(tf.zeros([n_label]))
    }    


layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['h1']), biases['h1']))

#Leaky relus  
layer2 =tf.add(tf.matmul(layer_1, weights['h2']),biases['h2'])
y_pred = tf.maximum(layer2,0.01*layer2)

reg = 0.01 
loss=tf.reduce_mean((y_pred-y)**2)+tf.nn.l2_loss(weights['h1'])*reg+tf.nn.l2_loss(weights['h2'])*reg
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

#加载
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
    
for i in range(20000):

    X, Y = generate(1000,num_classes,  [[3.0,0],[3.0,3.0],[0,3.0]],True)
    Y=Y%2
    Y=np.reshape(Y,[-1,1])
  
    _, loss_val = sess.run([train_step, loss], feed_dict={x: X, y: Y})
    
    if i % 1000 == 0:
        print ("Step:", i, "Current loss:", loss_val)

colors = ['r' if l == 0.0 else 'b' for l in Y[:]]
plt.scatter(X[:,0], X[:,1], c=colors)
plt.xlabel("Scaled age (in yrs)")
plt.ylabel("Tumor size (in cm)")

    
nb_of_xs = 200
xs1 = np.linspace(-1, 8, num=nb_of_xs)
xs2 = np.linspace(-1, 8, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2) # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
    for j in range(nb_of_xs):
        #classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
        classification_plane[i,j] = sess.run(y_pred, feed_dict={x: [[ xx[i,j], yy[i,j] ]]} )
        classification_plane[i,j] = int(classification_plane[i,j])

# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
        colorConverter.to_rgba('r', alpha=0.30),
        colorConverter.to_rgba('b', alpha=0.30)])
# Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.show() 


xTrain, yTrain = generate(12,num_classes,  [[3.0,0],[3.0,3.0],[0,3.0]],True)
yTrain=yTrain%2
colors = ['r' if l == 0.0 else 'b' for l in yTrain[:]]
plt.scatter(xTrain[:,0], xTrain[:,1], c=colors)
plt.xlabel("Scaled age (in yrs)")
plt.ylabel("Tumor size (in cm)")
#plt.show() 
yTrain=np.reshape(yTrain,[-1,1])           
print ("loss:\n", sess.run(loss, feed_dict={x: xTrain, y: yTrain}))          

nb_of_xs = 200
xs1 = np.linspace(-1, 8, num=nb_of_xs)
xs2 = np.linspace(-1, 8, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2) # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
    for j in range(nb_of_xs):
        #classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
        classification_plane[i,j] = sess.run(y_pred, feed_dict={x: [[ xx[i,j], yy[i,j] ]]} )
        classification_plane[i,j] = int(classification_plane[i,j])

# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
        colorConverter.to_rgba('r', alpha=0.30),
        colorConverter.to_rgba('b', alpha=0.30)])
# Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.show()

Step: 0 Current loss: 0.38450366
Step: 1000 Current loss: 0.13549063
Step: 2000 Current loss: 0.11430303
Step: 3000 Current loss: 0.105851315
Step: 4000 Current loss: 0.1019517
Step: 5000 Current loss: 0.100453615
Step: 6000 Current loss: 0.09993585
Step: 7000 Current loss: 0.09968082
Step: 8000 Current loss: 0.09953847
Step: 9000 Current loss: 0.09942692
Step: 10000 Current loss: 0.099354245
Step: 11000 Current loss: 0.09930407
Step: 12000 Current loss: 0.09926791
Step: 13000 Current loss: 0.09924292
Step: 14000 Current loss: 0.099226475
Step: 15000 Current loss: 0.09921448
Step: 16000 Current loss: 0.099205494
Step: 17000 Current loss: 0.09919693
Step: 18000 Current loss: 0.099191815
Step: 19000 Current loss: 0.099188045

loss:
 0.09049854

实例35：dropout

# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from sklearn.utils import shuffle
from matplotlib.colors import colorConverter, ListedColormap 
# 对于上面的fit可以这么扩展变成动态的
from sklearn.preprocessing import OneHotEncoder
def onehot(y,start,end):
    ohe = OneHotEncoder()
    a = np.linspace(start,end-1,end-start)
    b =np.reshape(a,[-1,1]).astype(np.int32)
    ohe.fit(b)
    c=ohe.transform(y).toarray()  
    return c  
    
def generate(sample_size, num_classes, diff,regression=False):
    np.random.seed(10)
    mean = np.random.randn(2)
    cov = np.eye(2)  
    
    #len(diff)
    samples_per_class = int(sample_size/num_classes)

    X0 = np.random.multivariate_normal(mean, cov, samples_per_class)
    Y0 = np.zeros(samples_per_class)
    
    for ci, d in enumerate(diff):
        X1 = np.random.multivariate_normal(mean+d, cov, samples_per_class)
        Y1 = (ci+1)*np.ones(samples_per_class)
    
        X0 = np.concatenate((X0,X1))
        Y0 = np.concatenate((Y0,Y1))

  
    if regression==False: #one-hot  0 into the vector "1 0
        Y0 = np.reshape(Y0,[-1,1])        
        #print(Y0.astype(np.int32))
        Y0 = onehot(Y0.astype(np.int32),0,num_classes)
        #print(Y0)
    X, Y = shuffle(X0, Y0)
    #print(X, Y)
    return X,Y   
    
# Ensure we always get the same amount of randomness
np.random.seed(10)

input_dim = 2
num_classes =4 
X, Y = generate(120,num_classes,  [[3.0,0],[3.0,3.0],[0,3.0]],True)
Y=Y%2
#colors = ['r' if l == 0.0 else 'b' for l in Y[:]]
#plt.scatter(X[:,0], X[:,1], c=colors)
xr=[]
xb=[]
for(l,k) in zip(Y[:],X[:]):
    if l == 0.0 :
        xr.append([k[0],k[1]])        
    else:
        xb.append([k[0],k[1]])
xr =np.array(xr)
xb =np.array(xb)      
plt.scatter(xr[:,0], xr[:,1], c='r',marker='+')
plt.scatter(xb[:,0], xb[:,1], c='b',marker='o')
plt.show() 
Y=np.reshape(Y,[-1,1])

learning_rate = 0.01#1e-4
n_input  = 2
n_label  = 1
n_hidden = 200


x = tf.placeholder(tf.float32, [None,n_input])
y = tf.placeholder(tf.float32, [None, n_label])

weights = {
    'h1': tf.Variable(tf.truncated_normal([n_input, n_hidden], stddev=0.1)),
    'h2': tf.Variable(tf.random_normal([n_hidden, n_label], stddev=0.1))
	} 
biases = {
    'h1': tf.Variable(tf.zeros([n_hidden])),
    'h2': tf.Variable(tf.zeros([n_label]))
    }    


layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['h1']), biases['h1']))

keep_prob = tf.placeholder("float")
layer_1_drop = tf.nn.dropout(layer_1, keep_prob)


#Leaky relus
layer2 =tf.add(tf.matmul(layer_1_drop, weights['h2']),biases['h2'])
y_pred = tf.maximum(layer2,0.01*layer2)

reg = 0.01 
#loss=tf.reduce_mean((y_pred-y)**2)+tf.nn.l2_loss(weights['h1'])*reg+tf.nn.l2_loss(weights['h2'])*reg
loss=tf.reduce_mean((y_pred-y)**2)

global_step = tf.Variable(0, trainable=False)
decaylearning_rate = tf.train.exponential_decay(learning_rate, global_step,1000, 0.9)
#train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
train_step = tf.train.AdamOptimizer(decaylearning_rate).minimize(loss,global_step=global_step)

#加载
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
    
for i in range(20000):

    X, Y = generate(1000,num_classes,  [[3.0,0],[3.0,3.0],[0,3.0]],True)
    Y=Y%2
    Y=np.reshape(Y,[-1,1])
  
    _, loss_val = sess.run([train_step, loss], feed_dict={x: X, y: Y,keep_prob:0.6})
   
    if i % 1000 == 0:
        print ("Step:", i, "Current loss:", loss_val)

#colors = ['r' if l == 0.0 else 'b' for l in Y[:]]
#plt.scatter(X[:,0], X[:,1], c=colors)
xr=[]
xb=[]
for(l,k) in zip(Y[:],X[:]):
    if l == 0.0 :
        xr.append([k[0],k[1]])        
    else:
        xb.append([k[0],k[1]])
xr =np.array(xr)
xb =np.array(xb)      
plt.scatter(xr[:,0], xr[:,1], c='r',marker='+')
plt.scatter(xb[:,0], xb[:,1], c='b',marker='o')

    
nb_of_xs = 200
xs1 = np.linspace(-1, 8, num=nb_of_xs)
xs2 = np.linspace(-1, 8, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2) # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
    for j in range(nb_of_xs):
        #classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
        classification_plane[i,j] = sess.run(y_pred, feed_dict={x: [[ xx[i,j], yy[i,j] ]],keep_prob:1.0} )
        classification_plane[i,j] = int(classification_plane[i,j])

# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
        colorConverter.to_rgba('r', alpha=0.30),
        colorConverter.to_rgba('b', alpha=0.30)])
# Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.show() 


xTrain, yTrain = generate(12,num_classes,  [[3.0,0],[3.0,3.0],[0,3.0]],True)
yTrain=yTrain%2
#colors = ['r' if l == 0.0 else 'b' for l in yTrain[:]]
#plt.scatter(xTrain[:,0], xTrain[:,1], c=colors)


xr=[]
xb=[]
for(l,k) in zip(yTrain[:],xTrain[:]):
    if l == 0.0 :
        xr.append([k[0],k[1]])        
    else:
        xb.append([k[0],k[1]])
xr =np.array(xr)
xb =np.array(xb)      
plt.scatter(xr[:,0], xr[:,1], c='r',marker='+')
plt.scatter(xb[:,0], xb[:,1], c='b',marker='o')


#plt.show() 
yTrain=np.reshape(yTrain,[-1,1])           
print ("loss:\n", sess.run(loss, feed_dict={x: xTrain, y: yTrain,keep_prob:1.0}))          

nb_of_xs = 200
xs1 = np.linspace(-1, 8, num=nb_of_xs)
xs2 = np.linspace(-1, 8, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2) # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
    for j in range(nb_of_xs):
        #classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
        classification_plane[i,j] = sess.run(y_pred, feed_dict={x: [[ xx[i,j], yy[i,j] ]],keep_prob:1.0} )
        classification_plane[i,j] = int(classification_plane[i,j])

# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
        colorConverter.to_rgba('r', alpha=0.30),
        colorConverter.to_rgba('b', alpha=0.30)])
# Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.show()

Step: 0 Current loss: 0.45869648
Step: 1000 Current loss: 0.093665764
Step: 2000 Current loss: 0.09341683
Step: 3000 Current loss: 0.091144115
Step: 4000 Current loss: 0.094939746
Step: 5000 Current loss: 0.09450153
Step: 6000 Current loss: 0.09326039
Step: 7000 Current loss: 0.08985673
Step: 8000 Current loss: 0.08905965
Step: 9000 Current loss: 0.09108193
Step: 10000 Current loss: 0.09060887
Step: 11000 Current loss: 0.091397986
Step: 12000 Current loss: 0.09049434
Step: 13000 Current loss: 0.09125102
Step: 14000 Current loss: 0.08975915
Step: 15000 Current loss: 0.092092544
Step: 16000 Current loss: 0.092546806
Step: 17000 Current loss: 0.090509295
Step: 18000 Current loss: 0.09183584
Step: 19000 Current loss: 0.09077683

改错：xorerr1 对bias初始化错误

# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

tf.set_random_seed(55)
np.random.seed(55)

input_data = [[0., 0.], [0., 1.], [1., 0.], [1., 1.]]  # XOR input
output_data = [[0.], [1.], [1.], [0.]]  # XOR output


hidden_nodes =2

n_input = tf.placeholder(tf.float32, shape=[None, 2], name="n_input")
n_output = tf.placeholder(tf.float32, shape=[None, 1], name="n_output")

# hidden layer's bias neuron
#b_hidden = tf.Variable(0.1, name="hidden_bias")  
b_hidden = tf.Variable(tf.random_normal([2]), name="hidden_bias")  

W_hidden = tf.Variable(tf.random_normal([2, hidden_nodes]), name="hidden_weights")

hidden = tf.sigmoid(tf.matmul(n_input, W_hidden) + b_hidden)

################
# output layer #
################
W_output = tf.Variable(tf.random_normal([hidden_nodes, 1]), name="output_weights")  # output layer's weight matrix


#不影响
#b_output =  tf.Variable(0.1, name="output_bias")#
b_output =  tf.Variable(tf.random_normal([2]), name="output_bias")#



#output = tf.nn.relu(tf.matmul(hidden, W_output)+b_output)  # 出来的都是nan calc output layer's activation
output = tf.nn.tanh(tf.matmul(hidden, W_output)+b_output)  # 出来的都是nan calc output layer's activation

#softmax
y = tf.matmul(hidden, W_output)+b_output
output = tf.nn.softmax(tf.matmul(hidden, W_output)+b_output)



#交叉熵
loss = -(n_output * tf.log(output) + (1 - n_output) * tf.log(1 - output))


optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)  # let the optimizer train

#####################
# train the network #
#####################
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(0, 2001):  
        # run the training operation
        cvalues = sess.run([train, loss, W_hidden, b_hidden, W_output],
                       feed_dict={n_input: input_data, n_output: output_data})

    # print some debug stuff
        if epoch % 200 == 0:
            print("")
            print("step: {:>3}".format(epoch))
            print("loss: {}".format(cvalues[1]))
            # print("b_hidden: {}".format(cvalues[3]))
            # print("W_hidden: {}".format(cvalues[2]))
            # print("W_output: {}".format(cvalues[4]))


    print("")
    print("input: {} | output: {}".format(input_data[0], sess.run(output, feed_dict={n_input: [input_data[0]]})))
    print("input: {} | output: {}".format(input_data[1], sess.run(output, feed_dict={n_input: [input_data[1]]})))
    print("input: {} | output: {}".format(input_data[2], sess.run(output, feed_dict={n_input: [input_data[2]]})))
    print("input: {} | output: {}".format(input_data[3], sess.run(output, feed_dict={n_input: [input_data[3]]})))

step:   0
loss: [[0.5389002 0.8756036]
 [0.8756037 0.5389002]
 [0.8756037 0.5389001]
 [0.5389002 0.8756036]]

step: 200
loss: [[0.693099   0.69319534]
 [0.69319534 0.693099  ]
 [0.69319534 0.693099  ]
 [0.693099   0.69319534]]

step: 400
loss: [[0.6931468  0.69314754]
 [0.69314754 0.6931468 ]
 [0.69314754 0.6931468 ]
 [0.6931468  0.69314754]]

step: 600
loss: [[0.6931468  0.69314754]
 [0.69314754 0.6931468 ]
 [0.69314754 0.6931468 ]
 [0.6931468  0.69314754]]

step: 800
loss: [[0.6931468  0.69314754]
 [0.69314754 0.6931468 ]
 [0.69314754 0.6931468 ]
 [0.6931468  0.69314754]]

step: 1000
loss: [[0.6931468  0.69314754]
 [0.69314754 0.6931468 ]
 [0.69314754 0.6931468 ]
 [0.6931468  0.69314754]]

step: 1200
loss: [[0.6931468  0.69314754]
 [0.69314754 0.6931468 ]
 [0.69314754 0.6931468 ]
 [0.6931468  0.69314754]]

step: 1400
loss: [[0.6931468  0.69314754]
 [0.69314754 0.6931468 ]
 [0.69314754 0.6931468 ]
 [0.6931468  0.69314754]]

step: 1600
loss: [[0.6931468  0.69314754]
 [0.69314754 0.6931468 ]
 [0.69314754 0.6931468 ]
 [0.6931468  0.69314754]]

step: 1800
loss: [[0.6931468  0.69314754]
 [0.69314754 0.6931468 ]
 [0.69314754 0.6931468 ]
 [0.6931468  0.69314754]]

step: 2000
loss: [[0.6931468  0.69314754]
 [0.69314754 0.6931468 ]
 [0.69314754 0.6931468 ]
 [0.6931468  0.69314754]]

input: [0.0, 0.0] | output: [[0.49999982 0.5000002 ]]
input: [0.0, 1.0] | output: [[0.49999982 0.5000002 ]]
input: [1.0, 0.0] | output: [[0.49999982 0.5000002 ]]
input: [1.0, 1.0] | output: [[0.49999982 0.5000002 ]]

改错：xorerr2 对bias初始化错误

# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

tf.set_random_seed(55)
np.random.seed(55)

input_data = [[0., 0.], [0., 1.], [1., 0.], [1., 1.]]  # XOR input
output_data = [[0.], [1.], [1.], [0.]]  # XOR output


hidden_nodes =2

n_input = tf.placeholder(tf.float32, shape=[None, 2], name="n_input")
n_output = tf.placeholder(tf.float32, shape=[None, 1], name="n_output")

# hidden layer's bias neuron
#b_hidden = tf.Variable(0.1, name="hidden_bias")  
b_output =  tf.Variable(tf.random_normal([2]), name="hidden_bias")#

W_hidden = tf.Variable(tf.random_normal([2, hidden_nodes]), name="hidden_weights")

hidden = tf.sigmoid(tf.matmul(n_input, W_hidden) + b_hidden)

################
# output layer #
################
W_output = tf.Variable(tf.random_normal([hidden_nodes, 1]), name="output_weights")  # output layer's weight matrix


#不影响
#b_output =  tf.Variable(0.1, name="output_bias")
b_output =  tf.Variable(tf.random_normal([2]), name="output_bias")#


output = tf.nn.tanh(tf.matmul(hidden, W_output)+b_output)  # 

#softmax
y = tf.matmul(hidden, W_output)+b_output
output = tf.nn.softmax(tf.matmul(hidden, W_output)+b_output)



#交叉熵
loss = -(n_output * tf.log(output) + (1 - n_output) * tf.log(1 - output))



optimizer = tf.train.AdamOptimizer(0.01) 
train = optimizer.minimize(loss)  # let the optimizer train

#####################
# train the network #
#####################
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(0, 2001):  
        # run the training operation
        cvalues = sess.run([train, loss, W_hidden, b_hidden, W_output],
                       feed_dict={n_input: input_data, n_output: output_data})

    # print some debug stuff
        if epoch % 200 == 0:
            print("")
            print("step: {:>3}".format(epoch))
            print("loss: {}".format(cvalues[1]))
            # print("b_hidden: {}".format(cvalues[3]))
            # print("W_hidden: {}".format(cvalues[2]))
            # print("W_output: {}".format(cvalues[4]))


    print("")
    print("input: {} | output: {}".format(input_data[0], sess.run(output, feed_dict={n_input: [input_data[0]]})))
    print("input: {} | output: {}".format(input_data[1], sess.run(output, feed_dict={n_input: [input_data[1]]})))
    print("input: {} | output: {}".format(input_data[2], sess.run(output, feed_dict={n_input: [input_data[2]]})))
    print("input: {} | output: {}".format(input_data[3], sess.run(output, feed_dict={n_input: [input_data[3]]})))

# -*- coding: utf-8 -*-

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

tf.set_random_seed(55)
np.random.seed(55)

input_data = [[0., 0.], [0., 1.], [1., 0.], [1., 1.]]  # XOR input
output_data = [[0.], [1.], [1.], [0.]]  # XOR output


hidden_nodes =2

n_input = tf.placeholder(tf.float32, shape=[None, 2], name="n_input")
n_output = tf.placeholder(tf.float32, shape=[None, 1], name="n_output")

# hidden layer's bias neuron
#b_hidden = tf.Variable(0.1, name="hidden_bias")  
b_output =  tf.Variable(tf.random_normal([2]), name="hidden_bias")#

W_hidden = tf.Variable(tf.random_normal([2, hidden_nodes]), name="hidden_weights")

hidden = tf.sigmoid(tf.matmul(n_input, W_hidden) + b_hidden)

################
# output layer #
################
W_output = tf.Variable(tf.random_normal([hidden_nodes, 1]), name="output_weights")  # output layer's weight matrix


#不影响
#b_output =  tf.Variable(0.1, name="output_bias")
b_output =  tf.Variable(tf.random_normal([2]), name="output_bias")#


output = tf.nn.tanh(tf.matmul(hidden, W_output)+b_output)  

#cross_entropy = -tf.reduce_sum(n_output * tf.log(output))# 
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=n_output, logits=output)

loss = tf.reduce_mean(cross_entropy)  # mean the cross_entropy

optimizer = tf.train.AdamOptimizer(0.01)
train = optimizer.minimize(loss)  # let the optimizer train

#####################
# train the network #
#####################
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(0, 2001):  
        # run the training operation
        cvalues = sess.run([train, loss, W_hidden, b_hidden, W_output],
                       feed_dict={n_input: input_data, n_output: output_data})

    # print some debug stuff
        if epoch % 200 == 0:
            print("")
            print("step: {:>3}".format(epoch))
            print("loss: {}".format(cvalues[1]))
            # print("b_hidden: {}".format(cvalues[3]))
            # print("W_hidden: {}".format(cvalues[2]))
            # print("W_output: {}".format(cvalues[4]))


    print("")
    print("input: {} | output: {}".format(input_data[0], sess.run(output, feed_dict={n_input: [input_data[0]]})))
    print("input: {} | output: {}".format(input_data[1], sess.run(output, feed_dict={n_input: [input_data[1]]})))
    print("input: {} | output: {}".format(input_data[2], sess.run(output, feed_dict={n_input: [input_data[2]]})))
    print("input: {} | output: {}".format(input_data[3], sess.run(output, feed_dict={n_input: [input_data[3]]})))

gdtop818

发布了261 篇原创文章 · 获赞 137 · 访问量 20万+

私信关注

[TF进阶] 多层神经网络

多层神经网络——解决非线性问题

实例28：用线性逻辑回归分析肿瘤的良性or恶性

1. 生成样本类

2. 构建网络结构

3. 设置参数进行训练

实例29：线性多分类

实例30：使用带隐藏层解决非线性问题

练习：异或one hot

实例31：mnist多层分类

实例32：过拟合

实例33：通过正则化改善过拟合

实例34：通过数据增强改善过拟合

实例35：dropout

改错：xorerr1 对bias初始化错误

改错：xorerr2 对bias初始化错误

猜你喜欢