ml_ex4

ex4:

%% Machine Learning Online Class - Exercise 4 Neural Network Learning
%% Initialization
clear ; close all; clc

%% Setup the parameters you will use for this exercise
input_layer_size  = 400;  % 20x20 Input Images of Digits
hidden_layer_size = 25;   % 25 hidden units
num_labels = 10;          % 10 labels, from 1 to 10   
                          % (note that we have mapped "0" to label 10)

%% =========== Part 1: Loading and Visualizing Data =============
load('ex4data1.mat');
m = size(X, 1);

% Randomly select 100 data points to display
sel = randperm(size(X, 1));
sel = sel(1:100);

displayData(X(sel, :));
%% ================ Part 2: Loading Parameters ================
% Load the weights into variables Theta1 and Theta2
load('ex4weights.mat');

% Unroll parameters 
nn_params = [Theta1(:) ; Theta2(:)];

%% ================ Part 3: Compute Cost (Feedforward) ================
% Weight regularization parameter (we set this to 0 here).
lambda = 0;

J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, ...
                   num_labels, X, y, lambda);

%% =============== Part 4: Implement Regularization ===============
% Weight regularization parameter (we set this to 1 here).
lambda = 1;

J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, ...
                   num_labels, X, y, lambda);

%% ================ Part 5: Sigmoid Gradient  ================
g = sigmoidGradient([-1 -0.5 0 0.5 1]);

%% ================ Part 6: Initializing Pameters ================

initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size);
initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels);

% Unroll parameters
initial_nn_params = [initial_Theta1(:) ; initial_Theta2(:)];

%% =============== Part 7: Implement Backpropagation ===============
%  Check gradients by running checkNNGradients
checkNNGradients;

%% =============== Part 8: Implement Regularization ===============
%  Check gradients by running checkNNGradients
lambda = 3;
checkNNGradients(lambda);

% Also output the costFunction debugging values
debug_J  = nnCostFunction(nn_params, input_layer_size, ...
                          hidden_layer_size, num_labels, X, y, lambda);

%% =================== Part 8: Training NN ===================
%  After you have completed the assignment, change the MaxIter to a larger
%  value to see how more training helps.
options = optimset('MaxIter', 400);

%  You should also try different values of lambda
lambda = 1;

% Create "short hand" for the cost function to be minimized
costFunction = @(p) nnCostFunction(p, ...
                                   input_layer_size, ...
                                   hidden_layer_size, ...
                                   num_labels, X, y, lambda);

% Now, costFunction is a function that takes in only one argument (the
% neural network parameters)
[nn_params, cost] = fmincg(costFunction, initial_nn_params, options);

% Obtain Theta1 and Theta2 back from nn_params
Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ...
                 hidden_layer_size, (input_layer_size + 1));

Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ...
                 num_labels, (hidden_layer_size + 1));

%% ================= Part 9: Visualize Weights =================
displayData(Theta1(:, 2:end));

%% ================= Part 10: Implement Predict =================
pred = predict(Theta1, Theta2, X);

function g = sigmoidGradient(z)
%SIGMOIDGRADIENT returns the gradient of the sigmoid function
%evaluated at z
%   g = SIGMOIDGRADIENT(z) computes the gradient of the sigmoid function
%   evaluated at z. This should work regardless if z is a matrix or a
%   vector. In particular, if z is a vector or matrix, you should return
%   the gradient for each element.

g = zeros(size(z));

g = sigmoid(z).*(1-sigmoid(z));

end

function W = randInitializeWeights(L_in, L_out)
%RANDINITIALIZEWEIGHTS Randomly initialize the weights of a layer with L_in
%incoming connections and L_out outgoing connections
%   W = RANDINITIALIZEWEIGHTS(L_in, L_out) randomly initializes the weights 
%   of a layer with L_in incoming connections and L_out outgoing 
%   connections. 
%
%   Note that W should be set to a matrix of size(L_out, 1 + L_in) as
%   the first column of W handles the "bias" terms
%

% You need to return the following variables correctly 
W = zeros(L_out, 1 + L_in);

epsilon_init = 0.12;
W = rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init; 

end

function [J grad] = nnCostFunction(nn_params, ...
                                   input_layer_size, ...
                                   hidden_layer_size, ...
                                   num_labels, ...
                                   X, y, lambda)
%NNCOSTFUNCTION Implements the neural network cost function for a two layer
%neural network which performs classification
%   [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ...
%   X, y, lambda) computes the cost and gradient of the neural network. The
%   parameters for the neural network are "unrolled" into the vector
%   nn_params and need to be converted back into the weight matrices. 
% 
%   The returned parameter grad should be a "unrolled" vector of the
%   partial derivatives of the neural network.
%

% Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
% for our 2 layer neural network
Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ...
                 hidden_layer_size, (input_layer_size + 1));

Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ...
                 num_labels, (hidden_layer_size + 1));

% Setup some useful variables
m = size(X, 1);
         
% You need to return the following variables correctly 
J = 0;
Theta1_grad = zeros(size(Theta1));
Theta2_grad = zeros(size(Theta2));

X = [ones(size(X,1),1) X];
input2hidden = X*Theta1'; %5000*25 
input2hidden = sigmoid(input2hidden);
input2hidden = [ones(size(input2hidden,1),1) input2hidden]; %5000*26
for i=1:m
    hx = sigmoid(input2hidden(i,:)*Theta2'); %1*10
    hy = y(i)==1:num_labels;
    J = J-hy*log(hx')-(1-hy)*log(1-hx');
end
J = (1/m)*J;
%regularized
r=0;
for i=1:hidden_layer_size
    for j=2:input_layer_size+1
        r=r+Theta1(i,j)*Theta1(i,j);
    end
end
for i=1:num_labels
    for j=2:hidden_layer_size+1
        r=r+Theta2(i,j)*Theta2(i,j);
    end
end
r=(lambda/(2*m))*r;
J=J+r;

%backpropagation grad D
for i=1:m
    a1 = X(i,:); %a1 1,401
    z2 = a1*Theta1'; %z2 1,25
    a2 = sigmoid(z2); %a2 1,25
    a2 = [ones(size(a2,1),1) a2]; %a2 1,26
    z3 = a2*Theta2'; %z3 1,10
    a3 = sigmoid(z3); %a3 1,10
    d3 = a3-(y(i)==1:num_labels); %d3 1,10
    d2 = d3*Theta2; %d2 1,26
    d2 = d2(:,2:end).*sigmoidGradient(z2); %discard d2(0),d2=1,25
    Theta2_grad = Theta2_grad + d3'*a2; %10,26
    Theta1_grad = Theta1_grad + d2'*a1; %25,401
end
Theta2_grad = Theta2_grad*(1/m);
Theta1_grad = Theta1_grad*(1/m);

%regularization
Theta2_grad(:,2:end) = Theta2_grad(:,2:end) + (lambda/m)*Theta2(:,2:end);
Theta1_grad(:,2:end) = Theta1_grad(:,2:end) + (lambda/m)*Theta1(:,2:end);

% Unroll gradients
grad = [Theta1_grad(:) ; Theta2_grad(:)];

end

猜你喜欢