Neural Network fails on mnist

Question

I coded a neural network in python to solve the mnist task. But the error rate changes really little (6th digit after comma) after one epoch and the network hasn't learnd much after 10000 epochs... Can you help me what I've done wrong and how to improve my code to solve mnist? I set learning rate eta to 0.05.

import numpy as np
import pickle
import time

class FeedForwardNetwork():

    def __init__(self, input_dim, hidden_dim, output_dim):
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.input_layer = np.array([])
        self.hidden_layer = np.array([])
        self.output_layer = np.array([])
        self.weights_input_hidden = (2 * np.random.random((input_dim, hidden_dim)) - 1)/1000
        self.weights_hidden_output = (2* np.random.random((hidden_dim, output_dim)) - 1)/1000

        self.validation_data = np.array([])
        self.validation_data_solution = np.array([])

    def _tanh(self, x, deriv=False):
        if not deriv:
            return np.tanh(x)
        return 1-np.tanh(x)**2

    def _softmax(self, x):
        return np.exp(x) / np.sum(np.exp(x), axis=0)

    def set_training_data(self, training_data_input, training_data_target):
        """Splits the data up into training and validation data with a ratio of 0.75/0.25 and sets the data for training."""
        if len(training_data_input) != len(training_data_target):
            raise Exception("Number of training examples and training targets does not match!")
        len_training_data = int((len(training_data_input)/100*75)//1)
        self.input_layer = training_data_input[:len_training_data]
        self.output_layer = training_data_target[:len_training_data]
        self.validation_data = np.array([training_data_input[len_training_data:]])
        self.validation_data_solution = np.array([training_data_target[len_training_data:]])

    def save(self, filename):
        """Saves the weights into a pickle file."""
        with open(filename, "wb") as network_file:
            pickle.dump(self.weights_input_hidden, network_file)
            pickle.dump(self.weights_hidden_output, network_file)

    def load(self, filename):
        """Loads network weights from a pickle file."""
        with open(filename, "rb") as network_file:
            weights_input_hidden = pickle.load(network_file)
            weights_hidden_output = pickle.load(network_file)

        if len(weights_input_hidden) != len(self.weights_input_hidden):
            raise Exception("File contains weights that does not match the current networks size!")
        if len(weights_hidden_output) != len(self.weights_hidden_output):
            raise Exception("File contains weights that does not match the current networks size!")

        self.weights_input_hidden = weights_input_hidden
        self.weights_hidden_output = weights_hidden_output

    def measure_error(self, input_data, output_data):
        return 1/2 * np.sum((output_data - self.activate(input_data))**2)

    def forward_propagate(self, input_data):
        """Proceds the input data from input neurons up to output neurons and returns the output layer"""
        input_layer = input_data
        self.hidden_layer = self.__tanh(np.dot(input_layer, self.weights_input_hidden))
        output_layer = self.__tanh(np.dot(self.hidden_layer, self.weights_hidden_output))
        return output_layer

    def activate(self, input_data):
        """Sends the given input through the net and returns the net's prediction."""
        return self.forward_propagate(input_data)

    def back_propagate(self, input_data, output_data, eta):
        """Calculates the difference between target output and output and adjust the weights to fit the target output better.
           The parameter eta is the learning rate."""
        num_of_samples = len(input_data)
        output_layer = self.forward_propagate(input_data)
        output_layer_error = output_data - output_layer
        output_layer_delta = output_layer_error * self.__tanh(output_layer, deriv=True)
        #How much did each hidden neuron contribute to the output error?
        #Multiplys delta term with weights
        hidden_layer_error = output_layer_delta.dot(self.weights_hidden_output.T)

        #If the prediction is good, the second term will be small and the change will be small
        #Ex: target: 1 -> Slope will be 1 so the second term will be big
        hidden_layer_delta = hidden_layer_error * self.__tanh(self.hidden_layer, deriv=True)
        #The both lines return a matrix. A row stands for all weights connected to one neuron.
        #E.g. [1, 2, 3] -> Weights to Neuron A
        #     [4, 5, 6] -> Weights to Neuron B
        hidden_weights_change = self.input_layer.T.dot(hidden_layer_delta)/num_of_samples
        output_weights_change = self.hidden_layer.T.dot(output_layer_delta)/num_of_samples

        self.weights_hidden_output += (output_weights_change * eta) / num_of_samples
        self.weights_input_hidden += (hidden_weights_change * eta) / num_of_samples

    def batch_train(self, epochs, eta, patience=10):
        """Trains the network in batch mode that means the weigts are updated after showing all training examples.
           Eta is the learning rate and patience is the number of epochs that the validation error is allowed to increase before aborting."""
        validation_error = self.measure_error(self.validation_data, self.validation_data_solution)
        for epoch in range(epochs):
            self.back_propagate(self.input_layer, self.output_layer, eta)
            validation_error_new = self.measure_error(self.validation_data, self.validation_data_solution)
            if  validation_error_new < validation_error:
                validation_error = validation_error_new
            else:
                patience -= 1
                if patience == 0:
                    print("Abort Training. Overfitting has started! Epoch: {0}. Error: {1}".format(epoch, validation_error_new))
                    return
            print("Epoch: {0}, Error: {1}".format(epoch, validation_error))
            self.save("Network_Mnist.net")

Thank you!

Epoch: 1813, Error: 7499.944371111551 Epoch: 1814, Error: 7499.944368765047

Lifu Huang Lifu Huang · Accepted Answer · 2016-05-08T05:47:36

I guess you might want to add a softmax layer with cross-entropy error. Tanh will output negative value when input is negative, it is clearly not what you want for the output layer since probability should be within range [0, 1].

This is a toy feed forward NN I implemented, which might be helpful for you.

Neural Network fails on mnist

1 Answers