2
votes

I'm new to this site, so I apologize in advance if I'm doing anything wrong in this post.

I'm currently trying out machine learning, and I'm learning neural networks. I'm currently using http://neuralnetworksanddeeplearning.com/. However, I don't fully understand everything, and all of the code is written in Python (I'm more comfortable with JavaScript).

I've created a program that works for simple data. However, for more complicated data (handwritten digits recognition with MNIST data), the accuracy rate isn't nearly as high as the website above says it will be, by using a neural network of 784 input neurons, 10-400 hidden neurons in the hidden layer (only one hidden layer and tried several possible number of neurons), and 10 output neurons with hundreds of iterations. I think that there is an error with my back propagation step (i.e. the train step, I'm including the other functions here as reference) that prevents it from learning fast enough (BTW, I'm using the cross-entropy as my cost function). I would really appreciate if anyone can help me find the error. Thanks in advance.

Below is the code. The weights are arranged in an array of arrays of arrays (weight[i][j][k] is the weight between the jth neurons in the ith layer and the kth neuron in the (i+1)th layer). Similarly, bias[i][j] is the bias of the (i+1)th layer for the jth neuron. The training data is formatted as an array of objects with keys of inputs and outputs (see example below).

class NeuralNetwork {
  constructor(layers) {
    // Check if layers is a valid argument
    // Initialize neural network
    if (!Array.isArray(layers) || layers.length < 2) {
      throw Error("Layers must be specified as an array of length at least 2");
    }
    this.weights = [];
    this.biases = [];
    for (let i = 0, l = layers.length; i < l; ++i) {
      let currentLayer = layers[i];
      if (typeof currentLayer === "number" && Number.isInteger(currentLayer) && currentLayer > 0) {
        let numWeights = layers[i + 1];
        if (i < l - 1) {
          this.weights.push([]);
        }
        if (i) {
          this.biases.push([]);
        }

        // Seed weights and biases
        for (let j = 0; j < currentLayer; ++j) {
          if (i < l - 1) {
            let weights = [];
            for (let k = 0; k < numWeights; ++k) {
              weights.push(Math.random() * 2 - 1);
            }
          this.weights[i].push(weights);
          }
          if (i) {
            this.biases[i - 1].push(Math.random() * 2 - 1);
          }
        }
      } else {
        throw Error("Array used to specify NeuralNetwork layers must consist solely of positive integers");
      }
    }
    this.activation = (x) => 1 / (1 + Math.exp(-x));
    this.activationDerivative = (x) => this.activation(x) * (1 - this.activation(x));
    Object.freeze(this);
    console.log("Successfully initialized NeuralNetwork");
    return this;
  }
  run(input, training) {
    // Forward propagation
    let currentInput;
    if (training) {
      currentInput = [input.map((a) => {return {before: a, after: a}})];
    } else {
      currentInput = [...input];
    }
    for (let i = 0, l = this.weights.length; i < l; ++i) {
      let newInput = [];
      for (let j = 0, m = this.weights[i][0].length, n = (training ? currentInput[i] : currentInput).length; j < m; ++j) {
        let sum = this.biases[i][j];
        for (let k = 0; k < n; ++k) {
          sum += (training ? currentInput[i][k].after : currentInput[k]) * this.weights[i][k][j];
        }
        if (training) {
          newInput.push({
            before: sum,
            after: this.activation(sum)
          });
        } else {
          newInput.push(this.activation(sum));
        }
      }
      if (training) {
        currentInput.push(newInput);
      } else {
        currentInput = newInput;
      }
    }
    return currentInput;
  }
  train(data, learningRate = 0.1, batch = 50, iterations = 10000) {
    // Backward propagation
    console.log("Initialized training");
    let length = data.length,
        totalCost = 0,
        learningRateFunction = typeof learningRate === "function",
        batchCount = 0,
        weightChanges = [],
        biasChanges = [];
    for (let i = 0; i < iterations; ++i) {
      let rate = learningRateFunction ? learningRate(i, totalCost) : learningRate;
      totalCost = 0;
      for (let j = 0, l = length; j < l; ++j) {
        let currentData = data[j],
            result = this.run(currentData.input, true),
            outputLayer = result[result.length - 1],
            outputLayerError = [],
            errors = [];
        for (let k = 0, m = outputLayer.length; k < m; ++k) {
          let currentOutputNeuron = outputLayer[k];
          outputLayerError.push(currentOutputNeuron.after - currentData.output[k]);
          totalCost -= Math.log(currentOutputNeuron.after) * currentData.output[k] + Math.log(1 - currentOutputNeuron.after) * (1 - currentData.output[k]);
        }
        errors.push(outputLayerError);
        for (let k = result.length - 1; k > 1; --k) {
          let previousErrors = errors[0],
              newErrors = [],
              currentLayerWeights = this.weights[k - 1],
              previousResult = result[k - 1];
          for (let i = 0, n = currentLayerWeights.length; i < n; ++i) {
            let sum = 0,
                currentNeuronWeights = currentLayerWeights[i];
            for (let j = 0, o = currentNeuronWeights.length; j < o; ++j) {
              sum += currentNeuronWeights[j] * previousErrors[j];
            }
            newErrors.push(sum * this.activationDerivative(previousResult[i].before));
          }
          errors.unshift(newErrors);
        }
        for (let k = 0, n = this.biases.length; k < n; ++k) {
          if (!weightChanges[k]) weightChanges[k] = [];
          if (!biasChanges[k]) biasChanges[k] = [];
          let currentLayerWeights = this.weights[k],
              currentLayerBiases = this.biases[k],
              currentLayerErrors = errors[k],
              currentLayerResults = result[k],
              currentLayerWeightChanges = weightChanges[k],
              currentLayerBiasChanges = biasChanges[k];
          for (let i = 0, o = currentLayerBiases.length; i < o; ++i) {
            let change = rate * currentLayerErrors[i];
            for (let j = 0, p = currentLayerWeights.length; j < p; ++j) {
              if (!currentLayerWeightChanges[j]) currentLayerWeightChanges[j] = [];
              currentLayerWeightChanges[j][i] = (currentLayerWeightChanges[j][i] || 0) - change * currentLayerResults[j].after;
            }
            currentLayerBiasChanges[i] = (currentLayerBiasChanges[i] || 0) - change;
          }
        }
        ++batchCount;
        if (batchCount % batch === 0 || i === iterations - 1 && j === l - 1) {
          for (let k = 0, n = this.weights.length; k < n; ++k) {
            let currentLayerWeights = this.weights[k],
                currentLayerBiases = this.biases[k],
                currentLayerWeightChanges = weightChanges[k],
                currentLayerBiasChanges = biasChanges[k];
            for (let i = 0, o = currentLayerWeights.length; i < o; ++i) {
              let currentNeuronWeights = currentLayerWeights[i],
                  currentNeuronWeightChanges = currentLayerWeightChanges[i];
              for (let j = 0, p = currentNeuronWeights.length; j < p; ++j) {
                currentNeuronWeights[j] += currentNeuronWeightChanges[j] / batch;
              }
              currentLayerBiases[i] += currentLayerBiasChanges[i] / batch;
            }
          }
          weightChanges = [];
          biasChanges = [];
        }
      }
      totalCost /= length;
    }
    console.log(`Training ended due to iterations reached\nIterations: ${iterations} times\nTime spent: ${(new Date).getTime() - startTime} ms`);
    return this;
  }
}

Example

Tests if a point is inside a circle. For this example, the neural network performs well. However, for more complicated examples such as handwriting recognition, the neural network performs really badly (best I can get for a single neural network is 70% accuracy, compared to the 96% accuracy stated in the website even when using similar parameters).

let trainingData = [];
for (let i = 0; i < 1000; ++i) {
    let [x, y] = [Math.random(), Math.random()];
    trainingData.push({input: [x, y], output: [Number(Math.hypot(x,y) < 1)]});
}
let brain = new NeuralNetwork([2, 5, 5, 1]);
brain.train(trainingData.slice(0,700), 0.1, 10, 500); // Accuracy rate 95.33% on the remaining 300 entries in trainingData
1
@desertnaut Thank you for your feedback. I have removed from my question some of the code that is unnecessary to check. Is this better now?WebWorker

1 Answers

0
votes

Ok, I guess I'm going to answer my own question. So, I don't think there is an error in my code and it's perfectly fine to use (albeit really, really inefficient) if anyone wants to.

The reason why my runs on the MNIST data did not give accurate answers come from the fact that I did not process the data at first. The raw data gave the darkness of the 28*28 pixels in the range of [0, 255], which I used directly as the input for each of the training data. The correct procedure here would be to convert this into the range of [0, 1] or [-1, 1].

The reason that the [0, 255] range does not work as well is due to the fact that the second hidden layer of neurons will receive really positive or negative inputs.

When the backpropagation algorithm computes the gradient, the change computed for each weight will be really small as it is proportional to the slope of the activation function at the input to the neuron (the derivative of the logistic function is exp(-x)/(1+exp(-x)), which is close to 0 for really positive and negative values of x). Thus, the neural network will take really long to train and, in my case, was not able to learn the data well.

With the correct method, I am able to achieve around 90% accuracy for a 784*200*10 neural network in a fairly short time, though it still is not nearly as accurate as what the author says he is able to achieve using an even simpler algorinthm in the link mentioned in the question.