I'm new to this site, so I apologize in advance if I'm doing anything wrong in this post.
I'm currently trying out machine learning, and I'm learning neural networks. I'm currently using http://neuralnetworksanddeeplearning.com/. However, I don't fully understand everything, and all of the code is written in Python (I'm more comfortable with JavaScript).
I've created a program that works for simple data. However, for more complicated data (handwritten digits recognition with MNIST data), the accuracy rate isn't nearly as high as the website above says it will be, by using a neural network of 784 input neurons, 10-400 hidden neurons in the hidden layer (only one hidden layer and tried several possible number of neurons), and 10 output neurons with hundreds of iterations. I think that there is an error with my back propagation step (i.e. the train step, I'm including the other functions here as reference) that prevents it from learning fast enough (BTW, I'm using the cross-entropy as my cost function). I would really appreciate if anyone can help me find the error. Thanks in advance.
Below is the code. The weights are arranged in an array of arrays of arrays (weight[i][j][k]
is the weight between the jth neurons in the ith layer and the kth neuron in the (i+1)th layer). Similarly, bias[i][j]
is the bias of the (i+1)th layer for the jth neuron. The training data is formatted as an array of objects with keys of inputs and outputs (see example below).
class NeuralNetwork {
constructor(layers) {
// Check if layers is a valid argument
// Initialize neural network
if (!Array.isArray(layers) || layers.length < 2) {
throw Error("Layers must be specified as an array of length at least 2");
}
this.weights = [];
this.biases = [];
for (let i = 0, l = layers.length; i < l; ++i) {
let currentLayer = layers[i];
if (typeof currentLayer === "number" && Number.isInteger(currentLayer) && currentLayer > 0) {
let numWeights = layers[i + 1];
if (i < l - 1) {
this.weights.push([]);
}
if (i) {
this.biases.push([]);
}
// Seed weights and biases
for (let j = 0; j < currentLayer; ++j) {
if (i < l - 1) {
let weights = [];
for (let k = 0; k < numWeights; ++k) {
weights.push(Math.random() * 2 - 1);
}
this.weights[i].push(weights);
}
if (i) {
this.biases[i - 1].push(Math.random() * 2 - 1);
}
}
} else {
throw Error("Array used to specify NeuralNetwork layers must consist solely of positive integers");
}
}
this.activation = (x) => 1 / (1 + Math.exp(-x));
this.activationDerivative = (x) => this.activation(x) * (1 - this.activation(x));
Object.freeze(this);
console.log("Successfully initialized NeuralNetwork");
return this;
}
run(input, training) {
// Forward propagation
let currentInput;
if (training) {
currentInput = [input.map((a) => {return {before: a, after: a}})];
} else {
currentInput = [...input];
}
for (let i = 0, l = this.weights.length; i < l; ++i) {
let newInput = [];
for (let j = 0, m = this.weights[i][0].length, n = (training ? currentInput[i] : currentInput).length; j < m; ++j) {
let sum = this.biases[i][j];
for (let k = 0; k < n; ++k) {
sum += (training ? currentInput[i][k].after : currentInput[k]) * this.weights[i][k][j];
}
if (training) {
newInput.push({
before: sum,
after: this.activation(sum)
});
} else {
newInput.push(this.activation(sum));
}
}
if (training) {
currentInput.push(newInput);
} else {
currentInput = newInput;
}
}
return currentInput;
}
train(data, learningRate = 0.1, batch = 50, iterations = 10000) {
// Backward propagation
console.log("Initialized training");
let length = data.length,
totalCost = 0,
learningRateFunction = typeof learningRate === "function",
batchCount = 0,
weightChanges = [],
biasChanges = [];
for (let i = 0; i < iterations; ++i) {
let rate = learningRateFunction ? learningRate(i, totalCost) : learningRate;
totalCost = 0;
for (let j = 0, l = length; j < l; ++j) {
let currentData = data[j],
result = this.run(currentData.input, true),
outputLayer = result[result.length - 1],
outputLayerError = [],
errors = [];
for (let k = 0, m = outputLayer.length; k < m; ++k) {
let currentOutputNeuron = outputLayer[k];
outputLayerError.push(currentOutputNeuron.after - currentData.output[k]);
totalCost -= Math.log(currentOutputNeuron.after) * currentData.output[k] + Math.log(1 - currentOutputNeuron.after) * (1 - currentData.output[k]);
}
errors.push(outputLayerError);
for (let k = result.length - 1; k > 1; --k) {
let previousErrors = errors[0],
newErrors = [],
currentLayerWeights = this.weights[k - 1],
previousResult = result[k - 1];
for (let i = 0, n = currentLayerWeights.length; i < n; ++i) {
let sum = 0,
currentNeuronWeights = currentLayerWeights[i];
for (let j = 0, o = currentNeuronWeights.length; j < o; ++j) {
sum += currentNeuronWeights[j] * previousErrors[j];
}
newErrors.push(sum * this.activationDerivative(previousResult[i].before));
}
errors.unshift(newErrors);
}
for (let k = 0, n = this.biases.length; k < n; ++k) {
if (!weightChanges[k]) weightChanges[k] = [];
if (!biasChanges[k]) biasChanges[k] = [];
let currentLayerWeights = this.weights[k],
currentLayerBiases = this.biases[k],
currentLayerErrors = errors[k],
currentLayerResults = result[k],
currentLayerWeightChanges = weightChanges[k],
currentLayerBiasChanges = biasChanges[k];
for (let i = 0, o = currentLayerBiases.length; i < o; ++i) {
let change = rate * currentLayerErrors[i];
for (let j = 0, p = currentLayerWeights.length; j < p; ++j) {
if (!currentLayerWeightChanges[j]) currentLayerWeightChanges[j] = [];
currentLayerWeightChanges[j][i] = (currentLayerWeightChanges[j][i] || 0) - change * currentLayerResults[j].after;
}
currentLayerBiasChanges[i] = (currentLayerBiasChanges[i] || 0) - change;
}
}
++batchCount;
if (batchCount % batch === 0 || i === iterations - 1 && j === l - 1) {
for (let k = 0, n = this.weights.length; k < n; ++k) {
let currentLayerWeights = this.weights[k],
currentLayerBiases = this.biases[k],
currentLayerWeightChanges = weightChanges[k],
currentLayerBiasChanges = biasChanges[k];
for (let i = 0, o = currentLayerWeights.length; i < o; ++i) {
let currentNeuronWeights = currentLayerWeights[i],
currentNeuronWeightChanges = currentLayerWeightChanges[i];
for (let j = 0, p = currentNeuronWeights.length; j < p; ++j) {
currentNeuronWeights[j] += currentNeuronWeightChanges[j] / batch;
}
currentLayerBiases[i] += currentLayerBiasChanges[i] / batch;
}
}
weightChanges = [];
biasChanges = [];
}
}
totalCost /= length;
}
console.log(`Training ended due to iterations reached\nIterations: ${iterations} times\nTime spent: ${(new Date).getTime() - startTime} ms`);
return this;
}
}
Example
Tests if a point is inside a circle. For this example, the neural network performs well. However, for more complicated examples such as handwriting recognition, the neural network performs really badly (best I can get for a single neural network is 70% accuracy, compared to the 96% accuracy stated in the website even when using similar parameters).
let trainingData = [];
for (let i = 0; i < 1000; ++i) {
let [x, y] = [Math.random(), Math.random()];
trainingData.push({input: [x, y], output: [Number(Math.hypot(x,y) < 1)]});
}
let brain = new NeuralNetwork([2, 5, 5, 1]);
brain.train(trainingData.slice(0,700), 0.1, 10, 500); // Accuracy rate 95.33% on the remaining 300 entries in trainingData