So I made a simple neural network for MNIST (784 input neurons, 30 hidden neurons, and 10 output neurons), but the cost function (MSE) always increases to 4.5 and never decreases, and the output neurons eventually all just output 1. Here's the code:
np.set_printoptions(suppress=True)
epochs = 50
batch = 60000
learning_rate = 3
B1 = np.random.randn(30, 1)
B2 = np.random.randn(10, 1)
W1 = np.random.randn(784, 30)
W2 = np.random.randn(30, 10)
for i in range(epochs):
X, Y = shuffle(X, Y)
c_B1 = np.zeros(B1.shape)
c_B2 = np.zeros(B2.shape)
c_W1 = np.zeros(W1.shape)
c_W2 = np.zeros(W2.shape)
for b in range(0, np.size(X, 0), batch):
inputs = X[b:b+batch]
outputs = Y[b:b+batch]
Z1 = nn_forward(inputs, W1.T, B1)
A1 = sigmoid(Z1)
Z2 = nn_forward(A1, W2.T, B2)
A2 = sigmoid(Z2)
e_L = (outputs - A2) * d_sig(Z2)
e_1 = np.multiply(np.dot(e_L, W2.T), d_sig(Z1))
d_B2 = np.sum(e_L, axis=0)
d_B1 = np.sum(e_1, axis=0)
d_W2 = np.dot(A1.T, e_L)
d_W1 = np.dot(inputs.T, e_1)
d_B2 = d_B2.reshape((np.size(B2, 0), 1))
d_B1 = d_B1.reshape((np.size(B1, 0), 1))
c_B1 = np.add(c_B1, d_B1)
c_B2 = np.add(c_B2, d_B2)
c_W1 = np.add(c_W1, d_W1)
c_W2 = np.add(c_W2, d_W2)
B1 = np.subtract(B1, (learning_rate/batch) * c_B1)
B2 = np.subtract(B2, (learning_rate/batch) * c_B2)
W1 = np.subtract(W1, (learning_rate/batch) * c_W1)
W2 = np.subtract(W2, (learning_rate/batch) * c_W2)
print(i, cost(outputs, A2))
What am I doing wrong?