I made a FC neural network with numpy based on the video's of welch's lab but when I try to train it I seem to have exploding gradients at launch, which is weird, I will put down the whole code which is testable in python 3+. only costfunctionprime seem to break the gradient descent stuff going but I have no idea what is happening. Can someone smarter than me help?
EDIT: the trng_input and trng_output are not the one I use, I use a big dataset
import numpy as np
import random
trng_input = [[random.random() for _ in range(7)] for _ in range(100)]
trng_output = [[random.random() for _ in range(2)] for _ in range(100)]
def relu(x):
return x * (x > 0)
def reluprime(x):
return (x>0).astype(x.dtype)
class Neural_Net():
def __init__(self, data_input, data_output):
self.data_input = data_input
self.trng_output = trng_output
self.bias = 0
self.nodes = np.array([7, 2])
self.LR = 0.01
self.weightinit()
self.training(1000, self.LR)
def randomweight(self, n):
output = []
for i in range(n):
output.append(random.uniform(-1,1))
return output
def weightinit(self):
self.weights = []
for n in range(len(self.nodes)-1):
temp = []
for _ in range(self.nodes[n]+self.bias):
temp.append(self.randomweight(self.nodes[n+1]))
self.weights.append(temp)
self.weights = [np.array(tuple(self.weights[i])) for i in range(len(self.weights))]
def forward(self, data):
self.Z = []
self.A = [np.array(data)]
for layer in range(len(self.weights)):
self.Z.append(np.dot(self.A[layer], self.weights[layer]))
self.A.append(relu(self.Z[layer]))
self.output = self.A[-1]
return self.output
def costFunction(self):
self.totalcost = 0.5*sum((self.trng_output-self.output)**2)
return self.totalcost
def costFunctionPrime(self):
self.forward(self.data_input)
self.delta = [[] for x in range(len(self.weights))]
self.DcostDw = [[] for x in range(len(self.weights))]
for layer in reversed(range(len(self.weights))):
Zprime = reluprime(self.Z[layer])
if layer == len(self.weights)-1:
self.delta[layer] = np.multiply(-(self.trng_output-self.output), Zprime)
else:
self.delta[layer] = np.dot(self.delta[layer+1], self.weights[layer+1].T) * Zprime
self.DcostDw[layer] = np.dot(self.A[layer].T, self.delta[layer])
return self.DcostDw
def backprop(self, LR):
self.DcostDw = (np.array(self.DcostDw)*LR).tolist()
self.weights = (np.array(self.weights) - np.array(self.DcostDw)).tolist()
def training(self, iteration, LR):
for i in range(iteration):
self.costFunctionPrime()
self.backprop(LR)
if (i/1000.0) == (i/1000):
print(self.costFunction())
print(sum(self.costFunction())/len(self.costFunction()))
NN = Neural_Net(trng_input, trng_output)
as asked, this is the expected result (result I got using the sigmoid activation function):
as you can see, the numbers are going down and thus the network is training.
this is the result using the relu activation function:
Here, the network is stuck and isnt getting trained, it never gets trained using the relu activation function and would like to understand why