2
votes

I have a simple neural network with 2 input neurons, 3 hidden neurons and 1 output neuron. hidden layer has bias.

I'm not used matrix operations to doing feed forward and backpropagation. when I run training function on a simple linear dataset, the error raises up and the predication result is wrong.

import random
from math import exp,pow,tanh

def random_weight():
    return random.random()

def sigmoid(x):
    return 1.0 / (1.0 + exp(-x))

def sigmoid_drv(x):
    return sigmoid(x)*(1.0-sigmoid(x))

w11_I = random_weight()
w12_I = random_weight()
w21_I = random_weight()
w22_I = random_weight()
w31_I = random_weight()
w32_I = random_weight()
w11_II = random_weight()
w12_II = random_weight()
w13_II = random_weight()
b_I = 1

activation = sigmoid
activation_drv = sigmoid_drv

def predict(x1,x2):
    global w11_I,w12_I,w21_I,w22_I,w31_I,w32_I,w11_II,w12_II,w13_II,b_I
    a1_I = w11_I*x1 + w12_I*x2 + b_I
    z1_I = activation(a1_I)
    a2_I = w21_I*x1 + w22_I*x2 + b_I
    z2_I = activation(a2_I)
    a3_I = w31_I*x1 + w32_I*x2 + b_I
    z3_I = activation(a3_I)
    a1_II = w11_II*z1_I + w12_II*z2_I + w13_II*z3_I
    z1_II = activation(a1_II)
    return a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II

def train(x1,x2,y,alpha):
    global w11_I,w12_I,w21_I,w22_I,w31_I,w32_I,w11_II,w12_II,w13_II,b_I
    a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II = predict(x1,x2)
    error = 0.5 * pow(y-z1_II,2)
    delta = y-z1_II * activation_drv(a1_II)

    w11_II += delta * z1_I * alpha
    w12_II += delta * z2_I * alpha
    w13_II += delta * z3_I * alpha

    w11_I += delta * w11_II * activation_drv(a1_I) * x1 * alpha
    w12_I += delta * w11_II * activation_drv(a1_I) * x2 * alpha
    w21_I += delta * w12_II * activation_drv(a2_I) * x1 * alpha
    w22_I += delta * w12_II * activation_drv(a2_I) * x2 * alpha
    w31_I += delta * w13_II * activation_drv(a3_I) * x1 * alpha
    w32_I += delta * w13_II * activation_drv(a3_I) * x2 * alpha

    b_I += (delta * w11_II * activation_drv(a1_I) + delta * w12_II * activation_drv(a2_I) + delta * w13_II * activation_drv(a3_I)) * alpha
    return error

data = [
    [0,0,0],
    [0,1,1],
    [1,0,1],
    [1,1,1],
]

for i in range(0,10):
    err = 0
    dt = data[::]
    random.shuffle(dt)
    for j in dt:
        err += train(j[0],j[1],j[2],0.01)
    print(err)

print("-"*30)
for j in data:
    _, _, _, _, _, _, _, res = predict(j[0],j[1])
    print(j[0],",",j[1],"=",res)

For example the result of the code is:

0.363894453262
0.366966815948
0.366406041572
0.369982058232
0.36988850637
0.375869833099
0.378106172616
0.380456639936
0.37901554717
0.383723920259
------------------------------
(0, ',', 0, '=', 0.8439871540493414)
(0, ',', 1, '=', 0.861714406183168)
(1, ',', 0, '=', 0.8515477541104413)
(1, ',', 1, '=', 0.8676931366534011)

---------------- UPDATE ----------------

I change codes to this :

import random
from math import exp,pow

def random_weight():
    return random.random()

def sigmoid(x):
    return 1.0 / (1.0 + exp(-x))

def sigmoid_drv(x):
    return sigmoid(x)*(1.0-sigmoid(x))

w11_I = random_weight()
w12_I = random_weight()
w21_I = random_weight()
w22_I = random_weight()
w31_I = random_weight()
w32_I = random_weight()
w11_II = random_weight()
w12_II = random_weight()
w13_II = random_weight()
b_I = random_weight()

activation = sigmoid
activation_drv = sigmoid_drv

def predict(x1,x2):
    global w11_I,w12_I,w21_I,w22_I,w31_I,w32_I,w11_II,w12_II,w13_II,b_I
    a1_I = w11_I*x1 + w12_I*x2 + b_I
    z1_I = activation(a1_I)
    a2_I = w21_I*x1 + w22_I*x2 + b_I
    z2_I = activation(a2_I)
    a3_I = w31_I*x1 + w32_I*x2 + b_I
    z3_I = activation(a3_I)
    a1_II = w11_II*z1_I + w12_II*z2_I + w13_II*z3_I
    z1_II = activation(a1_II)
    return a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II

def train(x1,x2,y,alpha):
    global w11_I,w12_I,w21_I,w22_I,w31_I,w32_I,w11_II,w12_II,w13_II,b_I
    a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II = predict(x1,x2)
    error = 0.5 * pow(z1_II-y,2)
    delta = z1_II-y * activation_drv(a1_II)

    d_w11_II = delta * z1_I * alpha 
    d_w12_II = delta * z2_I * alpha
    d_w13_II = delta * z3_I * alpha

    d_w11_I = delta * w11_II * activation_drv(a1_I) * x1 * alpha
    d_w12_I = delta * w11_II * activation_drv(a1_I) * x2 * alpha
    d_w21_I = delta * w12_II * activation_drv(a2_I) * x1 * alpha
    d_w22_I = delta * w12_II * activation_drv(a2_I) * x2 * alpha
    d_w31_I = delta * w13_II * activation_drv(a3_I) * x1 * alpha
    d_w32_I = delta * w13_II * activation_drv(a3_I) * x2 * alpha
    d_b_I = (delta * w11_II * activation_drv(a1_I) + delta * w12_II * activation_drv(a2_I) + delta * w13_II * activation_drv(a3_I)) * alpha

    w11_II -= d_w11_II
    w12_II -= d_w12_II
    w13_II -= d_w13_II

    w11_I -= d_w11_I
    w12_I -= d_w12_I
    w21_I -= d_w21_I
    w22_I -= d_w22_I
    w31_I -= d_w31_I
    w32_I -= d_w32_I
    b_I -= d_b_I

    return error

data = [
    [0,0,0],
    [0,1,0],
    [1,0,0],
    [1,1,1],
]
for i in range(0,10):
    err = 0
    dt = data[::]
    random.shuffle(dt)
    for j in dt:
        err += train(j[0],j[1],j[2],0.01)
    print(err)

print("-"*30)
for j in data:
    _, _, _, _, _, _, _, res = predict(j[0],j[1])
    print(j[0],",",j[1],"=",res)

I'm subtract weight errors with weights now. Error of network reduces. But prediction is still wrong.

The result of above code:

0.7793443881847488
0.7577581315356949
0.7432698222320477
0.7316129719356839
0.7160385688813552
0.6943522088277978
0.6862277294774705
0.6656984495700775
0.6584361784187711
0.6410006126876817
------------------------------
0 , 0 = 0.6049212721996029
0 , 1 = 0.6227402202339664
1 , 0 = 0.6139758543180651
1 , 1 = 0.6293581473456563
2
What is the error message?mkrieger1
There is no error message. Code execute correctly. The "Error" of neural network not approaching to zero. There is mathematical problems in this code.pedram
Ah sorry, I misunderstood "the error rises up".mkrieger1
Ok, but what were the results of your own debugging? Which parts of your code did you determine to be obviously correct and about which parts are you unsure?mkrieger1
The prediction is working correct. I think the problem is calculating error of weights and updating them. Maybe in chain rule.pedram

2 Answers

1
votes

One possible error is in the calculation of delta:

delta = z1_II-y * activation_drv(a1_II)

Add braces and change this to:

delta = (z1_II-y) * activation_drv(a1_II)
1
votes

I found the problem the sigmoid function was not good for this network. I change it to tanh and prediction results is correct now.

the final code :

import random
from math import exp,pow

class ANN:
    def random_weight(self):
        return random.random()

    def sigmoid(self,x):
        return 1.0 / (1.0 + exp(-x))

    def sigmoid_drv(self,x):
        return self.sigmoid(x)*(1.0-self.sigmoid(x))  

    def tanh(self, x):
        return (exp(x) - exp(-x)) / (exp(x) + exp(-x))

    def tanh_drv(self,x):
        return 1 - pow(self.tanh(x),2)

    def __init__(self):
        self.w11_I = self.random_weight()
        self.w12_I = self.random_weight()
        self.w21_I = self.random_weight()
        self.w22_I = self.random_weight()
        self.w31_I = self.random_weight()
        self.w32_I = self.random_weight()
        self.w11_II = self.random_weight()
        self.w12_II = self.random_weight()
        self.w13_II = self.random_weight()
        self.b_I = self.random_weight()
        self.activation = self.tanh
        self.activation_drv = self.tanh_drv

    def predict(self,x1,x2):
        a1_I = self.w11_I*x1 + self.w12_I*x2 + self.b_I
        z1_I = self.activation(a1_I)
        a2_I = self.w21_I*x1 + self.w22_I*x2 + self.b_I
        z2_I = self.activation(a2_I)
        a3_I = self.w31_I*x1 + self.w32_I*x2 + self.b_I
        z3_I = self.activation(a3_I)
        a1_II = self.w11_II*z1_I + self.w12_II*z2_I + self.w13_II*z3_I
        z1_II = self.activation(a1_II)
        return a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II

    def train(self,x1,x2,y,alpha):
        a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II = self.predict(x1,x2)
        error = 0.5 * pow(z1_II-y,2)
        delta = (z1_II-y) * self.activation_drv(a1_II)

        d_w11_II = delta * z1_I * alpha 
        d_w12_II = delta * z2_I * alpha
        d_w13_II = delta * z3_I * alpha

        d_w11_I = delta * self.w11_II * self.activation_drv(a1_I) * x1 * alpha
        d_w12_I = delta * self.w11_II * self.activation_drv(a1_I) * x2 * alpha
        d_w21_I = delta * self.w12_II * self.activation_drv(a2_I) * x1 * alpha
        d_w22_I = delta * self.w12_II * self.activation_drv(a2_I) * x2 * alpha
        d_w31_I = delta * self.w13_II * self.activation_drv(a3_I) * x1 * alpha
        d_w32_I = delta * self.w13_II * self.activation_drv(a3_I) * x2 * alpha
        d_b_I = (delta * self.w11_II * self.activation_drv(a1_I) + delta * self.w12_II * self.activation_drv(a2_I) + delta * self.w13_II * self.activation_drv(a3_I)) * alpha

        self.w11_II -= d_w11_II
        self.w12_II -= d_w12_II
        self.w13_II -= d_w13_II

        self.w11_I -= d_w11_I
        self.w12_I -= d_w12_I
        self.w21_I -= d_w21_I
        self.w22_I -= d_w22_I
        self.w31_I -= d_w31_I
        self.w32_I -= d_w32_I
        self.b_I -= d_b_I

        return error


model = ANN()

data = [
    [0,0,0],
    [0,1,0],
    [1,0,0],
    [1,1,1],
]
for i in range(0,200):
    err = 0
    dt = data[::]
    random.shuffle(dt)
    for j in dt:
        err += model.train(j[0],j[1],j[2],0.1)
    print(err)

print("-"*30)
for j in data:
    _, _, _, _, _, _, _, res = model.predict(j[0],j[1])
    print(j[0],",",j[1],"=",res)

Result of code :

...
0.1978539306282795
0.19794670251861882
0.19745074826953185
0.19529942727878868
0.19779970636626873
0.19661596298810918
------------------------------
0 , 0 = -0.24217968147818447
0 , 1 = 0.236033934015224
1 , 0 = 0.24457439328909888
1 , 1 = 0.5919949310028919