I'am trying to implement a recurrent neural network in C, but it doesn't work. I have read some documents on Internet but I don't understand the complex mathematics. So I have adapted the computation from a Multi-Layer Perceptron.
During a few steps of learning, the output of my network is a number, but soon after the output become "not a number" (-1,#IND00).
1. Computation.
My first problem is the computation of values, errors, and weight changes.
I compute the forward links, between two neurons N1->N2
, by the following way:
- forward pass:
(value of N2) += (value of N1) * (weight of link N1->N2)
- backward pass:
(error of N1) += (error of N2) * (weight of link N1->N2)
and for output neurons(error) = (value of neuron) - (target output)
- weight change:
(new weight) = (old weight) - derivative(value of N2) * (error of N2) * (value of N1) * learning_rate
In each neuron, I store the previous value of the neuron, and the previous error of the neuron, computed during the previous forward and backward passes, because the recurrent links cannot be computed during the same passes than the forward links.
I compute recurrent links, between two neurons N2->N1
, by the following way:
- forward pass:
(value of N1) += (previous value of N2) * (weight of link N2->N1)
and the final value of N1, from all forward and recurrent links, is then passed in a sigmoid function (tanh) except for the output neurons - backward pass:
(error of N2) += (previous error of N1) * (weight of link N2->N1)
- weight change:
(new weight) = (old weight) - derivative(value of N1) * (error of N1) * (previous value of N2) * learning_rate
I don't know if this computation is correct and can result in a working network.
2. Multi-Layer Perceptron.
My second problem is when I disable the computation of recurrent links, my network should work like a multi-layer perceptron. But, even if my network learn, it has bad performances and needs in average more training cycles than a multi-layer perceptron I have found on Internet. So there is something wrong in my implementation.
There is also a strange phenomenon, I backpropagate error by multiplying the error of a neuron with the weight of the incomming link/synapse, and add this product to the error of the neuron before the link/synapse. It is like in the perceptron I have found on Internet. But when I add the error without multiplying it with the weight, the network works better and have performances of the same order than the perceptron found on Internet.
3. Source code.
Here is the source code, the first file is my implementation with the main function that tests my network versus the multi-layer perceptron I have found on Internet. The perceptron is defined in the second file: mlp.h.
The computation of recurrent links is disabled, so it should work like a multi-layer perceptron. If you don't want to read the whole code, you can just have a look to the functions rnnset()
, rnnsetstart()
and rnnlearn()
, to see the forward and backward passes, and it is in these 3 functions that the recurrent links are disabled (commented lines/blocks). rnnsetstart()
must be called before rnnset()
, in order to store the value of the last forward pass in the neuron variable value_prev
.
#include <stdio.h>
#include <time.h>
#include <math.h>
#include <malloc.h>
#include <stdlib.h>
#include "mlp.h"
typedef struct _neuron NEURON;
struct _neuron {
int layer;
double * weight;
int nbsynapsesin;
NEURON ** synapsesin;
double bias;
double value;
double value_prev;
double error;
double error_prev;
};
typedef struct _rnn RNN;
struct _rnn {
int * layersize;
int nbneurons;
NEURON * n;
};
typedef struct _config CONFIG;
struct _config {
int nbneurons;
int * layersize;
int nbsynapses;
int * synapses;
};
CONFIG * createconfig(int * layersize) {
CONFIG * conf = (CONFIG*)malloc(sizeof(CONFIG));
int i;
conf->nbneurons = 0;
for(i=1; i<layersize[0]+1; i++) conf->nbneurons += layersize[i];
conf->layersize = (int*)malloc((layersize[0]+1)*sizeof(int));
for(i=0; i<layersize[0]+1; i++) conf->layersize[i] = layersize[i];
conf->nbsynapses = 0;
for(i=1; i<layersize[0]; i++) conf->nbsynapses += layersize[i] * layersize[i+1];
conf->nbsynapses *= 2;
conf->synapses = (int*)malloc(2*conf->nbsynapses*sizeof(int));
// creation of the synapses:
int j,k=0,l,k2=0,k3=0;
for(i=1;i<layersize[0];i++) {
k3 += layersize[i];
for(j=0; j<layersize[i]; j++) {
for(l=0; l<layersize[i+1]; l++) {
// forward link/synapse:
conf->synapses[k] = k2+j;
k++;
conf->synapses[k] = k3+l;
k++;
// Recurrent link/synapse:
conf->synapses[k] = k3+l;
k++;
conf->synapses[k] = k2+j;
k++;
}
}
k2 += layersize[i];
}
return conf;
}
void freeconfig(CONFIG* conf) {
free(conf->synapses);
free(conf->layersize);
free(conf);
}
RNN * creaternn(CONFIG * conf) {
RNN * net = (RNN*)malloc(sizeof(RNN));
net->nbneurons = conf->nbneurons;
net->layersize = (int*)malloc((conf->layersize[0]+1)*sizeof(int));
int i;
for(i=0; i<conf->layersize[0]+1; i++) net->layersize[i] = conf->layersize[i];
net->n = (NEURON*)malloc(conf->nbneurons*sizeof(NEURON));
int j=0,k=0;
for(i=0; i<conf->nbneurons; i++) {
if(k==0) { k = conf->layersize[j+1]; j++; }
net->n[i].layer = j-1;
net->n[i].nbsynapsesin = 0;
k--;
}
k=0;
for(i=0; i<conf->nbsynapses; i++) {
k++;
net->n[conf->synapses[k]].nbsynapsesin++;
k++;
}
for(i=0; i<conf->nbneurons; i++) {
net->n[i].weight = (double*)malloc(net->n[i].nbsynapsesin*sizeof(double));
net->n[i].synapsesin = (NEURON**)malloc(net->n[i].nbsynapsesin*sizeof(NEURON*));
net->n[i].nbsynapsesin = 0;
}
// Link the incoming synapses with the neurons:
k=0;
for(i=0; i<conf->nbsynapses; i++) {
k++;
net->n[conf->synapses[k]].synapsesin[net->n[conf->synapses[k]].nbsynapsesin] = &(net->n[conf->synapses[k-1]]);
net->n[conf->synapses[k]].nbsynapsesin++;
k++;
}
// Initialization of the values, errors, and weights:
for(i=0; i<net->nbneurons; i++) {
for(j=0; j<net->n[i].nbsynapsesin; j++) {
net->n[i].weight[j] = 1.0 * (double)rand() / RAND_MAX - 1.0/2;
}
net->n[i].bias = 1.0 * (double)rand() / RAND_MAX - 1.0/2;
net->n[i].value = 0.0;
net->n[i].value_prev = 0.0;
net->n[i].error_prev = 0.0;
net->n[i].error = 0.0;
}
return net;
}
void freernn(RNN * net) {
int i;
for(i=0; i<net->nbneurons; i++) {
free(net->n[i].weight);
free(net->n[i].synapsesin);
}
free(net->n);
free(net->layersize);
free(net);
}
void rnnget(RNN * net, double * out) {
int i,k=0;
for(i=net->nbneurons-1; i>net->nbneurons-net->layersize[net->layersize[0]]-1; i--) { out[k] = net->n[i].value; k++; }
}
void rnnset(RNN * net, double * in) {
int i,j,k;
double v;
NEURON *ni,*nj;
// For each neuron:
for(i=0; i<net->nbneurons; i++) {
ni = &(net->n[i]);
if(i<net->layersize[1]) ni->value = in[i]; else ni->value = ni->bias;
// For each incoming synapse:
for(j=0; j<ni->nbsynapsesin; j++) {
nj = ni->synapsesin[j];
// If it is a forward link/synapse:
if(ni->layer > nj->layer) ni->value += nj->value * ni->weight[j];
// Uncomment the following line to activate reccurent links computation:
//else ni->value += nj->value_prev * ni->weight[j];
}
// If NOT the output layer, then tanh the value:
if(ni->layer != net->layersize[0]-1) ni->value = tanh(ni->value);
}
}
void rnnsetstart(RNN * net) {
int i,j;
NEURON *ni,*nj;
// For each neuron, update value_prev:
for(i=0; i<net->nbneurons; i++) {
ni = &(net->n[i]);
// If NOT the output layer, then the value is already computed by tanh:
if(ni->layer != net->layersize[0]-1) {
ni->value_prev = ni->value;
} else {
ni->value_prev = tanh(ni->value);
}
}
}
void rnnlearn(RNN * net, double * out, double learningrate) {
int i,j,k;
k=0;
NEURON *ni,*nj;
// Initialize error to zero for the output layer:
for(i=net->nbneurons-1; i>=net->nbneurons-net->layersize[net->layersize[0]]; i--) net->n[i].error = 0.0;
// Compute the error for output neurons:
for(i=net->nbneurons-1; i>=0; i--) {
ni = &(net->n[i]);
// If ni is an output neuron, update the error:
if(ni->layer == net->layersize[0]-1) {
ni->error += ni->value - out[k];
k++;
} else {
ni->error = 0.0;
}
// Uncomment the following block to activate reccurent links computation:
/*
// For each incoming synapse from output layer:
for(j=0; j<ni->nbsynapsesin; j++) {
nj = ni->synapsesin[j];
// If neuron nj is in output layer, then update the error:
if(nj->layer == net->layersize[0]-1) nj->error += ni->error_prev * ni->weight[j];
}
*/
}
// Compute error for all other neurons:
for(i=net->nbneurons-1; i>=0; i--) {
ni = &(net->n[i]);
// For each input synapse NOT from output layer:
for(j=0; j<ni->nbsynapsesin; j++) {
nj = ni->synapsesin[j];
// If neuron nj is NOT in output layer, then update the error:
if(nj->layer != net->layersize[0]-1) {
// If it is a forward link/synapse:
if(ni->layer > nj->layer) nj->error += ni->error * ni->weight[j];
// Uncomment the following line to activate reccurent links computation:
//else nj->error += ni->error_prev * ni->weight[j];
}
}
}
// Update weights:
for(i=0; i<net->nbneurons; i++) {
ni = &(net->n[i]);
double wchange,derivative;
// For the output layer:
if(ni->layer == net->layersize[0]-1) {
derivative = ni->error * learningrate;
// For each incoming synapse:
for(j=0; j<ni->nbsynapsesin; j++) {
nj = ni->synapsesin[j];
wchange = derivative;
// If it is a forward link/synapse:
if(ni->layer > nj->layer) wchange *= nj->value;
else wchange *= nj->value_prev;
ni->weight[j] -= wchange;
if(ni->weight[j] > 5) ni->weight[j] = 5;
if(ni->weight[j] < -5) ni->weight[j] = -5;
}
ni->bias -= derivative;
if(ni->bias > 5) ni->bias = 5;
if(ni->bias < -5) ni->bias = -5;
// For the other layers:
} else {
derivative = 1.0 - ni->value * ni->value;
derivative *= ni->error * learningrate;
// For each incoming synapse:
for(j=0; j<ni->nbsynapsesin; j++) {
nj = ni->synapsesin[j];
wchange = derivative;
// If it is a forward link/synapse:
if(ni->layer > nj->layer) wchange *= nj->value;
else wchange *= nj->value_prev;
ni->weight[j] -= wchange;
}
ni->bias -= derivative;
}
}
// Update error_prev:
for(i=0; i<net->nbneurons; i++) net->n[i].error_prev = net->n[i].error;
}
int main() {
srand(time(NULL));
int layersize[] = {1, 25, 12, 1};
int layersize_netrnn[] = { 4, 1, 25, 12, 1 };
mlp * netmlp = create_mlp (4, layersize);
CONFIG * configrnn = createconfig(layersize_netrnn);
RNN * netrnn = creaternn(configrnn);
double inc,outc;
double global_error = 1;
double global_error2 = 1;
int iter,i1=0,i2=0;
//////////////////////////////////////////////////////
// Training of the Multi-Layer Perceptron:
//////////////////////////////////////////////////////
while(global_error > 0.005 && i1<1000) {
for (iter=0; iter < 100; iter++) {
inc = 1.0*rand()/(RAND_MAX+1.0);
outc = inc*inc;
set_mlp(netmlp,&inc);
learn_mlp(netmlp,&outc,0.03);
}
global_error = 0;
int k;
for (k=0; k < 100; k++) {
inc = 1.0*rand()/(RAND_MAX+1.0);
outc = inc*inc;
set_mlp(netmlp,&inc);
get_mlp(netmlp,&outc);
mlp_float desired_out = inc*inc;
global_error += (desired_out - outc)*(desired_out - outc);
}
global_error /= 100;
global_error = sqrt(global_error);
i1++;
}
//////////////////////////////////////////////////////
// Training of the Recurrent Neural Network:
//////////////////////////////////////////////////////
while(global_error2 > 0.005 && i2<1000) {
for (iter=0; iter < 100; iter++) {
inc = 1.0*rand()/(RAND_MAX+1.0);
outc = inc*inc;
rnnsetstart(netrnn);
rnnset(netrnn,&inc);
double outc2;
rnnlearn(netrnn,&outc,0.03);
}
global_error2 = 0;
int k;
for (k=0; k < 100; k++) {
inc = 1.0*rand()/(RAND_MAX+1.0);
outc = inc*inc;
double desired_out = inc*inc;
rnnsetstart(netrnn);
rnnset(netrnn,&inc);
rnnget(netrnn,&outc);
global_error2 += (desired_out - outc)*(desired_out - outc);
}
global_error2 /= 100;
global_error2 = sqrt(global_error2);
if(!isnormal(global_error2)) global_error2 = 100;
i2++;
}
//////////////////////////////////////////////////////
// Test of performance for the both networks:
//////////////////////////////////////////////////////
global_error = 0;
global_error2 = 0;
int k;
for (k=0; k < 10000; k++) {
inc = 1.0*rand()/(RAND_MAX+1.0);
outc = inc*inc;
double desired_out = inc*inc;
rnnsetstart(netrnn);
rnnset(netrnn,&inc);
rnnget(netrnn,&outc);
global_error2 += (desired_out - outc)*(desired_out - outc);
set_mlp(netmlp,&inc);
get_mlp(netmlp,&outc);
global_error += (desired_out - outc)*(desired_out - outc);
}
global_error /= 10000;
global_error = sqrt(global_error);
printf("\n MLP: i: %5d error: %f",i1,global_error);
global_error2 /= 10000;
global_error2 = sqrt(global_error2);
printf("\n RNN: i: %5d error: %f",i2,global_error2);
free_mlp(netmlp);
freeconfig(configrnn);
freernn(netrnn);
}
And the file mlp.h:
typedef double mlp_float;
typedef struct {
mlp_float *synaptic_weight;
mlp_float *neuron_value;
mlp_float *neuron_error_value;
mlp_float *input_neuron;
mlp_float *output_neuron;
mlp_float *output_error_value;
int *layer_index;
int *layer_size;
int *synapse_index;
int layer_number;
int neuron_number;
int synapse_number;
int input_layer_size;
int output_layer_size;
} mlp;
static mlp_float MAGICAL_WEIGHT_NUMBER = 1.0f;
static mlp_float MAGICAL_LEARNING_NUMBER = 0.4f;
void reinit_mlp(mlp * network) {
int i;
for (i = 0; i < network->synapse_number; i++) {
network->synaptic_weight[i] = /*0.001;*/MAGICAL_WEIGHT_NUMBER * (mlp_float)rand() / RAND_MAX - MAGICAL_WEIGHT_NUMBER/2;
}
}
mlp *create_mlp(int layer_number, int *layer_size) {
mlp *network = (mlp*)malloc(sizeof * network);
network->layer_number = layer_number;
network->layer_size = (int*)malloc(sizeof * network->layer_size * network->layer_number);
network->layer_index = (int*)malloc(sizeof * network->layer_index * network->layer_number);
int i;
network->neuron_number = 0;
for (i = 0; i < layer_number; i++) {
network->layer_size[i] = layer_size[i];
network->layer_index[i] = network->neuron_number;
network->neuron_number += layer_size[i];
}
network->neuron_value = (mlp_float*)malloc(sizeof * network->neuron_value * network->neuron_number);
network->neuron_error_value = (mlp_float*)malloc(sizeof * network->neuron_error_value * network->neuron_number);
network->input_layer_size = layer_size[0];
network->output_layer_size = layer_size[layer_number-1];
network->input_neuron = network->neuron_value;
network->output_neuron = &network->neuron_value[network->layer_index[layer_number-1]];
network->output_error_value = &network->neuron_error_value[network->layer_index[layer_number-1]];
network->synapse_index = (int*)malloc(sizeof * network->synapse_index * (network->layer_number-1));
network->synapse_number = 0;
for (i = 0; i < layer_number - 1; i++) {
network->synapse_index[i] = network->synapse_number;
network->synapse_number += (network->layer_size[i]+1) * network->layer_size[i+1];
}
network->synaptic_weight = (mlp_float*)malloc(sizeof * network->synaptic_weight * network->synapse_number);
for (i = 0; i < network->synapse_number; i++) {
network->synaptic_weight[i] = MAGICAL_WEIGHT_NUMBER * (mlp_float)rand() / RAND_MAX - MAGICAL_WEIGHT_NUMBER/2;
}
return network;
}
void free_mlp (mlp *network) {
free(network->layer_size);
free(network->layer_index);
free(network->neuron_value);
free(network->neuron_error_value);
free(network->synapse_index);
free(network->synaptic_weight);
free(network);
}
void set_mlp (mlp * network, mlp_float *vector) {
if (vector != NULL) {
int i;
for (i = 0; i < network->input_layer_size; i++) {
network->input_neuron[i] = vector[i];
}
}
int i;
int synapse_index;
synapse_index = 0;
for (i = 1; i < network->layer_number; i++) {
int j;
for (j = network->layer_index[i]; j < network->layer_index[i] + network->layer_size[i]; j++) {
mlp_float weighted_sum = 0.0;
int k;
for (k = network->layer_index[i-1]; k < network->layer_index[i-1] + network->layer_size[i-1]; k++) {
weighted_sum += network->neuron_value[k] * network->synaptic_weight[synapse_index];
synapse_index++;
}
weighted_sum += network->synaptic_weight[synapse_index];
synapse_index++;
network->neuron_value[j] = weighted_sum;
if (i != network->layer_number - 1) network->neuron_value[j] = tanh(network->neuron_value[j]);
}
}
}
void get_mlp (mlp *network, mlp_float *vector) {
int i;
for (i = 0; i < network->output_layer_size; i++) {
vector[i] = network->output_neuron[i];
}
}
void learn_mlp (mlp *network, mlp_float *desired_out, mlp_float learning_rate) {
int i;
mlp_float global_error = 0;
int synapse_index = network->synapse_index[network->layer_number-2];
for (i = 0; i < network->output_layer_size; i++) {
network->output_error_value[i] = network->output_neuron[i] - desired_out[i];
int j;
for (j = network->layer_index[network->layer_number-2]; j < network->layer_index[network->layer_number-2] + network->layer_size[network->layer_number-2]; j++) {
mlp_float weightChange;
weightChange = learning_rate * network->output_error_value[i] * network->neuron_value[j];
network->synaptic_weight[synapse_index] -= weightChange;
if (network->synaptic_weight[synapse_index] > 5) network->synaptic_weight[synapse_index] = 5;
if (network->synaptic_weight[synapse_index] < -5) network->synaptic_weight[synapse_index] = -5;
synapse_index++;
}
mlp_float weightChange;
weightChange = learning_rate * network->output_error_value[i];
network->synaptic_weight[synapse_index] -= weightChange;
if (network->synaptic_weight[synapse_index] > 5) network->synaptic_weight[synapse_index] = 5;
if (network->synaptic_weight[synapse_index] < -5) network->synaptic_weight[synapse_index] = -5;
synapse_index++;
}
for (i = network->layer_number - 2; i > 0; i--) {
int j;
int jj= 0;
int synapse_index = network->synapse_index[i-1];
for (j = network->layer_index[i]; j < network->layer_index[i] + network->layer_size[i]; j++,jj++) {
int k;
int synapse_index2 = network->synapse_index[i] + jj;
network->neuron_error_value[j] = 0;
for (k = network->layer_index[i+1]; k < network->layer_index[i+1] + network->layer_size[i+1]; k++) {
network->neuron_error_value[j] += network->synaptic_weight[synapse_index2] * network->neuron_error_value[k];
synapse_index2+=network->layer_size[i]+1;
}
for (k = network->layer_index[i-1]; k < network->layer_index[i-1] + network->layer_size[i-1]; k++) {
mlp_float weightChange;
weightChange = 1.0 - network->neuron_value[j] * network->neuron_value[j];
weightChange *= network->neuron_error_value[j] * learning_rate;
weightChange *= network->neuron_value[k];
network->synaptic_weight[synapse_index] -= weightChange;
synapse_index++;
}
mlp_float weightChange;
weightChange = 1.0 - network->neuron_value[j] * network->neuron_value[j];
weightChange *= network->neuron_error_value[j] * learning_rate;
network->synaptic_weight[synapse_index] -= weightChange;
synapse_index++;
}
}
}
void get_mlp_inputs (mlp *network, mlp_float *vector) {
if (vector != NULL) {
int i;
for (i = 0; i < network->input_layer_size; i++) {
vector[i] = network->input_neuron[i];
}
}
}