I built a CNN using theano. The code for the Convolution and Hidden layers is:
class HiddenLayer(Layer):
def __init__(self,n_in,n_out,inp_vector=T.dmatrix(),non_linearity='sigmoid',W=None,b=None):
Layer.__init__(self,n_in,n_out,W,b)
self.inp=inp_vector
out=self.non_lins[non_linearity](inp_vector.dot(self.W.transpose())+self.b)
self.output=out
class ConvolutionLayer(Layer):
def __init__(self,W_shape,b_shape,image_shape,inp_vector=T.tensor4(),maxpool=(2,2),non_linearity='tanh',W=None,b=None,flatten=False,batch=1):
W=theano.shared(numpy.random.standard_normal(W_shape))
b=theano.shared(numpy.random.random(b_shape))
Layer.__init__(self,0,0,W,b)
self.inp=inp_vector
out=convop.conv2d(input=inp_vector,filters=self.W,filter_shape=W_shape,image_shape=image_shape)
self.output=self.non_lins[non_linearity](downsample.max_pool_2d(out,maxpool,ignore_border=True)+self.b.dimshuffle('x',0,'x','x'))
if flatten:
self.output=self.output.flatten(batch)
else:
self.output=self.output
My neural network has 5 layers. But for checking what was going wrong, I stripped off all the other hidden layers and just kept the one which is connected to the convolution layer. I observed that the output of the convolution layer was fine, but after passing it through the fully connected layer, it became:
(2, 5000)
[[-1. -1. -1. ..., -1. -1. -1.]
[-1. -1. -1. ..., -1. -1. -1.]]
here 2,5000 is the shape of the output matrix of the hidden layer. The network implementation is:
layer0=machinebrain.ConvolutionLayer(image_shape=(2,3,480,640),W_shape=
(2,3,5,5),maxpool=(4,4),b_shape=(2,))
layer1=machinebrain.ConvolutionLayer(image_shape=(2,2,119,159),inp_vector=
layer0.output,maxpool=(2,2),W_shape=(3,2,5,5),b_shape=(3,),flatten=True,batch=2)
layer3=machinebrain.HiddenLayer(inp_vector=layer1.output,
non_linearity='tanh',n_in=13167,n_out=5000)
Any idea what might be causing the output of convolution layers to change to all 1.'s after passing through the hidden layer?