I am currently designing a NoisyNet layer, as proposed here: "Noisy Networks for Exploration", in Tensorflow and get the dimensionality error as indicated in the title, while the dimensions of the two tensors to be multiplied element-wise in line filtered_output = keras.layers.merge.Multiply()([output, actions_input])
should (in principle) be compatible with each other according to the printed output when printing the dimensions of both tensors involved, filtered_output
and actions_input
, where both tensors seem to be of dimension shape=(1, 4)
.
I am using Tensorflow 1.12.0 in Python3.
The relevant code looks as follows:
import numpy as np
import tensorflow as tf
import keras
class NoisyLayer(keras.layers.Layer):
def __init__(self, in_shape=(1,2592), out_units=256, activation=tf.identity):
super(NoisyLayer, self).__init__()
self.in_shape = in_shape
self.out_units = out_units
self.mu_interval = 1.0/np.sqrt(float(self.out_units))
self.sig_0 = 0.5
self.activation = activation
self.assign_resampling()
def build(self, input_shape):
# Initializer
self.mu_initializer = tf.initializers.random_uniform(minval=-self.mu_interval, maxval=self.mu_interval) # Mu-initializer
self.si_initializer = tf.initializers.constant(self.sig_0/np.sqrt(float(self.out_units))) # Sigma-initializer
# Weights
self.w_mu = tf.Variable(initial_value=self.mu_initializer(shape=(self.in_shape[-1], self.out_units), dtype='float32'), trainable=True) # (1,2592)x(2592,4) = (1,4)
self.w_si = tf.Variable(initial_value=self.si_initializer(shape=(self.in_shape[-1], self.out_units), dtype='float32'), trainable=True)
# Biases
self.b_mu = tf.Variable(initial_value=self.mu_initializer(shape=(self.in_shape[0], self.out_units), dtype='float32'), trainable=True)
self.b_si = tf.Variable(initial_value=self.si_initializer(shape=(self.in_shape[0], self.out_units), dtype='float32'), trainable=True)
def call(self, inputs, resample_noise_flag):
if resample_noise_flag:
self.assign_resampling()
# Putting it all together
self.w = tf.math.add(self.w_mu, tf.math.multiply(self.w_si, self.w_eps))
self.b = tf.math.add(self.b_mu, tf.math.multiply(self.b_si, self.q_eps))
return self.activation(tf.linalg.matmul(inputs, self.w) + self.b)
def assign_resampling(self):
self.p_eps = self.f(self.resample_noise([self.in_shape[-1], 1]))
self.q_eps = self.f(self.resample_noise([1, self.out_units]))
self.w_eps = self.p_eps * self.q_eps # Cartesian product of input_noise x output_noise
def resample_noise(self, shape):
return tf.random.normal(shape, mean=0.0, stddev=1.0, seed=None, name=None)
def f(self, x):
return tf.math.multiply(tf.math.sign(x), tf.math.sqrt(tf.math.abs(x)))
frames_input = tf.ones((1, 84, 84, 4)) # Toy input
conv1 = keras.layers.Conv2D(16, (8, 8), strides=(4, 4), activation="relu")(frames_input)
conv2 = keras.layers.Conv2D(32, (4, 4), strides=(2, 2), activation="relu")(conv1)
flattened = keras.layers.Flatten()(conv2)
actionspace_size = 4
# NoisyNet
hidden = NoisyLayer(activation=tf.nn.relu)(inputs=flattened, resample_noise_flag=True)
output = NoisyLayer(in_shape=(1,256), out_units=actionspace_size)(inputs=hidden, resample_noise_flag=True)
actions_input = tf.ones((1,actionspace_size))
print('hidden:\n', hidden)
print('output:\n', output)
print('actions_input:\n', actions_input)
filtered_output = keras.layers.merge.Multiply()([output, actions_input])
The output, when I run the code, looks as follows:
hidden:
Tensor("noisy_layer_5/Relu:0", shape=(1, 256), dtype=float32)
output:
Tensor("noisy_layer_6/Identity:0", shape=(1, 4), dtype=float32)
actions_input:
Tensor("ones_5:0", shape=(1, 4), dtype=float32)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-4-f6df621eacab> in <module>()
68 print('actions_input:\n', actions_input)
69
---> 70 filtered_output = keras.layers.merge.Multiply()([output, actions_input])
2 frames
/usr/local/lib/python3.6/dist-packages/keras/layers/merge.py in _compute_elemwise_op_output_shape(self, shape1, shape2)
59 raise ValueError('Operands could not be broadcast '
60 'together with shapes ' +
---> 61 str(shape1) + ' ' + str(shape2))
62 output_shape.append(i)
63 return tuple(output_shape)
ValueError: Operands could not be broadcast together with shapes (2592,) (4,)
Particularly, I am wondering where the number 2592
in Operands could not be broadcast together with shapes (2592,) (4,)
comes from, since the number coincides with the length of the flattened input tensor flattened
to the first noisy layer, but is -as it seems to me- not part of the output dimension of the second noisy layer output
anymore, which in turn serves as the input to the erroneous line indicated above.
Does anyone know what's going wrong?
Thanks in advance, Daniel