1
votes

I am building a model using tensorflow. I trained my model and it worked normally. Then, I modified my code and when I try to train my model, I am getting a AlreadyExistError. I restart my Jupyter Notebook but I’m still getting the same error. I need some help please. here is my piece of code where I build the network and train it. The problem occurs in the last line.

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Dropout, Dense, Flatten, LSTM, MaxPooling1D, Bidirectional
from tensorflow.keras.optimizers import Adam
from keras.callbacks import EarlyStopping, TensorBoard

model = Sequential()

model.add(Conv1D(32, kernel_size=3, activation='elu', padding='same',
                 input_shape=(vector_size, 1)))
model.add(Conv1D(32, kernel_size=3, activation='elu', padding='same'))
model.add(Conv1D(32, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPooling1D(pool_size=3))

model.add(Bidirectional(LSTM(512, dropout=0.2, recurrent_dropout=0.3)))

model.add(Dense(512, activation='sigmoid'))
model.add(Dropout(0.2))
model.add(Dense(512, activation='sigmoid'))
model.add(Dropout(0.25))
model.add(Dense(512, activation='sigmoid'))
model.add(Dropout(0.25))

model.add(Dense(2, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.0001, decay=1e-6), metrics=['accuracy'])

tensorboard = TensorBoard(log_dir='logs/', histogram_freq=0, write_graph=True, write_images=True)

model.summary()
model.fit(np.array(x_train), np.array(y_train), batch_size=batch_size, epochs=no_epochs,
         validation_data=(np.array(x_test), np.array(y_test)),  callbacks=[tensorboard, EarlyStopping(min_delta=0.0001, patience=3)])

Train on 90000 samples, validate on 10000 samples Epoch 1/10
500/90000 [..............................] - ETA: 2:00:49 /anaconda3/lib/python3.7/site-packages/keras/callbacks/callbacks.py:846: RuntimeWarning: Early stopping conditioned on metric val_loss which is not available. Available metrics are: (self.monitor, ','.join(list(logs.keys()))), RuntimeWarning --------------------------------------------------------------------------- AlreadyExistsError Traceback (most recent call last) in 1 model.fit(np.array(x_train), np.array(y_train), batch_size=batch_size, epochs=no_epochs, ----> 2 validation_data=(np.array(x_test), np.array(y_test)), callbacks=[tensorboard, EarlyStopping(min_delta=0.0001, patience=3)]) 3 print('You can continue')

/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs) 817 max_queue_size=max_queue_size, 818 workers=workers, --> 819 use_multiprocessing=use_multiprocessing) 820 821 def evaluate(self,

/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs) 340 mode=ModeKeys.TRAIN, 341 training_context=training_context, --> 342 total_epochs=epochs) 343 cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN) 344

/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs) 126 step=step, mode=mode, size=current_batch_size) as batch_logs: 127 try: --> 128 batch_outs = execution_function(iterator) 129 except (StopIteration, errors.OutOfRangeError): 130 # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?

/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in execution_function(input_fn) 96 # numpy translates Tensors to values in Eager mode. 97 return nest.map_structure(_non_none_constant_value, ---> 98 distributed_function(input_fn)) 99 100 return execution_function

/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in call(self, *args, **kwds) 566 xla_context.Exit() 567 else: --> 568 result = self._call(*args, **kwds) 569 570 if tracing_count == self._get_tracing_count():

/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in _call(self, *args, **kwds) 630 # Lifting succeeded, so variables are initialized and we can run the 631 # stateless function. --> 632 return self._stateless_fn(*args, **kwds) 633 else: 634 canon_args, canon_kwds = \

/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in call(self, *args, **kwargs) 2361 with self._lock:
2362 graph_function, args, kwargs = self._maybe_define_function(args, kwargs) -> 2363 return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access 2364 2365 @property

/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _filtered_call(self, args, kwargs) 1609 if isinstance(t, (ops.Tensor, 1610
resource_variable_ops.BaseResourceVariable))), -> 1611 self.captured_inputs) 1612 1613 def _call_flat(self, args, captured_inputs, cancellation_manager=None):

/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1690 # No tape is watching; skip to running the function.
1691 return self._build_call_outputs(self._inference_function.call( -> 1692 ctx, args, cancellation_manager=cancellation_manager)) 1693
forward_backward = self._select_forward_and_backward_functions(
1694 args,

/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in call(self, ctx, args, cancellation_manager) 543 inputs=args, 544 attrs=("executor_type", executor_type, "config_proto", config), --> 545 ctx=ctx) 546 else: 547 outputs = execute.execute_with_cancellation(

/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name) 65 else: 66 message = e.message ---> 67 six.raise_from(core._status_to_exception(e.code, message), None) 68 except TypeError as e: 69 keras_symbolic_tensors = [

/anaconda3/lib/python3.7/site-packages/six.py in raise_from(value, from_value)

AlreadyExistsError: Resource __per_step_0/sequential/bidirectional/forward_lstm/while_grad/body/_429/gradients/AddN_13/tmp_var/N10tensorflow19TemporaryVariableOp6TmpVarE [[{{node sequential/bidirectional/forward_lstm/while_grad/body/_429/gradients/AddN_13/tmp_var}}]] [Op:__inference_distributed_function_12060]

Function call stack: distributed_function

1
Can you give the error stack trace ?Orphee Faucoz
I added it by modifying the postMax

1 Answers

0
votes

You must have a problem on same architectures from previous run being seen while training.

This should reset the keras session :

from tensorflow.keras import backend
backend.clear_session()