Some of the feature columns' data type is list
. And their length can be different. I want to encode this column as a multi-hot categorical feature and feed it to tf.estimator
. I tried the following but the error Unable to get element as bytes
showed. I think this is a common practice in deep learning especially recommendation systems e.g. Deep & Wide model. I found a relevant question here but it doesn't show how to feed to the estimator.
import pandas as pd
import tensorflow as tf
OUTDIR = "./data"
data = {"x": [["a", "c"], ["a", "b"], ["b", "c"]], "y": ["x", "y", "z"]}
df = pd.DataFrame(data)
Y = df["y"]
X = df.drop("y", axis=1)
indicator_features = [
tf.feature_column.indicator_column(
categorical_column=tf.feature_column.categorical_column_with_vocabulary_list(
key="x", vocabulary_list=["a", "b", "c"]
)
)
]
model = tf.estimator.LinearClassifier(
feature_columns=indicator_features, model_dir=OUTDIR
)
training_input_fn = tf.estimator.inputs.pandas_input_fn(
x=X, y=Y, batch_size=64, shuffle=True, num_epochs=None
)
model.train(input_fn=training_input_fn)
The following error:
INFO:tensorflow:Using default config. INFO:tensorflow:Using config: {'_model_dir': 'testalg', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': , '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1} INFO:tensorflow:Calling model_fn. INFO:tensorflow:Done calling model_fn. INFO:tensorflow:Create CheckpointSaverHook. INFO:tensorflow:Graph was finalized. INFO:tensorflow:Running local_init_op. INFO:tensorflow:Done running local_init_op. INFO:tensorflow:Error reported to Coordinator: , Unable to get element as bytes. INFO:tensorflow:Saving checkpoints for 0 into testalg/model.ckpt. ------------------------------------------------------- InternalError Traceback (most recent call last) /home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args) 1321 try: -> 1322 return fn(*args) 1323 except errors.OpError as e:
/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run_fn(feed_dict, fetch_list, target_list, options, run_metadata) 1306 return self._call_tf_sessionrun( -> 1307 options, feed_dict, fetch_list, target_list, run_metadata) 1308
/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _call_tf_sessionrun(self, options, feed_dict, fetch_list, target_list, run_metadata) 1408 self._session, options, feed_dict, fetch_list, target_list, -> 1409 run_metadata) 1410 else:
InternalError: Unable to get element as bytes.
During handling of the above exception, another exception occurred:
InternalError Traceback (most recent call last) in () 44 45 ---> 46 model.train(input_fn=training_input_fn)
/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners) 364 365 saving_listeners = _check_listeners_type(saving_listeners) --> 366 loss = self._train_model(input_fn, hooks, saving_listeners) 367 logging.info('Loss for final step: %s.', loss) 368 return self
/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners) 1117
return self._train_model_distributed(input_fn, hooks, saving_listeners) 1118 else: -> 1119 return self._train_model_default(input_fn, hooks, saving_listeners) 1120 1121 def _train_model_default(self, input_fn, hooks, saving_listeners):/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)
1133 return self._train_with_estimator_spec(estimator_spec, worker_hooks, 1134
hooks, global_step_tensor, -> 1135 saving_listeners) 1136 1137 def _train_model_distributed(self, input_fn, hooks, saving_listeners):/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py in _train_with_estimator_spec(self, estimator_spec, worker_hooks, hooks, global_step_tensor, saving_listeners) 1334 loss = None 1335 while not mon_sess.should_stop(): -> 1336 _, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss]) 1337 return loss 1338
/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py in exit(self, exception_type, exception_value, traceback) 687 if exception_type in [errors.OutOfRangeError, StopIteration]: 688 exception_type = None --> 689 self._close_internal(exception_type) 690 # exit should return True to suppress an exception. 691 return exception_type is None
/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py in _close_internal(self, exception_type) 724 if self._sess is None: 725 raise RuntimeError('Session is already closed.') --> 726 self._sess.close() 727 finally: 728 self._sess = None
/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py in close(self) 972 if self._sess: 973 try: --> 974 self._sess.close() 975 except _PREEMPTION_ERRORS: 976 pass
/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py in close(self) 1116 self._coord.join( 1117
stop_grace_period_secs=self._stop_grace_period_secs, -> 1118 ignore_live_threads=True) 1119 finally: 1120 try:/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/coordinator.py in join(self, threads, stop_grace_period_secs, ignore_live_threads) 387 self._registered_threads = set() 388 if self._exc_info_to_raise: --> 389 six.reraise(*self._exc_info_to_raise) 390 elif stragglers: 391 if ignore_live_threads:
/home/yinan.li1/anaconda3/lib/python3.6/site-packages/six.py in reraise(tp, value, tb) 683 value = tp() 684 if value.traceback is not tb: --> 685 raise value.with_traceback(tb) 686 raise value 687
/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/estimator/inputs/queues/feeding_queue_runner.py in _run(self, sess, enqueue_op, feed_fn, coord) 92 try: 93 feed_dict = None if feed_fn is None else feed_fn() ---> 94 sess.run(enqueue_op, feed_dict=feed_dict) 95 except (errors.OutOfRangeError, errors.CancelledError): 96 # This exception indicates that a queue was closed.
/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata) 898 try: 899 result = self._run(None, fetches, feed_dict, options_ptr, --> 900 run_metadata_ptr) 901 if run_metadata: 902 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
1133 if final_fetches or final_targets or (handle and feed_dict_tensor): 1134 results = self._do_run(handle, final_targets, final_fetches, -> 1135 feed_dict_tensor, options, run_metadata) 1136 else: 1137 results = []/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata) 1314 if handle is None: 1315 return self._do_call(_run_fn, feeds, fetches, targets, options, -> 1316 run_metadata) 1317 else: 1318 return self._do_call(_prun_fn, handle, feeds, fetches)
/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args) 1333 except KeyError: 1334 pass -> 1335 raise type(e)(node_def, op, message) 1336 1337 def _extend_graph(self):
InternalError: Unable to get element as bytes.