tensor forest estimator value error at fitting the training part

Question

Code : from sklearn import cross_validation as cv import numpy as np from tensorflow.contrib.learn.python.learn.estimators import estimator from tensorflow.contrib.tensor_forest.python import tensor_forest

X = np.array([[  74.,  166.,  331.,  161.,  159.,  181.,  180.],
   [ 437.,  427.,  371.,  361.,  393.,  465.,  464.],
   [ 546.,  564.,  588.,  595.,  536.,  537.,  520.],
   [  89.,   89.,   87.,   87.,  108.,  113.,  111.],
   [  75.,   90.,   74.,   89.,  130.,  140.,  135.]])
Y = np.array([[ 51.,  43.,  29.,  43.,  43.,  41.,  42.],
   [ 22.,  23.,  26.,  27.,  25.,  19.,  19.],
   [  7.,   7.,   5.,   5.,   9.,   8.,  10.],
   [ 55.,  54.,  55.,  53.,  51.,  51.,  51.],
   [ 58.,  57.,  57.,  58.,  55.,  55.,  55.]])
train_X, test_X, train_Y, test_Y = cv.train_test_split(X, Y, 
test_size=0.50, random_state=42)

def build_estimator() :
 params = tensor_forest.ForestHParams(num_classes=7, num_features=7, 
 num_trees=30, max_nodes=100)
 graph_builder_class = tensor_forest.RandomForestGraphs

 graph_builder_class = tensor_forest.TrainingLossForest
 return estimator.SKCompat(random_forest.TensorForestEstimator(
   params, graph_builder_class=graph_builder_class,
   model_dir=None))
est = build_estimator() 
train_X = train_X.astype(dtype = np.float32) 
train_Y = train_Y.astype(dtype = np.float32) 
est = est.fit(x=train_X, y=train_Y, batch_size = 100)

My both input and output shape is [Number_of_samples,7]. It perfectly runs with scikitlearn random forest classifier. However for tf learn, I get the following error when fitting the estimator :

INFO:tensorflow:Constructing forest with params = INFO:tensorflow:{'valid_leaf_threshold': 1, 'split_after_samples': 250, 'num_output_columns': 8, 'feature_bagging_fraction': 1.0, 'split_initializations_per_input': 1, 'bagged_features': None, 'min_split_samples': 5, 'max_nodes': 100, 'num_features': 7, 'num_trees': 30, 'num_splits_to_consider': 7, 'base_random_seed': 0, 'num_outputs': 1, 'dominate_fraction': 0.99, 'max_fertile_nodes': 50, 'bagged_num_features': 7, 'dominate_method': 'bootstrap', 'bagging_fraction': 1.0, 'regression': False, 'num_classes': 7}

ValueErrorTraceback (most recent call last) in () ----> 1 est = est.fit(x=train_X, y=train_Y, batch_size = 100)

/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.pyc in fit(self, x, y, batch_size, steps, max_steps, monitors) 1351
steps=steps, 1352 max_steps=max_steps, -> 1353 monitors=all_monitors) 1354 return self 1355

/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tensor_forest/client/random_forest.pyc in fit(self, x, y, input_fn, steps, batch_size, monitors, max_steps) 262 elif input is not None: 263 self._estimator.fit(input_fn=input_fn, steps=steps, monitors=monitors, --> 264 max_steps=max_steps) 265 else: 266 raise ValueError('fit: Must provide either both x and y or input_fn.')

/usr/local/lib/python2.7/dist-packages/tensorflow/python/util/deprecation.pyc in new_func(*args, **kwargs) 278 _call_location(), decorator_utils.get_qualified_name(func), 279 func.module, arg_name, date, instructions) --> 280 return func(*args, **kwargs) 281 new_func.doc = _add_deprecated_arg_notice_to_docstring( 282 func.doc, date, instructions)

/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.pyc in fit(self, x, y, input_fn, steps, batch_size, monitors, max_steps) 424 hooks.append(basic_session_run_hooks.StopAtStepHook(steps, max_steps)) 425 --> 426 loss = self._train_model(input_fn=input_fn, hooks=hooks) 427 logging.info('Loss for final step: %s.', loss) 428 return self

/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.pyc in _train_model(self, input_fn, hooks) 932 features, labels = input_fn() 933 self._check_inputs(features, labels) --> 934 model_fn_ops = self._call_legacy_get_train_ops(features, labels) 935 ops.add_to_collection(ops.GraphKeys.LOSSES, model_fn_ops.loss) 936 all_hooks.extend([

/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.pyc in _call_legacy_get_train_ops(self, features, labels) 1001 1002 def _call_legacy_get_train_ops(self, features, labels): -> 1003 train_ops = self._get_train_ops(features, labels) 1004 if isinstance(train_ops, model_fn_lib.ModelFnOps): # Default signature 1005 return train_ops

/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.pyc in _get_train_ops(self, features, labels) 1160 ModelFnOps object. 1161 """ -> 1162 return self._call_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN) 1163 1164 def _get_eval_ops(self, features, labels, metrics):

/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.pyc in _call_model_fn(self, features, labels, mode) 1131 if 'model_dir' in model_fn_args: 1132 kwargs['model_dir'] = self.model_dir -> 1133 model_fn_results = self._model_fn(features, labels, **kwargs) 1134 1135 if isinstance(model_fn_results, model_fn_lib.ModelFnOps):

/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tensor_forest/client/random_forest.pyc in _model_fn(features, labels) 128 if labels is not None: 129 training_loss = graph_builder.training_loss( --> 130 features, labels, name=LOSS_NAME) 131 training_graph = control_flow_ops.group( 132 graph_builder.training_graph(

/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tensor_forest/python/tensor_forest.pyc in training_loss(self, features, labels, name) 559 560 def training_loss(self, features, labels, name='training_loss'): --> 561 return array_ops.identity(self._get_loss(features, labels), name=name) 562 563

/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tensor_forest/python/tensor_forest.pyc in _get_loss(self, features, labels) 548 self._loss = control_flow_ops.cond( 549 self.average_size() > 0, _average_loss, --> 550 lambda: constant_op.constant(sys.maxsize, dtype=dtypes.float32)) 551 552 return self._loss

/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.pyc in cond(pred, fn1, fn2, name) 1757 context_t = CondContext(pred, pivot_1, branch=1) 1758 context_t.Enter() -> 1759 orig_res, res_t = context_t.BuildCondBranch(fn1) 1760 context_t.ExitResult(res_t) 1761 context_t.Exit()

/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.pyc in BuildCondBranch(self, fn) 1658 def BuildCondBranch(self, fn): 1659 """Add the subgraph defined by fn() to the graph.""" -> 1660 r = fn() 1661 original_r = r 1662 result = []

/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tensor_forest/python/tensor_forest.pyc in _average_loss() 544 probs = self.inference_graph(features) 545 return math_ops.reduce_sum(self.loss_fn( --> 546 probs, labels)) / math_ops.to_float(array_ops.shape(labels)[0]) 547 548 self._loss = control_flow_ops.cond(

/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tensor_forest/python/tensor_forest.pyc in _loss(probs, targets) 508 def _loss(probs, targets): 509 if targets.get_shape().ndims > 1: --> 510 targets = array_ops.squeeze(targets, squeeze_dims=[1]) 511 one_hot_labels = array_ops.one_hot( 512 math_ops.to_int32(targets),

/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/array_ops.pyc in squeeze(input, axis, name, squeeze_dims) 2270 if np.isscalar(axis): 2271 axis = [axis] -> 2272 return gen_array_ops._squeeze(input, axis, name) 2273 2274

/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_array_ops.pyc in _squeeze(input, squeeze_dims, name) 3496 """ 3497 result = _op_def_lib.apply_op("Squeeze", input=input, -> 3498 squeeze_dims=squeeze_dims, name=name) 3499 return result 3500

/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.pyc in apply_op(self, op_type_name, name, **keywords) 761 op = g.create_op(op_type_name, inputs, output_types, name=scope, 762 input_types=input_types, attrs=attr_protos, --> 763 op_def=op_def) 764 if output_structure: 765 outputs = op.outputs

/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.pyc in create_op(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_shapes, compute_device) 2395
original_op=self._default_original_op, op_def=op_def) 2396 if compute_shapes: -> 2397 set_shapes_for_outputs(ret) 2398 self._add_op(ret) 2399
self._record_op_seen_by_control_dependencies(ret)

/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.pyc in set_shapes_for_outputs(op) 1755 shape_func = _call_cpp_shape_fn_and_require_op 1756 -> 1757 shapes = shape_func(op) 1758 if shapes is None: 1759 raise RuntimeError(

/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.pyc in call_with_requiring(op) 1705 1706 def call_with_requiring(op): -> 1707 return call_cpp_shape_fn(op, require_shape_fn=True) 1708 1709 _call_cpp_shape_fn_and_require_op = call_with_requiring

/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/common_shapes.pyc in call_cpp_shape_fn(op, input_tensors_needed, input_tensors_as_shapes_needed, debug_python_shape_fn, require_shape_fn) 608 res = _call_cpp_shape_fn_impl(op, input_tensors_needed, 609 input_tensors_as_shapes_needed, --> 610 debug_python_shape_fn, require_shape_fn) 611 if not isinstance(res, dict): 612 # Handles the case where _call_cpp_shape_fn_impl calls unknown_shape(op).

/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/common_shapes.pyc in _call_cpp_shape_fn_impl(op, input_tensors_needed, input_tensors_as_shapes_needed, debug_python_shape_fn, require_shape_fn) 673 missing_shape_fn = True 674 else: --> 675 raise ValueError(err.message) 676 677 if missing_shape_fn:

ValueError: Can not squeeze dim[1], expected a dimension of 1, got 7 for 'cond/Squeeze' (op: 'Squeeze') with input shapes: [?,7].

Would you mind including some fake data which reproduces the issue in your question (i.e. make the code snippet self-contained)? — Allen Lavoie

Gilbert Hendry Gilbert Hendry · Accepted Answer · 2017-07-07T18:01:37

Right now, when using TrainingLossForest it assumes a classification problem. I will fix this internally, but for now you can get around it with something like:

from tensorflow.contrib.losses.python.losses import loss_ops

def _loss_fn(values, targets):
  return loss_ops.mean_squared_error(values, targets)

def _builder_class(params, **kwargs):
  return tensor_forest.TrainingLossForest(
      params, loss_fn=_loss_fn, **kwargs)

TensorForestEstimator(..., graph_builder_class=_builder_class)

Or whatever loss function you want (MSE here as example). Also, simply using (graph_builder_class = tensor_forest.RandomForestGraphs) should suffice, but the loss is the number of nodes, so training stops when the forest stops growing or reaches max_nodes, which is maybe not what you want.

tensor forest estimator value error at fitting the training part

1 Answers