0
votes

When restoring a metagraph from disk, TensorFlow complains that it is attempting to create an iterator on the GPU from a handle defined on the CPU.

I'm trying to create a graph that uses tf.Data pipelines with a placeholder string to define the iterator (so that I can swap datasets). I can successfully create a graph which seemingly works on the GPU. However, after I restore the graph from disk, I get an error when trying to bind the dataset handle to the iterator (I think):

"Attempted create an iterator on device "...GPU:0" from handle defined on device "CPU:0" [[{{node IteratorFromStringHandleV2}} = IteratorFromStringHandleV2output_shapes=[....], output_types=[...], _device="...GPU:0"]]

I've tried explicitly defining where I would like objects placed with with tf.device("/GPU:0"): guards, specifically around where I create the dataset iterator, but that has a different error: "Cannot assign a device for operation TensorSliceDataset: Could not satisfy explicit device specification '/device:GPU:0' because no supported kernel for GPU devices is available"

I found a similar problem here, When use Dataset API, got device placement error with tensorflow >= 1.11

I'm using tf-1.12 (and I cannot use a higher version, unfortunately).

# this is the code which creates the graph

import tensorflow as tf
import numpy as np

def _bytestring_feature(byteStringList):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=byteStringList));

def _int64_feature(intList):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=intList));

def _float_feature(intList):
    return tf.train.Feature(float_list=tf.train.FloatList(value=intList));

def toTFrecord(tfrec_filewriter, img, label):
    feature={
        'image': _bytestring_feature([img.tostring()]),
        'class': _int64_feature([label])
    }
    return tf.train.Example(features=tf.train.Features(feature=feature));

# generate data and save it to disk:

print('generating data')
nPartitions=5; # number of file partitions
for p in range(nPartitions):
    filename='./tfrec_'+'{:02d}-{}.tfrec'.format(p,nPartitions)
    with tf.python_io.TFRecordWriter(filename) as outFile:
        # generate some data for this partition
        for i in range(10):
            example=toTFrecord(outFile, (p*100+i)*np.ones((32,32), np.float32), (p*100+i));
            outFile.write(example.SerializeToString());
print('...complete')

# make the network
handle=tf.placeholder(tf.string, shape=[], name='handle')
with tf.device("/GPU:0"):
    iter=tf.data.Iterator.from_string_handle(handle, (tf.float32, tf.int64), (tf.TensorShape([tf.Dimension(None), tf.Dimension(32), tf.Dimension(32)]), tf.TensorShape([tf.Dimension(None)])))
    img,label=iter.get_next()
    network=tf.layers.conv2d(inputs=tf.reshape(img, [-1, tf.shape(img)[1], tf.shape(img)[2], 1]), filters=4, kernel_size=[3,3], dilation_rate=[1,1], padding='same', activation=None, name='networkConv')

with tf.Session(config=tf.ConfigProto(log_device_placement=True, allow_soft_placement=False)) as sess:
    sess.run(tf.global_variables_initializer())

    saver=tf.train.Saver(keep_checkpoint_every_n_hours=0.5, max_to_keep=1000)
    tf.add_to_collection('network', network)
    tf.add_to_collection('handle', handle)
    saver.save(sess, './demoSession')
#......
# and this is a separate process which restores the graph for training:

import tensorflow as tf
import numpy as np
import glob

def readTFrecord(example):
    features={
        'image': tf.io.FixedLenFeature([], tf.string),
        'class': tf.io.FixedLenFeature([], tf.int64)
    };
    example=tf.parse_example(example, features)
    return tf.reshape(tf.decode_raw(example['image'], tf.float32), [-1, 32, 32]), example['class'] 

filenames=glob.glob('./tfrec*.tfrec')
ds=tf.data.TFRecordDataset(filenames)
ds=ds.shuffle(5000).batch(4).prefetch(4).map(readTFrecord, num_parallel_calls=2)

with tf.Session(config=tf.ConfigProto(log_device_placement=True, allow_soft_placement=False)) as sess:
    new_saver=tf.train.import_meta_graph('demoSession.meta', clear_devices=False)
    new_saver.restore(sess, 'demoSession')
    network=tf.get_collection('network')[0]
    handle=tf.get_collection('handle')[0]

    #with tf.device("/GPU:0"):
    dsIterator=ds.make_initializable_iterator()
    dsHandle=sess.run(dsIterator.string_handle())

    sess.run(dsIterator.initializer)

    out=sess.run(network, feed_dict={handle:dsHandle})
    print(out.shape)

I expect it to work, Mr Bond. Unfortunately, it says that it cannot

tensorflow.python.framework.errors_impl.InvalidArgumentError: Attempted create an iterator on device "/job:localhost/replica:0/task:0/device:GPU:0" from handle defined on device "/job:localhost/replica:0/task:0/device:CPU:0" [[{{node IteratorFromStringHandleV2}} = IteratorFromStringHandleV2output_shapes=[[?,32,32], [?]], output_types=[DT_FLOAT, DT_INT64], _device="/job:localhost/replica:0/task:0/device:GPU:0"]]

1

1 Answers

0
votes

It looks like it might be that I need to add

iter=tf.data.Iterator.from_string_handle(...) saveable_obj = tf.contrib.data.make_saveable_from_iterator(iter) ... tf.add_to_collection(tf.GraphKeys.SAVEABLE_OBJECTS, saveable_obj)

my initial test seems to work :-D

edit: actually, it progresses past the error I describe above, but it raises another error when I try to create a new save state, so I suspect it's not the actual answer =/