After using datasets from Torchvision, I am trying to load in Pytorch a HDF5 file instead, with no success. I read I should define my own Dataset and Dataloader classes containing getitem to enable indexing and len to return the length of the dataset. Plus, that I should define transform because the default option of pytorch expect PIL images. I tried doing that, but I get the error "ValueError: not enough values to unpack (expected 2, got 1)" What am I doing wrong?
#PyTorch packages
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms, datasets
from torch import optim
from torch.autograd import Variable
from torch.utils import data
import h5py
import numpy as np
import matplotlib.pyplot as plt
torch.manual_seed(0)
#open training file
with h5py.File('train_catvnoncat.h5', 'r') as hdf:
ls = list(hdf.keys())
print('List of datasets in this file: \n', ls)
data = hdf.get('dataset')
dataset1 = np.array(data)
print('Shape of dataset1: \n', dataset1.shape)
length = len(h5py.File('train_catvnoncat.h5', 'r'))
print(length)
#image size (64,64,3) 64*64*3=12,288.
#209 training examples
#50 test examples
# Example of a picture
#image size (64,64,3) 64*64*3=12,288.
#209 training examples
#50 test examples
#Def the dataloader for h5 files:
class HDF5Dataset(Dataset):
def __init__(self, h5_path):
self.h5_path = '/Users/teff/Downloads/'
self.train = train_catvnoncat.h5(h5_path, 'r')
self.train = test_catvnoncat.h5(h5_path, 'r')
self.length = len(h5py.File(h5_path, 'r'))
# self.transform = transform_hdf5 #I need to define the "transformToTensor"
def __getitem__(self, index): #to enable indexing
record = self.train[str(index)]
return (
record['X'].value,
record['y'].value,
)
def __len__(self): #returns the lenght of the dataset
return self.length
train_loader = torch.utils.data.DataLoader('train_catvnoncat.h5', shuffle=True)
test_loader = torch.utils.data.DataLoader('test_catvnoncat.h5', shuffle=True)