I am new to PyTorch, and I'm working on a simple project to generate text, in order to get my hands on pytorch. I am using the concept of this code and converting it to PyTorch: https://machinelearningmastery.com/text-generation-lstm-recurrent-neural-networks-python-keras/ I have 10 timesteps and 990 samples. For each of these 990 samples, there are 10 values, corresponding to the indexes (scaled) of the sequence. My output samples are the rest of the letters (not including the first letter). For example if my sample is 'Hello Worl" my corresponding output is "ello World". My input size (features) is 1 since I want to feed in one letter at a time. Therefore, my final input shape is (990, 10, 1). I then convert the output tensor into one hot vector, so the final shape is (9900, 42), where 42 are the number of elements in the one hot vector. When I run the output, I can get a shape of (9900, 42), so this is the output of all my timesteps, and each one of them contains the corresponding one-hot vector. But when I calculate the loss, then there is an error:
multi-target not supported
Can I understand what I did wrong? Thanks . Below is my code
#The file contains 163780 characters
#The file contains 1000 characters
#There are 42 unique characters
char2int = {char:value for (value,char) in enumerate (unique)}
int2char = {value:char for (value,char) in enumerate (unique)}
learning_rate = 0.01
num_epochs = 5
input_size = 1 #The number of input neurons (features) to our RNN
units = 100
num_layers = 2
num_classes = len(unique) #The number of output neurons
timesteps = 10
datax = []
datay = []
for index in range(0, len(file) - timesteps, 1):
prev_letters = file[index:index + timesteps]
output = file[index + 1: index + timesteps + 1]
#Convert the 10 previous characters to their integers and put in a list. Append that list to the dataset
datax.append([char2int[c] for c in prev_letters])
datay.append([char2int[c] for c in output])
print('There are {} Sequences in the dataset'.format(len(datax)))
#There are 990 Sequences in the dataset
x = np.array(datax)
x = x / float(len(unique))
x = torch.FloatTensor(x)
x = x.view(x.size(0), timesteps, input_size)
print(x.shape) #torch.Size([990, 10, 1])
y = torch.LongTensor(datay)
print(y.shape) #torch.Size([990, 10])
y_one_hot = torch.zeros(y.shape[0] * y.shape[1], num_classes)
index = y.long()
index = index.view(-1,1) #The expected shape for the scatter function
y_one_hot.scatter_(1,index,1) #(dim (1 for along rows and 0 for along cols), index, number to insert)
y_one_hot = y_one_hot.view(-1, num_classes) # Make the tensor of shape(rows, cols)
y_one_hot = y_one_hot.long()
print(y_one_hot.shape)
#torch.Size([9900, 42])
inputs = Variable(x)
labels = Variable(y_one_hot)
class TextGenerator(nn.Module):
def __init__(self,input_size,units,num_layers,num_classes,timesteps):
super(TextGenerator,self).__init__()
self.units = units
self.num_layers = num_layers
self.timesteps = timesteps
self.input_size = input_size
# When batch_first=true, inputs are of shape (batch_size/samples, sequence_length, input_dimension)
self.lstm = nn.LSTM(input_size = input_size, hidden_size = units, num_layers = num_layers, batch_first = True)
#The output layer
self.fc = nn.Linear(units, num_classes)
def forward(self,x):
#Initialize the hidden state
h0 = Variable(torch.zeros(self.num_layers, x.size(0), self.units))
#Initialize the cell state
c0 = Variable(torch.zeros(self.num_layers, x.size(0), self.units))
out,_ = self.lstm(x, (h0,c0))
#Reshape the outout from (samples,timesteps,output_features) to a shape appropriate for the FC layer
out = out.contiguous().view(-1, self.units)
out = self.fc(out)
return out
net = TextGenerator(input_size,units,num_layers,num_classes,timesteps)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
out = net(inputs)
out.shape #(9900, 42)
loss_fn(out,labels)