I can't get what I am doing wrong when training RNN. I am trying to train RNN for AND operation on sequences (to learn how it works on simple task). But my network is not learning, loss stays the same and it can't event overfit the model. Can you please help me to find the problem?
Data I am using:
data = [
[1, 1, 1, 1, 0, 0, 1, 1, 1],
[1, 1, 1, 1],
[0, 0, 1, 1],
[0, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 1, 1, 1],
[1, 1],
[0],
[1],
[1, 0]]
labels = [
0,
1,
0,
0,
1,
1,
0,
1,
0
]
Code for NN:
class AndRNN(nn.Module):
def __init__(self):
super(AndRNN, self).__init__()
self.rnn = nn.RNN(1, 10, 5)
self.fc = nn.Sequential(
nn.Linear(10, 30),
nn.Linear(30, 2)
)
def forward(self, input, hidden):
x, hidden = self.rnn(input, hidden)
x = self.fc(x[-1])
return x, hidden
def initHidden(self):
return Variable(torch.zeros((5, 1, 10)))
Training loop:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
correct = 0
for e in range(20):
for i in range(len(data)):
tensor = torch.FloatTensor(data[i]).view(-1, 1, 1)
label = torch.LongTensor([labels[i]])
hidden = net.initHidden()
optimizer.zero_grad()
out, hidden = net(Variable(tensor), Variable(hidden.data))
_, l = torch.topk(out, 1)
if label[0] == l[0].data[0]:
correct += 1
loss = criterion(out, Variable(label))
loss.backward()
optimizer.step()
print("Loss ", loss.data[0], "Accuracy ", (correct / (i + 1)))
Shape for tensor will be (sequence_len, 1 (which is batch size), 1), that is correct according to the PyTorch docs for RNN