In Pytorch, the output parameter gives the output of each individual LSTM cell in the last layer of the LSTM stack, while hidden state and cell state give the output of each hidden cell and cell state in the LSTM stack in every layer.
import torch.nn as nn
torch.manual_seed(1)
inputs = [torch.randn(1, 3) for _ in range(5)] # indicates that there are 5 sequences to be given as inputs and (1,3) indicates that there is 1 layer with 3 cells
hidden = (torch.randn(1, 1, 3),
torch.randn(1, 1, 3)) #initializing h and c values to be of dimensions (1, 1, 3) which indicates there is (1 * 1) - num_layers * num_directions, with batch size of 1 and projection size of 3.
#Since there is only 1 batch in input, h and c can also have only one batch of data for initialization and the number of cells in both input and output should also match.
lstm = nn.LSTM(3, 3) #implying both input and output are 3 dimensional data
for i in inputs:
out, hidden = lstm(i.view(1, 1, -1), hidden)
print('out:', out)
print('hidden:', hidden)
Output
out: tensor([[[-0.1124, -0.0653, 0.2808]]], grad_fn=<StackBackward>)
hidden: (tensor([[[-0.1124, -0.0653, 0.2808]]], grad_fn=<StackBackward>), tensor([[[-0.2883, -0.2846, 2.0720]]], grad_fn=<StackBackward>))
out: tensor([[[ 0.1675, -0.0376, 0.4402]]], grad_fn=<StackBackward>)
hidden: (tensor([[[ 0.1675, -0.0376, 0.4402]]], grad_fn=<StackBackward>), tensor([[[ 0.4394, -0.1226, 1.5611]]], grad_fn=<StackBackward>))
out: tensor([[[0.3699, 0.0150, 0.1429]]], grad_fn=<StackBackward>)
hidden: (tensor([[[0.3699, 0.0150, 0.1429]]], grad_fn=<StackBackward>), tensor([[[0.8432, 0.0618, 0.9413]]], grad_fn=<StackBackward>))
out: tensor([[[0.1795, 0.0296, 0.2957]]], grad_fn=<StackBackward>)
hidden: (tensor([[[0.1795, 0.0296, 0.2957]]], grad_fn=<StackBackward>), tensor([[[0.4541, 0.1121, 0.9320]]], grad_fn=<StackBackward>))
out: tensor([[[0.1365, 0.0596, 0.3931]]], grad_fn=<StackBackward>)
hidden: (tensor([[[0.1365, 0.0596, 0.3931]]], grad_fn=<StackBackward>), tensor([[[0.3430, 0.1948, 1.0255]]], grad_fn=<StackBackward>))
Multi-Layered LSTM
import torch.nn as nn
torch.manual_seed(1)
num_layers = 2
inputs = [torch.randn(1, 3) for _ in range(5)]
hidden = (torch.randn(2, 1, 3),
torch.randn(2, 1, 3))
lstm = nn.LSTM(input_size=3, hidden_size=3, num_layers=2)
for i in inputs:
# Step through the sequence one element at a time.
# after each step, hidden contains the hidden state.
out, hidden = lstm(i.view(1, 1, -1), hidden)
print('out:', out)
print('hidden:', hidden)
Output
out: tensor([[[-0.0819, 0.1214, -0.2586]]], grad_fn=<StackBackward>)
hidden: (tensor([[[-0.2625, 0.4415, -0.4917]],
[[-0.0819, 0.1214, -0.2586]]], grad_fn=<StackBackward>), tensor([[[-2.5740, 0.7832, -0.9211]],
[[-0.2803, 0.5175, -0.5330]]], grad_fn=<StackBackward>))
out: tensor([[[-0.1298, 0.2797, -0.0882]]], grad_fn=<StackBackward>)
hidden: (tensor([[[-0.3818, 0.3306, -0.3020]],
[[-0.1298, 0.2797, -0.0882]]], grad_fn=<StackBackward>), tensor([[[-2.3980, 0.6347, -0.6592]],
[[-0.3643, 0.9301, -0.1326]]], grad_fn=<StackBackward>))
out: tensor([[[-0.1630, 0.3187, 0.0728]]], grad_fn=<StackBackward>)
hidden: (tensor([[[-0.5612, 0.3134, -0.0782]],
[[-0.1630, 0.3187, 0.0728]]], grad_fn=<StackBackward>), tensor([[[-1.7555, 0.6882, -0.3575]],
[[-0.4571, 1.2094, 0.1061]]], grad_fn=<StackBackward>))
out: tensor([[[-0.1723, 0.3274, 0.1546]]], grad_fn=<StackBackward>)
hidden: (tensor([[[-0.5112, 0.1597, -0.0901]],
[[-0.1723, 0.3274, 0.1546]]], grad_fn=<StackBackward>), tensor([[[-1.4417, 0.5892, -0.2489]],
[[-0.4940, 1.3620, 0.2255]]], grad_fn=<StackBackward>))
out: tensor([[[-0.1847, 0.2968, 0.1333]]], grad_fn=<StackBackward>)
hidden: (tensor([[[-0.3256, 0.3217, -0.1899]],
[[-0.1847, 0.2968, 0.1333]]], grad_fn=<StackBackward>), tensor([[[-1.7925, 0.6096, -0.4432]],
[[-0.5147, 1.4031, 0.2014]]], grad_fn=<StackBackward>))
Bi-Directional Multi-Layered LSTM
import torch.nn as nn
torch.manual_seed(1)
num_layers = 2
is_bidirectional = True
inputs = [torch.randn(1, 3) for _ in range(5)]
hidden = (torch.randn(4, 1, 3),
torch.randn(4, 1, 3)) #4 -> (2 * 2) -> num_layers * num_directions
lstm = nn.LSTM(input_size=3, hidden_size=3, num_layers=2, bidirectional=is_bidirectional)
for i in inputs:
# Step through the sequence one element at a time.
# after each step, hidden contains the hidden state.
out, hidden = lstm(i.view(1, 1, -1), hidden)
print('out:', out)
print('hidden:', hidden)
# output dim -> (seq_len, batch, num_directions * hidden_size) -> (5, 1, 2*3)
# hidden dim -> (num_layers * num_directions, batch, hidden_size) -> (2 * 2, 1, 3)
# cell state dim -> (num_layers * num_directions, batch, hidden_size) -> (2 * 2, 1, 3)
Output
out: tensor([[[-0.4620, 0.1115, -0.1087, 0.1646, 0.0173, -0.2196]]],
grad_fn=<CatBackward>)
hidden: (tensor([[[ 0.5187, 0.2656, -0.2543]],
[[ 0.4175, 0.0539, 0.0633]],
[[-0.4620, 0.1115, -0.1087]],
[[ 0.1646, 0.0173, -0.2196]]], grad_fn=<StackBackward>), tensor([[[ 1.1546, 0.4012, -0.4119]],
[[ 0.7999, 0.2632, 0.2587]],
[[-1.4196, 0.2075, -0.3148]],
[[ 0.6605, 0.0243, -0.5783]]], grad_fn=<StackBackward>))
out: tensor([[[-0.1860, 0.1359, -0.2719, 0.0815, 0.0061, -0.0980]]],
grad_fn=<CatBackward>)
hidden: (tensor([[[ 0.2945, 0.0842, -0.1580]],
[[ 0.2766, -0.1873, 0.2416]],
[[-0.1860, 0.1359, -0.2719]],
[[ 0.0815, 0.0061, -0.0980]]], grad_fn=<StackBackward>), tensor([[[ 0.5453, 0.1281, -0.2497]],
[[ 0.9706, -0.3592, 0.4834]],
[[-0.3706, 0.2681, -0.6189]],
[[ 0.2029, 0.0121, -0.3028]]], grad_fn=<StackBackward>))
out: tensor([[[ 0.1095, 0.1520, -0.3238, 0.0283, 0.0387, -0.0820]]],
grad_fn=<CatBackward>)
hidden: (tensor([[[ 0.1427, 0.0859, -0.2926]],
[[ 0.1536, -0.2343, 0.0727]],
[[ 0.1095, 0.1520, -0.3238]],
[[ 0.0283, 0.0387, -0.0820]]], grad_fn=<StackBackward>), tensor([[[ 0.2386, 0.1646, -0.4102]],
[[ 0.2636, -0.4828, 0.1889]],
[[ 0.1967, 0.2848, -0.7155]],
[[ 0.0735, 0.0702, -0.2859]]], grad_fn=<StackBackward>))
out: tensor([[[ 0.2346, 0.1576, -0.4006, -0.0053, 0.0256, -0.0653]]],
grad_fn=<CatBackward>)
hidden: (tensor([[[ 0.1706, 0.0147, -0.0341]],
[[ 0.1835, -0.3951, 0.2506]],
[[ 0.2346, 0.1576, -0.4006]],
[[-0.0053, 0.0256, -0.0653]]], grad_fn=<StackBackward>), tensor([[[ 0.3422, 0.0269, -0.0475]],
[[ 0.4235, -0.9144, 0.5655]],
[[ 0.4589, 0.2807, -0.8332]],
[[-0.0133, 0.0507, -0.1996]]], grad_fn=<StackBackward>))
out: tensor([[[ 0.2774, 0.1639, -0.4460, -0.0228, 0.0086, -0.0369]]],
grad_fn=<CatBackward>)
hidden: (tensor([[[ 0.2147, -0.0191, 0.0677]],
[[ 0.2516, -0.4591, 0.3327]],
[[ 0.2774, 0.1639, -0.4460]],
[[-0.0228, 0.0086, -0.0369]]], grad_fn=<StackBackward>), tensor([[[ 0.4414, -0.0299, 0.0889]],
[[ 0.6360, -1.2360, 0.7229]],
[[ 0.5692, 0.2843, -0.9375]],
[[-0.0569, 0.0177, -0.1039]]], grad_fn=<StackBackward>))