I'm just getting started with PyTorch and I wanted to run through a few toy problems. In the following case, I'm noticing a significant difference in how much time it takes for the model to train once over and issue one batch of predictions.
This is the PyTorch implementation. On the GPU, it takes ~17 seconds on my machine. The same model on the CPU takes ~11 seconds.
class LR(torch.nn.Module):
def __init__(self):
super().__init__()
self.linear1 = torch.nn.Linear(2, 20)
self.linear2 = torch.nn.Linear(20, 1)
def forward(self, x):
x = torch.nn.functional.relu(self.linear1(x))
x = torch.nn.functional.relu(self.linear2(x))
return x
def fit_torch(df_train, df_test):
sampler_tr = torch.utils.data.SubsetRandomSampler(df_train.index)
train = torch.utils.data.DataLoader(
torch.tensor(df_train.values, dtype=torch.float),
batch_size=batch_size, sampler=sampler_tr)
sampler_te = torch.utils.data.SubsetRandomSampler(df_test.index)
test = torch.utils.data.DataLoader(
torch.tensor(df_test.values, dtype=torch.float),
batch_size=batch_size, sampler=sampler_te)
model = LR()
model = model.to(device)
loss = torch.nn.MSELoss()
optim = torch.optim.Adam(model.parameters(), lr=0.001)
model.train()
for _ in range(1000):
for train_data in train:
train_data = train_data.to(device)
x_train = train_data[:, :2]
y_train = train_data[:, 2]
optim.zero_grad()
pred = model(x_train)
loss_val = loss(pred.squeeze(), y_train)
loss_val.backward()
optim.step()
model.eval()
with torch.no_grad():
for test_data in test:
test_data = test_data.to(device)
pred = model(test_data[:, :2].float())
break
This is the keras implementation. It takes approximately 9 seconds to run.
def fit_tf(df_train, df_test):
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(20, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='relu'))
model.compile(loss='mse', optimizer='adam')
model.fit(
df_train.values[:, :2],
df_train.values[:, 2],
batch_size=batch_size, epochs=1000, verbose=0)
model.predict(df_test.iloc[:batch_size].values[:, :2])
The dataset and main functions.
device = torch.device('cuda:0')
scaler = MinMaxScaler()
batch_size = 64
def create_dataset():
dataset = []
random_x = np.random.randint(10, 1000, 1000)
random_y = np.random.randint(10, 1000, 1000)
for x, y in zip(random_x, random_y):
dataset.append((x, y, 4 * x + 3 * y + 10))
np.random.shuffle(dataset)
df = pd.DataFrame(dataset)
df = pd.DataFrame(scaler.fit_transform(df))
return df
def __main__():
df = create_dataset()
df_train, df_test = train_test_split(df)
start_time = time.time()
fit_tf(df_train.reset_index(drop=True), df_test.reset_index(drop=True))
print(time.time() - start_time)
model.predict(df_test.iloc[:batch_size].values[:, :2]), hence you are using a single batch in testing in Keras, while using whole test set in PyTorch. Also what is yourbatch_sizeexactly? Also what is your GPU? - Szymon Maszkebatch_sizeis 64 (from the last segment of code). Also, the PyTorch evaluation loop breaks after the first batch. I also ran it without the testing and the difference persists regardless (8.1s for keras on GPU and 16.5s for PyTorch on GPU). This is on a Quadro RTX 5000. - BasioMeusPuga