PyTorch - Tips and Tricks
-
ALWAYS OVERFIT A SINGLE BATCH
-
Create Tensors directly on the target device (GPU)
- Instead of creating Tensors on CPU and moving them later to GPU is not efficient.
-
Use
SequentialLayers where possible
import torch.nn as nn
class ExampleModel(nn.Module):
def __init__(self):
super().__init__()
input_size = 2
output_size = 3
hidden_size = 16
# Define layers and activations explicitly
self.input_layer = nn.Linear(input_size, hidden_size)
self.input_activation = nn.ReLU()
self.mid_layer = nn.Linear(hidden_size, hidden_size)
self.mid_activation = nn.ReLU()
self.output_layer = nn.Linear(hidden_size, output_size)
def forward(self, x):
# Pass through each layer and activation explicitly
z = self.input_layer(x)
z = self.input_activation(z)
z = self.mid_layer(z)
z = self.mid_activation(z)
out = self.output_layer(z)
return out
We can refactor the above code to instead use Sequential layer:
import torch.nn as nn
class ExampleSequentialModel(nn.Module):
def __init__(self):
super().__init__()
input_size = 2
output_size = 3
hidden_size = 16
# Use nn.Sequential for defining the network
self.layers = nn.Sequential(
nn.Linear(input_size, hidden_size),
nn.ReLU(),
nn.Linear(hidden_size, hidden_size),
nn.ReLU(),
nn.Linear(hidden_size, output_size)
)
def forward(self, x):
# Simply pass input through the sequential layers
out = self.layers(x)
return out
- Use
model.eval()before model evaluation