PyTorch - Tips and Tricks

  1. ALWAYS OVERFIT A SINGLE BATCH

  2. Create Tensors directly on the target device (GPU)

    • Instead of creating Tensors on CPU and moving them later to GPU is not efficient.
  3. Use Sequential Layers where possible

import torch.nn as nn

class ExampleModel(nn.Module):
    def __init__(self):
        super().__init__()

        input_size = 2
        output_size = 3
        hidden_size = 16

        # Define layers and activations explicitly
        self.input_layer = nn.Linear(input_size, hidden_size)
        self.input_activation = nn.ReLU()

        self.mid_layer = nn.Linear(hidden_size, hidden_size)
        self.mid_activation = nn.ReLU()

        self.output_layer = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Pass through each layer and activation explicitly
        z = self.input_layer(x)
        z = self.input_activation(z)

        z = self.mid_layer(z)
        z = self.mid_activation(z)

        out = self.output_layer(z)
        return out

We can refactor the above code to instead use Sequential layer:

import torch.nn as nn

class ExampleSequentialModel(nn.Module):
    def __init__(self):
        super().__init__()

        input_size = 2
        output_size = 3
        hidden_size = 16

        # Use nn.Sequential for defining the network
        self.layers = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, output_size)
        )

    def forward(self, x):
        # Simply pass input through the sequential layers
        out = self.layers(x)
        return out
  1. Use model.eval() before model evaluation