For reference, the two parts are: 1. 05. Going Modular: Part 1 (cell mode) - this notebook is run as a traditional Jupyter Notebook/Google Colab notebook and is a condensed version of notebook 04. 2. 05. Going Modular: Part 2 (script mode) - this notebook is the same as number 1 but with added functionality to turn each of the major sections into Python scripts, such as, data_setup.py and train.py.
Why two parts?
Because sometimes the best way to learn something is to see how it differs from something else.
If you run each notebook side-by-side you’ll see how they differ and that’s where the key learnings are.
What is cell mode?
A cell mode notebook is a regular notebook run exactly how we’ve been running them through the course.
Some cells contain text and others contain code.
What’s the difference between this notebook (Part 1) and the script mode notebook (Part 2)?
This notebook, 05. PyTorch Going Modular: Part 1 (cell mode), runs a cleaned up version of the most useful code from section 04. PyTorch Custom Datasets.
Running this notebook end-to-end will result in recreating the image classification model we built in notebook 04 (TinyVGG) trained on images of pizza, steak and sushi.
The main difference between this notebook (Part 1) and Part 2 is that each section in Part 2 (script mode) has an extra subsection (e.g. 2.1, 3.1, 4.1) for turning cell code into script code.
Now we’ll turn the image dataset into PyTorch Dataset’s and DataLoader’s.
from torchvision import datasets, transforms# Create simple transformdata_transform = transforms.Compose([ transforms.Resize((64, 64)), transforms.ToTensor(),])# Use ImageFolder to create dataset(s)train_data = datasets.ImageFolder(root=train_dir, # target folder of images transform=data_transform, # transforms to perform on data (images) target_transform=None) # transforms to perform on labels (if necessary)test_data = datasets.ImageFolder(root=test_dir, transform=data_transform)print(f"Train data:\n{train_data}\nTest data:\n{test_data}")
Train data:
Dataset ImageFolder
Number of datapoints: 225
Root location: data/pizza_steak_sushi/train
StandardTransform
Transform: Compose(
Resize(size=(64, 64), interpolation=bilinear, max_size=None, antialias=None)
ToTensor()
)
Test data:
Dataset ImageFolder
Number of datapoints: 75
Root location: data/pizza_steak_sushi/test
StandardTransform
Transform: Compose(
Resize(size=(64, 64), interpolation=bilinear, max_size=None, antialias=None)
ToTensor()
)
# Get class names as a listclass_names = train_data.classesclass_names
['pizza', 'steak', 'sushi']
# Can also get class names as a dictclass_dict = train_data.class_to_idxclass_dict
{'pizza': 0, 'steak': 1, 'sushi': 2}
# Check the lengthslen(train_data), len(test_data)
(225, 75)
# Turn train and test Datasets into DataLoadersfrom torch.utils.data import DataLoadertrain_dataloader = DataLoader(dataset=train_data, batch_size=1, # how many samples per batch? num_workers=1, # how many subprocesses to use for data loading? (higher = more) shuffle=True) # shuffle the data?test_dataloader = DataLoader(dataset=test_data, batch_size=1, num_workers=1, shuffle=False) # don't usually need to shuffle testing datatrain_dataloader, test_dataloader
(<torch.utils.data.dataloader.DataLoader at 0x7f853747bbe0>,
<torch.utils.data.dataloader.DataLoader at 0x7f853747b550>)
# Check out single image size/shapeimg, label =next(iter(train_dataloader))# Batch size will now be 1, try changing the batch_size parameter above and see what happensprint(f"Image shape: {img.shape} -> [batch_size, color_channels, height, width]")print(f"Label shape: {label.shape}")
import torchfrom torch import nnclass TinyVGG(nn.Module):"""Creates the TinyVGG architecture. Replicates the TinyVGG architecture from the CNN explainer website in PyTorch. See the original architecture here: https://poloclub.github.io/cnn-explainer/ Args: input_shape: An integer indicating number of input channels. hidden_units: An integer indicating number of hidden units between layers. output_shape: An integer indicating number of output units. """def__init__(self, input_shape: int, hidden_units: int, output_shape: int) ->None:super().__init__()self.conv_block_1 = nn.Sequential( nn.Conv2d(in_channels=input_shape, out_channels=hidden_units, kernel_size=3, # how big is the square that's going over the image? stride=1, # default padding=0), # options = "valid" (no padding) or "same" (output has same shape as input) or int for specific number nn.ReLU(), nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=0), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2) # default stride value is same as kernel_size )self.conv_block_2 = nn.Sequential( nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=0), nn.ReLU(), nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=0), nn.ReLU(), nn.MaxPool2d(2) )self.classifier = nn.Sequential( nn.Flatten(),# Where did this in_features shape come from? # It's because each layer of our network compresses and changes the shape of our inputs data. nn.Linear(in_features=hidden_units*13*13, out_features=output_shape) )def forward(self, x: torch.Tensor): x =self.conv_block_1(x) x =self.conv_block_2(x) x =self.classifier(x)return x# return self.classifier(self.block_2(self.block_1(x))) # <- leverage the benefits of operator fusion
import torchdevice ="cuda"if torch.cuda.is_available() else"cpu"# Instantiate an instance of the modeltorch.manual_seed(42)model_0 = TinyVGG(input_shape=3, # number of color channels (3 for RGB) hidden_units=10, output_shape=len(train_data.classes)).to(device)model_0
To test our model let’s do a single forward pass (pass a sample batch from the training set through our model).
# 1. Get a batch of images and labels from the DataLoaderimg_batch, label_batch =next(iter(train_dataloader))# 2. Get a single image from the batch and unsqueeze the image so its shape fits the modelimg_single, label_single = img_batch[0].unsqueeze(dim=0), label_batch[0]print(f"Single image shape: {img_single.shape}\n")# 3. Perform a forward pass on a single imagemodel_0.eval()with torch.inference_mode(): pred = model_0(img_single.to(device))# 4. Print out what's happening and convert model logits -> pred probs -> pred labelprint(f"Output logits:\n{pred}\n")print(f"Output prediction probabilities:\n{torch.softmax(pred, dim=1)}\n")print(f"Output prediction label:\n{torch.argmax(torch.softmax(pred, dim=1), dim=1)}\n")print(f"Actual label:\n{label_single}")
from typing import Tupledef train_step(model: torch.nn.Module, dataloader: torch.utils.data.DataLoader, loss_fn: torch.nn.Module, optimizer: torch.optim.Optimizer, device: torch.device) -> Tuple[float, float]:"""Trains a PyTorch model for a single epoch. Turns a target PyTorch model to training mode and then runs through all of the required training steps (forward pass, loss calculation, optimizer step). Args: model: A PyTorch model to be trained. dataloader: A DataLoader instance for the model to be trained on. loss_fn: A PyTorch loss function to minimize. optimizer: A PyTorch optimizer to help minimize the loss function. device: A target device to compute on (e.g. "cuda" or "cpu"). Returns: A tuple of training loss and training accuracy metrics. In the form (train_loss, train_accuracy). For example: (0.1112, 0.8743) """# Put model in train mode model.train()# Setup train loss and train accuracy values train_loss, train_acc =0, 0# Loop through data loader data batchesfor batch, (X, y) inenumerate(dataloader):# Send data to target device X, y = X.to(device), y.to(device)# 1. Forward pass y_pred = model(X)# 2. Calculate and accumulate loss loss = loss_fn(y_pred, y) train_loss += loss.item() # 3. Optimizer zero grad optimizer.zero_grad()# 4. Loss backward loss.backward()# 5. Optimizer step optimizer.step()# Calculate and accumulate accuracy metric across all batches y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1) train_acc += (y_pred_class == y).sum().item()/len(y_pred)# Adjust metrics to get average loss and accuracy per batch train_loss = train_loss /len(dataloader) train_acc = train_acc /len(dataloader)return train_loss, train_acc
Now we’ll do test_step().
def test_step(model: torch.nn.Module, dataloader: torch.utils.data.DataLoader, loss_fn: torch.nn.Module, device: torch.device) -> Tuple[float, float]:"""Tests a PyTorch model for a single epoch. Turns a target PyTorch model to "eval" mode and then performs a forward pass on a testing dataset. Args: model: A PyTorch model to be tested. dataloader: A DataLoader instance for the model to be tested on. loss_fn: A PyTorch loss function to calculate loss on the test data. device: A target device to compute on (e.g. "cuda" or "cpu"). Returns: A tuple of testing loss and testing accuracy metrics. In the form (test_loss, test_accuracy). For example: (0.0223, 0.8985) """# Put model in eval mode model.eval() # Setup test loss and test accuracy values test_loss, test_acc =0, 0# Turn on inference context managerwith torch.inference_mode():# Loop through DataLoader batchesfor batch, (X, y) inenumerate(dataloader):# Send data to target device X, y = X.to(device), y.to(device)# 1. Forward pass test_pred_logits = model(X)# 2. Calculate and accumulate loss loss = loss_fn(test_pred_logits, y) test_loss += loss.item()# Calculate and accumulate accuracy test_pred_labels = test_pred_logits.argmax(dim=1) test_acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))# Adjust metrics to get average loss and accuracy per batch test_loss = test_loss /len(dataloader) test_acc = test_acc /len(dataloader)return test_loss, test_acc
And we’ll combine train_step() and test_step() into train().
from typing import Dict, Listfrom tqdm.auto import tqdmdef train(model: torch.nn.Module, train_dataloader: torch.utils.data.DataLoader, test_dataloader: torch.utils.data.DataLoader, optimizer: torch.optim.Optimizer, loss_fn: torch.nn.Module, epochs: int, device: torch.device) -> Dict[str, List[float]]:"""Trains and tests a PyTorch model. Passes a target PyTorch models through train_step() and test_step() functions for a number of epochs, training and testing the model in the same epoch loop. Calculates, prints and stores evaluation metrics throughout. Args: model: A PyTorch model to be trained and tested. train_dataloader: A DataLoader instance for the model to be trained on. test_dataloader: A DataLoader instance for the model to be tested on. optimizer: A PyTorch optimizer to help minimize the loss function. loss_fn: A PyTorch loss function to calculate loss on both datasets. epochs: An integer indicating how many epochs to train for. device: A target device to compute on (e.g. "cuda" or "cpu"). Returns: A dictionary of training and testing loss as well as training and testing accuracy metrics. Each metric has a value in a list for each epoch. In the form: {train_loss: [...], train_acc: [...], test_loss: [...], test_acc: [...]} For example if training for epochs=2: {train_loss: [2.0616, 1.0537], train_acc: [0.3945, 0.3945], test_loss: [1.2641, 1.5706], test_acc: [0.3400, 0.2973]} """# Create empty results dictionary results = {"train_loss": [],"train_acc": [],"test_loss": [],"test_acc": [] }# Loop through training and testing steps for a number of epochsfor epoch in tqdm(range(epochs)): train_loss, train_acc = train_step(model=model, dataloader=train_dataloader, loss_fn=loss_fn, optimizer=optimizer, device=device) test_loss, test_acc = test_step(model=model, dataloader=test_dataloader, loss_fn=loss_fn, device=device)# Print out what's happeningprint(f"Epoch: {epoch+1} | "f"train_loss: {train_loss:.4f} | "f"train_acc: {train_acc:.4f} | "f"test_loss: {test_loss:.4f} | "f"test_acc: {test_acc:.4f}" )# Update results dictionary results["train_loss"].append(train_loss) results["train_acc"].append(train_acc) results["test_loss"].append(test_loss) results["test_acc"].append(test_acc)# Return the filled results at the end of the epochsreturn results
5. Creating a function to save the model
Let’s setup a function to save our model to a directory.
from pathlib import Pathdef save_model(model: torch.nn.Module, target_dir: str, model_name: str):"""Saves a PyTorch model to a target directory. Args: model: A target PyTorch model to save. target_dir: A directory for saving the model to. model_name: A filename for the saved model. Should include either ".pth" or ".pt" as the file extension. Example usage: save_model(model=model_0, target_dir="models", model_name="05_going_modular_tingvgg_model.pth") """# Create target directory target_dir_path = Path(target_dir) target_dir_path.mkdir(parents=True, exist_ok=True)# Create model save pathassert model_name.endswith(".pth") or model_name.endswith(".pt"), "model_name should end with '.pt' or '.pth'" model_save_path = target_dir_path / model_name# Save the model state_dict()print(f"[INFO] Saving model to: {model_save_path}") torch.save(obj=model.state_dict(), f=model_save_path)
6. Train, evaluate and save the model
Let’s leverage the functions we’ve got above to train, test and save a model to file.
# Set random seedstorch.manual_seed(42) torch.cuda.manual_seed(42)# Set number of epochsNUM_EPOCHS =5# Recreate an instance of TinyVGGmodel_0 = TinyVGG(input_shape=3, # number of color channels (3 for RGB) hidden_units=10, output_shape=len(train_data.classes)).to(device)# Setup loss function and optimizerloss_fn = nn.CrossEntropyLoss()optimizer = torch.optim.Adam(params=model_0.parameters(), lr=0.001)# Start the timerfrom timeit import default_timer as timer start_time = timer()# Train model_0 model_0_results = train(model=model_0, train_dataloader=train_dataloader, test_dataloader=test_dataloader, optimizer=optimizer, loss_fn=loss_fn, epochs=NUM_EPOCHS, device=device)# End the timer and print out how long it tookend_time = timer()print(f"[INFO] Total training time: {end_time-start_time:.3f} seconds")# Save the modelsave_model(model=model_0, target_dir="models", model_name="05_going_modular_cell_mode_tinyvgg_model.pth")