diff --git a/beginner_source/introyt/introyt1_tutorial.py b/beginner_source/introyt/introyt1_tutorial.py
index c01befb40c..0962334307 100644
--- a/beginner_source/introyt/introyt1_tutorial.py
+++ b/beginner_source/introyt/introyt1_tutorial.py
@@ -18,21 +18,27 @@
+.. note::
+ The video above was recorded with an earlier version of PyTorch.
+ The code samples in this tutorial have been updated to use modern
+ PyTorch APIs. Where the video differs from the code
+ below, follow the written tutorial.
+
PyTorch Tensors
---------------
Follow along with the video beginning at `03:50 `__.
-First, we’ll import pytorch.
+First, we'll import pytorch.
"""
import torch
######################################################################
-# Let’s see a few basic tensor manipulations. First, just a few of the
+# Let's see a few basic tensor manipulations. First, just a few of the
# ways to create tensors:
-#
+#
z = torch.zeros(5, 3)
print(z)
@@ -43,10 +49,10 @@
# Above, we create a 5x3 matrix filled with zeros, and query its datatype
# to find out that the zeros are 32-bit floating point numbers, which is
# the default PyTorch.
-#
+#
# What if you wanted integers instead? You can always override the
# default:
-#
+#
i = torch.ones((5, 3), dtype=torch.int16)
print(i)
@@ -55,10 +61,10 @@
######################################################################
# You can see that when we do change the default, the tensor helpfully
# reports this when printed.
-#
-# It’s common to initialize learning weights randomly, often with a
+#
+# It's common to initialize learning weights randomly, often with a
# specific seed for the PRNG for reproducibility of results:
-#
+#
torch.manual_seed(1729)
r1 = torch.rand(2, 2)
@@ -79,7 +85,7 @@
# PyTorch tensors perform arithmetic operations intuitively. Tensors of
# similar shapes may be added, multiplied, etc. Operations with scalars
# are distributed over the tensor:
-#
+#
ones = torch.ones(2, 3)
print(ones)
@@ -98,8 +104,8 @@
######################################################################
-# Here’s a small sample of the mathematical operations available:
-#
+# Here's a small sample of the mathematical operations available:
+#
r = (torch.rand(2, 2) - 0.5) * 2 # values between -1 and 1
print('A random matrix, r:')
@@ -115,9 +121,9 @@
# ...and linear algebra operations like determinant and singular value decomposition
print('\nDeterminant of r:')
-print(torch.det(r))
+print(torch.linalg.det(r))
print('\nSingular value decomposition of r:')
-print(torch.svd(r))
+print(torch.linalg.svd(r))
# ...and statistical and aggregate operations:
print('\nAverage and standard deviation of r:')
@@ -127,16 +133,22 @@
##########################################################################
-# There’s a good deal more to know about the power of PyTorch tensors,
-# including how to set them up for parallel computations on GPU - we’ll be
+# There's a good deal more to know about the power of PyTorch tensors,
+# including how to set them up for parallel computations on GPU - we'll be
# going into more depth in another video.
-#
+#
+# .. note::
+# Linear algebra operations in PyTorch live in the ``torch.linalg``
+# module. Functions like ``torch.linalg.det()``, ``torch.linalg.svd()``,
+# and ``torch.linalg.eigh()`` follow NumPy conventions and are the
+# recommended API for new code.
+#
# PyTorch Models
# --------------
#
# Follow along with the video beginning at `10:00 `__.
#
-# Let’s talk about how we can express models in PyTorch
+# Let's talk about how we can express models in PyTorch
#
import torch # for all things PyTorch
@@ -149,33 +161,33 @@
# :alt: le-net-5 diagram
#
# *Figure: LeNet-5*
-#
+#
# Above is a diagram of LeNet-5, one of the earliest convolutional neural
# nets, and one of the drivers of the explosion in Deep Learning. It was
# built to read small images of handwritten numbers (the MNIST dataset),
# and correctly classify which digit was represented in the image.
-#
-# Here’s the abridged version of how it works:
-#
+#
+# Here's the abridged version of how it works:
+#
# - Layer C1 is a convolutional layer, meaning that it scans the input
# image for features it learned during training. It outputs a map of
# where it saw each of its learned features in the image. This
-# “activation map” is downsampled in layer S2.
-# - Layer C3 is another convolutional layer, this time scanning C1’s
+# "activation map" is downsampled in layer S2.
+# - Layer C3 is another convolutional layer, this time scanning C1's
# activation map for *combinations* of features. It also puts out an
# activation map describing the spatial locations of these feature
# combinations, which is downsampled in layer S4.
# - Finally, the fully-connected layers at the end, F5, F6, and OUTPUT,
# are a *classifier* that takes the final activation map, and
# classifies it into one of ten bins representing the 10 digits.
-#
+#
# How do we express this simple neural network in code?
-#
+#
class LeNet(nn.Module):
def __init__(self):
- super(LeNet, self).__init__()
+ super().__init__()
# 1 input image channel (black & white), 6 output channels, 5x5 square convolution
# kernel
self.conv1 = nn.Conv2d(1, 6, 5)
@@ -207,8 +219,8 @@ def num_flat_features(self, x):
############################################################################
# Looking over this code, you should be able to spot some structural
# similarities with the diagram above.
-#
-# This demonstrates the structure of a typical PyTorch model:
+#
+# This demonstrates the structure of a typical PyTorch model:
#
# - It inherits from ``torch.nn.Module`` - modules may be nested - in fact,
# even the ``Conv2d`` and ``Linear`` layer classes inherit from
@@ -221,10 +233,10 @@ def num_flat_features(self, x):
# and various functions to generate an output.
# - Other than that, you can build out your model class like any other
# Python class, adding whatever properties and methods you need to
-# support your model’s computation.
-#
-# Let’s instantiate this object and run a sample input through it.
-#
+# support your model's computation.
+#
+# Let's instantiate this object and run a sample input through it.
+#
net = LeNet()
print(net) # what does the object tell us about itself?
@@ -241,37 +253,37 @@ def num_flat_features(self, x):
##########################################################################
# There are a few important things happening above:
-#
+#
# First, we instantiate the ``LeNet`` class, and we print the ``net``
# object. A subclass of ``torch.nn.Module`` will report the layers it has
# created and their shapes and parameters. This can provide a handy
# overview of a model if you want to get the gist of its processing.
-#
+#
# Below that, we create a dummy input representing a 32x32 image with 1
# color channel. Normally, you would load an image tile and convert it to
# a tensor of this shape.
-#
+#
# You may have noticed an extra dimension to our tensor - the *batch
# dimension.* PyTorch models assume they are working on *batches* of data
# - for example, a batch of 16 of our image tiles would have the shape
-# ``(16, 1, 32, 32)``. Since we’re only using one image, we create a batch
+# ``(16, 1, 32, 32)``. Since we're only using one image, we create a batch
# of 1 with shape ``(1, 1, 32, 32)``.
-#
+#
# We ask the model for an inference by calling it like a function:
-# ``net(input)``. The output of this call represents the model’s
+# ``net(input)``. The output of this call represents the model's
# confidence that the input represents a particular digit. (Since this
-# instance of the model hasn’t learned anything yet, we shouldn’t expect
+# instance of the model hasn't learned anything yet, we shouldn't expect
# to see any signal in the output.) Looking at the shape of ``output``, we
# can see that it also has a batch dimension, the size of which should
# always match the input batch dimension. If we had passed in an input
# batch of 16 instances, ``output`` would have a shape of ``(16, 10)``.
-#
+#
# Datasets and Dataloaders
# ------------------------
#
# Follow along with the video beginning at `14:00 `__.
#
-# Below, we’re going to demonstrate using one of the ready-to-download,
+# Below, we're going to demonstrate using one of the ready-to-download,
# open-access datasets from TorchVision, how to transform the images for
# consumption by your model, and how to use the DataLoader to feed batches
# of data to your model.
@@ -284,19 +296,23 @@ def num_flat_features(self, x):
import torch
import torchvision
-import torchvision.transforms as transforms
+from torchvision.transforms import v2
-transform = transforms.Compose(
- [transforms.ToTensor(),
- transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))])
+transform = v2.Compose(
+ [v2.ToImage(),
+ v2.ToDtype(torch.float32, scale=True),
+ v2.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))])
##########################################################################
-# Here, we specify two transformations for our input:
+# Here, we specify three transformations for our input:
#
-# - ``transforms.ToTensor()`` converts images loaded by Pillow into
-# PyTorch tensors.
-# - ``transforms.Normalize()`` adjusts the values of the tensor so
+# - ``v2.ToImage()`` converts images loaded by Pillow into the
+# ``TVTensor`` image type used by torchvision v2.
+# - ``v2.ToDtype(torch.float32, scale=True)`` converts pixel values to
+# float32 and scales them from [0, 255] to [0.0, 1.0]. (This replaces
+# the older ``transforms.ToTensor()``.)
+# - ``v2.Normalize()`` adjusts the values of the tensor so
# that their average is zero and their standard deviation is 1.0. Most
# activation functions have their strongest gradients around x = 0, so
# centering our data there can speed learning.
@@ -306,27 +322,29 @@ def num_flat_features(self, x):
# few lines of code::
#
# from torch.utils.data import ConcatDataset
-# transform = transforms.Compose([transforms.ToTensor()])
+# transform = v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)])
# trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
# download=True, transform=transform)
#
-# # stack all train images together into a tensor of shape
+# # stack all train images together into a tensor of shape
# # (50000, 3, 32, 32)
# x = torch.stack([sample[0] for sample in ConcatDataset([trainset])])
-#
-# # get the mean of each channel
+#
+# # get the mean of each channel
# mean = torch.mean(x, dim=(0,2,3)) # tensor([0.4914, 0.4822, 0.4465])
-# std = torch.std(x, dim=(0,2,3)) # tensor([0.2470, 0.2435, 0.2616])
-#
-#
+# std = torch.std(x, dim=(0,2,3)) # tensor([0.2470, 0.2435, 0.2616])
+#
+#
# There are many more transforms available, including cropping, centering,
-# rotation, and reflection.
-#
-# Next, we’ll create an instance of the CIFAR10 dataset. This is a set of
+# rotation, and reflection. See
+# `torchvision.transforms.v2 `_
+# for the full list.
+#
+# Next, we'll create an instance of the CIFAR10 dataset. This is a set of
# 32x32 color image tiles representing 10 classes of objects: 6 of animals
# (bird, cat, deer, dog, frog, horse) and 4 of vehicles (airplane,
# automobile, ship, truck):
-#
+#
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=transform)
@@ -334,9 +352,9 @@ def num_flat_features(self, x):
##########################################################################
# .. note::
-# When you run the cell above, it may take a little time for the
+# When you run the cell above, it may take a little time for the
# dataset to download.
-#
+#
# This is an example of creating a dataset object in PyTorch. Downloadable
# datasets (like CIFAR-10 above) are subclasses of
# ``torch.utils.data.Dataset``. ``Dataset`` classes in PyTorch include the
@@ -344,17 +362,17 @@ def num_flat_features(self, x):
# as utility dataset classes such as ``torchvision.datasets.ImageFolder``,
# which will read a folder of labeled images. You can also create your own
# subclasses of ``Dataset``.
-#
+#
# When we instantiate our dataset, we need to tell it a few things:
#
-# - The filesystem path to where we want the data to go.
+# - The filesystem path to where we want the data to go.
# - Whether or not we are using this set for training; most datasets
# will be split into training and test subsets.
-# - Whether we would like to download the dataset if we haven’t already.
+# - Whether we would like to download the dataset if we haven't already.
# - The transformations we want to apply to the data.
-#
+#
# Once your dataset is ready, you can give it to the ``DataLoader``:
-#
+#
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
shuffle=True, num_workers=2)
@@ -362,16 +380,16 @@ def num_flat_features(self, x):
##########################################################################
# A ``Dataset`` subclass wraps access to the data, and is specialized to
-# the type of data it’s serving. The ``DataLoader`` knows *nothing* about
+# the type of data it's serving. The ``DataLoader`` knows *nothing* about
# the data, but organizes the input tensors served by the ``Dataset`` into
# batches with the parameters you specify.
-#
-# In the example above, we’ve asked a ``DataLoader`` to give us batches of
+#
+# In the example above, we've asked a ``DataLoader`` to give us batches of
# 4 images from ``trainset``, randomizing their order (``shuffle=True``),
# and we told it to spin up two workers to load data from disk.
-#
-# It’s good practice to visualize the batches your ``DataLoader`` serves:
-#
+#
+# It's good practice to visualize the batches your ``DataLoader`` serves:
+#
import matplotlib.pyplot as plt
import numpy as np
@@ -392,19 +410,19 @@ def imshow(img):
# show images
imshow(torchvision.utils.make_grid(images))
# print labels
-print(' '.join('%5s' % classes[labels[j]] for j in range(4)))
+print(' '.join(f'{classes[labels[j]]:>5s}' for j in range(4)))
########################################################################
# Running the above cell should show you a strip of four images, and the
# correct label for each.
-#
+#
# Training Your PyTorch Model
# ---------------------------
#
# Follow along with the video beginning at `17:10 `__.
#
-# Let’s put all the pieces together, and train a model:
+# Let's put all the pieces together, and train a model:
#
#%matplotlib inline
@@ -415,7 +433,7 @@ def imshow(img):
import torch.optim as optim
import torchvision
-import torchvision.transforms as transforms
+from torchvision.transforms import v2
import matplotlib
import matplotlib.pyplot as plt
@@ -423,14 +441,15 @@ def imshow(img):
#########################################################################
-# First, we’ll need training and test datasets. If you haven’t already,
+# First, we'll need training and test datasets. If you haven't already,
# run the cell below to make sure the dataset is downloaded. (It may take
# a minute.)
-#
+#
-transform = transforms.Compose(
- [transforms.ToTensor(),
- transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+transform = v2.Compose(
+ [v2.ToImage(),
+ v2.ToDtype(torch.float32, scale=True),
+ v2.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=transform)
@@ -447,8 +466,8 @@ def imshow(img):
######################################################################
-# We’ll run our check on the output from ``DataLoader``:
-#
+# We'll run our check on the output from ``DataLoader``:
+#
import matplotlib.pyplot as plt
import numpy as np
@@ -469,18 +488,18 @@ def imshow(img):
# show images
imshow(torchvision.utils.make_grid(images))
# print labels
-print(' '.join('%5s' % classes[labels[j]] for j in range(4)))
+print(' '.join(f'{classes[labels[j]]:>5s}' for j in range(4)))
##########################################################################
-# This is the model we’ll train. If it looks familiar, that’s because it’s
+# This is the model we'll train. If it looks familiar, that's because it's
# a variant of LeNet - discussed earlier in this video - adapted for
# 3-color images.
-#
+#
class Net(nn.Module):
def __init__(self):
- super(Net, self).__init__()
+ super().__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
@@ -503,7 +522,7 @@ def forward(self, x):
######################################################################
# The last ingredients we need are a loss function and an optimizer:
-#
+#
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
@@ -511,19 +530,19 @@ def forward(self, x):
##########################################################################
# The loss function, as discussed earlier in this video, is a measure of
-# how far from our ideal output the model’s prediction was. Cross-entropy
+# how far from our ideal output the model's prediction was. Cross-entropy
# loss is a typical loss function for classification models like ours.
-#
+#
# The **optimizer** is what drives the learning. Here we have created an
# optimizer that implements *stochastic gradient descent,* one of the more
# straightforward optimization algorithms. Besides parameters of the
# algorithm, like the learning rate (``lr``) and momentum, we also pass in
# ``net.parameters()``, which is a collection of all the learning weights
# in the model - which is what the optimizer adjusts.
-#
+#
# Finally, all of this is assembled into the training loop. Go ahead and
# run this cell, as it will likely take a few minutes to execute:
-#
+#
for epoch in range(2): # loop over the dataset multiple times
@@ -544,8 +563,7 @@ def forward(self, x):
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
- print('[%d, %5d] loss: %.3f' %
- (epoch + 1, i + 1, running_loss / 2000))
+ print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
running_loss = 0.0
print('Finished Training')
@@ -556,31 +574,31 @@ def forward(self, x):
# passes over the training dataset. Each pass has an inner loop that
# **iterates over the training data** (line 4), serving batches of
# transformed input images and their correct labels.
-#
+#
# **Zeroing the gradients** (line 9) is an important step. Gradients are
# accumulated over a batch; if we do not reset them for every batch, they
# will keep accumulating, which will provide incorrect gradient values,
# making learning impossible.
-#
+#
# In line 12, we **ask the model for its predictions** on this batch. In
# the following line (13), we compute the loss - the difference between
# ``outputs`` (the model prediction) and ``labels`` (the correct output).
-#
+#
# In line 14, we do the ``backward()`` pass, and calculate the gradients
# that will direct the learning.
-#
+#
# In line 15, the optimizer performs one learning step - it uses the
# gradients from the ``backward()`` call to nudge the learning weights in
# the direction it thinks will reduce the loss.
-#
+#
# The remainder of the loop does some light reporting on the epoch number,
# how many training instances have been completed, and what the collected
# loss is over the training loop.
-#
+#
# **When you run the cell above,** you should see something like this:
-#
+#
# .. code-block:: sh
-#
+#
# [1, 2000] loss: 2.235
# [1, 4000] loss: 1.940
# [1, 6000] loss: 1.713
@@ -594,20 +612,20 @@ def forward(self, x):
# [2, 10000] loss: 1.284
# [2, 12000] loss: 1.267
# Finished Training
-#
+#
# Note that the loss is monotonically descending, indicating that our
# model is continuing to improve its performance on the training dataset.
-#
+#
# As a final step, we should check that the model is actually doing
-# *general* learning, and not simply “memorizing” the dataset. This is
+# *general* learning, and not simply "memorizing" the dataset. This is
# called **overfitting,** and usually indicates that the dataset is too
# small (not enough examples for general learning), or that the model has
# more learning parameters than it needs to correctly model the dataset.
-#
+#
# This is the reason datasets are split into training and test subsets -
# to test the generality of the model, we ask it to make predictions on
-# data it hasn’t trained on:
-#
+# data it hasn't trained on:
+#
correct = 0
total = 0
@@ -619,13 +637,11 @@ def forward(self, x):
total += labels.size(0)
correct += (predicted == labels).sum().item()
-print('Accuracy of the network on the 10000 test images: %d %%' % (
- 100 * correct / total))
+print(f'Accuracy of the network on the 10000 test images: {100 * correct / total:.0f} %')
#########################################################################
# If you followed along, you should see that the model is roughly 50%
-# accurate at this point. That’s not exactly state-of-the-art, but it’s
-# far better than the 10% accuracy we’d expect from a random output. This
+# accurate at this point. That's not exactly state-of-the-art, but it's
+# far better than the 10% accuracy we'd expect from a random output. This
# demonstrates that some general learning did happen in the model.
-#