An Introduction to PyTorch
Daniel Yukimura
yukimura@impa.br
Class Overview
- A modular approach.
- Deep Learning Software.
- PyTorch basics: Tensors and Autograd.
- Representing NNs computationally.
- Handling Data.
- Optimization.
- Training Loop
- Validation Loop
Modularity
Breaking a complex system (software) in smaller and more simple pieces.
- Deep Learning is highly modular.
- Easy maintenance.
- Reusability.
- Efficiency.
- Robustness
Deep Learning Frameworks
Caffe
Caffe2
(UC Berkeley)
(Facebook)
Torch
PyTorch
(NYU / Facebook)
(Facebook)
Theano
TensorFlow
(U Montreal)
(Google)
Others:
- MXNet (Amazon)
- CNTK (Microsoft)
- PaddlePaddle (Baidu)
- ...
PyTorch Basics
Installing and Setting
PyTorch Basics
Tensors
import torch
# 2x2 tensor built from a regular array
T = torch.tensor( [ [1,2], [3, 4] ] )
# It has structures similar to numpy
T_zeros = torch.zeros(100, 100)
T_ones = torch.ones( 100, 100)
T_random = torch.randn(100, 100)
# dtype sets the data type
T_int = torch.tensor( [ [1,2], [3, 4] ], dtype=torch.int )
T_double = torch.randn( 3, 5, 7, dtype=torch.double) #same as torch.float64
print(T.dtype)
print(T.size())
PyTorch Basics
Tensors
import torch
x = torch.randn(3, 3, dtype=torch.float64)
y = torch.randn(3, 3, dtype=torch.float64)
#Addition and subtraction
z = x + y
z = torch.add(x, y)
w = x - y
w = torch.sub(x, y)
#Element-wise multiplication and division
u = x * y
u = torch.mul(x, y)
u = x / y
u = torch.div(x, y)
#Inplace operations
y.add_(x)
x.mul_(y)
PyTorch Basics
Tensors
import torch
x = torch.randn(6, 6, dtype=torch.float64)
y = torch.randn(6, 6, dtype=torch.float64)
#Indexing and Slicing
x_row = x[3,:]
x_cln = x[:,3]
x_e = x[0,0].item()
#Reshape
z = y.view(36)
w = y.view(-1, 9) #4x9 tensor
PyTorch Basics
Tensors
import torch
import numpy as np
x = torch.randn(10)
y = x.numpy()
a = np.ones(5)
b = torch.from_numpy(a)
b = torch.tensor(a)
PyTorch Basics
Tensors
import torch
import cv2
import matplotlib.pyplot as plt
#1200x1200 image
cat = cv2.imread('cat.jpg')
cat_T = torch.from_numpy(img)
plt.imshow(cat)
half_cat = cat_T[0:600]
plt.imshow( half_cat.numpy() )
column_half_cat = cat_T[:,0:600]
plt.imshow( column_half_cat.numpy() )
cat_T[400:800, 400:800] = 0
plt.imshow( cat_T.numpy() )
PyTorch Basics
Tensors
import torch
device = torch.device("cuda")
x = torch.randn(5,5, device=device)
device2 = torch.device("cuda:2")
x2 = torch.randn(5,5, device=device2)
y = torch.ones(5,5)
y.to(device)
#Operations must be done on the same device
z = x + y
# z2 = x2 + y
PyTorch Basics
Autograd
import torch
x = torch.randn(3, requires_grad=True)
y = torch.randn(3, requires_grad=True)
z = torch.randn(3, requires_grad=False)
p = x+y
g = p*z
g = g.mean()
g.backward() #dz/dx
print(x.grad)
x.requires_grad(False)
y.detach()
with torch.no_grad():
...
PyTorch Basics
Autograd
import torch
W = torch.randn(4, requires_grad=True)
for epoch in range(3):
out = (W*5).sum()
out.backward()
#Gradients accumulate
print(W.grad)
W.grad.zero_()
PyTorch Basics
Layers - torch.nn
import torch
L = torch.nn.Linear(10, 25)
x = torch.randn(3,10)
y = L(x)
print(y.size())
model = torch.nn.Sequential( L,
torch.nn.ReLU(),
torch.nn.Linear(25,5))
y = model(x)
print(y.size())
PyTorch Basics
Layers - torch.nn
- nn.Linear(...)
- nn.Conv2d(...)
- nn.LSTM(...)
- nn.ReLU(...)
- nn.Sigmoid(...)
- nn.Dropout(...)
- nn.Module(...)
- nn.Sequential(...)
- nn.Softmax(...)
- nn.CrossEntropyLoss(...)
- nn.MSELoss(...)
- nn.Transformer(...)
Building Neural Networks
with PyTorch
input
output
Layers
DATA IN
PREDICTION
MODEL
Building Neural Networks
with PyTorch
import torch
import torch.nn as nn
class TwoLayerNN(nn.Module):
def __init__(self, dim_in, dim_hidden, dim_out):
super(TwoLayerNN, self).__init__()
self.linear1 = nn.Linear(dim_in, dim_hidden)
self.act = nn.ReLU()
self.linear2 = nn.Linear(dim_hidden, dim_out)
def forward(self, x):
h = self.linear1(x)
h = self.act(h)
y = self.linear2(h)
return y
n_batch, dim_in, dim_hidden, dim_out = 32, 100, 50, 10
x = torch.randn(n_batch, dim_in)
model = TwoLayerNN(dim_in, dim_hidden, dim_out)
pred = model(x)
Handling Data
import torch
import torchvision
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader, random_split
from torchvision import transforms
import os
mnist_data = MNIST(os.getcwd(),
download=True,
transform=transforms.ToTensor())
data_loader = DataLoader(mnist_data,
batch_size=4,
shuffle=True)
Handling Data
from torch.utils.data import Dataset
class CustomDataset(Dataset):
def __init__(self, ..., transform=None):
...
self.transform = transform
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
data = self.data[idx]
...
sample = {'image': image, 'label': label}
if self.transform:
sample = self.transform(sample)
return sample
Loss Functions
import torch
import torch.nn.functional as F
x, y = mnist_data.__getitem__(0)
dim_in, dim_hidden, dim_out = x.size(1)*x.size(2), 100, 10
model = TwoLayerNN(dim_in, dim_hidden, dim_out)
pred = model(x.view(x.size(0),-1))
loss = F.cross_entropy(pred, torch.tensor([y]))
- nn.MSELoss
- nn.NLLLoss
- nn.functional.l1_loss
- nn.functional.cross_entropy
Optimizer
import torch
import torch.optim as optim
dim_in, dim_hidden, dim_out = x.size(1)*x.size(2), 100, 10
model = TwoLayerNN(dim_in, dim_hidden, dim_out)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
for batch in data_loader:
x, y = batch
pred = model(x.view(x.size(0),-1))
loss = F.cross_entropy(pred, torch.tensor([y]))
optimizer.zero_grad()
loss.backward()
optimizer.step()
- optim.SGD
- optim.Adam
Validation Loop
import torch
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader, random_split
mnist_data = MNIST(os.getcwd(), download=True, transform=transforms.ToTensor())
train, val = random_split(mnist_data, [55000, 5000])
data_train = DataLoader(train)
data_val = DataLoader(val)
...
with torch.no_grad():
for batch in data_val:
x, y = batch
loss = loss_function(model(x), y)
print(loss)
...
#accuracy
Thanks
deck
By Daniel Yukimura
deck
- 611