Here we will use the torch vision package, which serves the PyTorch deep learning framework and is mainly used to build the computer vision model.
-Torch vision is mainly composed of the following parts:
torchvision.datasets: some functions for loading data and common data set interfaces;
Torch vision. Models: including common model structures (including pre training models), such as AlexNet, VGG, ResNet, etc;
torchvision.transforms: commonly used image transformations, such as clipping, rotation, etc;
torchvision.utils: some other useful methods.
# import needed package %matplotlib inline from IPython import display import matplotlib.pyplot as plt import torch import torchvision import torchvision.transforms as transforms import time import sys sys.path.append("/home/kesci/input") import d2lzh1981 as d2l print(torch.__version__) print(torchvision.__version__)
mnist_train = torchvision.datasets.FashionMNIST(root='/home/kesci/input/FashionMNIST2065', train=True, download=True, transform=transforms.ToTensor()) mnist_test = torchvision.datasets.FashionMNIST(root='/home/kesci/input/FashionMNIST2065', train=False, download=True, transform=transforms.ToTensor())
class torchvision.datasets.FashionMNIST(root, train=True, transform=None, target_transform=None, download=False)
Root (string) - the root directory of the dataset, which holds the processed/training.pt and processed/test.pt files.
train (bool, optional) - if set to True, create the dataset from training.pt, otherwise from test.pt.
Download (bool, optional) - if set to True, download data from the Internet and place it in the root folder. If the data already exists in the root directory, it will not be downloaded again.
Transform (callable, optional) - a function or transform that inputs a PIL picture and returns the transformed data. For example: transforms.RandomCrop.
Target? Transform (callable, optional) - a function or transform that inputs a target and transforms it.
# show result print(type(mnist_train)) print(len(mnist_train), len(mnist_test))
# We can access any sample by subscript feature, label = mnist_train print(feature.shape, label) # Channel x Height x Width
If the input data without transformation is an image, we can see the type parameters of the image:
mnist_PIL = torchvision.datasets.FashionMNIST(root='/home/kesci/input/FashionMNIST2065', train=True, download=True) PIL_feature, label = mnist_PIL print(PIL_feature)
<PIL.Image.Image image mode=L size=28x28 at 0x7F57E8736F28>
# This function has been saved in package d2lzh for later use #Role: convert labels to text #The text information corresponding to the label is returned (the text information is stored in the list of text_labels) def get_fashion_mnist_labels(labels): text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot'] return [text_labels[int(i)] for i in labels]
#Make a data set presentation def show_fashion_mnist(images, labels): d2l.use_svg_display() # Here "UU" means we ignore (do not use) variables _, figs = plt.subplots(1, len(images), figsize=(12, 12)) for f, img, lbl in zip(figs, images, labels): f.imshow(img.view((28, 28)).numpy()) f.set_title(lbl) f.axes.get_xaxis().set_visible(False) f.axes.get_yaxis().set_visible(False) plt.show()
X, y = ,  for i in range(10): X.append(mnist_train[i]) # Add the i th feature to X y.append(mnist_train[i]) # Add the i-th label to y show_fashion_mnist(X, get_fashion_mnist_labels(y))
# Read data batch_size = 256 #Batch size num_workers = 4 #Worker threads train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers) test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)
See how long it took to get the data
start = time.time() for X, y in train_iter: continue print('%.2f sec' % (time.time() - start))
import torch import torchvision import numpy as np import sys sys.path.append("/home/kesci/input") import d2lzh1981 as d2l print(torch.__version__) print(torchvision.__version__)
batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) #Is an encapsulation of dataloader
num_inputs = 784 #The input characteristic is 784, i.e. X has 28 * 28 elements print(28*28) num_outputs = 10 #There are ten types of output #Next, define weights and deviations W = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_outputs)), dtype=torch.float) b = torch.zeros(num_outputs, dtype=torch.float)
#In order to facilitate the subsequent back propagation, two parameter gradients are given here W.requires_grad_(requires_grad=True) b.requires_grad_(requires_grad=True)
X = torch.tensor([[1, 2, 3], [4, 5, 6]]) print(X.sum(dim=0, keepdim=True)) # dim is 0, sum according to the same column, and retain the column characteristics in the result print(X.sum(dim=1, keepdim=True)) # dim is 1, sum according to the same lines, and retain the line features in the result print(X.sum(dim=0, keepdim=False)) # dim is 0, sum according to the same column, do not retain the column characteristics in the result print(X.sum(dim=1, keepdim=False)) # dim is 1, sum according to the same lines, and do not retain the line features in the result
Sum by line when dim = 1
def softmax(X): X_exp = X.exp() #Exponential operation partition = X_exp.sum(dim=1, keepdim=True) #Sum after index operation as denominator # print("X size is ", X_exp.size()) # print("partition size is ", partition, partition.size()) return X_exp / partition # The broadcast mechanism is applied here
The two commented sentences show the shape difference between X and partition, using the broadcast mechanism.
The results are as follows
X = torch.rand((2, 5)) X_prob = softmax(X) print(X_prob, '\n', X_prob.sum(dim=1))
* the following parameter x is the input characteristic, so it is a row vector. It is transformed into a column vector through the view() function, which is convenient to multiply with the weight w, and then add with b, and pass it into the softmax function to get the output y_hat
def net(X): return softmax(torch.mm(X.view((-1, num_inputs)), W) + b)
y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]]) y = torch.LongTensor([0, 2]) y_hat.gather(1, y.view(-1, 1))
def cross_entropy(y_hat, y): return - torch.log(y_hat.gather(1, y.view(-1, 1)))
When our model is trained for model prediction, the accuracy defined here will be used.
def accuracy(y_hat, y): return (y_hat.argmax(dim=1) == y).float().mean().item() #Take the maximum value in Y hat by line and compare it with the value of the real label y. if it is the same, it is 1, and the difference is 0. Then add it up to get the average value print(accuracy(y_hat, y))
# This function has been saved in the d2lzh pytorch package for later use. This function will be improved step by step: its complete implementation will be described in the "image augmentation" section def evaluate_accuracy(data_iter, net): acc_sum, n = 0.0, 0 for X, y in data_iter: acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() n += y.shape return acc_sum / n print(evaluate_accuracy(test_iter, net))
num_epochs, lr = 5, 0.1 # This function has been saved in the d2lzh pytorch package for later use def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, optimizer=None): for epoch in range(num_epochs): train_l_sum, train_acc_sum, n = 0.0, 0.0, 0 for X, y in train_iter: y_hat = net(X) l = loss(y_hat, y).sum() # Gradient clearing if optimizer is not None: optimizer.zero_grad() elif params is not None and params.grad is not None: for param in params: param.grad.data.zero_() l.backward() if optimizer is None: d2l.sgd(params, lr, batch_size) else: optimizer.step() train_l_sum += l.item() train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item() n += y.shape test_acc = evaluate_accuracy(test_iter, net) print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc)) train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr)
Now that our model has been trained, we can make a prediction. The accuracy of our model training is not accurate. Now you can demonstrate how to classify images. Given a series of images (the third line of image output), let's compare their real tags (the first line of text output) with the model prediction results (the second line of text output).
X, y = iter(test_iter).next() true_labels = d2l.get_fashion_mnist_labels(y.numpy()) pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(dim=1).numpy()) titles = [true + '\n' + pred for true, pred in zip(true_labels, pred_labels)] d2l.show_fashion_mnist(X[0:9], titles[0:9])
# Loading various packages or modules import torch from torch import nn from torch.nn import init import numpy as np import sys sys.path.append("/home/kesci/input") import d2lzh1981 as d2l print(torch.__version__)
batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
num_inputs = 784 num_outputs = 10 class LinearNet(nn.Module): def __init__(self, num_inputs, num_outputs): super(LinearNet, self).__init__() self.linear = nn.Linear(num_inputs, num_outputs) def forward(self, x): # Shape of x: (batch, 1, 28, 28) y = self.linear(x.view(x.shape, -1)) return y # net = LinearNet(num_inputs, num_outputs) class FlattenLayer(nn.Module): def __init__(self): super(FlattenLayer, self).__init__() def forward(self, x): # Shape of x: (batch, *, *,...) return x.view(x.shape, -1) from collections import OrderedDict net = nn.Sequential( # FlattenLayer(), # LinearNet(num_inputs, num_outputs) OrderedDict([ ('flatten', FlattenLayer()), ('linear', nn.Linear(num_inputs, num_outputs))]) # Or write it as our own defined linearnet (Num ﹣ inputs, num ﹣ outputs) )
init.normal_(net.linear.weight, mean=0, std=0.01) init.constant_(net.linear.bias, val=0) Parameter containing: tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)
loss = nn.CrossEntropyLoss() # Here is his function prototype # class torch.nn.CrossEntropyLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean')
optimizer = torch.optim.SGD(net.parameters(), lr=0.1) # Here is the function prototype # class torch.optim.SGD(params, lr=, momentum=0, dampening=0, weight_decay=0, nesterov=False)
num_epochs = 5 d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)