[hands on deep learning note] day02 "fashion MNIST data set & two methods of softmax implementation

softmax regression

1, Get fashion MNIST training set and read data

Here we will use the torch vision package, which serves the PyTorch deep learning framework and is mainly used to build the computer vision model.
-Torch vision is mainly composed of the following parts:
torchvision.datasets: some functions for loading data and common data set interfaces;
Torch vision. Models: including common model structures (including pre training models), such as AlexNet, VGG, ResNet, etc;
torchvision.transforms: commonly used image transformations, such as clipping, rotation, etc;
torchvision.utils: some other useful methods.

1.import package

# import needed package
%matplotlib inline
from IPython import display
import matplotlib.pyplot as plt

import torch
import torchvision
import torchvision.transforms as transforms
import time

import sys
sys.path.append("/home/kesci/input")
import d2lzh1981 as d2l

print(torch.__version__)
print(torchvision.__version__)

2. get dataset

mnist_train = torchvision.datasets.FashionMNIST(root='/home/kesci/input/FashionMNIST2065', train=True, download=True, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root='/home/kesci/input/FashionMNIST2065', train=False, download=True, transform=transforms.ToTensor())

class torchvision.datasets.FashionMNIST(root, train=True, transform=None, target_transform=None, download=False)

Root (string) - the root directory of the dataset, which holds the processed/training.pt and processed/test.pt files.
train (bool, optional) - if set to True, create the dataset from training.pt, otherwise from test.pt.
Download (bool, optional) - if set to True, download data from the Internet and place it in the root folder. If the data already exists in the root directory, it will not be downloaded again.
Transform (callable, optional) - a function or transform that inputs a PIL picture and returns the transformed data. For example: transforms.RandomCrop.
Target? Transform (callable, optional) - a function or transform that inputs a target and transforms it.

# show result 
print(type(mnist_train))
print(len(mnist_train), len(mnist_test))

<class 'torchvision.datasets.mnist.FashionMNIST'>
60000 10000

# We can access any sample by subscript
feature, label = mnist_train[0]
print(feature.shape, label)  # Channel x Height x Width

If the input data without transformation is an image, we can see the type parameters of the image:

mnist_PIL = torchvision.datasets.FashionMNIST(root='/home/kesci/input/FashionMNIST2065', train=True, download=True)
PIL_feature, label = mnist_PIL[0]
print(PIL_feature)

<PIL.Image.Image image mode=L size=28x28 at 0x7F57E8736F28>

# This function has been saved in package d2lzh for later use
#Role: convert labels to text
#The text information corresponding to the label is returned (the text information is stored in the list of text_labels)
def get_fashion_mnist_labels(labels):
    text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
                   'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
    return [text_labels[int(i)] for i in labels]
#Make a data set presentation
def show_fashion_mnist(images, labels):
    d2l.use_svg_display()
    # Here "UU" means we ignore (do not use) variables
    _, figs = plt.subplots(1, len(images), figsize=(12, 12))
    for f, img, lbl in zip(figs, images, labels):
        f.imshow(img.view((28, 28)).numpy())
        f.set_title(lbl)
        f.axes.get_xaxis().set_visible(False)
        f.axes.get_yaxis().set_visible(False)
    plt.show()
X, y = [], []
for i in range(10):
    X.append(mnist_train[i][0]) # Add the i th feature to X
    y.append(mnist_train[i][1]) # Add the i-th label to y
show_fashion_mnist(X, get_fashion_mnist_labels(y))
# Read data
batch_size = 256	#Batch size
num_workers = 4		#Worker threads
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

See how long it took to get the data

start = time.time()
for X, y in train_iter:
    continue
print('%.2f sec' % (time.time() - start))

2, softmax from zero

import packages

import torch
import torchvision
import numpy as np
import sys
sys.path.append("/home/kesci/input")
import d2lzh1981 as d2l

print(torch.__version__)
print(torchvision.__version__)

Get training data set and test data set

batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
#Is an encapsulation of dataloader

Model parameter initialization

num_inputs = 784	#The input characteristic is 784, i.e. X has 28 * 28 elements
print(28*28)
num_outputs = 10	#There are ten types of output
#Next, define weights and deviations
W = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_outputs)), dtype=torch.float)
b = torch.zeros(num_outputs, dtype=torch.float)
#In order to facilitate the subsequent back propagation, two parameter gradients are given here
W.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True)

Operate by dimension on multi-dimensional Tensor

X = torch.tensor([[1, 2, 3], [4, 5, 6]])
print(X.sum(dim=0, keepdim=True))  # dim is 0, sum according to the same column, and retain the column characteristics in the result
print(X.sum(dim=1, keepdim=True))  # dim is 1, sum according to the same lines, and retain the line features in the result
print(X.sum(dim=0, keepdim=False)) # dim is 0, sum according to the same column, do not retain the column characteristics in the result
print(X.sum(dim=1, keepdim=False)) # dim is 1, sum according to the same lines, and do not retain the line features in the result

Sum by line when dim = 1

Define softmax operations

def softmax(X):
    X_exp = X.exp()				#Exponential operation
    partition = X_exp.sum(dim=1, keepdim=True) 	#Sum after index operation as denominator
    # print("X size is ", X_exp.size())
    # print("partition size is ", partition, partition.size())
    return X_exp / partition  # The broadcast mechanism is applied here

The two commented sentences show the shape difference between X and partition, using the broadcast mechanism.
The results are as follows

X = torch.rand((2, 5))
X_prob = softmax(X)
print(X_prob, '\n', X_prob.sum(dim=1))

softmax regression model


* the following parameter x is the input characteristic, so it is a row vector. It is transformed into a column vector through the view() function, which is convenient to multiply with the weight w, and then add with b, and pass it into the softmax function to get the output y_hat

def net(X):
    return softmax(torch.mm(X.view((-1, num_inputs)), W) + b)

Define loss function

y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])
y = torch.LongTensor([0, 2])
y_hat.gather(1, y.view(-1, 1))
def cross_entropy(y_hat, y):
    return - torch.log(y_hat.gather(1, y.view(-1, 1)))

Definition accuracy

When our model is trained for model prediction, the accuracy defined here will be used.

def accuracy(y_hat, y):
    return (y_hat.argmax(dim=1) == y).float().mean().item()
  	#Take the maximum value in Y hat by line and compare it with the value of the real label y. if it is the same, it is 1, and the difference is 0. Then add it up to get the average value
print(accuracy(y_hat, y))
# This function has been saved in the d2lzh pytorch package for later use. This function will be improved step by step: its complete implementation will be described in the "image augmentation" section
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n
print(evaluate_accuracy(test_iter, net))

Training model

num_epochs, lr = 5, 0.1

# This function has been saved in the d2lzh pytorch package for later use
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
              params=None, lr=None, optimizer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in train_iter:
            y_hat = net(X)
            l = loss(y_hat, y).sum()
            
            # Gradient clearing
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
            
            l.backward()
            if optimizer is None:
                d2l.sgd(params, lr, batch_size)
            else:
                optimizer.step() 
            
            
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))

train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr)

model prediction

Now that our model has been trained, we can make a prediction. The accuracy of our model training is not accurate. Now you can demonstrate how to classify images. Given a series of images (the third line of image output), let's compare their real tags (the first line of text output) with the model prediction results (the second line of text output).

X, y = iter(test_iter).next()

true_labels = d2l.get_fashion_mnist_labels(y.numpy())
pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(dim=1).numpy())
titles = [true + '\n' + pred for true, pred in zip(true_labels, pred_labels)]

d2l.show_fashion_mnist(X[0:9], titles[0:9])

Simple implementation of softmax

# Loading various packages or modules
import torch
from torch import nn
from torch.nn import init
import numpy as np
import sys
sys.path.append("/home/kesci/input")
import d2lzh1981 as d2l

print(torch.__version__)

Initialize parameters and get data

batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

Define network model

num_inputs = 784
num_outputs = 10

class LinearNet(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super(LinearNet, self).__init__()
        self.linear = nn.Linear(num_inputs, num_outputs)
    def forward(self, x): # Shape of x: (batch, 1, 28, 28)
        y = self.linear(x.view(x.shape[0], -1))
        return y
    
# net = LinearNet(num_inputs, num_outputs)

class FlattenLayer(nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()
    def forward(self, x): # Shape of x: (batch, *, *,...)
        return x.view(x.shape[0], -1)

from collections import OrderedDict
net = nn.Sequential(
        # FlattenLayer(),
        # LinearNet(num_inputs, num_outputs) 
        OrderedDict([
           ('flatten', FlattenLayer()),
           ('linear', nn.Linear(num_inputs, num_outputs))]) # Or write it as our own defined linearnet (Num ﹣ inputs, num ﹣ outputs)
        )

Initialize model parameters

init.normal_(net.linear.weight, mean=0, std=0.01)
init.constant_(net.linear.bias, val=0)
Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

Define loss function

loss = nn.CrossEntropyLoss() # Here is his function prototype
# class torch.nn.CrossEntropyLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean')

Define optimization function

optimizer = torch.optim.SGD(net.parameters(), lr=0.1) # Here is the function prototype
# class torch.optim.SGD(params, lr=, momentum=0, dampening=0, weight_decay=0, nesterov=False)

train

num_epochs = 5
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)
Published 2 original articles, won praise 1, visited 15
Private letter follow

Tags: IPython network

Posted on Fri, 14 Feb 2020 10:55:13 -0500 by tjhilder