1, Realization of linear regression model from zero
0. Preparations
#Import visual packages and base packages %matplotlib inline import torch from IPython import display from matplotlib import pyplot as plt import numpy as np import random print(torch.__version__)
1. Generate data set
Use linear model to generate data set, generate a data set of 1000 samples, linear relationship:
price=w1⋅area+w2⋅age+b
# Input characteristics num_inputs = 2 # Sample size num_examples = 1000 # Weight deviation true_w = [2, -3.4] true_b = 4.2 features = torch.randn(num_examples, num_inputs, dtype=torch.float32) # Linear expression labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()),dtype=torch.float32)
2. Read data set
#Data iterator def data_iter(batch_size, features, labels): num_examples = len(features) indices = list(range(num_examples)) random.shuffle(indices) # random read 10 samples for i in range(0, num_examples, batch_size): j = torch.LongTensor(indices[i: min(i + batch_size, num_examples)]) # the last time may be not enough for a whole batch yield features.index_select(0,j),labels.index_select(0, j)
3. Initialize model parameters
w = torch.tensor(np.random.normal(0, 0.01, (num_inputs, 1)), dtype=torch.float32) b = torch.zeros(1, dtype=torch.float32) w.requires_grad_(requires_grad=True) b.requires_grad_(requires_grad=True)
4. Define the model
def linreg(X, w, b): return torch.mm(X, w) + b
5. Define loss function
#Mean square error loss function def squared_loss(y_hat, y): return (y_hat - y.view(y_hat.size())) ** 2 / 2
Y? Hat is the predicted value of Y, y.view() changes the shape of Y,
Because y_hat is the product of X and w, i.e., [n,m] and [m,1], where n is the number of data and m is the number of data features, y_hat is [n,1], and Y is [n]
6. Define optimization function
Random gradient descent
#Random gradient descent of small batch def sgd(params, lr, batch_size): for param in params: param.data -= lr * param.grad / batch_size # ues .data to operate param without gradient track
7. training
# Super parameter lr = 0.03 num_epochs = 5 net = linreg loss = squared_loss # training for epoch in range(num_epochs): # X is the feature and y is the label of a batch sample for X, y in data_iter(batch_size, features, labels): l = loss(net(X, w, b), y).sum() # calculate the gradient of batch sample loss l.backward() # using small batch random gradient descent to iter model parameters sgd([w, b], lr, batch_size) # reset parameter gradient w.grad.data.zero_() b.grad.data.zero_() train_l = loss(net(features, w, b), labels) print('epoch %d, loss %f' % (epoch + 1, train_l.mean().item()))
2, softmax is implemented from scratch
0. Preparations
#Import base package import torch import torchvision import numpy as np import sys sys.path.append("/home/kesci/input") import d2lzh1981 as d2l
1. Obtain training set data and test set data
batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, root='/home/kesci/input/FashionMNIST2065')
2. Model parameter initialization
num_inputs = 784 num_outputs = 10 #W. b initialization gradient drop W = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_outputs)), dtype=torch.float) b = torch.zeros(num_outputs, dtype=torch.float) W.requires_grad_(requires_grad=True) b.requires_grad_(requires_grad=True)
3.softmax regression model
def net(X): return softmax(torch.mm(X.view((-1, num_inputs)), W) + b)
4. Define loss function
#Cross entropy loss function def cross_entropy(y_hat, y): return - torch.log(y_hat.gather(1, y.view(-1, 1)))
y.view(-1,1) is a sensor that changes y into 2 rows and 1 column
Then take the first element in the first row and the third element in the second row from y'hat
5. Definition accuracy
def accuracy(y_hat, y): return (y_hat.argmax(dim=1) == y).float().mean().item()
6. Training model
num_epochs, lr = 5, 0.1 # This function has been saved in the d2lzh pytorch package for later use def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, optimizer=None): for epoch in range(num_epochs): train_l_sum, train_acc_sum, n = 0.0, 0.0, 0 for X, y in train_iter: y_hat = net(X) l = loss(y_hat, y).sum() # Gradient clearing if optimizer is not None: optimizer.zero_grad() elif params is not None and params[0].grad is not None: for param in params: param.grad.data.zero_() l.backward() if optimizer is None: d2l.sgd(params, lr, batch_size) else: optimizer.step() train_l_sum += l.item() train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item() n += y.shape[0] test_acc = evaluate_accuracy(test_iter, net) print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc)) train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr)
7. Prediction model
X, y = iter(test_iter).next() true_labels = d2l.get_fashion_mnist_labels(y.numpy()) pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(dim=1).numpy()) titles = [true + '\n' + pred for true, pred in zip(true_labels, pred_labels)] d2l.show_fashion_mnist(X[0:9], titles[0:9])3, Implementation of multi-layer perceptron from scratch
0. Preparations
import torch import numpy as np import sys sys.path.append("/home/kesci/input") import d2lzh1981 as d2l print(torch.__version__)
1. Get training set
batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size,root='/home/kesci/input/FashionMNIST2065')
2. Define model parameters
num_inputs, num_outputs, num_hiddens = 784, 10, 256 W1 = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_hiddens)), dtype=torch.float) b1 = torch.zeros(num_hiddens, dtype=torch.float) W2 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_outputs)), dtype=torch.float) b2 = torch.zeros(num_outputs, dtype=torch.float) params = [W1, b1, W2, b2] for param in params: param.requires_grad_(requires_grad=True) `` 3.Define activation function ```python def relu(X): return torch.max(input=X, other=torch.tensor(0.0))
4. Define network
def net(X): X = X.view((-1, num_inputs)) H = relu(torch.matmul(X, W1) + b1) return torch.matmul(H, W2) + b2
5. Define loss function
loss = torch.nn.CrossEntropyLoss()
6. training
num_epochs, lr = 5, 100.0 # def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, # params=None, lr=None, optimizer=None): # for epoch in range(num_epochs): # train_l_sum, train_acc_sum, n = 0.0, 0.0, 0 # for X, y in train_iter: # y_hat = net(X) # l = loss(y_hat, y).sum() # # # Gradient clearing # if optimizer is not None: # optimizer.zero_grad() # elif params is not None and params[0].grad is not None: # for param in params: # param.grad.data.zero_() # # l.backward() # if optimizer is None: # d2l.sgd(params, lr, batch_size) # else: # optimizer.step() # The section "simple implementation of softmax regression" will use # # # train_l_sum += l.item() # train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item() # n += y.shape[0] # test_acc = evaluate_accuracy(test_iter, net) # print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' # % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc)) d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)qq_41238579 Published 23 original articles, won praise 6, visited 5355 Private letter follow