Recently, I am learning in-depth learning in pairs with my little friends. The book "hands on learning in-depth learning" is quite recommended.
This blog is my first blog and the first lesson of my study. A simple linear regression is implemented from 0. The process is relatively simple, go straight to the code.
import torch from matplotlib import pyplot as plt import numpy as np import random from torch import cuda #Generate data set #Set the number of features num_inputs=2 #Set the number of samples num_example=2000 #Set real parameters, weight matrix and deviation true_w=[2,-3.4] true_b=4.2 #According to the above model, generate random data set features = torch.randn(num_example, num_inputs, dtype=torch.float32) # print(features) #Generate labels based on features and weights labels=true_w*features[:,0]+true_w*features[:,1]+true_b # print(labels.size()) #On the basis of the original labels, add a deviation of normal distribution with mean value of 0 and variance of 0.01, and randomly add the deviation labels+=torch.tensor(np.random.normal(0,0.01,size=labels.size()),dtype=torch.float32) #Visualize the sample data # plt.scatter(features[:,1].numpy(),labels.numpy(),1) # plt.show() #According to the previously defined data set, combined with the amount of data taken out each time, the data in the feature is read def data_iter(batch_size, features, labels): num_examples = len(features) indices = list(range(num_examples)) random.shuffle(indices) # random read 10 samples for i in range(0, num_examples, batch_size): j = torch.LongTensor(indices[i: min(i + batch_size, num_examples)]) # the last time may be not enough for a whole batch yield features.index_select(0, j), labels.index_select(0, j) #Test, take out features and labels batch_size=100 for X,y in data_iter(batch_size,features,labels): print(X,'\n',y) break #Initialize the model parameters. The number of parameters is consistent with the above content w = torch.tensor(np.random.normal(0, 0.01, (num_inputs, 1)), dtype=torch.float32) b=torch.zeros(1,dtype=torch.float32) #Set the gradient for the model parameters, and set the gradient for the parameters w.requires_grad_(requires_grad=True) b.requires_grad_(requires_grad=True) #Definition model def linreg(X,w,b): return torch.mm(X,w)+b #Define mean square error as loss function def squared_loss(y_hat, y): return (y_hat - y.view(y_hat.size())) ** 2 / 2 #Define the optimization function of SGD random gradient descent def sgd(params, lr, batch_size): for param in params: param.data -= lr * param.grad / batch_size # ues .data to operate param without gradient track #Training model learning_rate=0.03 num_epochs=5000 #Initialize model to # torch.cuda.set_device(0) net=linreg loss=squared_loss from datetime import datetime import time a = datetime.now() #Get current time # training for epoch in range(num_epochs): # training repeats num_epochs times # in each epoch, all the samples in dataset will be used once # X is the feature and y is the label of a batch sample for X, y in data_iter(batch_size, features, labels): l = loss(net(X, w, b), y).sum() # calculate the gradient of batch sample loss l.backward() # using small batch random gradient descent to iter model parameters sgd([w, b], learning_rate, batch_size) # reset parameter gradient w.grad.data.zero_() b.grad.data.zero_() train_l = loss(net(features, w, b), labels) # print('epoch %d, loss %f' % (epoch + 1, train_l.mean().item())) b = datetime.now() # Get current time durn = (b-a).seconds #Two time differences, displayed in seconds print(durn)
Because the notes are written in detail, there is no more explanation. This is the first blog experience, which will be updated frequently. If there is any mistake, please forgive the novice.