1. Image preprocessing
import numpy as np import matplotlib.pyplot as plt import h5py import scipy from PIL import Image from scipy import ndimage def load_dataset(): #Read data in the corresponding path train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r") train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r") test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels classes = np.array(test_dataset["list_classes"][:]) # the list of classes train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0])) test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0])) return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset() #Read the dimensions of training set and test set for data set preprocessing m_train=train_set_x_orig.shape[0] #Number of samples m_test=test_set_x_orig.shape[0] #Number of test sets num_px=train_set_x_orig.shape[1] #The image is square, so it is OK to read the row or column of sample pixels (1 or 2) print ("Number of training examples: m_train = " + str(m_train)) print ("Number of testing examples: m_test = " + str(m_test)) print ("Height/Width of each image: num_px = " + str(num_px)) print ("Each image is of size: (" + str(num_px) + ", " + str(num_px) + ", 3)") print ("train_set_x shape: " + str(train_set_x_orig.shape)) print ("train_set_y shape: " + str(train_set_y.shape)) print ("test_set_x shape: " + str(test_set_x_orig.shape)) print ("test_set_y shape: " + str(test_set_y.shape)) #Reshape the matrix and arrange the pixel values as the columns of the matrix, similar to X=[x1,x2,x3,x4.......xm] train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset() train_set_x_flatten=train_set_x_orig.reshape(train_set_x_orig.shape[0],-1).T test_set_x_flatten=test_set_x_orig.reshape(test_set_x_orig.shape[0],-1).T print(train_set_x_orig.shape) #Print the dimension of the matrix and find that it is successfully converted from three-dimensional matrix to one-dimensional matrix print(train_set_x_flatten.shape) #Divide all pixel values by 255 to standardize the color intensity train_set_x=train_set_x_flatten/255 test_set_x=test_set_x_flatten/255
train_set_x_orig: initial pixel value of unprocessed training sample
train_set_y_orig: the true value corresponding to the training sample (0 or 1)
test_set_x_orig: the sample used for testing after training the values of w and b
test_set_y_orig: the corresponding true value (the simulated true value needs to be compared with it to obtain the optimal algorithm)
Here, matrix remodeling is to reshape the matrix into a matrix with pixel values as a column and the number of columns determined by the number of samples (for specific methods, please refer to Wu Enda's video)
After preprocessing, we will get the sample matrix of rgb trichromatic distribution (0,1) of (12288 (total number of three channel pixel values) and 209 (number of samples)).
2. General architecture of algorithm
For a sample:
Then the cost function is calculated by training all samples and summing them
After understanding the algorithm part, it's time for code practice
3. Write code
Writing code is divided into three steps:
1. Define the structure of the model
2. Initialize model parameters
3. Loop parameter iteration correction
1. Define model structure
(i) Define sigmoid function:
def sigmoid(z): s=1/(1+np.exp(-z)) return s
The previous article has talked about the advantages of numpy, which can be used for reference (36 messages) deep learning notes (2): understanding of logistic regression_ fyjyyds blog - CSDN bloghttps://blog.csdn.net/fyjyyds/article/details/118935150?spm=1001.2014.3001.5501 Details will not be repeated
(ii) define parameter initialization function:
def initialize_with_zeros(dim): w = np.zeros((dim, 1)) #Define a matrix with dimension (dim, 1) b = 0 assert (w.shape == (dim, 1)) #The assert function ensures that the matrix dimension is correct assert (isinstance(b, float) or isinstance(b, int)) return w, b
(iii) define the forward and backward propagation function:
The algorithm of function has been written in note (2) Deep learning notes (II): understanding of logistic regression_ fyjyyds blog - CSDN blog import sensor # sensor.reset() # initialize sensor.set_pixformat() # sets the pixel mode RGB565 to color and GRAYSCALE to GRAYSCALE sensor.set_framesize() # sets the size of the imagehttps://blog.csdn.net/fyjyyds/article/details/118935150?spm=1001.2014.3001.5501
Only the key parameter algorithms are given here
The parameters here are the same as above
def propagate(w, b, X, Y): #w and b are sigmoid function parameters #10. Y is the training sample set and the corresponding truth set respectively m = X.shape[1] # Find the cost function A = sigmoid(np.dot(w.T, X) + b) cost = -1 / m * np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A)) # Derivation dw = np.dot(X, (A - Y).T) / m db = np.sum(A - Y) / m assert (dw.shape == w.shape) assert (db.dtype == float) cost = np.squeeze(cost) grads = {"dw": dw, "db": db} return grads, cost
In the returned grads, dw matrix (whose elements are the derivative of cost function to w1, w2,...) and db (the derivative of cost to b parameter) are saved
With dw and db, we can optimize w and b parameters again and again in the loop to get the cost function with the least loss
(iv) define parameter optimization function:
num_iterations is the number of iterations
learning_rate is the weight of dw and db
print_cost: whether to print the value of cost
def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False): costs = [] for i in range(num_iterations): #Calculate the cost function to get dw, db grads, cost = propagate(w, b, X, Y) #Set dw, db parameters dw = grads["dw"] db = grads["db"] #Perform iterative optimization of parameters b = b - learning_rate * db w = w - dw * learning_rate #Print loss per 100 Traversals if i % 100 == 0: costs.append(cost) if print_cost and i % 100 == 0: print("Cost after iteration %i: %f" % (i, cost)) #Returns the final value of w, b params = {"w": w, "b": b} grads = {"dw": dw, "db": db} return params, grads, costs
(v) Guess image meaning function:
def predict(w, b, X): m = X.shape[1] Y_prediction = np.zeros((1, m)) w = w.reshape(X.shape[0], 1) #The optimized w and b are used to calculate the truth value A = sigmoid(np.dot(w.T, X) + b) #The calculated true value is binarized into 0 and 1 for i in range(A.shape[1]): # if (A[0, i] >= 0.5): Y_prediction[0, i] = 1 else: Y_prediction[0, i] = 0 assert (Y_prediction.shape == (1, m)) return Y_prediction
Finally, as long as these modules are integrated, the neural network can be used to traverse the image and guess the meaning of the image
def model(X_train, Y_train, X_test, Y_test, num_iterations=2000, learning_rate=0.5, print_cost=False): w, b = initialize_with_zeros(X_train.shape[0]) #Find out the parameters params, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost) #Import parameters w = params["w"] b = params["b"] # guess Y_prediction_test = predict(w, b, X_test) Y_prediction_train = predict(w, b, X_train) print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100)) print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100)) d = {"costs": costs, "Y_prediction_test": Y_prediction_test, "Y_prediction_train": Y_prediction_train, "w": w, "b": b, "learning_rate": learning_rate, "num_iterations": num_iterations} return d
Bring in your own parameters
ps: we can also optimize the selected value of learning_rate by drawing
costs = np.squeeze(d['costs']) plt.plot(costs) plt.ylabel('cost') plt.xlabel('iterations (per hundreds)') plt.title("Learning rate =" + str(d["learning_rate"])) plt.show()
Try a few more times and you'll get an ideal learning_rate