# 1. Image preprocessing

import numpy as np import matplotlib.pyplot as plt import h5py import scipy from PIL import Image from scipy import ndimage def load_dataset(): #Read data in the corresponding path train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r") train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r") test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels classes = np.array(test_dataset["list_classes"][:]) # the list of classes train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0])) test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0])) return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset() #Read the dimensions of training set and test set for data set preprocessing m_train=train_set_x_orig.shape[0] #Number of samples m_test=test_set_x_orig.shape[0] #Number of test sets num_px=train_set_x_orig.shape[1] #The image is square, so it is OK to read the row or column of sample pixels (1 or 2) print ("Number of training examples: m_train = " + str(m_train)) print ("Number of testing examples: m_test = " + str(m_test)) print ("Height/Width of each image: num_px = " + str(num_px)) print ("Each image is of size: (" + str(num_px) + ", " + str(num_px) + ", 3)") print ("train_set_x shape: " + str(train_set_x_orig.shape)) print ("train_set_y shape: " + str(train_set_y.shape)) print ("test_set_x shape: " + str(test_set_x_orig.shape)) print ("test_set_y shape: " + str(test_set_y.shape)) #Reshape the matrix and arrange the pixel values as the columns of the matrix, similar to X=[x1,x2,x3,x4.......xm] train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset() train_set_x_flatten=train_set_x_orig.reshape(train_set_x_orig.shape[0],-1).T test_set_x_flatten=test_set_x_orig.reshape(test_set_x_orig.shape[0],-1).T print(train_set_x_orig.shape) #Print the dimension of the matrix and find that it is successfully converted from three-dimensional matrix to one-dimensional matrix print(train_set_x_flatten.shape) #Divide all pixel values by 255 to standardize the color intensity train_set_x=train_set_x_flatten/255 test_set_x=test_set_x_flatten/255

train_set_x_orig: initial pixel value of unprocessed training sample

train_set_y_orig: the true value corresponding to the training sample (0 or 1)

test_set_x_orig: the sample used for testing after training the values of w and b

test_set_y_orig: the corresponding true value (the simulated true value needs to be compared with it to obtain the optimal algorithm)

Here, matrix remodeling is to reshape the matrix into a matrix with pixel values as a column and the number of columns determined by the number of samples (for specific methods, please refer to Wu Enda's video)

After preprocessing, we will get the sample matrix of rgb trichromatic distribution (0,1) of (12288 (total number of three channel pixel values) and 209 (number of samples)).

# 2. General architecture of algorithm

For a sample:

Then the cost function is calculated by training all samples and summing them

After understanding the algorithm part, it's time for code practice

# 3. Write code

## Writing code is divided into three steps:

## 1. Define the structure of the model

## 2. Initialize model parameters

## 3. Loop parameter iteration correction

1. Define model structure

(i) Define sigmoid function:

def sigmoid(z): s=1/(1+np.exp(-z)) return s

The previous article has talked about the advantages of numpy, which can be used for reference (36 messages) deep learning notes (2): understanding of logistic regression_ fyjyyds blog - CSDN bloghttps://blog.csdn.net/fyjyyds/article/details/118935150?spm=1001.2014.3001.5501 Details will not be repeated

(ii) define parameter initialization function:

def initialize_with_zeros(dim): w = np.zeros((dim, 1)) #Define a matrix with dimension (dim, 1) b = 0 assert (w.shape == (dim, 1)) #The assert function ensures that the matrix dimension is correct assert (isinstance(b, float) or isinstance(b, int)) return w, b

(iii) define the forward and backward propagation function:

The algorithm of function has been written in note (2) Deep learning notes (II): understanding of logistic regression_ fyjyyds blog - CSDN blog import sensor # sensor.reset() # initialize sensor.set_pixformat() # sets the pixel mode RGB565 to color and GRAYSCALE to GRAYSCALE sensor.set_framesize() # sets the size of the imagehttps://blog.csdn.net/fyjyyds/article/details/118935150?spm=1001.2014.3001.5501

Only the key parameter algorithms are given here

The parameters here are the same as above

def propagate(w, b, X, Y): #w and b are sigmoid function parameters #10. Y is the training sample set and the corresponding truth set respectively m = X.shape[1] # Find the cost function A = sigmoid(np.dot(w.T, X) + b) cost = -1 / m * np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A)) # Derivation dw = np.dot(X, (A - Y).T) / m db = np.sum(A - Y) / m assert (dw.shape == w.shape) assert (db.dtype == float) cost = np.squeeze(cost) grads = {"dw": dw, "db": db} return grads, cost

In the returned grads, dw matrix (whose elements are the derivative of cost function to w1, w2,...) and db (the derivative of cost to b parameter) are saved

With dw and db, we can optimize w and b parameters again and again in the loop to get the cost function with the least loss

(iv) define parameter optimization function:

num_iterations is the number of iterations

learning_rate is the weight of dw and db

print_cost: whether to print the value of cost

def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False): costs = [] for i in range(num_iterations): #Calculate the cost function to get dw, db grads, cost = propagate(w, b, X, Y) #Set dw, db parameters dw = grads["dw"] db = grads["db"] #Perform iterative optimization of parameters b = b - learning_rate * db w = w - dw * learning_rate #Print loss per 100 Traversals if i % 100 == 0: costs.append(cost) if print_cost and i % 100 == 0: print("Cost after iteration %i: %f" % (i, cost)) #Returns the final value of w, b params = {"w": w, "b": b} grads = {"dw": dw, "db": db} return params, grads, costs

(v) Guess image meaning function:

def predict(w, b, X): m = X.shape[1] Y_prediction = np.zeros((1, m)) w = w.reshape(X.shape[0], 1) #The optimized w and b are used to calculate the truth value A = sigmoid(np.dot(w.T, X) + b) #The calculated true value is binarized into 0 and 1 for i in range(A.shape[1]): # if (A[0, i] >= 0.5): Y_prediction[0, i] = 1 else: Y_prediction[0, i] = 0 assert (Y_prediction.shape == (1, m)) return Y_prediction

## Finally, as long as these modules are integrated, the neural network can be used to traverse the image and guess the meaning of the image

def model(X_train, Y_train, X_test, Y_test, num_iterations=2000, learning_rate=0.5, print_cost=False): w, b = initialize_with_zeros(X_train.shape[0]) #Find out the parameters params, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost) #Import parameters w = params["w"] b = params["b"] # guess Y_prediction_test = predict(w, b, X_test) Y_prediction_train = predict(w, b, X_train) print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100)) print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100)) d = {"costs": costs, "Y_prediction_test": Y_prediction_test, "Y_prediction_train": Y_prediction_train, "w": w, "b": b, "learning_rate": learning_rate, "num_iterations": num_iterations} return d

Bring in your own parameters

ps: we can also optimize the selected value of learning_rate by drawing

costs = np.squeeze(d['costs']) plt.plot(costs) plt.ylabel('cost') plt.xlabel('iterations (per hundreds)') plt.title("Learning rate =" + str(d["learning_rate"])) plt.show()

Try a few more times and you'll get an ideal learning_rate