in TensorFlow, similarly, there is no need to manually download, parse and load CIFAR10 datasets through datasets.CIFAR10.load_ The data () function directly loads the cut training set and test set according to the human meaning. For example:
import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers, Sequential, losses, optimizers, datasets import os from Chapter10.CIFAR10 import load_data os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' def preprocess(x, y): x = tf.cast(x, dtype=tf.float32) / 255. y = tf.cast(y, dtype=tf.int32) return x, y # Download online and load CIFAR10 dataset (x, y), (x_test, y_test) = load_data('/Users/xuruihang/.keras/datasets/cifar-10-batches-py') # Delete a dimension of y, [b, 1] = > [b] y = tf.squeeze(y, axis=1) y_test = tf.squeeze(y_test, axis=1) # Print the shapes of training and test sets print(x.shape, y.shape, x_test.shape, y_test.shape) # Constructing training set objects, random disruption, preprocessing, batch train_db = tf.data.Dataset.from_tensor_slices((x, y)) train_db = train_db.shuffle(1000).map(preprocess).batch(128) # Build test set object, preprocessing, batch test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test)) train_db = train_db.map(preprocess).batch(128) # Sample a Batch from the training set and observe sample = next(iter(train_db)) print('sample: ', sample[0].shape, sample[1].shape, tf.reduce_min(sample[0]), tf.reduce_max(sample[0]))
The operation results are shown in the figure below:
Note: load here_ Data () is a piece of code written by the caller, because direct download will report an error:
import numpy as np import os def load_batch(file): import pickle with open(file, 'rb') as fo: d = pickle.load(fo, encoding='bytes') d_decoded = {} for k, v in d.items(): d_decoded[k.decode('utf8')] = v d = d_decoded data = d['data'] labels = d['labels'] data = data.reshape(data.shape[0], 3, 32, 32) return data, labels def load_data(path ='data/cifar-10-batches-py'): """Loads CIFAR10 dataset. # Returns Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ from tensorflow.python.keras import backend as K num_train_samples = 50000 x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.empty((num_train_samples,), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) (x_train[(i - 1) * 10000: i * 10000, :, :, :], y_train[(i - 1) * 10000: i * 10000]) = load_batch(fpath) fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) return (x_train, y_train), (x_test, y_test) (x_train, y_train), (x_test, y_test) = load_data('/Users/xuruihang/.keras/datasets/cifar-10-batches-py')
See details Keras CIFAR10 offline loading
it can be seen that the training set obtained after the above code is run
X
\boldsymbol X
X and
y
\boldsymbol y
The y shape is:
(
50000
,
32
,
32
,
3
)
(50000,32,32,3)
(50000,32,32,3) and
(
50000
)
(50000)
(50000), test set
X
\boldsymbol X
X and
y
\boldsymbol y
The y shape is:
(
10000
,
32
,
32
,
3
)
(10000,32,32,3)
(10000,32,32,3) and
(
10000
)
(10000)
(10000), respectively representing the picture size of
32
×
32
32×32
thirty-two × 32. Color pictures. The number of samples in the training set is 50000 and the number of samples in the test set is 10000.
CIFAR10 image recognition task is not simple, mainly because the image content of CIFAR10 needs a lot of details to be presented, and the resolution of the saved image is only
32
×
32
32×32
thirty-two × 32, which makes the subject part of the information vague and even difficult to distinguish by human eyes. The expression ability of shallow neural network is limited, so it is difficult to train and optimize to better performance. In this section, based on the VGG13 network with stronger expression ability, some network structures will be modified according to the characteristics of our data set to complete CIFAR10 image recognition. Amend as follows:
- Adjust the network input to 32 × 32. The original network input is
224
×
224
224×224
two hundred and twenty-four × 224, leading to the whole company
The feature dimension of layer input is too large, and the amount of network parameters is too large. - The dimensions of the three full connection layers are adjusted to [ 256 , 64 , 10 ] [256,64,10] [256,64,10], meeting the setting of 10 classification tasks.
the following figure shows the adjusted VGG13 network structure, which is collectively referred to as the VGG13 network model.
we implement the network into two sub networks: convolution sub network and full connection sub network. The convolution sub network is composed of five sub modules. Each sub module contains conv conv maxpooling unit structure. The code is as follows:
conv_layers = [ # First create a list containing multiple network layers # Conv conv pooling unit 1 # 64 3 × 3 convolution kernel, with the same size of input and output layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), # Halve height and width layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'), # Conv conv pooling unit 2, the output channel is raised to 128, and the height, width and size are halved layers.Conv2D(128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.Conv2D(128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'), # Conv conv pooling unit 3, the output channel is increased to 256, and the height, width and size are halved layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'), # Conv conv pooling unit 4, the output channel is raised to 512, and the height, width and size are halved layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'), # Conv conv pooling unit 5, the output channel is increased to 512, and the height, width and size are halved layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'), ] # Build a network container using the layer list you created earlier conv_net = Sequential(conv_layers)
The fully connected sub network consists of three fully connected layers, each layer is added with ReLU nonlinear activation function, except the last layer. The code is as follows:
# Create a 3-layer full connection layer subnet fc_net = Sequential([ layers.Dense(256, activation=tf.nn.relu), layers.Dense(128, activation=tf.nn.relu), layers.Dense(10, activation=None) ])
After the sub network is created, check the parameter quantity of the network through the following code:
conv_net.build(input_shape=[None, 32, 32, 3]) fc_net.build(input_shape=[None, 512]) conv_net.summary() fc_net.summary()
The total parameters of the convolution network are about 9.4 million, the total parameters of the fully connected network are about 177000, and the total parameters of the network are about 9.5 million, which is much less than that of the original version of VGG13.
since we implement the network as two sub networks, we need to merge the parameter list to be optimized of the two sub networks during gradient update. The code is as follows:
# List merging, merging the parameters of 2 subnetworks variables = conv_net.trainable_variables + fc_net.trainable_variables # Gradient all parameters grads = tape.gradient(loss, variables) # Automatic update optimizer.apply_gradients(zip(grads, variables))
Run the code to start training the model. After training 50 epochs, the test accuracy of the network has reached 77.5%.
Full code:
import tensorflow as tf from tensorflow.keras import layers, optimizers, datasets, Sequential import os from Chapter10.CIFAR10 import load_data os.environ['TF_CPP_MIN_LOG_LEVEL']='2' tf.random.set_seed(2345) conv_layers = [ # 5 units of conv + max pooling # unit 1 layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'), # unit 2 layers.Conv2D(128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.Conv2D(128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'), # unit 3 layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'), # unit 4 layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'), # unit 5 layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same') ] def preprocess(x, y): # [0~1] x = 2*tf.cast(x, dtype=tf.float32) / 255.-1 y = tf.cast(y, dtype=tf.int32) return x,y # Download online and load CIFAR10 dataset (x, y), (x_test, y_test) = load_data('/Users/xuruihang/.keras/datasets/cifar-10-batches-py') y = tf.squeeze(y, axis=1) y_test = tf.squeeze(y_test, axis=1) print(x.shape, y.shape, x_test.shape, y_test.shape) train_db = tf.data.Dataset.from_tensor_slices((x,y)) train_db = train_db.shuffle(1000).map(preprocess).batch(128) test_db = tf.data.Dataset.from_tensor_slices((x_test,y_test)) test_db = test_db.map(preprocess).batch(64) sample = next(iter(train_db)) print('sample:', sample[0].shape, sample[1].shape, tf.reduce_min(sample[0]), tf.reduce_max(sample[0])) def main(): # [b, 32, 32, 3] => [b, 1, 1, 512] conv_net = Sequential(conv_layers) fc_net = Sequential([ layers.Dense(256, activation=tf.nn.relu), layers.Dense(128, activation=tf.nn.relu), layers.Dense(10, activation=None), ]) conv_net.build(input_shape=[None, 32, 32, 3]) fc_net.build(input_shape=[None, 512]) conv_net.summary() fc_net.summary() optimizer = optimizers.Adam(lr=1e-4) # [1, 2] + [3, 4] => [1, 2, 3, 4] # List merging, merging the parameters of 2 subnetworks variables = conv_net.trainable_variables + fc_net.trainable_variables for epoch in range(50): for step, (x,y) in enumerate(train_db): with tf.GradientTape() as tape: # [b, 32, 32, 3] => [b, 1, 1, 512] out = conv_net(x) # flatten, => [b, 512] out = tf.reshape(out, [-1, 512]) # [b, 512] => [b, 10] logits = fc_net(out) # [b] => [b, 10] y_onehot = tf.one_hot(y, depth=10) # compute loss loss = tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True) loss = tf.reduce_mean(loss) grads = tape.gradient(loss, variables) optimizer.apply_gradients(zip(grads, variables)) if step %100 == 0: print(epoch, step, 'loss:', float(loss)) total_num = 0 total_correct = 0 for x,y in test_db: out = conv_net(x) out = tf.reshape(out, [-1, 512]) logits = fc_net(out) prob = tf.nn.softmax(logits, axis=1) pred = tf.argmax(prob, axis=1) pred = tf.cast(pred, dtype=tf.int32) correct = tf.cast(tf.equal(pred, y), dtype=tf.int32) correct = tf.reduce_sum(correct) total_num += x.shape[0] total_correct += int(correct) acc = total_correct / total_num print(epoch, 'acc:', acc) if __name__ == '__main__': main()
The operation results are shown in the figure below:
It can be seen that the accuracy has reached 77.41%. (the broken program ran all night, and the computer took off directly)