import tensorflow as tf from tensorflow.keras.datasets import fashion_mnist def make_model(n_classes): return tf.keras.Sequential( [ tf.keras.layers.Conv2D( 32, (5, 5), activation=tf.nn.relu, input_shape=(28, 28, 1) ), tf.keras.layers.MaxPool2D((2, 2), (2, 2)), tf.keras.layers.Conv2D(64, (3, 3), activation=tf.nn.relu), tf.keras.layers.MaxPool2D((2, 2), (2, 2)), tf.keras.layers.Flatten(), tf.keras.layers.Dense(1024, activation=tf.nn.relu), tf.keras.layers.Dropout(0.5), tf.keras.layers.Dense(n_classes), ] ) def load_data(): (train_x, train_y), (test_x, test_y) = fashion_mnist.load_data() # Scale input in [-1, 1] range train_x = tf.expand_dims(train_x, -1) train_x = (tf.image.convert_image_dtype(train_x, tf.float32) - 0.5) * 2 train_y = tf.expand_dims(train_y, -1) test_x = test_x / 255.0 * 2 - 1 test_x = (tf.image.convert_image_dtype(test_x, tf.float32) - 0.5) * 2 test_y = tf.expand_dims(test_y, -1) return (train_x, train_y), (test_x, test_y) def train(): # Define the model n_classes = 10 model = make_model(n_classes) # Input data (train_x, train_y), (test_x, test_y) = load_data() # Training parameters loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True) step = tf.Variable(1, name="global_step") optimizer = tf.optimizers.Adam(1e-3) ckpt = tf.train.Checkpoint(step=step, optimizer=optimizer, model=model) manager = tf.train.CheckpointManager(ckpt, "./tf_ckpts", max_to_keep=3) ckpt.restore(manager.latest_checkpoint) if manager.latest_checkpoint: print(f"Restored from {manager.latest_checkpoint}") else: print("Initializing from scratch.") accuracy = tf.metrics.Accuracy() mean_loss = tf.metrics.Mean(name="loss") # Train step function @tf.function def train_step(inputs, labels): with tf.GradientTape() as tape: logits = model(inputs) loss_value = loss(labels, logits) gradients = tape.gradient(loss_value, model.trainable_variables) # TODO: apply gradient clipping here optimizer.apply_gradients(zip(gradients, model.trainable_variables)) step.assign_add(1) accuracy.update_state(labels, tf.argmax(logits, -1)) return loss_value, accuracy.result() epochs = 10 batch_size = 32 nr_batches_train = int(train_x.shape[0] / batch_size) print(f"Batch size: {batch_size}") print(f"Number of batches per epoch: {nr_batches_train}") train_summary_writer = tf.summary.create_file_writer("./log/train") with train_summary_writer.as_default(): for epoch in range(epochs): for t in range(nr_batches_train): start_from = t * batch_size to = (t + 1) * batch_size features, labels = train_x[start_from:to], train_y[start_from:to] loss_value, accuracy_value = train_step(features, labels) mean_loss.update_state(loss_value) if t % 10 == 0: print(f"{step.numpy()}: {loss_value} - accuracy: {accuracy_value}") save_path = manager.save() print(f"Checkpoint saved: {save_path}") tf.summary.image( "train_set", features, max_outputs=3, step=step.numpy() ) tf.summary.scalar("accuracy", accuracy_value, step=step.numpy()) tf.summary.scalar("loss", mean_loss.result(), step=step.numpy()) accuracy.reset_states() mean_loss.reset_states() print(f"Epoch {epoch} terminated") # Measuring accuracy on the whole training set at the end of epoch for t in range(nr_batches_train): start_from = t * batch_size to = (t + 1) * batch_size features, labels = train_x[start_from:to], train_y[start_from:to] logits = model(features) accuracy.update_state(labels, tf.argmax(logits, -1)) print(f"Training accuracy: {accuracy.result()}") accuracy.reset_states() if __name__ == "__main__": train()
The tf.GradientTape() function creates a context ('tape ') that records all automatic differential operations, which also solves the problem that tensorflow 1. X uses graphs to calculate automatic differential gradients
Generally, when calling tf.GradientTape() (i.e. tape.gradient), the tape will automatically delete all internal data. If you need to call it multiple times, you can make with tf.GradientTape(persistent=True) as tape:
with tf.GradientTape() as tape: logits = model(inputs) loss_value = loss(labels, logits) gradients = tape.gradient(loss_value, model.trainable_variables)
Save and restore model state
ckpt = tf.train.Checkpoint(step=step, optimizer=optimizer, model=model) manager = tf.train.CheckpointManager(ckpt, "./tf_ckpts", max_to_keep=3) ckpt.restore(manager.latest_checkpoint) if manager.latest_checkpoint: print(f"Restored from {manager.latest_checkpoint}") else: print("Initializing from scratch.")
Use the tensorBoard to visualize the data. First, create a file, define the context manager, and finally call and write the data. When we want to visualize the training process, open the terminal in the code directory and run tensorboard --logdir=./tensorboard, and then use the browser to access the web address output by the command-line program (generally http://name-of-your-computer:6006 )
train_summary_writer = tf.summary.create_file_writer("./log/train") with train_summary_writer.as_default(): ...... tf.summary.image( "train_set", features, max_outputs=3, step=step.numpy() ) tf.summary.scalar("accuracy", accuracy_value, step=step.numpy()) tf.summary.scalar("loss", mean_loss.result(), step=step.numpy())
The following is the sequential simple sequential API process, which is called all the time, but it is not widely used as above. The self-made training cycle can adjust the gradient and add functions according to its own requirements, which is also troublesome. Both of them build models based on the number of stacking layers of sequential API. The residual modules such as resnet learned later need functional APIs
import tensorflow as tf from tensorflow.keras.datasets import fashion_mnist n_classes = 10 model = tf.keras.Sequential( [ tf.keras.layers.Conv2D( 32, (5, 5), activation=tf.nn.relu, input_shape=(28, 28, 1) ), tf.keras.layers.MaxPool2D((2, 2), (2, 2)), tf.keras.layers.Conv2D(64, (3, 3), activation=tf.nn.relu), tf.keras.layers.MaxPool2D((2, 2), (2, 2)), tf.keras.layers.Flatten(), tf.keras.layers.Dense(1024, activation=tf.nn.relu), tf.keras.layers.Dropout(0.5), tf.keras.layers.Dense(n_classes), ] ) model.summary() (train_x, train_y), (test_x, test_y) = fashion_mnist.load_data() # Scale input in [-1, 1] range train_x = train_x / 255.0 * 2 - 1 test_x = test_x / 255.0 * 2 - 1 train_x = tf.expand_dims(train_x, -1).numpy() test_x = tf.expand_dims(test_x, -1).numpy() model.compile( optimizer=tf.keras.optimizers.Adam(1e-5), loss="sparse_categorical_crossentropy", metrics=["accuracy"], ) model.fit(train_x, train_y, epochs=10) model.evaluate(test_x, test_y)
eager execution mode:
It can evaluate and calculate immediately without drawing. TensorFlow 2.0 defaults to eager execution mode, which makes model debugging easier and reduces the amount of code. When the model is deeper and more complex, we still want to use the default Graph Execution mode in TensorFlow 1.X when we pursue high performance or deploy the model, Transform the model into an efficient TensorFlow diagram model. At this time, TensorFlow 2 provides us with tf.function module. Combined with AutoGraph mechanism, we can easily run the model in Graph Execution mode by adding a simple @ tf.function modifier.
When using the graph mode, for constants, exceptions will be triggered for dynamic variables, such as tf.Variable()
If the program contains variables, you still want to use the graph mode, and there are some solutions
The first is to change the definition of function and pass variables through input parameters
import tensorflow as tf def f(b): A = tf.constant([[1, 2], [3, 4]], dtype=tf.float32) x = tf.constant([[0, 10], [0, 0.5]]) #b = tf.constant([[1, -1]], dtype=tf.float32) y = tf.add(tf.matmul(A, x), b, name="result") return y var=tf.Variable(12.) f(var) f(15) f(tf.constant(1))
Now f can accept variables and tensors, which adapts to the strict type restrictions of graph accelerated version
The second is to break the function scope and make the variables outside the function scope. Global variables are not recommended here. You can use the keras object, as shown below
class F(): def__init__(self): self._b=None @tf.function def __call__(self): A = tf.constant([[1, 2], [3, 4]], dtype=tf.float32) x = tf.constant([[0, 10], [0, 0.5]]) if self._b is None: self._b=TF.Variable(12.) y = tf.add(tf.matmul(A, x), b, name="result") f=F() f()