Neural network for deep learning - tensorflow 2.0-keras API framework template

import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist


def make_model(n_classes):
    return tf.keras.Sequential(
        [
            tf.keras.layers.Conv2D(
                32, (5, 5), activation=tf.nn.relu, input_shape=(28, 28, 1)
            ),
            tf.keras.layers.MaxPool2D((2, 2), (2, 2)),
            tf.keras.layers.Conv2D(64, (3, 3), activation=tf.nn.relu),
            tf.keras.layers.MaxPool2D((2, 2), (2, 2)),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(1024, activation=tf.nn.relu),
            tf.keras.layers.Dropout(0.5),
            tf.keras.layers.Dense(n_classes),
        ]
    )


def load_data():
    (train_x, train_y), (test_x, test_y) = fashion_mnist.load_data()
    # Scale input in [-1, 1] range
    train_x = tf.expand_dims(train_x, -1)
    train_x = (tf.image.convert_image_dtype(train_x, tf.float32) - 0.5) * 2
    train_y = tf.expand_dims(train_y, -1)

    test_x = test_x / 255.0 * 2 - 1
    test_x = (tf.image.convert_image_dtype(test_x, tf.float32) - 0.5) * 2
    test_y = tf.expand_dims(test_y, -1)

    return (train_x, train_y), (test_x, test_y)


def train():
    # Define the model
    n_classes = 10
    model = make_model(n_classes)

    # Input data
    (train_x, train_y), (test_x, test_y) = load_data()

    # Training parameters
    loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
    step = tf.Variable(1, name="global_step")
    optimizer = tf.optimizers.Adam(1e-3)

    ckpt = tf.train.Checkpoint(step=step, optimizer=optimizer, model=model)
    manager = tf.train.CheckpointManager(ckpt, "./tf_ckpts", max_to_keep=3)
    ckpt.restore(manager.latest_checkpoint)
    if manager.latest_checkpoint:
        print(f"Restored from {manager.latest_checkpoint}")
    else:
        print("Initializing from scratch.")

    accuracy = tf.metrics.Accuracy()
    mean_loss = tf.metrics.Mean(name="loss")

    # Train step function
    @tf.function
    def train_step(inputs, labels):
        with tf.GradientTape() as tape:
            logits = model(inputs)
            loss_value = loss(labels, logits)

        gradients = tape.gradient(loss_value, model.trainable_variables)
        # TODO: apply gradient clipping here
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        step.assign_add(1)

        accuracy.update_state(labels, tf.argmax(logits, -1))
        return loss_value, accuracy.result()

    epochs = 10
    batch_size = 32
    nr_batches_train = int(train_x.shape[0] / batch_size)
    print(f"Batch size: {batch_size}")
    print(f"Number of batches per epoch: {nr_batches_train}")

    train_summary_writer = tf.summary.create_file_writer("./log/train")

    with train_summary_writer.as_default():
        for epoch in range(epochs):
            for t in range(nr_batches_train):
                start_from = t * batch_size
                to = (t + 1) * batch_size

                features, labels = train_x[start_from:to], train_y[start_from:to]

                loss_value, accuracy_value = train_step(features, labels)
                mean_loss.update_state(loss_value)

                if t % 10 == 0:
                    print(f"{step.numpy()}: {loss_value} - accuracy: {accuracy_value}")
                    save_path = manager.save()
                    print(f"Checkpoint saved: {save_path}")
                    tf.summary.image(
                        "train_set", features, max_outputs=3, step=step.numpy()
                    )
                    tf.summary.scalar("accuracy", accuracy_value, step=step.numpy())
                    tf.summary.scalar("loss", mean_loss.result(), step=step.numpy())
                    accuracy.reset_states()
                    mean_loss.reset_states()
            print(f"Epoch {epoch} terminated")
            # Measuring accuracy on the whole training set at the end of epoch
            for t in range(nr_batches_train):
                start_from = t * batch_size
                to = (t + 1) * batch_size
                features, labels = train_x[start_from:to], train_y[start_from:to]
                logits = model(features)
                accuracy.update_state(labels, tf.argmax(logits, -1))
            print(f"Training accuracy: {accuracy.result()}")
            accuracy.reset_states()


if __name__ == "__main__":
    train()

The tf.GradientTape() function creates a context ('tape ') that records all automatic differential operations, which also solves the problem that tensorflow 1. X uses graphs to calculate automatic differential gradients
Generally, when calling tf.GradientTape() (i.e. tape.gradient), the tape will automatically delete all internal data. If you need to call it multiple times, you can make with tf.GradientTape(persistent=True) as tape:

with tf.GradientTape() as tape:
            logits = model(inputs)
            loss_value = loss(labels, logits)

        gradients = tape.gradient(loss_value, model.trainable_variables)

Save and restore model state

ckpt = tf.train.Checkpoint(step=step, optimizer=optimizer, model=model)
    manager = tf.train.CheckpointManager(ckpt, "./tf_ckpts", max_to_keep=3)
    ckpt.restore(manager.latest_checkpoint)
    if manager.latest_checkpoint:
        print(f"Restored from {manager.latest_checkpoint}")
    else:
        print("Initializing from scratch.")

Use the tensorBoard to visualize the data. First, create a file, define the context manager, and finally call and write the data. When we want to visualize the training process, open the terminal in the code directory and run tensorboard --logdir=./tensorboard, and then use the browser to access the web address output by the command-line program (generally http://name-of-your-computer:6006 )

train_summary_writer = tf.summary.create_file_writer("./log/train")

    with train_summary_writer.as_default():
    ......
    	tf.summary.image(
                        "train_set", features, max_outputs=3, step=step.numpy()
                    )
        tf.summary.scalar("accuracy", accuracy_value, step=step.numpy())
        tf.summary.scalar("loss", mean_loss.result(), step=step.numpy())

The following is the sequential simple sequential API process, which is called all the time, but it is not widely used as above. The self-made training cycle can adjust the gradient and add functions according to its own requirements, which is also troublesome. Both of them build models based on the number of stacking layers of sequential API. The residual modules such as resnet learned later need functional APIs

import tensorflow as tf

from tensorflow.keras.datasets import fashion_mnist

n_classes = 10
model = tf.keras.Sequential(
    [
        tf.keras.layers.Conv2D(
            32, (5, 5), activation=tf.nn.relu, input_shape=(28, 28, 1)
        ),
        tf.keras.layers.MaxPool2D((2, 2), (2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation=tf.nn.relu),
        tf.keras.layers.MaxPool2D((2, 2), (2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(1024, activation=tf.nn.relu),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(n_classes),
    ]
)

model.summary()

(train_x, train_y), (test_x, test_y) = fashion_mnist.load_data()
# Scale input in [-1, 1] range
train_x = train_x / 255.0 * 2 - 1
test_x = test_x / 255.0 * 2 - 1
train_x = tf.expand_dims(train_x, -1).numpy()
test_x = tf.expand_dims(test_x, -1).numpy()

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-5),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"],
)

model.fit(train_x, train_y, epochs=10)
model.evaluate(test_x, test_y)

eager execution mode:
It can evaluate and calculate immediately without drawing. TensorFlow 2.0 defaults to eager execution mode, which makes model debugging easier and reduces the amount of code. When the model is deeper and more complex, we still want to use the default Graph Execution mode in TensorFlow 1.X when we pursue high performance or deploy the model, Transform the model into an efficient TensorFlow diagram model. At this time, TensorFlow 2 provides us with tf.function module. Combined with AutoGraph mechanism, we can easily run the model in Graph Execution mode by adding a simple @ tf.function modifier.
When using the graph mode, for constants, exceptions will be triggered for dynamic variables, such as tf.Variable()
If the program contains variables, you still want to use the graph mode, and there are some solutions
The first is to change the definition of function and pass variables through input parameters

import tensorflow as tf
def f(b):
	A = tf.constant([[1, 2], [3, 4]], dtype=tf.float32)
	x = tf.constant([[0, 10], [0, 0.5]])
	#b = tf.constant([[1, -1]], dtype=tf.float32)
	y = tf.add(tf.matmul(A, x), b, name="result")
return y

var=tf.Variable(12.)
f(var)
f(15)
f(tf.constant(1))

Now f can accept variables and tensors, which adapts to the strict type restrictions of graph accelerated version
The second is to break the function scope and make the variables outside the function scope. Global variables are not recommended here. You can use the keras object, as shown below

class F():
	def__init__(self):
		self._b=None
	@tf.function
	def __call__(self):
		A = tf.constant([[1, 2], [3, 4]], dtype=tf.float32)
		x = tf.constant([[0, 10], [0, 0.5]])
		if self._b is None:
			self._b=TF.Variable(12.)
		y = tf.add(tf.matmul(A, x), b, name="result")	
f=F()
f()

Tags: neural networks TensorFlow Deep Learning

Posted on Fri, 29 Oct 2021 22:49:11 -0400 by mrphobos