Neural network, integer to onehot vector experiment, found that Adam optimizer is not universal, sometimes RMSprop optimizer is better

Experiment: the 16 integers 0-15 are transformed into a onehot vector with length of 16 by MLP of two layers, and the accuracy is required to be 100%
Increase the difficulty, change to 100 integers, and change the length of onehot to 100.

In the case of 16 integers, the original data is not processed
The number of iterations using the Adam optimizer is generally more than 10000 to achieve 100% accuracy
With the RMSprop optimizer, 100% accuracy is achieved in most cases within 2000 iterations

100 integers
Using the RMSprop optimizer
Without original data processing, the accuracy is 0.45 for 40000 iterations
The original data is normalized and the accuracy of 5100 iterations is 1.0
After normalizing the original data with 0-means, the accuracy of 3600 iterations is 1.0
After regularizing the original data, the accuracy of 2300 iterations is 1.0
With the Adam optimizer, no data processing can converge

This is the experiment of 16 integers. The next one is the experiment of 100 integers

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np


class SelectNet(nn.Module):
    def __init__(self, in_dim=1, out_dim=4*4, inter_dim=32, n_layer=2, act=nn.LeakyReLU(0.1)):
        super().__init__()
        self.mod_list = nn.ModuleList()
        for i in range(n_layer):
            d_in = in_dim if i == 0 else inter_dim
            d_out = out_dim if i == n_layer - 1 else inter_dim
            self.mod_list.append(nn.Linear(d_in, d_out))
            self.mod_list.append(act)

    def forward(self, x):
        y = x
        for m in self.mod_list:
            y = m(y)
        return y #F.sigmoid(y)


torch.set_grad_enabled(False)

in_dim = 1
out_dim = 4*4
inter_dim = int(out_dim*1.3)
n_layer = 2

net = SelectNet(in_dim, out_dim, inter_dim, n_layer, nn.LeakyReLU(0.1))

x_train = np.arange(out_dim)

norm_type = 'Keep it as it is.'

print('Original data processing method', norm_type)

if norm_type == 'normalization':
    # Using normalization, when the length is 100, the accuracy reaches 1.0 when the number of iterations is about 5100
    x_train = (x_train - x_train.min()) / (x_train.max() - x_train.min().min())
elif norm_type == '0 Mean normalization':
    # When the length is 100, the accuracy reaches 1.0 when the number of iterations is about 3600
    x_train = (x_train - x_train.min()) / (x_train.max() - x_train.min().min()) * 2 - 1
elif norm_type == 'Regularization':
    # Using regularization, when the length is 100, the accuracy reaches 1.0 when the number of iterations is about 2300
    x_train = (x_train - x_train.mean()) / np.std(x_train)
elif norm_type == 'Keep it as it is.':
    # Without normalization, this is the slowest... When the length is 100 and 50000 iterations, the accuracy is only 0.45, and then it is terminated
    x_train = x_train
else:
    print('Keep as is by default')

y_train = []

mask = np.ones(len(x_train))
# First come, first take
for i in x_train:
    x = torch.Tensor([i])[None, ...]
    a = net(x).cpu().detach().numpy()[0]
    a -= (a.min()-1)
    a *= mask
    pos = np.argmax(a)
    y_train.append(pos)
    mask[pos] = 0

# y_train = x_train.copy()
print(y_train)

def mini_train(net, x_train, y_train, batch_size=None, target_accuracy=None, max_count=None):
    # Training, end at 1 Accuracy
    optim = torch.optim.RMSprop(net.parameters(), lr=0.001)
    # optim = torch.optim.Adam(net.parameters(), lr=0.001)
    x_train = torch.Tensor(x_train)
    y_train = torch.Tensor(y_train)

    def check_accuracy(xs, ys):
        with torch.no_grad():
            out_ys = net(torch.Tensor(xs)).numpy()
        return np.sum(np.argmax(out_ys, 1) == ys.numpy()) / len(ys)

    with torch.enable_grad():
        for b in range(99999999999999999):
            batch_x = torch.Tensor(x_train).type(torch.float32)
            batch_y = torch.Tensor(y_train).type(torch.long)
            out = net(batch_x)
            loss = F.cross_entropy(out, batch_y)
            loss.backward()
            optim.step()

            acc = check_accuracy(x_train, y_train)
            print(b, acc, loss.item())
            if acc == 1:
                break


mini_train(net, x_train[:, None], y_train)

print('over')

This is an experiment of 100 integers

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np


class SelectNet(nn.Module):
    def __init__(self, in_dim=1, out_dim=4*4, inter_dim=32, n_layer=2, act=nn.LeakyReLU(0.1)):
        super().__init__()
        self.mod_list = nn.ModuleList()
        for i in range(n_layer):
            d_in = in_dim if i == 0 else inter_dim
            d_out = out_dim if i == n_layer - 1 else inter_dim
            self.mod_list.append(nn.Linear(d_in, d_out))
            self.mod_list.append(act)

    def forward(self, x):
        y = x
        for m in self.mod_list:
            y = m(y)
        return y #F.sigmoid(y)


torch.set_grad_enabled(False)

in_dim = 1
out_dim = 10*10
inter_dim = int(out_dim*1.3)
n_layer = 2

net = SelectNet(in_dim, out_dim, inter_dim, n_layer, nn.LeakyReLU(0.1))

x_train = np.arange(out_dim)

norm_type = 'Regularization'
optim_type = torch.optim.RMSprop
# optim_type = torch.optim.Adam


print('Original data processing method', norm_type)

if norm_type == 'normalization':
    # Using normalization, when the length is 100, the accuracy reaches 1.0 when the number of iterations is about 5100
    x_train = (x_train - x_train.min()) / (x_train.max() - x_train.min().min())
elif norm_type == '0 Mean normalization':
    # When the length is 100, the accuracy reaches 1.0 when the number of iterations is about 3600
    x_train = (x_train - x_train.min()) / (x_train.max() - x_train.min().min()) * 2 - 1
elif norm_type == 'Regularization':
    # Using regularization, when the length is 100, the accuracy reaches 1.0 when the number of iterations is about 2300
    x_train = (x_train - x_train.mean()) / np.std(x_train)
elif norm_type == 'Keep it as it is.':
    # Without normalization, this is the slowest... When the length is 100 and 50000 iterations, the accuracy is only 0.45, and then it is terminated
    x_train = x_train
else:
    print('Keep as is by default')

y_train = []

mask = np.ones(len(x_train))
# First come, first take
for i in x_train:
    x = torch.Tensor([i])[None, ...]
    a = net(x).cpu().detach().numpy()[0]
    a -= (a.min()-1)
    a *= mask
    pos = np.argmax(a)
    y_train.append(pos)
    mask[pos] = 0

# y_train = x_train.copy()
print(y_train)

def mini_train(net, x_train, y_train, batch_size=None, target_accuracy=None, max_count=None):
    # Training, end at 1 Accuracy
    optim = optim_type(net.parameters(), lr=0.001)
    x_train = torch.Tensor(x_train)
    y_train = torch.Tensor(y_train)

    def check_accuracy(xs, ys):
        with torch.no_grad():
            out_ys = net(torch.Tensor(xs)).numpy()
        return np.sum(np.argmax(out_ys, 1) == ys.numpy()) / len(ys)

    with torch.enable_grad():
        for b in range(99999999999999999):
            batch_x = torch.Tensor(x_train).type(torch.float32)
            batch_y = torch.Tensor(y_train).type(torch.long)
            out = net(batch_x)
            loss = F.cross_entropy(out, batch_y)
            loss.backward()
            optim.step()

            acc = check_accuracy(x_train, y_train)
            print(b, acc, loss.item())
            if acc == 1:
                break


mini_train(net, x_train[:, None], y_train)

print('over')

Posted on Tue, 03 Dec 2019 00:00:10 -0500 by villager203