# standard useful libraries
import matplotlib.pyplot as plt
import numpy as np


import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader


import time
from IPython.display import clear_output


torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)     # On by default, leave it here for clarity

<torch.autograd.grad_mode.set_grad_enabled at 0x7fce924779a0>


train_and_valid_set = torchvision.datasets.FashionMNIST("./data", download=True, train=True, transform=
                                                transforms.ToTensor())


print(f'The training set has {len(train_and_valid_set)} datapoints')

The training set has 60000 datapoints


print('Number of data points in training set:\t', len(train_and_valid_set))
print('Number in split training set:\t\t', int(len(train_and_valid_set)*.8))
print('Number in validation set:\t\t', int(len(train_and_valid_set)*.2))

Number of data points in training set:	 60000
Number in split training set:		 48000
Number in validation set:		 12000


train_set, valid_set = torch.utils.data.dataset.random_split(train_and_valid_set, [48000, 12000])


test_set = torchvision.datasets.FashionMNIST("./data", download=True, train=False, transform=
                                               transforms.ToTensor())


print(f'The test set has {len(test_set)} datapoints')

The test set has 10000 datapoints


def output_label(label: 'torch.Tensor or int') -> str:
    '''
    This function solely takes in an integer label, that is type
    torch.Tensor or integer and returns the string that describes the label. 
    '''
    # create my output mapping dictionary, where the integer label
    # is the dictionary id and the label 
    output_mapping = {
                 0: "T-shirt/Top",
                 1: "Trouser",
                 2: "Pullover",
                 3: "Dress",
                 4: "Coat", 
                 5: "Sandal", 
                 6: "Shirt",
                 7: "Sneaker",
                 8: "Bag",
                 9: "Ankle Boot"
                 }
    # sometimes the label is type torch.Tensor, for which we want 
    # the integer (or item) stored in there, otherwise, the id is 
    # just the integer label given
    input = (label.item() if type(label) == torch.Tensor else label)
    return output_mapping[input]


random_image, random_image_label = train_set[3]


random_image.shape

torch.Size([1, 28, 28])


random_image

tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1765, 0.8078, 0.5961, 0.5961, 0.3765,
          0.4824, 0.3216, 0.5529, 0.3294, 0.4627, 0.6118, 0.6196, 0.1098, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.3098, 0.8706, 0.7725, 0.8902, 0.6275,
          1.0000, 0.6118, 0.5843, 0.7490, 0.8471, 0.8745, 1.0000, 0.3020, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.2824, 0.6706, 0.6353, 0.8196, 0.4706,
          0.8549, 0.4863, 0.7490, 0.7020, 0.6667, 0.5608, 0.7020, 0.0784, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.3725, 0.8235, 0.7490, 0.8667, 0.7059,
          0.9137, 0.7647, 0.9765, 0.7176, 0.6196, 0.5529, 0.7059, 0.2196, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.4824, 0.8549, 0.7176, 0.7333, 0.8235,
          0.7882, 0.7725, 0.7765, 0.7451, 0.5490, 0.7961, 0.8314, 0.3412, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.3255, 0.4627, 0.5451, 0.6784, 0.7255,
          0.6902, 0.7765, 0.9216, 0.8353, 0.8118, 0.8431, 0.7020, 0.3529, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.4784, 0.6275, 0.6667, 0.7961, 0.8196,
          0.8431, 0.7255, 0.7529, 0.7137, 0.7451, 0.7647, 0.7176, 0.4000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.4196, 0.5765, 0.7176, 0.7333, 0.7098,
          0.7725, 0.7647, 0.7098, 0.7569, 0.7804, 0.6863, 0.6549, 0.5020, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.3569, 0.5529, 0.7608, 0.7765, 0.7137,
          0.7725, 0.7490, 0.7725, 0.8000, 0.7804, 0.6471, 0.5216, 0.4314, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.4863, 0.6000, 0.7373, 0.7725, 0.7451,
          0.7608, 0.9333, 0.8667, 0.8039, 0.7725, 0.5647, 0.4706, 0.6039, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.3961, 0.6627, 0.7216, 0.7922, 0.8039,
          0.7412, 0.7059, 0.9294, 0.7961, 0.7843, 0.5882, 0.4667, 0.5098, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.3373, 0.7176, 0.7333, 0.7725, 0.8078,
          0.7922, 0.1647, 0.9255, 0.8078, 0.7451, 0.6471, 0.6000, 0.5098, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.3098, 0.7765, 0.6863, 0.7294, 0.7216,
          0.7765, 0.0980, 0.9255, 0.8118, 0.7059, 0.6157, 0.6392, 0.3765, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.2824, 0.7020, 0.4039, 0.6941, 0.7725,
          0.7765, 0.0000, 0.8588, 0.8275, 0.7294, 0.5804, 0.7294, 0.2941, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.2118, 0.8196, 0.3098, 0.5451, 0.7686,
          0.7647, 0.0000, 0.8588, 0.7804, 0.6196, 0.4118, 0.7294, 0.2863, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1137, 0.8510, 0.3451, 0.5176, 0.7333,
          0.7686, 0.0000, 0.8235, 0.8157, 0.5255, 0.3961, 0.7059, 0.1216, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0157, 0.8000, 0.4471, 0.5137, 0.6784,
          0.7686, 0.0000, 0.8196, 0.8314, 0.5804, 0.4627, 0.6667, 0.0275, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.8471, 0.4235, 0.4471, 0.6941,
          0.7569, 0.0000, 0.7686, 0.8863, 0.5569, 0.4745, 0.8549, 0.0549, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.7176, 0.5412, 0.4314, 0.6275,
          0.7137, 0.0000, 0.7882, 0.8039, 0.5608, 0.5804, 0.7490, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.6353, 0.5882, 0.3608, 0.6902,
          0.6745, 0.0000, 0.7961, 0.7725, 0.4745, 0.6078, 0.7255, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.5804, 0.6392, 0.4235, 0.7255,
          0.5961, 0.0000, 0.8039, 0.8235, 0.5333, 0.6510, 0.6314, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.5294, 0.7216, 0.3804, 0.7216,
          0.5451, 0.0000, 0.7843, 0.8039, 0.5373, 0.6902, 0.5686, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.4627, 0.7294, 0.3922, 0.7804,
          0.5804, 0.0000, 0.7882, 0.8000, 0.4784, 0.6863, 0.5255, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.3647, 0.6980, 0.5608, 0.7765,
          0.5490, 0.0000, 0.8000, 0.8118, 0.4510, 0.7255, 0.4667, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.2980, 0.7725, 0.6549, 0.7137,
          0.5490, 0.0000, 0.7843, 0.7608, 0.5647, 0.6824, 0.3804, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.2039, 0.8196, 0.7333, 0.7765,
          0.5294, 0.0000, 0.6980, 0.7922, 0.6275, 0.7490, 0.3176, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.2784, 0.8196, 0.8118, 0.9294,
          0.5647, 0.0000, 0.6980, 0.8667, 0.7765, 0.8157, 0.2549, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.2667, 0.4078, 0.4196,
          0.0314, 0.0000, 0.0627, 0.4353, 0.5137, 0.3765, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000]]])


plt.imshow(random_image.squeeze(), cmap="gray")

<matplotlib.image.AxesImage at 0x7f9add61b8e0>


print('The integer label is:\t\t', random_image_label)
print('The description label is:\t', output_label(random_image_label))

The integer label is:		 1
The description label is:	 Trouser


# split the data set into batches of 50 so we can iterate over 
# the images -- this will come into play again later
sets_of_50_images = torch.utils.data.DataLoader(train_set, 50)
random_50_images = iter(sets_of_50_images)

# the images variable here will contain the matrix data for the 
# gray scale image and the labels variable will hold an array 
# of the integer labels for each image. 
images, labels = random_50_images.next()

# make the grid of images, this is appending and arranging
# all the matrix data for each individual image into one larger
# matrix
gird_of_images = torchvision.utils.make_grid(images)

# use numpy to read in the image and show it using imshow
npimg = gird_of_images.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.axis('off')

(-0.5, 241.5, 211.5, -0.5)


print('Number of labels for this batch:', labels.shape)

Number of labels for this batch: torch.Size([50])


set_with_labels = []
has_labels = []
for i, label in enumerate(labels):
    if label not in has_labels:
        has_labels.append(label)
        set_with_labels.append((i, label))
set_with_labels.sort(key = lambda x: x[1]) 

fig, ax = plt.subplots(nrows = 1, ncols = 10, figsize = (13, 5), constrained_layout=True)

for i, set in enumerate(set_with_labels):
    img = images[set[0]]
    ax[i].imshow(img.squeeze(), cmap="gray")
    ax[i].set_title(output_label(i), fontsize = 21)
    ax[i].axis('off')


flatten = nn.Flatten()
flat_image = flatten(random_image)
print('Before flattening:\t', random_image.shape)
print('After flattening:\t', flat_image.shape)

Before flattening:	 torch.Size([1, 28, 28])
After flattening:	 torch.Size([1, 784])


layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.shape)

torch.Size([1, 20])


print(f"Before ReLU: {hidden1}")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[ 1.1308e-01,  7.8541e-02,  3.2105e-02, -3.1373e-01,  2.1415e-01,  4.7796e-02, -7.4124e-02,  2.5886e-02,
         -6.9871e-02, -4.5069e-01,  4.1861e-04,  1.8736e-01,  6.0131e-01,  1.6730e-01, -6.1446e-02,  2.4272e-01,
          1.6963e-01, -6.6513e-02, -5.3369e-02,  1.2783e-01]], grad_fn=<AddmmBackward0>)
After ReLU: tensor([[1.1308e-01, 7.8541e-02, 3.2105e-02, 0.0000e+00, 2.1415e-01, 4.7796e-02, 0.0000e+00, 2.5886e-02, 0.0000e+00,
         0.0000e+00, 4.1861e-04, 1.8736e-01, 6.0131e-01, 1.6730e-01, 0.0000e+00, 2.4272e-01, 1.6963e-01, 0.0000e+00,
         0.0000e+00, 1.2783e-01]], grad_fn=<ReluBackward0>)


small_network = nn.Sequential(
    flatten,            # nn.Flatten()
    layer1,             # nn.Linear(in_features=28*28, out_features=20)
    nn.ReLU(),          # activation function 
    nn.Linear(20, 10)   # output layer
)


small_network(random_image)

tensor([[ 0.3686, -0.0562,  0.1976, -0.0653,  0.2150, -0.0391, -0.0037, -0.2843, -0.0311, -0.1537]],
       grad_fn=<AddmmBackward0>)


logits = small_network(random_image)
softmax = nn.Softmax(dim=1)
pred_probabilities = softmax(logits)
pred_probabilities

tensor([[0.1400, 0.0916, 0.1180, 0.0907, 0.1201, 0.0932, 0.0965, 0.0729, 0.0939, 0.0831]], grad_fn=<SoftmaxBackward0>)


prob, pred_class = torch.max(pred_probabilities, dim=1)
print(f'The maximum probability is {prob[0]}, the predicted class is {pred_class[0]}, and the label is {output_label(pred_class)}')

The maximum probability is 0.14003434777259827, the predicted class is 0, and the label is T-shirt/Top


class NeuralNetwork(nn.Module):
    '''
    A simple neural network model for the fashionMNIST data set 
    - the input features should be a matrix of size 28x28
    FULLY CONNECTED -- 2 HIDDEN LAYERS
    '''
    def __init__(self):
        # the super command makes my NeuralNetwork class
        # inherit all the methods within the nn.Module
        super(NeuralNetwork, self).__init__()

        # flatten the matrix
        self.flatten = nn.Flatten()

        # the input is the number of features, which in this
        # case is the total number of pixels
        input_features = 28*28
        # we are going to do two hidden layers here, lets use
        # 512 nodes for both hidden layers
        num_nodes = 256
        # finally we have our final output, which will be the
        # classes, and remember we have 10 items of clothing
        num_classes = 10

        # we are going to create a sequential stack that holds 
        # all the data in linear form
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_features, num_nodes),   # layer 1
            nn.ReLU(),  # activation function 
            nn.Linear(num_nodes, num_nodes),        # layer 2
            nn.ReLU(),  # activation function 
            nn.Linear(num_nodes, num_classes)       # output layer
        )

    def forward(self, x):
        '''
        Execute the sequential stack for a flattened matrix and 
        return the raw logits 
        '''
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits


model = NeuralNetwork()
model

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=10, bias=True)
  )
)


loss_function = nn.CrossEntropyLoss()
true_class = torch.LongTensor([random_image_label])
loss_function(logits, true_class)

tensor(2.1194, grad_fn=<NllLossBackward0>)


learning_rate = 1e-3
optimizer = optim.Adam(model.parameters(), lr = learning_rate)


def update_data(BATCH_SIZE=128):
    train_loader = DataLoader(dataset=train_set, batch_size=BATCH_SIZE, shuffle=True)
    valid_loader = DataLoader(dataset=valid_set, batch_size=BATCH_SIZE, shuffle=False)
    test_loader = DataLoader(dataset=test_set,batch_size=BATCH_SIZE, shuffle=False)
    return train_loader, valid_loader, test_loader


def train_loop(dataloader: torch.utils.data.dataloader.DataLoader, 
                model: NeuralNetwork, 
                loss_function: torch.nn.modules.loss, 
                optimizer: torch.optim):
    '''
    This function executes the training step for the model. The inputs are:
    - dataloader: the data from the dataloader function in torch
    - model: in this case we are using the NeuralNetwork type created previously
    - loss_function: the loss function you will ultimately choose, in this case
                     the type is torch.nn.modules.loss.CrossEntropyLoss as we are
                     using the CrossEntropyLoss loss function
    - optimizer: the optimizer from the optim package
    '''
    size = len(dataloader.dataset)
    for batch, (image, image_label) in enumerate(dataloader):
        # Compute prediction and loss
        prediction = model(image)
        loss = loss_function(prediction, image_label)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # only print some stats every 100 batches
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(image)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def validation_loop(dataloader: torch.utils.data.dataloader.DataLoader, 
                model: NeuralNetwork, 
                loss_function: torch.nn.modules.loss):
    '''
    This function is used on the validation data to test the accuracy
    of the model on the validation set. 
    - dataloader: the validation data from the dataloader function in torch
    - model: use the same you use in train_loop
    - loss_function: use the same you use in train_loop
    '''
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # we use no_grad() here since this is the validation and the 
    # grad is used to optimize the error each loop, we want to 
    # use the same one generated in the train_loop
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_function(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")


train_loader, valid_loader, test_loader = update_data(128)
model = NeuralNetwork()
loss_function = nn.CrossEntropyLoss()
learning_rate = 1e-3
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_loader, model, loss_function, optimizer)
    validation_loop(valid_loader, model, loss_function)
print("Done!")

Epoch 1
-------------------------------
loss: 2.297202  [    0/48000]
loss: 0.602654  [12800/48000]
loss: 0.505886  [25600/48000]
loss: 0.603907  [38400/48000]
Test Error: 
 Accuracy: 83.8%, Avg loss: 0.470459 

Epoch 2
-------------------------------
loss: 0.496968  [    0/48000]
loss: 0.418039  [12800/48000]
loss: 0.348570  [25600/48000]
loss: 0.468451  [38400/48000]
Test Error: 
 Accuracy: 87.0%, Avg loss: 0.360674 

Epoch 3
-------------------------------
loss: 0.277681  [    0/48000]
loss: 0.290925  [12800/48000]
loss: 0.416162  [25600/48000]
loss: 0.235787  [38400/48000]
Test Error: 
 Accuracy: 87.9%, Avg loss: 0.335625 

Epoch 4
-------------------------------
loss: 0.408194  [    0/48000]
loss: 0.415229  [12800/48000]
loss: 0.325675  [25600/48000]
loss: 0.433964  [38400/48000]
Test Error: 
 Accuracy: 88.1%, Avg loss: 0.332448 

Epoch 5
-------------------------------
loss: 0.301518  [    0/48000]
loss: 0.395055  [12800/48000]
loss: 0.282579  [25600/48000]
loss: 0.349042  [38400/48000]
Test Error: 
 Accuracy: 88.4%, Avg loss: 0.325903 

Epoch 6
-------------------------------
loss: 0.194457  [    0/48000]
loss: 0.283951  [12800/48000]
loss: 0.329823  [25600/48000]
loss: 0.326164  [38400/48000]
Test Error: 
 Accuracy: 88.0%, Avg loss: 0.327924 

Epoch 7
-------------------------------
loss: 0.240443  [    0/48000]
loss: 0.244386  [12800/48000]
loss: 0.193949  [25600/48000]
loss: 0.270937  [38400/48000]
Test Error: 
 Accuracy: 88.8%, Avg loss: 0.309602 

Epoch 8
-------------------------------
loss: 0.297721  [    0/48000]
loss: 0.188814  [12800/48000]
loss: 0.305038  [25600/48000]
loss: 0.228069  [38400/48000]
Test Error: 
 Accuracy: 88.9%, Avg loss: 0.309673 

Epoch 9
-------------------------------
loss: 0.286372  [    0/48000]
loss: 0.294312  [12800/48000]
loss: 0.291262  [25600/48000]
loss: 0.274535  [38400/48000]
Test Error: 
 Accuracy: 88.2%, Avg loss: 0.324369 

Epoch 10
-------------------------------
loss: 0.398201  [    0/48000]
loss: 0.379567  [12800/48000]
loss: 0.278448  [25600/48000]
loss: 0.190554  [38400/48000]
Test Error: 
 Accuracy: 88.8%, Avg loss: 0.313809 

Done!


validation_loop(test_loader, model, loss_function)

Test Error: 
 Accuracy: 87.8%, Avg loss: 0.338173


def train_loop_modified(dataloader: torch.utils.data.dataloader.DataLoader, 
                model: NeuralNetwork, 
                loss_function: torch.nn.modules.loss, 
                optimizer: torch.optim) -> float :

    size = len(dataloader.dataset)
    num_batches = len(dataloader)

    correct = 0
    loss_acc = 0
    for batch, (image, image_label) in enumerate(dataloader):
        prediction = model(image)
        loss = loss_function(prediction, image_label)
        loss_acc += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        correct += (prediction.argmax(1) == image_label).type(torch.float).sum().item()
    correct /= size
    loss_acc /= num_batches
    return 100 * correct, loss_acc


def validation_loop_modified(dataloader: torch.utils.data.dataloader.DataLoader, 
                model: NeuralNetwork, 
                loss_function: torch.nn.modules.loss):

    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for image, image_class in dataloader:
            pred = model(image)
            test_loss += loss_function(pred, image_class).item()
            correct += (pred.argmax(1) == image_class).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    return 100 * correct, test_loss


# this takes 5 min on my machine
train_loader, valid_loader, test_loader = update_data(128)
model = NeuralNetwork()
loss_function = nn.CrossEntropyLoss()
learning_rate = 1e-3
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
epochs = range(30)
train_accuracies = []
valid_accuracies = []
for t in epochs:
    clear_output(wait=True)
    print(f"Currently on Epoch {t+1}....")
    train_accuracy,_ = train_loop_modified(train_loader, model, loss_function, optimizer)
    train_accuracies.append(train_accuracy)
    valid_accuracy,_ = validation_loop_modified(valid_loader, model, loss_function)
    valid_accuracies.append(valid_accuracy)
print("Done!")

Currently on Epoch 30....
Done!


# epoch data results so you don't have to run test again
with open('exploration_data.txt') as f:
    lines = f.readlines()
    for i, line in enumerate(lines):
        lines[i] = line.strip('\n').strip()

epochs = list(map(int, lines[1][7:].split(' ')))
train_accuracies = list(map(float, lines[2][6:].split(' ')))
valid_accuracies = list(map(float, lines[3][11:].split(' ')))
lines = None


plt.rcParams.update({'font.size': 18})
plt.plot(epochs, train_accuracies, 'ko-', label = 'Train')
plt.plot(epochs, valid_accuracies, 'bo--', label = 'Validation')
plt.xlabel('Epochs'); plt.ylabel('% Accuracy')
plt.legend(); plt.show()


def linear_relu(dim_in, dim_out, num_layers):
    layers = []
    for i in range(num_layers-1):
        layers.extend([nn.Linear(dim_in, dim_out),
            nn.ReLU()])
    return layers

class NeuralNetwork_num_layers(nn.Module):
    ''' Number of layers is variable '''
    def __init__(self, num_layers):
        super(NeuralNetwork_num_layers, self).__init__()
        self.flatten = nn.Flatten()
        input_features = 28*28
        num_nodes = 256
        num_classes = 10
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_features, num_nodes),   # layer 1
            nn.ReLU(),  # activation function 
            *linear_relu(num_nodes, num_nodes, num_layers),
            nn.Linear(num_nodes, num_classes)       # output layer
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits


# this took 10 minutes on my machine
train_loader, valid_loader, test_loader = update_data(128)

train_accuracies = []
valid_accuracies = []

layer_choices = range(2, 7)

for num_layers in layer_choices:
    model = NeuralNetwork_num_layers(num_layers)
    loss_function = nn.CrossEntropyLoss()
    learning_rate = 1e-3
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    epochs = 10
    clear_output(wait=True)
    print(f"Currently testing {num_layers} Layers")
    for t in range(epochs):
        print(f"Currently on Epoch {t+1}....")
        train_accuracy,_ = train_loop_modified(train_loader, model, loss_function, optimizer)
        valid_accuracy,_ = validation_loop_modified(valid_loader, model, loss_function)
    train_accuracies.append(train_accuracy)
    valid_accuracies.append(valid_accuracy)
print("Done!")

Currently testing 6 Layers
Currently on Epoch 1....
Currently on Epoch 2....
Currently on Epoch 3....
Currently on Epoch 4....
Currently on Epoch 5....
Currently on Epoch 6....
Currently on Epoch 7....
Currently on Epoch 8....
Currently on Epoch 9....
Currently on Epoch 10....
Done!


# num layers data results so you don't have to run test again
with open('exploration_data.txt') as f:
    lines = f.readlines()
    for i, line in enumerate(lines):
        lines[i] = line.strip('\n').strip()

num_layers = list(map(int, lines[5][7:].split(' ')))
train_accuracies = list(map(float, lines[6][6:].split(' ')))
valid_accuracies = list(map(float, lines[7][11:].split(' ')))
lines = None


plt.rcParams.update({'font.size': 18})
plt.plot(num_layers, train_accuracies, 'ko-', label = 'Train')
plt.plot(num_layers, valid_accuracies, 'bo--', label = 'Validation')
plt.xlabel('Num Layers'); plt.ylabel('% Accuracy')
plt.legend(); plt.show()


class NeuralNetwork_num_nodes(nn.Module):
    '''
    A simple neural network model for the fashionMNIST data set 
    - the input features should be a matrix of size 28x28
    FULLY CONNECTED -- 2 HIDDEN LAYERS -- variable num_nodes
    '''
    def __init__(self, num_nodes):
        super(NeuralNetwork_num_nodes, self).__init__()
        self.flatten = nn.Flatten()
        input_features = 28*28
        num_classes = 10
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_features, num_nodes),   # layer 1
            nn.ReLU(),  # activation function 
            nn.Linear(num_nodes, num_nodes),        # layer 2
            nn.ReLU(),  # activation function 
            nn.Linear(num_nodes, num_classes)       # output layer
        )

    def forward(self, x):
        '''
        Execute the sequential stack for a flattened matrix and 
        return the raw logits 
        '''
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits


train_loader, valid_loader, test_loader = update_data(128)

train_accuracies = []
valid_accuracies = []

node_choices = [(lambda n: 2**n)(x) for x in range(6,12)]

for num_nodes in node_choices:
    model = NeuralNetwork_num_nodes(num_nodes)
    loss_function = nn.CrossEntropyLoss()
    learning_rate = 1e-3
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    epochs = 10
    clear_output(wait=True)
    print(f"Currently testing {num_nodes} Nodes")
    for t in range(epochs):
        print(f"Currently on Epoch {t+1}....")
        train_accuracy,_ = train_loop_modified(train_loader, model, loss_function, optimizer)
        valid_accuracy,_ = validation_loop_modified(valid_loader, model, loss_function)
    train_accuracies.append(train_accuracy)
    valid_accuracies.append(valid_accuracy)
print("Done!")

Currently testing 2048 Nodes
Currently on Epoch 1....
Currently on Epoch 2....
Currently on Epoch 3....
Currently on Epoch 4....
Currently on Epoch 5....
Currently on Epoch 6....
Currently on Epoch 7....
Currently on Epoch 8....
Currently on Epoch 9....
Currently on Epoch 10....
Done!


# num layers data results so you don't have to run test again
with open('exploration_data.txt') as f:
    lines = f.readlines()
    for i, line in enumerate(lines):
        lines[i] = line.strip('\n').strip()

num_nodes = list(map(int, lines[9][:].split(' ')[1:]))
train_accuracies = list(map(float, lines[10][:].split(' ')[1:]))
valid_accuracies = list(map(float, lines[11][:].split(' ')[1:]))
lines = None


plt.rcParams.update({'font.size': 18})
plt.plot(node_choices, train_accuracies, 'ko-', label = 'Train')
plt.plot(node_choices, valid_accuracies, 'bo--', label = 'Validation')
plt.xlabel('Num Nodes'); plt.ylabel('% Accuracy')
plt.legend(); plt.show()


# this takes 50 minutes on my machine
batch_sizes = [(lambda n: 2**n)(x) for x in range(0,12)]

train_accuracies = []
valid_accuracies = []
time_to_completion = []

for batch_size in batch_sizes:

    train_loader, valid_loader, test_loader = update_data(batch_size)

    model = NeuralNetwork()
    loss_function = nn.CrossEntropyLoss()
    learning_rate = 1e-3
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    epochs = 10
    clear_output(wait=True)
    print(f"Testing for a batch size of {batch_size}")
    start = time.process_time()
    for t in range(epochs):
        print(f"Currently on Epoch {t+1}....")
        train_accuracy,_ = train_loop_modified(train_loader, model, loss_function, optimizer)
        valid_accuracy,_ = validation_loop_modified(valid_loader, model, loss_function)
    
    end = time.process_time()
    train_accuracies.append(train_accuracy)
    valid_accuracies.append(valid_accuracy)
    time_to_completion.append(end-start)
    
print("Done!")

Testing for a batch size of 2048
Currently on Epoch 1....
Currently on Epoch 2....
Currently on Epoch 3....
Currently on Epoch 4....
Currently on Epoch 5....
Currently on Epoch 6....
Currently on Epoch 7....
Currently on Epoch 8....
Currently on Epoch 9....
Currently on Epoch 10....
Done!


ls = batch_sizes
for num in ls:
    print(num, end=' ')

1 2 4 8 16 32 64 128 256 512 1024 2048


ls = train_accuracies
for num in ls:
    print(num, end=' ')

86.74374999999999 88.14166666666667 89.24166666666666 90.28541666666666 90.94791666666666 91.18125 91.25833333333333 90.99166666666667 90.41666666666667 89.78333333333333 88.59166666666667 87.2625


ls = valid_accuracies
for num in ls:
    print(num, end=' ')

86.14166666666667 87.875 88.56666666666668 88.79166666666667 88.8 88.06666666666668 89.625 89.11666666666666 88.60833333333333 87.39166666666667 87.425 86.93333333333332


ls = time_to_completion
for num in ls:
    print(num, end=' ')

3164.811485 1613.865718000001 751.7283559999996 341.006942 192.4119129999999 124.4057789999988 87.17478899999878 73.35671199999888 68.19115899999997 66.18156699999963 64.97983099999874 64.511899000001


fig, ax1 = plt.subplots()

color = 'k'
ax1.set_xlabel('Batch Size')
ax1.set_ylabel('% Accuracy', color=color)
ax1.semilogx(batch_sizes, train_accuracies, 'ko-', label = 'Train')
ax1.semilogx(batch_sizes, valid_accuracies, 'bo--',  label = 'Validation')
ax1.tick_params(axis='y', labelcolor=color)
ax1.legend()

x = batch_sizes[7]
y = valid_accuracies[7]
label = "BS = {}".format(x)

plt.annotate(label, (x,y), textcoords="offset points", xytext=(-19,25), ha='center') 

# ax2 = ax1.twinx()

# times_min = [(lambda n: n/60)(time) for time in time_to_completion]
# color = 'tab:blue'
# ax2.semilogx(batch_sizes, times_min, 'o--', color = color)
# ax2.set_ylabel('Execution Time (min)', color=color)
# ax2.tick_params(axis='y', labelcolor=color)

Text(-19, 25, 'BS = 128')


def run_test(model = NeuralNetwork(),
            loss_function = nn.CrossEntropyLoss(),
            optimizer = torch.optim.Adam,
            learning = 1e-3,
            epochs=10,
            batch_size=128):

    train_loader, valid_loader, test_loader = update_data(batch_size)

    optimizer = optimizer(model.parameters(), lr=learning)

    start = time.process_time()
    loss_acc = 0
    for t in range(epochs):
        print(f"Currently on Epoch {t+1}....")
        train_accuracy, loss = train_loop_modified(train_loader, model, loss_function, optimizer)
        valid_accuracy,_ = validation_loop_modified(valid_loader, model, loss_function)
        loss_acc+=loss

    loss_acc/= epochs
    end = time.process_time()

    return train_accuracy, valid_accuracy, end-start, loss_acc


# this took 22 minutes on my machine
learning_rates = [0.0001, 0.0003, 0.0005, 0.0007, 0.001, 0.003, 0.005, 0.007, 0.01, 0.013, 0.015, 0.017, 0.02, 0.025, 0.03, 0.07, 0.1, 0.2, 0.4]

train_accuracies = [None] * len(learning_rates)
valid_accuracies = [None] * len(learning_rates)
time_to_completion = [None] * len(learning_rates)
avg_loss_per_test = [None] * len(learning_rates)

for i, learning_rate in enumerate(learning_rates):
    clear_output(wait=True)
    print(f"Testing for a learning rate of {learning_rate}")

    #----
    model = NeuralNetwork()
    loss_function = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    learning = 1e-3
    epochs=10
    batch_size=128
    #----

    train_loader, valid_loader, test_loader = update_data(batch_size)

    loss_acc = 0
    start = time.process_time()
    for t in range(epochs):
        print(f"Currently on Epoch {t+1}....")
        train_accuracy, loss = train_loop_modified(train_loader, model, loss_function, optimizer)
        valid_accuracy,_ = validation_loop_modified(valid_loader, model, loss_function)
        loss_acc+=loss
        
    end = time.process_time()
    loss_acc/= epochs

    train_accuracies[i] = train_accuracy
    valid_accuracies[i] = valid_accuracy
    time_to_completion[i] = end - start
    avg_loss_per_test[i] = loss_acc
    
print("Done!")

Testing for a learning rate of 0.4
Currently on Epoch 1....
Currently on Epoch 2....
Currently on Epoch 3....
Currently on Epoch 4....
Currently on Epoch 5....
Currently on Epoch 6....
Currently on Epoch 7....
Currently on Epoch 8....
Currently on Epoch 9....
Currently on Epoch 10....
Done!


# num layers data results so you don't have to run test again
with open('exploration_data.txt') as f:
    lines = f.readlines()
    for i, line in enumerate(lines):
        lines[i] = line.strip('\n').strip()

learning_rates = list(map(float, lines[18][:].split(' ')[1:]))
train_accuracies = list(map(float, lines[19][:].split(' ')[1:]))
valid_accuracies = list(map(float, lines[20][:].split(' ')[1:]))
lines = None


plt.rcParams.update({'font.size': 18})
plt.semilogx(learning_rates[:-4], train_accuracies[:-4], 'ko-', label = 'Train')
plt.semilogx(learning_rates[:-4], valid_accuracies[:-4], 'bo--', label = 'Validation')
plt.xlabel('Learning rate'); plt.ylabel('% Accuracy')
plt.legend(); plt.show()


class NeuralNetwork_drop(nn.Module):
    '''
    - the input features should be a matrix of size 28x28
    FULLY CONNECTED -- 2 HIDDEN LAYERS
    '''
    def __init__(self, drop_p):

        super(NeuralNetwork_drop, self).__init__()
        self.flatten = nn.Flatten()
        input_features = 28*28
        num_nodes = 256
        num_classes = 10
        # drop_p = 0.25

        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_features, num_nodes),   # layer 1
            nn.Dropout(p=drop_p),
            nn.ReLU(),  # activation function 
            nn.Linear(num_nodes, num_nodes),        # layer 2
            nn.Dropout(p=drop_p),
            nn.ReLU(),  # activation function 
            nn.Linear(num_nodes, num_classes)       # output layer
        )

    def forward(self, x):
        '''
        Execute the sequential stack for a flattened matrix and 
        return the raw logits 
        '''
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits


np.linspace(0,0.2, 20)

array([0.        , 0.01052632, 0.02105263, 0.03157895, 0.04210526,
       0.05263158, 0.06315789, 0.07368421, 0.08421053, 0.09473684,
       0.10526316, 0.11578947, 0.12631579, 0.13684211, 0.14736842,
       0.15789474, 0.16842105, 0.17894737, 0.18947368, 0.2       ])


# this took 22 minutes on my machine
drop_rates = np.linspace(0,0.2, 20)

train_accuracies = [None] * len(drop_rates)
valid_accuracies = [None] * len(drop_rates)
time_to_completion = [None] * len(drop_rates)
avg_loss_per_test = [None] * len(drop_rates)

for i, drop in enumerate(drop_rates):
    clear_output(wait=True)
    print(f"Testing for a Drop rate of {drop}")

    #----
    model = NeuralNetwork_drop(drop)
    loss_function = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0007)
    epochs=10
    batch_size=128
    #----

    train_loader, valid_loader, test_loader = update_data(batch_size)

    loss_acc = 0
    start = time.process_time()
    for t in range(epochs):
        print(f"Currently on Epoch {t+1}....")
        train_accuracy, loss = train_loop_modified(train_loader, model, loss_function, optimizer)
        valid_accuracy,_ = validation_loop_modified(valid_loader, model, loss_function)
        loss_acc+=loss
        
    end = time.process_time()
    loss_acc/= epochs

    train_accuracies[i] = train_accuracy
    valid_accuracies[i] = valid_accuracy
    time_to_completion[i] = end - start
    avg_loss_per_test[i] = loss_acc

Testing for a Drop rate of 0.2
Currently on Epoch 1....
Currently on Epoch 2....
Currently on Epoch 3....
Currently on Epoch 4....
Currently on Epoch 5....
Currently on Epoch 6....
Currently on Epoch 7....
Currently on Epoch 8....
Currently on Epoch 9....
Currently on Epoch 10....


ls=valid_accuracies
for num in ls:
    print(num, end=' ')

88.75833333333333 88.71666666666667 88.94166666666666 88.675 88.41666666666667 88.575 88.9 88.925 88.38333333333334 89.28333333333333 87.68333333333334 89.09166666666667 88.24166666666666 88.88333333333334 88.425 88.575 88.63333333333333 88.40833333333333 88.33333333333333 88.69166666666666


plt.rcParams.update({'font.size': 18})
plt.plot(drop_rates, train_accuracies, 'ko-', label = 'Train')
plt.plot(drop_rates, valid_accuracies, 'bo--', label = 'Validation')
plt.xlabel('Dropout Rate'); plt.ylabel('% Accuracy')
plt.legend(); plt.show()


best_dr = 0.09473684210526316


train_accuracies = [None] * len(drop_rates)
valid_accuracies = [None] * len(drop_rates)

#----
model = NeuralNetwork()
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0007)
epochs=10
batch_size=128
#----

train_loader, valid_loader, test_loader = update_data(batch_size)

for t in range(epochs):
    clear_output(wait=True)
    print(f"Currently on Epoch {t+1}....")
    train_accuracy, loss = train_loop_modified(train_loader, model, loss_function, optimizer)
    valid_accuracy,_ = validation_loop_modified(valid_loader, model, loss_function)

test_accuracy,_ = validation_loop_modified(test_loader, model, loss_function)

print(f"Training Accuracy is:\t {train_accuracy}")
print(f"Validation Accuracy is:\t {valid_accuracy}")
print(f"Test Accuracy is:\t {test_accuracy}")

Currently on Epoch 10....
Training Accuracy is:	 90.81666666666666
Validation Accuracy is:	 89.35
Test Accuracy is:	 88.42999999999999

Fashion-MNIST¶

Machine Learning Basics¶

Set up Python Virtual Environment¶

Getting Aquatinted with the Data¶

Building a Model¶

Model Exploration¶

Number of Epochs Test¶

Number of Layers Test¶

Number of Nodes Test¶

Batch Size Test¶

Learning Rate Test¶

A Final Test¶