Fast Gradient Sign Method (FGSM) Example Using PyTorch on the UCI Digits Data

The fast gradient sign method (FGSM) is a technique to generate evil data items that are designed to trick a trained neural network classifier. I implemented a demo using PyTorch and the UCI Digits dataset.

Each UCI Digits data item is a crude 8 by 8 grayscale image of a handwritten digit from ‘0’ to ‘9’. You can find the dataset at archive.ics.uci.edu/ml/machine-learning-databases/optdigits/. There are 3823 training images and 1797 test images.

In the screenshot below, the demo begins by training a CNN network on the UCI Digits data. Then the demo uses FGSM on the 1797 test items to create 1797 evil items. The evil items are designed to look very much like the test items, but be misclassified by the model.

The trained network model scores 96.22% accuracy on the 1797 test items but only 24.76% accuracy on the evil items that were generated from the test items.

The demo displays test item [33], which is a ‘5’ digit, and the corresponding evil item [33] in visual format. The two images appear similar but the model classifies the evil image as a ‘9’ digit.

My demo didn’t take too long to put together because I used a previous example as a template. The previous example used the MNIST digits dataset. See https://jamesmccaffreyblog.com/2022/07/25/fast-gradient-sign-method-fgsm-example-for-mnist-using-pytorch/.

The FGSM technique is one of those ideas that seems very complex until after you figure it out, then it seems easy. But regardless, the demo program has many tricky details. So the complexity of the FGSM demo program depends on how you look at it.

I’m not a big fan of the steampunk subculture but here are three photos that feature clever glasses. Probably not very practical for looking at things, but interesting.

Demo code. Replace “lt”, “gt”, “lte”, “gte” with Boolean operator symbols. The UCI Digits data can be found at:
https://archive.ics.uci.edu/ml/datasets/optical+recognition+of+handwritten+digits.

# uci_digits_fgsm.py

# generate adversarial data using the fast gradient
# sign method (FGSM)

# PyTorch 1.12.1-CPU Anaconda3-2020.02  Python 3.7.6
# Windows 10/11 

import numpy as np
import matplotlib.pyplot as plt
import torch as T

device = T.device('cpu') 

# -----------------------------------------------------------

class UCI_Digits_Dataset(T.utils.data.Dataset):
  # like 8,12,0,16, . . 15,7
  # 64 pixel values [0-16], label/digit [0-9]

  def __init__(self, src_file):
    tmp_xy = np.loadtxt(src_file, usecols=range(0,65),
      delimiter=",", comments="#", dtype=np.float32)
    tmp_x = tmp_xy[:,0:64]
    tmp_x /= 16.0  # normalize pixels to [0.0, 1.0]
    tmp_x = tmp_x.reshape(-1, 1, 8, 8)  # bs, chnls, 8x8
    tmp_y = tmp_xy[:,64]  # float32 form, must convert to int

    self.x_data = T.tensor(tmp_x, dtype=T.float32).to(device)
    self.y_data = T.tensor(tmp_y, dtype=T.int64).to(device)

  def __len__(self):
    return len(self.x_data)

  def __getitem__(self, idx):
    pixels = self.x_data[idx]
    label = self.y_data[idx]
    return (pixels, label)  # as a tuple

# -----------------------------------------------------------

class CNN_Net(T.nn.Module):
  def __init__(self):
    super(CNN_Net, self).__init__()  # pre Python 3.3 syntax
    self.conv1 = T.nn.Conv2d(1, 16, 2)  # chnl-in, out, krnl
    self.conv2 = T.nn.Conv2d(16, 24, 2)

    self.fc1 = T.nn.Linear(96, 64)   # [24*2*2, x]
    self.fc2 = T.nn.Linear(64, 10)   # 10 output vals

    self.pool1 = T.nn.MaxPool2d(2, 2)  # kernel, stride
    self.drop1 = T.nn.Dropout(0.10)    # between conv    
    self.drop2 = T.nn.Dropout(0.15)    # between fc

    # default weight and bias initialization
    # therefore order of defintion maters
  
  def forward(self, x):
    # input x is Size([bs, 64])
    z = T.relu(self.conv1(x))     # Size([bs, 16, 7, 7])
    z = self.pool1(z)             # Size([bs, 16, 3, 3])
    z = self.drop1(z)             # Size([bs, 16, 3, 3])
    z = T.relu(self.conv2(z))     # Size([bs, 24, 2, 2])
   
    z = z.reshape(-1, 96)         # Size([bs, 96)]
    z = T.relu(self.fc1(z))
    z = self.drop2(z)
    z = T.log_softmax(self.fc2(z), dim=1)  # for NLLLoss()
    return z

# -----------------------------------------------------------

def accuracy(model, ds):
  ldr = T.utils.data.DataLoader(ds,
    batch_size=len(ds), shuffle=False)
  n_correct = 0
  for data in ldr:
    (pixels, labels) = data
    with T.no_grad():
      oupts = model(pixels)
    (_, predicteds) = T.max(oupts, 1)
    n_correct += (predicteds == labels).sum().item()

  acc = (n_correct * 1.0) / len(ds)
  return acc

# -----------------------------------------------------------

def main():
  # 0. setup
  print("\nBegin UCI Digits FGSM with PyTorch demo ")
  np.random.seed(1)
  T.manual_seed(1)

  # 1. create Dataset objects
  print("\nLoading UCI digits train and test data ")
  # train_data = ".\\Data\\uci_digits_train_100.txt"
  train_data = ".\\Data\\optdigits_train_3823.txt"
  train_ds = UCI_Digits_Dataset(train_data)
  bat_size = 4
  train_ldr = T.utils.data.DataLoader(train_ds,
    batch_size=bat_size, shuffle=True)

  test_file = ".\\Data\\digits_uci_test_1797.txt"
  test_ds = UCI_Digits_Dataset(test_file)

# -----------------------------------------------------------

  # 2. create network
  print("\nCreating CNN classifier ")
  net = CNN_Net().to(device)
  net.train()  # set mode

# -----------------------------------------------------------

  # 3. train model
  loss_func = T.nn.NLLLoss()  # log_softmax output
  lrn_rate = 0.01
  opt = T.optim.SGD(net.parameters(), lr=lrn_rate)
  max_epochs = 50  # 50 
  log_every = 10   # 5

  print("\nStarting training ")
  for epoch in range(max_epochs):
    epoch_loss = 0.0
    for bix, batch in enumerate(train_ldr):
      X = batch[0]  # 64 normalized input pixels
      Y = batch[1]  # the class label

      opt.zero_grad()
      oupt = net(X)
      loss_val = loss_func(oupt, Y)  # a tensor
      epoch_loss += loss_val.item()  # for progress display
      loss_val.backward()            # compute gradients
      opt.step()                     # update weights

    if epoch % log_every == 0:
      print("epoch = %4d   loss = %0.4f" % (epoch, epoch_loss))

  print("Done ")

# -----------------------------------------------------------

  # 4. evaluate model accuracy
  print("\nComputing model accuracy")
  net.eval()
  acc_train = accuracy(net, train_ds)  # all at once
  print("Accuracy on training data = %0.4f" % acc_train)
  
  net.eval()
  acc_test = accuracy(net, test_ds)  # all at once
  print("Accuracy on test data = %0.4f" % acc_test)

# -----------------------------------------------------------

  # 5. use model to make prediction: N/A
  
# -----------------------------------------------------------

  # 6. save model
  # print("\nSaving trained model state")
  # fn = ".\\Models\\uci_digits_model.pt"
  # T.save(net.state_dict(), fn)  

# -----------------------------------------------------------

# 7. create inputs designed to trick model
  epsilon = 0.20
  print("\nCreating evil images from test w epsilon = %0.2f "\
    % epsilon)
  evil_images_lst = []
  n_correct = 0; n_wrong = 0

  test_ldr = T.utils.data.DataLoader(test_ds,
    batch_size=1, shuffle=False)
  loss_func = T.nn.NLLLoss()  # assumes log-softmax()

  for (batch_idx, batch) in enumerate(test_ldr):
    (X, y) = batch  # X = pixels, y = target label
    X.requires_grad = True
    oupt = net(X)
    loss_val = loss_func(oupt, y)
    net.zero_grad()  # zap all gradients
    loss_val.backward()  # compute gradients

    sgn = X.grad.data.sign()
    mutated = X + epsilon * sgn
    mutated = T.clamp(mutated, 0.0, 1.0)

    with T.no_grad():
      pred = net(mutated)  # 10 log-softmax logits
    pred_class = T.argmax(pred[0])
    if pred_class.item() == y.item():
      n_correct += 1
    else:
      n_wrong += 1

    # if batch_idx == 1:
    #   print("Predicted class of evil[1] = " + \
    #     str(pred_class.item()))

    mutated = mutated.detach().numpy()
    evil_images_lst.append(mutated)
    
  # print(n_correct)
  # print(n_wrong)
  adver_acc = (n_correct * 1.0) / (n_correct + n_wrong)
  print("\nModel acc on evil images = %0.4f " % adver_acc)

# -----------------------------------------------------------
  
  # show a test image and corresponding mutation

  idx = 33  # index of test item / evil item
  print("\nExamining test item idx = " + str(idx))
  pixels = test_ds[idx][0].reshape(8,8) 
  plt.imshow(pixels, cmap=plt.get_cmap('gray_r'))
  plt.show() 

  pixels = evil_images_lst[idx].reshape(8,8)
  plt.imshow(pixels, cmap=plt.get_cmap('gray_r'))
  plt.show() 
  
  x = test_ds[idx][0].reshape(1, 1, 8, 8)  # make it a batch
  act_class = test_ds[idx][1].item()
  with T.no_grad():
    oupt = net(x)
  pred_class = T.argmax(oupt).item()

  print("\nActual class test item [idx] = " \
    + str(act_class))
  print("Pred class test item [idx] = " \
    + str(pred_class))

  x = evil_images_lst[idx]
  x = T.tensor(x, dtype=T.float32).to(device)
  x = x.reshape(1, 1, 8, 8)
  with T.no_grad():
    oupt = net(x)
  pred_class = T.argmax(oupt).item()
  print("Predicted class evil item [idx] = " \
    + str(pred_class))

  print("\nEnd UCI Digits FGSM PyTorch demo ")

if __name__ == "__main__":
  main()