The fast gradient sign method (FGSM) is a technique to generate evil data items that are designed to trick a trained neural network classifier. I implemented a demo using PyTorch and the UCI Digits dataset.
Each UCI Digits data item is a crude 8 by 8 grayscale image of a handwritten digit from ‘0’ to ‘9’. You can find the dataset at archive.ics.uci.edu/ml/machine-learning-databases/optdigits/. There are 3823 training images and 1797 test images.
In the screenshot below, the demo begins by training a CNN network on the UCI Digits data. Then the demo uses FGSM on the 1797 test items to create 1797 evil items. The evil items are designed to look very much like the test items, but be misclassified by the model.
The trained network model scores 96.22% accuracy on the 1797 test items but only 24.76% accuracy on the evil items that were generated from the test items.
The demo displays test item [33], which is a ‘5’ digit, and the corresponding evil item [33] in visual format. The two images appear similar but the model classifies the evil image as a ‘9’ digit.
My demo didn’t take too long to put together because I used a previous example as a template. The previous example used the MNIST digits dataset. See https://jamesmccaffreyblog.com/2022/07/25/fast-gradient-sign-method-fgsm-example-for-mnist-using-pytorch/.
The FGSM technique is one of those ideas that seems very complex until after you figure it out, then it seems easy. But regardless, the demo program has many tricky details. So the complexity of the FGSM demo program depends on how you look at it.

I’m not a big fan of the steampunk subculture but here are three photos that feature clever glasses. Probably not very practical for looking at things, but interesting.
Demo code. Replace “lt”, “gt”, “lte”, “gte” with Boolean operator symbols. The UCI Digits data can be found at:
https://archive.ics.uci.edu/ml/datasets/optical+recognition+of+handwritten+digits.
# uci_digits_fgsm.py
# generate adversarial data using the fast gradient
# sign method (FGSM)
# PyTorch 1.12.1-CPU Anaconda3-2020.02 Python 3.7.6
# Windows 10/11
import numpy as np
import matplotlib.pyplot as plt
import torch as T
device = T.device('cpu')
# -----------------------------------------------------------
class UCI_Digits_Dataset(T.utils.data.Dataset):
# like 8,12,0,16, . . 15,7
# 64 pixel values [0-16], label/digit [0-9]
def __init__(self, src_file):
tmp_xy = np.loadtxt(src_file, usecols=range(0,65),
delimiter=",", comments="#", dtype=np.float32)
tmp_x = tmp_xy[:,0:64]
tmp_x /= 16.0 # normalize pixels to [0.0, 1.0]
tmp_x = tmp_x.reshape(-1, 1, 8, 8) # bs, chnls, 8x8
tmp_y = tmp_xy[:,64] # float32 form, must convert to int
self.x_data = T.tensor(tmp_x, dtype=T.float32).to(device)
self.y_data = T.tensor(tmp_y, dtype=T.int64).to(device)
def __len__(self):
return len(self.x_data)
def __getitem__(self, idx):
pixels = self.x_data[idx]
label = self.y_data[idx]
return (pixels, label) # as a tuple
# -----------------------------------------------------------
class CNN_Net(T.nn.Module):
def __init__(self):
super(CNN_Net, self).__init__() # pre Python 3.3 syntax
self.conv1 = T.nn.Conv2d(1, 16, 2) # chnl-in, out, krnl
self.conv2 = T.nn.Conv2d(16, 24, 2)
self.fc1 = T.nn.Linear(96, 64) # [24*2*2, x]
self.fc2 = T.nn.Linear(64, 10) # 10 output vals
self.pool1 = T.nn.MaxPool2d(2, 2) # kernel, stride
self.drop1 = T.nn.Dropout(0.10) # between conv
self.drop2 = T.nn.Dropout(0.15) # between fc
# default weight and bias initialization
# therefore order of defintion maters
def forward(self, x):
# input x is Size([bs, 64])
z = T.relu(self.conv1(x)) # Size([bs, 16, 7, 7])
z = self.pool1(z) # Size([bs, 16, 3, 3])
z = self.drop1(z) # Size([bs, 16, 3, 3])
z = T.relu(self.conv2(z)) # Size([bs, 24, 2, 2])
z = z.reshape(-1, 96) # Size([bs, 96)]
z = T.relu(self.fc1(z))
z = self.drop2(z)
z = T.log_softmax(self.fc2(z), dim=1) # for NLLLoss()
return z
# -----------------------------------------------------------
def accuracy(model, ds):
ldr = T.utils.data.DataLoader(ds,
batch_size=len(ds), shuffle=False)
n_correct = 0
for data in ldr:
(pixels, labels) = data
with T.no_grad():
oupts = model(pixels)
(_, predicteds) = T.max(oupts, 1)
n_correct += (predicteds == labels).sum().item()
acc = (n_correct * 1.0) / len(ds)
return acc
# -----------------------------------------------------------
def main():
# 0. setup
print("\nBegin UCI Digits FGSM with PyTorch demo ")
np.random.seed(1)
T.manual_seed(1)
# 1. create Dataset objects
print("\nLoading UCI digits train and test data ")
# train_data = ".\\Data\\uci_digits_train_100.txt"
train_data = ".\\Data\\optdigits_train_3823.txt"
train_ds = UCI_Digits_Dataset(train_data)
bat_size = 4
train_ldr = T.utils.data.DataLoader(train_ds,
batch_size=bat_size, shuffle=True)
test_file = ".\\Data\\digits_uci_test_1797.txt"
test_ds = UCI_Digits_Dataset(test_file)
# -----------------------------------------------------------
# 2. create network
print("\nCreating CNN classifier ")
net = CNN_Net().to(device)
net.train() # set mode
# -----------------------------------------------------------
# 3. train model
loss_func = T.nn.NLLLoss() # log_softmax output
lrn_rate = 0.01
opt = T.optim.SGD(net.parameters(), lr=lrn_rate)
max_epochs = 50 # 50
log_every = 10 # 5
print("\nStarting training ")
for epoch in range(max_epochs):
epoch_loss = 0.0
for bix, batch in enumerate(train_ldr):
X = batch[0] # 64 normalized input pixels
Y = batch[1] # the class label
opt.zero_grad()
oupt = net(X)
loss_val = loss_func(oupt, Y) # a tensor
epoch_loss += loss_val.item() # for progress display
loss_val.backward() # compute gradients
opt.step() # update weights
if epoch % log_every == 0:
print("epoch = %4d loss = %0.4f" % (epoch, epoch_loss))
print("Done ")
# -----------------------------------------------------------
# 4. evaluate model accuracy
print("\nComputing model accuracy")
net.eval()
acc_train = accuracy(net, train_ds) # all at once
print("Accuracy on training data = %0.4f" % acc_train)
net.eval()
acc_test = accuracy(net, test_ds) # all at once
print("Accuracy on test data = %0.4f" % acc_test)
# -----------------------------------------------------------
# 5. use model to make prediction: N/A
# -----------------------------------------------------------
# 6. save model
# print("\nSaving trained model state")
# fn = ".\\Models\\uci_digits_model.pt"
# T.save(net.state_dict(), fn)
# -----------------------------------------------------------
# 7. create inputs designed to trick model
epsilon = 0.20
print("\nCreating evil images from test w epsilon = %0.2f "\
% epsilon)
evil_images_lst = []
n_correct = 0; n_wrong = 0
test_ldr = T.utils.data.DataLoader(test_ds,
batch_size=1, shuffle=False)
loss_func = T.nn.NLLLoss() # assumes log-softmax()
for (batch_idx, batch) in enumerate(test_ldr):
(X, y) = batch # X = pixels, y = target label
X.requires_grad = True
oupt = net(X)
loss_val = loss_func(oupt, y)
net.zero_grad() # zap all gradients
loss_val.backward() # compute gradients
sgn = X.grad.data.sign()
mutated = X + epsilon * sgn
mutated = T.clamp(mutated, 0.0, 1.0)
with T.no_grad():
pred = net(mutated) # 10 log-softmax logits
pred_class = T.argmax(pred[0])
if pred_class.item() == y.item():
n_correct += 1
else:
n_wrong += 1
# if batch_idx == 1:
# print("Predicted class of evil[1] = " + \
# str(pred_class.item()))
mutated = mutated.detach().numpy()
evil_images_lst.append(mutated)
# print(n_correct)
# print(n_wrong)
adver_acc = (n_correct * 1.0) / (n_correct + n_wrong)
print("\nModel acc on evil images = %0.4f " % adver_acc)
# -----------------------------------------------------------
# show a test image and corresponding mutation
idx = 33 # index of test item / evil item
print("\nExamining test item idx = " + str(idx))
pixels = test_ds[idx][0].reshape(8,8)
plt.imshow(pixels, cmap=plt.get_cmap('gray_r'))
plt.show()
pixels = evil_images_lst[idx].reshape(8,8)
plt.imshow(pixels, cmap=plt.get_cmap('gray_r'))
plt.show()
x = test_ds[idx][0].reshape(1, 1, 8, 8) # make it a batch
act_class = test_ds[idx][1].item()
with T.no_grad():
oupt = net(x)
pred_class = T.argmax(oupt).item()
print("\nActual class test item [idx] = " \
+ str(act_class))
print("Pred class test item [idx] = " \
+ str(pred_class))
x = evil_images_lst[idx]
x = T.tensor(x, dtype=T.float32).to(device)
x = x.reshape(1, 1, 8, 8)
with T.no_grad():
oupt = net(x)
pred_class = T.argmax(oupt).item()
print("Predicted class evil item [idx] = " \
+ str(pred_class))
print("\nEnd UCI Digits FGSM PyTorch demo ")
if __name__ == "__main__":
main()

.NET Test Automation Recipes
Software Testing
SciPy Programming Succinctly
Keras Succinctly
R Programming
2026 Visual Studio Live
2025 Summer MLADS Conference
2026 DevIntersection Conference
2025 Machine Learning Week
2025 Ai4 Conference
2026 G2E Conference
2026 iSC West Conference
You must be logged in to post a comment.