Virtually all the PyTorch CIFAR-10 image recognition examples I’ve seen use the built-in TorchVision CIFAR-10 dataset. Using a built-in dataset is fine for experiments but I wanted to code up a CIFAR-10 demo from raw data so that I’d be sure I knew how everything works.
CIFAR-10 is a dataset of 50,000 training images and 10,000 test images. For my demo, I pruned the raw data down to 5,000 training images and 1,000 test images. Each CIFAR-10 image is color (i.e., 3 channels), and just 32×32 pixels in size. Each pixel channel is an integer between 0 and 255. The images are one of 10 classes: plane, car, bird, cat, deer, dog, frog, horse, ship, truck.
After a couple of hours of work (while sitting on an airplane), I got a demo up and running. The demo reached nearly 100% accuracy on the training data (which is expected), and 47.7% accuracy on the test data (which is pretty decent for only using 1/10 of the available training images).
Converting the raw CIFAR-10 data to text files was an interesting challenge. See https://jamesmccaffreyblog.com/2022/03/10/fetching-cifar-10-data-and-saving-as-a-text-file/. For my prediction model, I used a relatively simple architecture that I borrowed from the PyTorch documentation example, and that I used before with the TorchVision built-in data. See https://jamesmccaffreyblog.com/2020/10/29/yet-another-cifar-10-example-using-pytorch/.
A good way to use time on an airplane flight.

Image classification isn’t easy. Here are three portraits, classified by Internet searches for “Italian”, “Asian”, and “Russian”. Left: By Pier Toffoletti. Center: By Dodi Ballada. Right: By Galya Bukova. If I were given the images and the labels, I don’t think I could match them.
Demo code:
# cifar_raw_data_cnn.py
# PyTorch 1.10.0-CPU Anaconda3-2020.02 Python 3.7.6
# Windows 10/11
import numpy as np
import torch as T
device = T.device('cpu')
# -----------------------------------------------------------
class CIFAR10_Dataset(T.utils.data.Dataset):
# 3072 comma-delim pixel values (0-255) then label (0-9)
def __init__(self, src_file):
all_xy = np.loadtxt(src_file, usecols=range(0,3073),
delimiter=",", comments="#", dtype=np.float32)
tmp_x = all_xy[:, 0:3072] # all rows, cols [0,3071]
tmp_x /= 255.0
tmp_x = tmp_x.reshape(-1, 3, 32, 32) # bs, chnls, 32x32
tmp_y = all_xy[:, 3072] # 1-D required
self.x_data = \
T.tensor(tmp_x, dtype=T.float32).to(device)
self.y_data = \
T.tensor(tmp_y, dtype=T.int64).to(device)
def __len__(self):
return len(self.x_data)
def __getitem__(self, idx):
lbl = self.y_data[idx]
pixels = self.x_data[idx]
return (pixels, lbl)
# -----------------------------------------------------------
class Net(T.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = T.nn.Conv2d(3, 6, 5) # in, out, kernel
self.conv2 = T.nn.Conv2d(6, 16, 5)
self.pool = T.nn.MaxPool2d(2, 2) # kernel, stride
self.fc1 = T.nn.Linear(16 * 5 * 5, 120)
self.fc2 = T.nn.Linear(120, 84)
self.fc3 = T.nn.Linear(84, 10)
def forward(self, x):
z = T.nn.functional.relu(self.conv1(x))
z = self.pool(z)
z = T.nn.functional.relu(self.conv2(z))
z = self.pool(z)
z = z.view(-1, 16 * 5 * 5)
z = T.nn.functional.relu(self.fc1(z))
z = T.nn.functional.relu(self.fc2(z))
z = self.fc3(z) # no activate: CrossEntropyLoss()
return z
# -----------------------------------------------------------
def accuracy(model, ds):
ldr = T.utils.data.DataLoader(ds,
batch_size=len(ds), shuffle=False)
n_correct = 0
for data in ldr:
(pixels, labels) = data
with T.no_grad():
oupts = model(pixels)
(_, predicteds) = T.max(oupts, 1)
n_correct += (predicteds == labels).sum().item()
acc = (n_correct * 1.0) / len(ds)
return acc
# -----------------------------------------------------------
def main():
# 0. setup
print("\nBegin CIFAR-10 with raw data CNN demo ")
np.random.seed(1)
T.manual_seed(1)
# 1. create Dataset
print("\nCreating 5000 train and 1000 test datasets ")
train_file = ".\\Data\\cifar10_train_5000.txt"
train_ds = CIFAR10_Dataset(train_file)
test_file = ".\\Data\\cifar10_test_1000.txt"
test_ds = CIFAR10_Dataset(test_file)
bat_size = 10
train_ldr = T.utils.data.DataLoader(train_ds,
batch_size=bat_size, shuffle=True)
# 2. create network
print("\nCreating CNN with 2 conv and 400-120-84-10 ")
net = Net().to(device)
# 3. train model
max_epochs = 100
ep_log_interval = 10
lrn_rate = 0.005
loss_func = T.nn.CrossEntropyLoss() # does log-softmax()
optimizer = T.optim.SGD(net.parameters(), lr=lrn_rate)
print("\nbat_size = %3d " % bat_size)
print("loss = " + str(loss_func))
print("optimizer = SGD")
print("max_epochs = %3d " % max_epochs)
print("lrn_rate = %0.3f " % lrn_rate)
print("\nStarting training")
net = net.train()
for epoch in range(0, max_epochs):
epoch_loss = 0 # for one full epoch
for (batch_idx, batch) in enumerate(train_ldr):
(X, Y) = batch # X = pixels, Y = target labels
optimizer.zero_grad()
oupt = net(X) # X is Size([bat_size, 3, 32, 32])
loss_obj = loss_func(oupt, Y) # a tensor
epoch_loss += loss_obj.item() # accumulate
loss_obj.backward()
optimizer.step()
if epoch % ep_log_interval == 0:
print("epoch = %4d | loss = %10.4f | " % \
(epoch, epoch_loss), end="")
net.eval()
acc = accuracy(net, train_ds)
net.train()
print(" acc = %6.4f " % acc)
print("Done ")
# 4. evaluate model accuracy
print("\nComputing model accuracy")
net.eval()
acc_test = accuracy(net, test_ds) # all at once
print("Accuracy on test data = %0.4f" % acc_test)
# 5. TODO: save trained model
# 6. TODO: use model to make a prediction
print("\nEnd CIFAR-10 CNN demo ")
if __name__ == "__main__":
main()

.NET Test Automation Recipes
Software Testing
SciPy Programming Succinctly
Keras Succinctly
R Programming
2026 Visual Studio Live
2025 Summer MLADS Conference
2026 DevIntersection Conference
2025 Machine Learning Week
2025 Ai4 Conference
2026 G2E Conference
2026 iSC West Conference
You must be logged in to post a comment.