When I work with PyTorch I almost always use a traditional local installation. Some of my colleagues like to use the Google colab online environment because you can run a PyTorch program in a browser on a machine that has absolutely no prerequisites. I hadn’t used colab in a long time so one morning after walking my dogs, I figured I’d run one of my standard PyTorch demo programs on colab.
I grabbed a machine that didn’t have Python or PyTorch installed. I launched a Chrome browser and logged in using my Google gmail account. I navigated to colab.research.google.com to launch a new colab project.
I wanted to use a GPU instead of the default CPU so I clicked on Runtime | Change runtime type | T4 GPU | Save.
Next, I uploaded a file of training data and a file of test data. The tab-delimited data looks like:
1 0.24 1 0 0 0.2950 2 -1 0.39 0 0 1 0.5120 1 1 0.63 0 1 0 0.7580 0 -1 0.36 1 0 0 0.4450 1 . . .
Each line represents a person. The fields are sex (male = -1, female = +1), age (divided by 100), State (Michigan = 100, Nebraska = 010, Oklahoma = 001), income (divided by $100,000), poltical leaning (0 = conservative, 1 = moderate, 2 = liberal). There are 200 training items and 40 test items.
Compared to my local program, I made the following three modifications.
1.) At the top of the program, to switch from local CPU to Cloud GPU, I changed
import torch as T
device = T.device('cpu')
to
import torch as T
device = T.device('cuda:0')
2.) Because my data files were in colab session storage instead of on my local machine, I changed file paths:
train_file = ".\\Data\\people_train.txt" train_ds = PeopleDataset(train_file) # 200 rows test_file = ".\\Data\\people_test.txt" test_ds = PeopleDataset(test_file) # 40 rows fn = ".\\Models\\people_model.pt" T.save(net.state_dict(), fn)
to
train_file = "./people_train.txt" train_ds = PeopleDataset(train_file) # 200 rows test_file = "./people_test.txt" test_ds = PeopleDataset(test_file) # 40 rows fn = "./people_model.pt" T.save(net.state_dict(), fn)
3.) To deal with a PyTorch tensor to numpy array issue, I changed
probs = T.exp(logits) # sum to 1.0 probs = probs.numpy() # numpy vector prints better
to
probs = T.exp(logits) # sum to 1.0 probs = probs.cpu().numpy() # from GPU to CPU to numpy
After a bit of tweaking, my standard demo ran on colab successfully. Although colab is quite nice and is very useful for some scenarios, I prefer running PyTorch programs on a local installation.

One of the characteristics of using colab is that it has multiple layers — browser to colab to Jupyter to Google cloud to PyTorch to Python to runtime. Here are three examples of traditional Kazakhstan dress, which is characterized, in part, by multiple layers of clothing. Kazakhstan is in central Asia and is bordered by Russia, China, Kyrgyzstan, Uzbekistan, and Turkmenistan. Astana, the capital of Kazakhstan, was the site of the 2023 world chess championship match between Ian Nepomniachtchi (Russia) and Ding Liren (China).
Demo code. Replace “lt” (less than), “gt”, “lte”, “gte” with Boolean operator symbols. The training and test data can be found at https://jamesmccaffreyblog.com/2023/12/15/pytorch-multi-class-classification-using-a-transformer-with-custom-accuracy-and-interpretability/.
# people_politics.py
# predict politics type from sex, age, state, income
# running on colab-GPU
import numpy as np
import torch as T
device = T.device('cuda:0') # apply to Tensor or Module
# -----------------------------------------------------------
class PeopleDataset(T.utils.data.Dataset):
# sex age state income politics
# -1 0.27 0 1 0 0.7610 2
# +1 0.19 0 0 1 0.6550 0
# sex: -1 = male, +1 = female
# state: michigan, nebraska, oklahoma
# politics: conservative, moderate, liberal
def __init__(self, src_file):
all_xy = np.loadtxt(src_file, usecols=range(0,7),
delimiter="\t", comments="#", dtype=np.float32)
tmp_x = all_xy[:,0:6] # cols [0,6) = [0,5]
tmp_y = all_xy[:,6] # 1-D
self.x_data = T.tensor(tmp_x,
dtype=T.float32).to(device)
self.y_data = T.tensor(tmp_y,
dtype=T.int64).to(device) # 1-D
def __len__(self):
return len(self.x_data)
def __getitem__(self, idx):
preds = self.x_data[idx]
trgts = self.y_data[idx]
return preds, trgts # as a Tuple
# -----------------------------------------------------------
class Net(T.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.hid1 = T.nn.Linear(6, 10) # 6-(10-10)-3
self.hid2 = T.nn.Linear(10, 10)
self.oupt = T.nn.Linear(10, 3)
T.nn.init.xavier_uniform_(self.hid1.weight)
T.nn.init.zeros_(self.hid1.bias)
T.nn.init.xavier_uniform_(self.hid2.weight)
T.nn.init.zeros_(self.hid2.bias)
T.nn.init.xavier_uniform_(self.oupt.weight)
T.nn.init.zeros_(self.oupt.bias)
def forward(self, x):
z = T.tanh(self.hid1(x))
z = T.tanh(self.hid2(z))
z = T.log_softmax(self.oupt(z), dim=1) # NLLLoss()
return z
# -----------------------------------------------------------
def accuracy(model, ds):
# assumes model.eval()
# item-by-item version
n_correct = 0; n_wrong = 0
for i in range(len(ds)):
X = ds[i][0].reshape(1,-1) # make it a batch
Y = ds[i][1].reshape(1) # 0 1 or 2, 1D
with T.no_grad():
oupt = model(X) # logits form
big_idx = T.argmax(oupt) # 0 or 1 or 2
if big_idx == Y:
n_correct += 1
else:
n_wrong += 1
acc = (n_correct * 1.0) / (n_correct + n_wrong)
return acc
# -----------------------------------------------------------
def accuracy_quick(model, dataset):
# assumes model.eval()
X = dataset[0:len(dataset)][0]
# Y = T.flatten(dataset[0:len(dataset)][1])
Y = dataset[0:len(dataset)][1]
with T.no_grad():
oupt = model(X)
# (_, arg_maxs) = T.max(oupt, dim=1)
arg_maxs = T.argmax(oupt, dim=1) # argmax() is new
num_correct = T.sum(Y==arg_maxs)
acc = (num_correct * 1.0 / len(dataset))
return acc.item()
# -----------------------------------------------------------
def confusion_matrix_multi(model, ds, n_classes):
if n_classes "lte" 2:
print("ERROR: n_classes must be 3 or greater ")
return None
cm = np.zeros((n_classes,n_classes), dtype=np.int64)
for i in range(len(ds)):
X = ds[i][0].reshape(1,-1) # make it a batch
Y = ds[i][1].reshape(1) # actual class 0 1 or 2, 1D
with T.no_grad():
oupt = model(X) # logits form
pred_class = T.argmax(oupt) # 0,1,2
cm[Y][pred_class] += 1
return cm
# -----------------------------------------------------------
def show_confusion(cm):
dim = len(cm)
mx = np.max(cm) # largest count in cm
wid = len(str(mx)) + 1 # width to print
fmt = "%" + str(wid) + "d" # like "%3d"
for i in range(dim):
print("actual ", end="")
print("%3d:" % i, end="")
for j in range(dim):
print(fmt % cm[i][j], end="")
print("")
print("------------")
print("predicted ", end="")
for j in range(dim):
print(fmt % j, end="")
print("")
# -----------------------------------------------------------
def main():
# 0. get started
print("\nBegin People predict politics type ")
T.manual_seed(1)
np.random.seed(1)
# 1. create DataLoader objects
print("\nCreating People Datasets ")
train_file = "./people_train.txt"
train_ds = PeopleDataset(train_file) # 200 rows
test_file = "./people_test.txt"
test_ds = PeopleDataset(test_file) # 40 rows
bat_size = 10
train_ldr = T.utils.data.DataLoader(train_ds,
batch_size=bat_size, shuffle=True)
# -----------------------------------------------------------
# 2. create network
print("\nCreating 6-(10-10)-3 neural network ")
net = Net().to(device)
net.train()
# -----------------------------------------------------------
# 3. train model
max_epochs = 1000
ep_log_interval = 200
lrn_rate = 0.01
loss_func = T.nn.NLLLoss() # assumes log_softmax()
optimizer = T.optim.SGD(net.parameters(), lr=lrn_rate)
print("\nbat_size = %3d " % bat_size)
print("loss = " + str(loss_func))
print("optimizer = SGD")
print("max_epochs = %3d " % max_epochs)
print("lrn_rate = %0.3f " % lrn_rate)
print("\nStarting training ")
for epoch in range(0, max_epochs):
# T.manual_seed(epoch+1) # checkpoint reproducibility
epoch_loss = 0 # for one full epoch
for (batch_idx, batch) in enumerate(train_ldr):
X = batch[0] # inputs
Y = batch[1] # correct class/label/politics
optimizer.zero_grad()
oupt = net(X)
loss_val = loss_func(oupt, Y) # a tensor
epoch_loss += loss_val.item() # accumulate
loss_val.backward()
optimizer.step()
if epoch % ep_log_interval == 0:
print("epoch = %5d | loss = %10.4f" % \
(epoch, epoch_loss))
print("Training done ")
# -----------------------------------------------------------
# 4. evaluate model accuracy
print("\nComputing model accuracy")
net.eval()
acc_train = accuracy(net, train_ds) # item-by-item
print("Accuracy on training data = %0.4f" % acc_train)
acc_test = accuracy(net, test_ds)
print("Accuracy on test data = %0.4f" % acc_test)
# 5. make a prediction
print("\nPredicting politics for M 30 oklahoma $50,000: ")
X = np.array([[-1, 0.30, 0,0,1, 0.5000]], dtype=np.float32)
X = T.tensor(X, dtype=T.float32).to(device)
with T.no_grad():
logits = net(X) # do not sum to 1.0
probs = T.exp(logits) # sum to 1.0
probs = probs.cpu().numpy() # numpy vector prints better
np.set_printoptions(precision=4, suppress=True)
print(probs)
# 6. save model (state_dict approach)
print("\nSaving trained model state ")
fn = "./people_model.pt"
T.save(net.state_dict(), fn)
print("\nEnd People predict politics demo ")
if __name__ == "__main__":
main()


.NET Test Automation Recipes
Software Testing
SciPy Programming Succinctly
Keras Succinctly
R Programming
2026 Visual Studio Live
2025 Summer MLADS Conference
2025 DevIntersection Conference
2025 Machine Learning Week
2025 Ai4 Conference
2025 G2E Conference
2025 iSC West Conference
You must be logged in to post a comment.