Running My Standard PyTorch Demo on Colab

When I work with PyTorch I almost always use a traditional local installation. Some of my colleagues like to use the Google colab online environment because you can run a PyTorch program in a browser on a machine that has absolutely no prerequisites. I hadn’t used colab in a long time so one morning after walking my dogs, I figured I’d run one of my standard PyTorch demo programs on colab.

I grabbed a machine that didn’t have Python or PyTorch installed. I launched a Chrome browser and logged in using my Google gmail account. I navigated to colab.research.google.com to launch a new colab project.

I wanted to use a GPU instead of the default CPU so I clicked on Runtime | Change runtime type | T4 GPU | Save.

Next, I uploaded a file of training data and a file of test data. The tab-delimited data looks like:

 1  0.24  1  0  0  0.2950  2
-1  0.39  0  0  1  0.5120  1
 1  0.63  0  1  0  0.7580  0
-1  0.36  1  0  0  0.4450  1
. . .

Each line represents a person. The fields are sex (male = -1, female = +1), age (divided by 100), State (Michigan = 100, Nebraska = 010, Oklahoma = 001), income (divided by $100,000), poltical leaning (0 = conservative, 1 = moderate, 2 = liberal). There are 200 training items and 40 test items.

Compared to my local program, I made the following three modifications.

1.) At the top of the program, to switch from local CPU to Cloud GPU, I changed

import torch as T
device = T.device('cpu')

import torch as T
device = T.device('cuda:0')

2.) Because my data files were in colab session storage instead of on my local machine, I changed file paths:

train_file = ".\\Data\\people_train.txt"
train_ds = PeopleDataset(train_file)  # 200 rows

test_file = ".\\Data\\people_test.txt"
test_ds = PeopleDataset(test_file)    # 40 rows

fn = ".\\Models\\people_model.pt"
T.save(net.state_dict(), fn)

train_file = "./people_train.txt"
train_ds = PeopleDataset(train_file)  # 200 rows

test_file = "./people_test.txt"
test_ds = PeopleDataset(test_file)    # 40 rows

fn = "./people_model.pt"
T.save(net.state_dict(), fn)

3.) To deal with a PyTorch tensor to numpy array issue, I changed

probs = T.exp(logits)  # sum to 1.0
probs = probs.numpy()  # numpy vector prints better

probs = T.exp(logits)  # sum to 1.0
probs = probs.cpu().numpy()  # from GPU to CPU to numpy

After a bit of tweaking, my standard demo ran on colab successfully. Although colab is quite nice and is very useful for some scenarios, I prefer running PyTorch programs on a local installation.

One of the characteristics of using colab is that it has multiple layers — browser to colab to Jupyter to Google cloud to PyTorch to Python to runtime. Here are three examples of traditional Kazakhstan dress, which is characterized, in part, by multiple layers of clothing. Kazakhstan is in central Asia and is bordered by Russia, China, Kyrgyzstan, Uzbekistan, and Turkmenistan. Astana, the capital of Kazakhstan, was the site of the 2023 world chess championship match between Ian Nepomniachtchi (Russia) and Ding Liren (China).

Demo code. Replace “lt” (less than), “gt”, “lte”, “gte” with Boolean operator symbols. The training and test data can be found at https://jamesmccaffreyblog.com/2023/12/15/pytorch-multi-class-classification-using-a-transformer-with-custom-accuracy-and-interpretability/.

# people_politics.py
# predict politics type from sex, age, state, income
# running on colab-GPU 

import numpy as np
import torch as T
device = T.device('cuda:0')  # apply to Tensor or Module

# -----------------------------------------------------------

class PeopleDataset(T.utils.data.Dataset):
  # sex  age    state    income   politics
  # -1   0.27   0  1  0   0.7610   2
  # +1   0.19   0  0  1   0.6550   0
  # sex: -1 = male, +1 = female
  # state: michigan, nebraska, oklahoma
  # politics: conservative, moderate, liberal

  def __init__(self, src_file):
    all_xy = np.loadtxt(src_file, usecols=range(0,7),
      delimiter="\t", comments="#", dtype=np.float32)
    tmp_x = all_xy[:,0:6]   # cols [0,6) = [0,5]
    tmp_y = all_xy[:,6]     # 1-D

    self.x_data = T.tensor(tmp_x, 
      dtype=T.float32).to(device)
    self.y_data = T.tensor(tmp_y,
      dtype=T.int64).to(device)  # 1-D

  def __len__(self):
    return len(self.x_data)

  def __getitem__(self, idx):
    preds = self.x_data[idx]
    trgts = self.y_data[idx] 
    return preds, trgts  # as a Tuple

# -----------------------------------------------------------

class Net(T.nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.hid1 = T.nn.Linear(6, 10)  # 6-(10-10)-3
    self.hid2 = T.nn.Linear(10, 10)
    self.oupt = T.nn.Linear(10, 3)

    T.nn.init.xavier_uniform_(self.hid1.weight)
    T.nn.init.zeros_(self.hid1.bias)
    T.nn.init.xavier_uniform_(self.hid2.weight)
    T.nn.init.zeros_(self.hid2.bias)
    T.nn.init.xavier_uniform_(self.oupt.weight)
    T.nn.init.zeros_(self.oupt.bias)

  def forward(self, x):
    z = T.tanh(self.hid1(x))
    z = T.tanh(self.hid2(z))
    z = T.log_softmax(self.oupt(z), dim=1)  # NLLLoss() 
    return z

# -----------------------------------------------------------

def accuracy(model, ds):
  # assumes model.eval()
  # item-by-item version
  n_correct = 0; n_wrong = 0
  for i in range(len(ds)):
    X = ds[i][0].reshape(1,-1)  # make it a batch
    Y = ds[i][1].reshape(1)  # 0 1 or 2, 1D
    with T.no_grad():
      oupt = model(X)  # logits form

    big_idx = T.argmax(oupt)  # 0 or 1 or 2
    if big_idx == Y:
      n_correct += 1
    else:
      n_wrong += 1

  acc = (n_correct * 1.0) / (n_correct + n_wrong)
  return acc

# -----------------------------------------------------------

def accuracy_quick(model, dataset):
  # assumes model.eval()
  X = dataset[0:len(dataset)][0]
  # Y = T.flatten(dataset[0:len(dataset)][1])
  Y = dataset[0:len(dataset)][1]
  with T.no_grad():
    oupt = model(X)
  # (_, arg_maxs) = T.max(oupt, dim=1)
  arg_maxs = T.argmax(oupt, dim=1)  # argmax() is new
  num_correct = T.sum(Y==arg_maxs)
  acc = (num_correct * 1.0 / len(dataset))
  return acc.item()

# -----------------------------------------------------------

def confusion_matrix_multi(model, ds, n_classes):
  if n_classes "lte" 2:
    print("ERROR: n_classes must be 3 or greater ")
    return None

  cm = np.zeros((n_classes,n_classes), dtype=np.int64)
  for i in range(len(ds)):
    X = ds[i][0].reshape(1,-1)  # make it a batch
    Y = ds[i][1].reshape(1)  # actual class 0 1 or 2, 1D
    with T.no_grad():
      oupt = model(X)  # logits form
    pred_class = T.argmax(oupt)  # 0,1,2
    cm[Y][pred_class] += 1
  return cm

# -----------------------------------------------------------

def show_confusion(cm):
  dim = len(cm)
  mx = np.max(cm)             # largest count in cm
  wid = len(str(mx)) + 1      # width to print
  fmt = "%" + str(wid) + "d"  # like "%3d"
  for i in range(dim):
    print("actual   ", end="")
    print("%3d:" % i, end="")
    for j in range(dim):
      print(fmt % cm[i][j], end="")
    print("")
  print("------------")
  print("predicted    ", end="")
  for j in range(dim):
    print(fmt % j, end="")
  print("")

# -----------------------------------------------------------  

def main():
  # 0. get started
  print("\nBegin People predict politics type ")
  T.manual_seed(1)
  np.random.seed(1)
  
  # 1. create DataLoader objects
  print("\nCreating People Datasets ")

  train_file = "./people_train.txt"
  train_ds = PeopleDataset(train_file)  # 200 rows

  test_file = "./people_test.txt"
  test_ds = PeopleDataset(test_file)    # 40 rows

  bat_size = 10
  train_ldr = T.utils.data.DataLoader(train_ds,
    batch_size=bat_size, shuffle=True)

# -----------------------------------------------------------

  # 2. create network
  print("\nCreating 6-(10-10)-3 neural network ")
  net = Net().to(device)
  net.train()

# -----------------------------------------------------------

  # 3. train model
  max_epochs = 1000
  ep_log_interval = 200
  lrn_rate = 0.01

  loss_func = T.nn.NLLLoss()  # assumes log_softmax()
  optimizer = T.optim.SGD(net.parameters(), lr=lrn_rate)

  print("\nbat_size = %3d " % bat_size)
  print("loss = " + str(loss_func))
  print("optimizer = SGD")
  print("max_epochs = %3d " % max_epochs)
  print("lrn_rate = %0.3f " % lrn_rate)

  print("\nStarting training ")
  for epoch in range(0, max_epochs):
    # T.manual_seed(epoch+1)  # checkpoint reproducibility
    epoch_loss = 0  # for one full epoch

    for (batch_idx, batch) in enumerate(train_ldr):
      X = batch[0]  # inputs
      Y = batch[1]  # correct class/label/politics

      optimizer.zero_grad()
      oupt = net(X)
      loss_val = loss_func(oupt, Y)  # a tensor
      epoch_loss += loss_val.item()  # accumulate
      loss_val.backward()
      optimizer.step()

    if epoch % ep_log_interval == 0:
      print("epoch = %5d  |  loss = %10.4f" % \
        (epoch, epoch_loss))

  print("Training done ")

# -----------------------------------------------------------

  # 4. evaluate model accuracy
  print("\nComputing model accuracy")
  net.eval()
  acc_train = accuracy(net, train_ds)  # item-by-item
  print("Accuracy on training data = %0.4f" % acc_train)
  acc_test = accuracy(net, test_ds) 
  print("Accuracy on test data = %0.4f" % acc_test)

  # 5. make a prediction
  print("\nPredicting politics for M  30  oklahoma  $50,000: ")
  X = np.array([[-1, 0.30,  0,0,1,  0.5000]], dtype=np.float32)
  X = T.tensor(X, dtype=T.float32).to(device) 

  with T.no_grad():
    logits = net(X)  # do not sum to 1.0
  probs = T.exp(logits)  # sum to 1.0
  probs = probs.cpu().numpy()  # numpy vector prints better
  np.set_printoptions(precision=4, suppress=True)
  print(probs)

  # 6. save model (state_dict approach)
  print("\nSaving trained model state ")
  fn = "./people_model.pt"
  T.save(net.state_dict(), fn)

  print("\nEnd People predict politics demo ")

if __name__ == "__main__":
  main()