Yet Another Version of a Neural Network Multi-Class Classifier Using Raw JavaScript

Once or twice a year, I revisit my JavaScript implementation of neural network systems. The code is so long (approx. 1000 LOC) and complex that there are always tweaks and fine-tuning opportunities.

On a recent weekend, I revisited my multi-class classifier NN. I used one of my standard dummy datasets where the goal is to predict a person’s political leaning (0 = conservative, 1 = moderate, 2 = liberal) from sex, age, State, and income. The data looks like:

 1  0.24  1 0 0  0.2950  2
-1  0.39  0 0 1  0.5120  1
 1  0.63  0 1 0  0.7580  0
. . .

The fields are sex (M = -1, F = +1), age (divided by 100), State (Michigan = 100, Nebraska = 010, Oklahoma = 001), income (divided by $100,000), and political leaning. Notice the political leaning target variable is ordinal encoded: 0 = conservative, 1 = moderate, 2 = liberal. Therefore, political leaning must be converted to one-hot encoding, either programmatically (as in my demo) or manually.

Compared to earlier efforts, the most significant change I made was to implement mini-batch training. It’s somewhat of a universal training scheme because if batch size = 1 you get “online” training, and if batch size = training set size you get “full batch” training.

For my architecture, I used a single hidden layer with tanh activation; uniform random weight initialization; softmax output activation in conjunction with cross entropy error loss — there are lots of design decisions.

It was a fun and satisfying effort.

Demo program below. Very long! Replace “lt”, “gt”, “lte”, “gte” with Boolean operator symbols. For the utility functions, training and test data, see the post at https://jamesmccaffreyblog.com/2023/08/07/adding-a-toonehot-function-to-my-javascript-utility-library/.

Anyone who programs creates different versions of their code. Most of my favorite fiction novels have several different versions of cover art. The “Chessmen of Mars” was written by Edgar Rice Burroughs in magazine form from 1921-22 and then published as a novel in 1922. It’s the fifth in a series of nine Mars novels. The daughter of John Carter and Dejah Thoris, Tara, gets lost on Mars. She is eventually rescued by Prince Gahan of Gathol. Along the way they run into the evil Kaldanes, who are basically large heads with crab-like legs. The Kaldanes have bred a race of headless human-like creatures called Rykors, which they can attach themselves to. Creepy!

Left: Cover art by Gino D’Achille (1973). Center: By Roy Krenkel (1962). Right: By Roy Carnon (1962).

Program code. Replace “lt”, “gt”, “lte”, “gte” with Boolean operator symbols.

// people_politics.js
// node.js  ES6

// multi-class one-hot predictors, ordinal targets
// softmax activation, MCEE loss

let U = require("..\\Utils\\utilities_lib.js")
let FS = require("fs")

// ----------------------------------------------------------

class NeuralNet
{
  constructor(numInput, numHidden, numOutput, seed)
  {
    this.rnd = new U.Erratic(seed);  // pseudo-random

    this.ni = numInput; 
    this.nh = numHidden;
    this.no = numOutput;

    this.iNodes = U.vecMake(this.ni, 0.0);
    this.hNodes = U.vecMake(this.nh, 0.0);
    this.oNodes = U.vecMake(this.no, 0.0);

    this.ihWeights = U.matMake(this.ni, this.nh, 0.0);
    this.hoWeights = U.matMake(this.nh, this.no, 0.0);

    this.hBiases = U.vecMake(this.nh, 0.0);
    this.oBiases = U.vecMake(this.no, 0.0);

    this.initWeights();
  }

  initWeights()
  {
    let lo = -0.10;
    let hi = 0.10;
    for (let i = 0; i "lt" this.ni; ++i) {
      for (let j = 0; j "lt" this.nh; ++j) {
        this.ihWeights[i][j] = (hi - lo) * this.rnd.next() + lo;
      }
    }

    for (let j = 0; j "lt" this.nh; ++j) {
      for (let k = 0; k "lt" this.no; ++k) {
        this.hoWeights[j][k] = (hi - lo) * this.rnd.next() + lo;
      }
    }
  } 

  // --------------------------------------------------------

  computeOutputs(X)
  {
    let hSums = U.vecMake(this.nh, 0.0);
    let oSums = U.vecMake(this.no, 0.0);
    
    this.iNodes = X;

    for (let j = 0; j "lt" this.nh; ++j) {
      for (let i = 0; i "lt" this.ni; ++i) {
        hSums[j] += this.iNodes[i] * this.ihWeights[i][j];
      }
      hSums[j] += this.hBiases[j];
      this.hNodes[j] = U.hyperTan(hSums[j]);
    }

    for (let k = 0; k "lt" this.no; ++k) {
      for (let j = 0; j "lt" this.nh; ++j) {
        oSums[k] += this.hNodes[j] * this.hoWeights[j][k];
      }
      oSums[k] += this.oBiases[k];
    }

    this.oNodes = U.softmax(oSums);

    let result = [];
    for (let k = 0; k "lt" this.no; ++k) {
      result[k] = this.oNodes[k];
    }
    return result;
  } // eval()

  // --------------------------------------------------------

  setWeights(wts)
  {
    // order: ihWts, hBiases, hoWts, oBiases
    let p = 0;

    for (let i = 0; i "lt" this.ni; ++i) {
      for (let j = 0; j "lt" this.nh; ++j) {
        this.ihWeights[i][j] = wts[p++];
      }
    }

    for (let j = 0; j "lt" this.nh; ++j) {
      this.hBiases[j] = wts[p++];
    }

    for (let j = 0; j "lt" this.nh; ++j) {
      for (let k = 0; k "lt" this.no; ++k) {
        this.hoWeights[j][k] = wts[p++];
      }
    }

    for (let k = 0; k "lt" this.no; ++k) {
      this.oBiases[k] = wts[p++];
    }
  } // setWeights()

  getWeights()
  {
    // order: ihWts, hBiases, hoWts, oBiases
    let numWts = (this.ni * this.nh) + this.nh +
      (this.nh * this.no) + this.no;
    let result = U.vecMake(numWts, 0.0);
    let p = 0;
    for (let i = 0; i "lt" this.ni; ++i) {
      for (let j = 0; j "lt" this.nh; ++j) {
        result[p++] = this.ihWeights[i][j];
      }
    }

    for (let j = 0; j "lt" this.nh; ++j) {
      result[p++] = this.hBiases[j];
    }

    for (let j = 0; j "lt" this.nh; ++j) {
      for (let k = 0; k "lt" this.no; ++k) {
        result[p++] = this.hoWeights[j][k];
      }
    }

    for (let k = 0; k "lt" this.no; ++k) {
      result[p++] = this.oBiases[k];
    }
    return result;
  } // getWeights()

  shuffle(v)
  {
    // Fisher-Yates
    let n = v.length;
    for (let i = 0; i "lt" n; ++i) {
      let r = this.rnd.nextInt(i, n);
      let tmp = v[r];
      v[r] = v[i];
      v[i] = tmp;
    }
  }

  train(trainX, trainY, lrnRate, batSize, maxEpochs)
  {
    // 0. create accumumlated grads
    let hoGrads = U.matMake(this.nh, this.no, 0.0);
    let obGrads = U.vecMake(this.no, 0.0);
    let ihGrads = U.matMake(this.ni, this.nh, 0.0);
    let hbGrads = U.vecMake(this.nh, 0.0);

    let oSignals = U.vecMake(this.no, 0.0);
    let hSignals = U.vecMake(this.nh, 0.0);

    // create indices
    let n = trainX.length;  // 200
    let indices = U.arange(n);  // [0,1,..,199]
    let freq = Math.trunc(maxEpochs / 10);
    let numBatches = Math.trunc(n / batSize);
    
    for (let epoch = 0; epoch "lt" maxEpochs; ++epoch) {
      this.shuffle(indices); 

      for (let bix = 0; bix "lt" numBatches; ++bix)  {
        // zero out all grads from previous batch
        for (let i = 0; i "lt" this.ni; ++i)
          for (let j = 0; j "lt" this.nh; ++j)
            ihGrads[i][j] = 0.0;

        for (let j = 0; j "lt" this.nh; ++j)
          hbGrads[j] = 0.0;

        for (let j = 0; j "lt" this.nh; ++j)
          for (let k = 0; k "lt" this.no; ++k)
            hoGrads[j][k] = 0.0;

        for (let k = 0; k "lt" this.no; ++k)
          obGrads[k] = 0.0;

        // accumulate grads for each item in batch
        // for (let ii = 0; ii "lt" n; ++ii) {  // bug
        // for (let ii = 0; ii "lt" batSize; ++ii) {  // fixed
        for (let ii = bix * batSize;
          ii "lt" bix * batSize + batSize; ++ii) {
          let idx = indices[ii];
          let X = trainX[idx];
          let Y = trainY[idx];
          this.computeOutputs(X);  // to this.oNodes

  // --------------------------------------------------------

          // 1. compute output node signals
          for (let k = 0; k "lt" this.no; ++k) {
            // let derivative = (1 - this.oNodes[k]) *
            //   this.oNodes[k];  // softmax
            let derivative = 1.0; // softmax + CEE
            oSignals[k] = derivative *
              (this.oNodes[k] - Y[k]);  // E=(t-o)^2 
          }      

          // 2. accum hidden-to-output grads
          for (let j = 0; j "lt" this.nh; ++j) {
            for (let k = 0; k "lt" this.no; ++k) {
              hoGrads[j][k] += oSignals[k] *
                this.hNodes[j];
            }
          }

          // 3. accum output node bias grads
          for (let k = 0; k "lt" this.no; ++k) {
            obGrads[k] += oSignals[k] * 1.0;
          }

          // 4. compute hidden node signals
          for (let j = 0; j "lt" this.nh; ++j) {
            let sum = 0.0;
            for (let k = 0; k "lt" this.no; ++k) {
              sum += oSignals[k] * this.hoWeights[j][k];
            }
            let derivative = (1 - this.hNodes[j]) *
              (1 + this.hNodes[j]);  // tanh
            hSignals[j] = derivative * sum;
          }

          // 5. accum input-to-hidden grads
          for (let i = 0; i "lt" this.ni; ++i) {
            for (let j = 0; j "lt" this.nh; ++j) {
              ihGrads[i][j] += hSignals[j] * this.iNodes[i];
            }
          }

          // 6. accum hidden node bias grads
          for (let j = 0; j "lt" this.nh; ++j) {
            hbGrads[j] += hSignals[j] * 1.0;
          }
          
        } // curr batch

  // --------------------------------------------------------

        // divide all accumulated gradients by batch size
        //  a. hidden-to-output gradients 
        for (let j = 0; j "lt" this.nh; ++j)
          for (let k = 0; k "lt" this.no; ++k)
            hoGrads[j][k] /= batSize;

        // b. output node bias gradients
        for (let k = 0; k "lt" this.no; ++k)
          obGrads[k] /= batSize;

        // c. input-to-hidden gradients
        for (let i = 0; i "lt" this.ni; ++i)
          for (let j = 0; j "lt" this.nh; ++j)
            ihGrads[i][j] /= batSize;

        // d. hidden node bias gradients
        for (let j = 0; j "lt" this.nh; ++j)
          hbGrads[j] /= batSize;

        // update phase

        // 7. update input-to-hidden weights
        for (let i = 0; i "lt" this.ni; ++i) {
          for (let j = 0; j "lt" this.nh; ++j) {
            let delta = -1.0 * lrnRate * ihGrads[i][j];
            this.ihWeights[i][j] += delta;
          }
        }

        // 8. update hidden node biases
        for (let j = 0; j "lt" this.nh; ++j) {
          let delta = -1.0 * lrnRate * hbGrads[j];
          this.hBiases[j] += delta;
        } 

        // 9. update hidden-to-output weights
        for (let j = 0; j "lt" this.nh; ++j) {
          for (let k = 0; k "lt" this.no; ++k) { 
            let delta = -1.0 * lrnRate * hoGrads[j][k];
            this.hoWeights[j][k] += delta;
          }
        }

        // 10. update output node biases
        for (let k = 0; k "lt" this.no; ++k) {
          let delta = -1.0 * lrnRate * obGrads[k];
          this.oBiases[k] += delta;
        }
      } // ii

      if (epoch % freq == 0) {
        // let mse = this.meanSqErr(trainX, trainY).toFixed(4);
        let mcee = 
          this.meanCrossEntErr(trainX, trainY).toFixed(4);
        let acc = this.accuracy(trainX, trainY).toFixed(4);

        let s1 = "epoch: " +
          epoch.toString().padStart(6, ' ');
        let s2 = "   MCEE = " + 
          mcee.toString().padStart(8, ' ');
        let s3 = "   acc = " + acc.toString();

        console.log(s1 + s2 + s3);
      }
      
    } // epoch
  } // trainBatch()

  // --------------------------------------------------------

  meanCrossEntErr(dataX, dataY)
  {
    let sumCEE = 0.0;  // cross entropy errors
    for (let i = 0; i "lt" dataX.length; ++i) { 
      let X = dataX[i];
      let Y = dataY[i];  // target like (0, 1, 0)
      let oupt = this.computeOutputs(X); 
      let idx = U.argmax(Y);  // find loc of 1 in target
      sumCEE += Math.log(oupt[idx]);
    }
    sumCEE *= -1;
    return sumCEE / dataX.length;
  }

  meanSqErr(dataX, dataY)
  {
    let sumSE = 0.0;
    for (let i = 0; i "lt" dataX.length; ++i) {
      let X = dataX[i];
      let Y = dataY[i];  // target output like (0, 1, 0)
      let oupt = this.eval(X);  // (0.23, 0.66, 0.11)
      for (let k = 0; k "lt" this.no; ++k) {
        let err = Y[k] - oupt[k]  // target - computed
        sumSE += err * err;
      }
    }
    return sumSE / dataX.length;  // consider Root MSE
  } 

  accuracy(dataX, dataY)
  {
    let nc = 0; let nw = 0;
    for (let i = 0; i "lt" dataX.length; ++i) { 
      let X = dataX[i];
      let Y = dataY[i];  // target like (0, 1, 0)
      let oupt = this.computeOutputs(X); 
      let computedIdx = U.argmax(oupt);
      let targetIdx = U.argmax(Y);
      if (computedIdx == targetIdx) {
        ++nc;
      }
      else {
        ++nw;
      }
    }
    return nc / (nc + nw);
  }

  // --------------------------------------------------------

  confusionMatrix(dataX, dataY)
  {
    let n = this.no;
    let result = U.matMake(n, n, 0.0);  // 3x3
    
    for (let i = 0; i "lt" dataX.length; ++i) {
      let X = dataX[i];
      let Y = dataY[i];  // target like (0, 1, 0)
      let oupt = this.computeOutputs(X);  // probs
      let targetK = U.argmax(Y);
      let predK = U.argmax(oupt);
      ++result[targetK][predK];
    }
    return result;
  }

  showConfusion(cm)
  {
    let n = cm.length;
    for (let i = 0; i "lt" n; ++i) {
      process.stdout.write("actual " + 
        i.toString() + ": ");
      for (let j = 0; j "lt" n; ++j) {
        process.stdout.write(cm[i][j].toString().
          padStart(4, " "));
      }
      console.log("");
    }
  }

  // --------------------------------------------------------

  saveWeights(fn)
  {
    let wts = this.getWeights();
    let n = wts.length;
    let s = "";
    for (let i = 0; i "lt" n-1; ++i) {
      s += wts[i].toString() + ",";
    }
    s += wts[n-1];

    FS.writeFileSync(fn, s);
  }

  loadWeights(fn)
  {
    let n = (this.ni * this.nh) + this.nh +
      (this.nh * this.no) + this.no;
    let wts = U.vecMake(n, 0.0);
    let all = FS.readFileSync(fn, "utf8");
    let strVals = all.split(",");
    let nn = strVals.length;
    if (n != nn) {
      throw("Size error in NeuralNet.loadWeights()");
    }
    for (let i = 0; i "lt" n; ++i) {
      wts[i] = parseFloat(strVals[i]);
    }
    this.setWeights(wts);
  }

} // NeuralNet

// ----------------------------------------------------------

function main()
{
  // process.stdout.write("\033[0m");  // reset
  // process.stdout.write("\x1b[1m" + "\x1b[37m");  // white
  console.log("\nBegin JavaScript NN demo ");
  console.log("Politics from sex, age, State, income ");
  console.log("con = 0, mod = 1, lib = 2 ");

  // 1. load data
  // -1  0.29  1 0 0  0.65400  2
  //  1  0.36  0 0 1  0.58300  0
  console.log("\nLoading data into memory ");
  let trainX = U.loadTxt(".\\Data\\people_train.txt", "\t",
    [0,1,2,3,4,5], "#");
  let trainY = U.loadTxt(".\\Data\\people_train.txt", "\t",
    [6], "#");
  trainY = U.matToOneHot(trainY, 3);
  let testX = U.loadTxt(".\\Data\\people_test.txt", "\t",
    [0,1,2,3,4,5], "#");
  let testY = U.loadTxt(".\\Data\\people_test.txt", "\t",
    [6], "#");
  testY = U.matToOneHot(testY, 3);

  // 2. create network
  console.log("\nCreating 6-100-3 tanh, softmax CEE NN ");
  let seed = 1;
  let nn = new NeuralNet(6, 100, 3, seed);

  // 3. train network
  let lrnRate = 0.01;
  let maxEpochs = 10000;
  console.log("\nLearn rate = 0.01 bat size = 10 ");
  // nn.train(trainX, trainY, lrnRate, maxEpochs);
  nn.train(trainX, trainY, lrnRate, 10, maxEpochs);
  console.log("Training complete ");

  // 4. evaluate model
  let trainAcc = nn.accuracy(trainX, trainY);
  let testAcc = nn.accuracy(testX, testY);
  console.log("\nAccuracy on training data = " +
    trainAcc.toFixed(4).toString()); 
  console.log("Accuracy on test data     = " +
    testAcc.toFixed(4).toString());

  // 4b. confusion
  console.log("\nComputing confusion matrix ");
  let cm = nn.confusionMatrix(testX, testY);
  //U.matShow(cm, 0);
  nn.showConfusion(cm);

  // 5. save trained model
  fn = ".\\Models\\people_wts.txt";
  console.log("\nSaving model weights and biases to: ");
  console.log(fn);
  nn.saveWeights(fn);

  // 6. use trained model
  console.log("\npredict M 46 Oklahoma $66,400 ");
  let x = [-1, 0.46, 0, 0, 1, 0.6640];
  let predicted = nn.computeOutputs(x);
  // console.log("\nPredicting politics for: ");
  // U.vecShow(x, 4, 12);
  console.log("\nPredicted pseudo-probabilities: ");
  U.vecShow(predicted, 4, 10); 

  //process.stdout.write("\033[0m");  // reset
  console.log("\nEnd demo");
}

main()