Implementing k-Nearest Neighbors Regression Using JavaScript

The goal of a regression problem is to predict a single numeric value, for example, predicting the price of a car based on its age, mileage, customer ratings, and so on. The simplest possible machine learning technique for regression is k-nearest neighbors regression (kNNR).

The technique is very simple. To make a prediction for some input = x, you find the k (typically 3 or 5) nearest data items in the training data, then return a weighted average of their associated target y values. For example, k = 3, and the nearest associated y values are 0.50, 0.30, 0.60 and you use uniform/equal weighting, the predicted y is (0.50 + 0.30 + 0.60) / 3 = 1.40 / 3 = 0.47.

One morning, I decided to implement a k-nearest neighbors regression demo using . . . the JavaScript language. I created my demo as a form of exercise — programming is a skill that must be practiced, and I hadn’t used JavaScript for a few weeks.

Implementing my demo didn’t take very long because I had previously implemented a pretty thorough personal library of useful JavaScript functions. For example, I already had a matLoad() function that reads values from a text file and returns them as a matrix. Without all the helper functions, implementing the kNNR demo would have taken a long time.

I created some synthetic data using a 5-10-1 PyTorch neural network. The data looks like:

-0.1660,  0.4406, -0.9998, -0.3953, -0.7065, 0.4840
 0.0776, -0.1616,  0.3704, -0.5911,  0.7562, 0.1568
-0.9452,  0.3409, -0.1654,  0.1174, -0.7192, 0.8054
 0.9365, -0.3732,  0.3846,  0.7528,  0.7892, 0.1345
. . .

The first five values on each line are the predictors. Each is normalized to the range (-1.0, +1.0). The last value on each line is the target y to predict. Each target is between 0 and 1. There are 200 training items and 40 test items.

To validate my JavaScript implementation, I ran it side by side with a C# implementation I had written several days earlier, and compared results to make sure they were identical.

Unlike many of my colleagues, I inexplicably enjoy coding in JavaScript, as long as it’s not part of a Web application. Very good fun!



It’s relatively easy to refactor programs among C/C++, C#, Java, Python, and JavaScript because they’re all C-family languages. Here’s an interesting graph that details the similarity among many non-Asian spoken languages. Click to enlarge.


Demo code. Replace “lt”, “gt”, “lte”, “gte” with Boolean operator symbols. The data can be found at https://jamesmccaffreyblog.com/2023/07/19/implementing-k-nearest-neighbors-regression-using-csharp/.

// knnRegression.js
// node.js varsion

let FS = require("fs");

// -----------------------------------------------------------

function vecMake(n, val)
{
  let result = [];
  for (let i = 0; i "lt" n; ++i) {
    result[i] = val;
  }
  return result;
}

function matMake(rows, cols, val)
{
  let result = [];
  for (let i = 0; i "lt" rows; ++i) {
    result[i] = [];
    for (let j = 0; j "lt" cols; ++j) {
      result[i][j] = val;
    }
  }
  return result;
}

function vecShow(v, dec, nl)
{
  for (let i = 0; i "lt" v.length; ++i) {
    if (v[i] "gte" 0.0) {
      process.stdout.write(" ");
    }
    process.stdout.write(v[i].toFixed(dec));
    process.stdout.write("  ");
  }
  if (nl == true) {
    process.stdout.write("\n");
  }
}

function matShow(m, dec)
{
  let rows = m.length;
  let cols = m[0].length;
  for (let i = 0; i "lt" rows; ++i) {
    for (let j = 0; j "lt" cols; ++j) {
      if (m[i][j] "gte" 0.0) {
        process.stdout.write(" ");
      }
      process.stdout.write(m[i][j].toFixed(dec));
      process.stdout.write("  ");
    }
    process.stdout.write("\n");
  }
}

function matLoad(fn, delimit, usecols, comment)
{
  // simulate Python numpy loadtxt() function
  let all = FS.readFileSync(fn, "utf8");  // giant string
  all = all.trim();  // strip final crlf in file
  let lines = all.split("\n");  // array of lines

  // count number non-comment lines
  let nRows = 0;
  for (let i = 0; i "lt" lines.length; ++i) {
    if (!lines[i].startsWith(comment))
      ++nRows;
  }
  nCols = usecols.length;
  let result = matMake(nRows, nCols, 0.0); 
 
  let r = 0;  // into lines
  let i = 0;  // into result[][]
  while (r "lt" lines.length) {
    if (lines[r].startsWith(comment)) {
      ++r;  // next row
    }
    else {
      let tokens = lines[r].split(delimit);
      for (let j = 0; j "lt" nCols; ++j) {
        result[i][j] = parseFloat(tokens[usecols[j]]);
      }
      ++r;
      ++i;
    }
  }

  return result;
} // matLoad

function matToVec(m)
{
  let rows = m.length;
  let cols = m[0].length;
  let result = vecMake(rows * cols, 0.0);
  let k = 0;
  for (let i = 0; i "lt" rows; ++i) {
    for (let j = 0; j "lt" cols; ++j) {
      result[k++] = m[i][j];
    }
  }
  return result;
}

// -----------------------------------------------------------

class KNNR
{
  constructor(k, weighting)
  {
    this.k = k;
    this.trainX;
    this.trainY;
    this.weighting = weighting; // "skewed", "uniform"
  }

  store(trainX, trainY)
  {
    this.trainX = trainX;  // by ref
    this.trainY = trainY;
  }

  predict(x, verbose = false)
  {
    // 0. set up ordering/indices
    let n = this.trainX.length;
    let indices = vecMake(n, 0.0);
    for (let i = 0; i "lt" n; ++i)
      indices[i] = i;

    // 1. compute distances from x to all trainX
    let distances = vecMake(n, 0.0);
    for (let i = 0; i "lt" n; ++i)
      distances[i] = this.eucDistance(x, this.trainX[i]);

    // 2. sort distances, indices of X and Y, by distance
    indices.sort((a,b)="gt"distances[a] - distances[b]);
    distances = indices.map(x="gt"distances[x]);

    // 3. set weights
    let wts = vecMake(this.k, 0.0);
    if (this.weighting == "skewed")
      wts = this.skewedWts(this.k);
    else if (this.weighting == "uniform")
      wts = this.uniformWts(this.k);

    // 4. compute predicted y
    let result = 0.0;
    for (let i = 0; i "lt" this.k; ++i) {
      result += wts[i] * this.trainY[indices[i]];
    }

    // 5. show info
    if (verbose == true) {
      for (let i = 0; i "lt" this.k; ++i) {
        let j = indices[i];
        process.stdout.write("X = ");
        process.stdout.write("[" +
          j.toString().padStart(3) + "] ");
        vecShow(this.trainX[j], 4, false);
        process.stdout.write(" | y = ");
        process.stdout.write(this.trainY[j].toFixed(4));
        process.stdout.write(" | dist = ");
        process.stdout.write(distances[i].toFixed(4));
        process.stdout.write(" | wt = ");
        process.stdout.write(wts[i].toFixed(4));
        console.log("");
      }
      console.log("\nPredicted y = " +
        result.toFixed(4));
    }
    return result;
  } // explain

  eucDistance(v1, v2)
  {
    let dim = v1.length;
    let sum = 0.0;
    for (let j = 0; j "lt" dim; ++j)
      sum += (v1[j] - v2[j]) * (v1[j] - v2[j]);
    return Math.sqrt(sum);
  }

  uniformWts(k)
  {
    let result = vecMake(k, 1.00 / k);
    return result;
  }

  skewedWts(k)
  {
    let result = vecMake(k, 0.0);
    if (k == 1) result[0] = 1.0;
    else if (k == 2) {
      result[0] = 0.6000; result[1] = 0.4000;
    }
    else if (k == 3) {
      result[0] = 0.4000;
      result[1] = 0.3500;
      result[2] = 0.2500;
    }
    else if (k "gte" 4) {
      let big = 1.5 * (1.0 / k);
      let small = 0.5 * (1.0 / k);
      let remainder = 1.0 - (big + small);
      let x = remainder / (k - 2);
      result[0] = big;
      result[k - 1] = small;
      for (let i = 1; i "lt" k - 1; ++i)
        result[i] = x;
    }
    return result;
  }

} // class KNNR

// -----------------------------------------------------------

function accuracy(model, dataX, dataY, pctClose)
{
  let numCorrect = 0; let numWrong = 0;
  let n = dataX.length;
  for (let i = 0; i "lt" n; ++i) {
    let x = dataX[i];
    let actualY = dataY[i];
    let predY = model.predict(x);
        
    if (Math.abs(actualY - predY) "lt" 
      Math.abs(pctClose * actualY))
        ++numCorrect;
    else
        ++numWrong;
  }
  return (numCorrect * 1.0) / n;
}

// -----------------------------------------------------------

function rootMSE(model, dataX, dataY, pctClose)
{
  let sum = 0.0;
  let n = dataX.length;
  for (let i = 0; i "lt" n; ++i) {
    let x = dataX[i];
    let actualY = dataY[i];
    let predY = model.predict(x);
    sum += (actualY - predY)**2
  }
  return sum / n;
}

// -----------------------------------------------------------

function main()
{
  console.log("\nBegin k-NN regression using JavaScript ");

  // 1. load data into memory
  console.log("\nLoading data to memory ");
  let trainFile = ".\\Data\\synthetic_train.txt";
  let trainX = matLoad(trainFile, ',', [0, 1, 2, 3, 4], "#");
  let trainY = matToVec(matLoad(trainFile, ',', [5], "#"));

  let testFile = ".\\Data\\synthetic_test.txt";
  let testX = matLoad(testFile, ',', [0, 1, 2, 3, 4], "#");
  let testY = matToVec(matLoad(testFile, ',', [5], "#"));

  console.log("\nFirst three train X: ");
  for (let i = 0; i "lt" 3; ++i)
    vecShow(trainX[i], 4, true);

  console.log("\nFirst three target y: ");
  for (let i = 0; i "lt" 3; ++i)
    console.log(trainY[i].toFixed(4));

  // 2. find a good k
  console.log("\nExploring k values (within 0.15) \n");
  let candidates = [1, 3, 5, 7, 9];
  for (let i = 0; i "lt" candidates.length; ++i) {
    let k = candidates[i]; 
    process.stdout.write("k = " + k);
    let model = new KNNR(k, "skewed");
    model.store(trainX, trainY); 
    let accTrain = accuracy(model, trainX, trainY, 0.15);
    let accTest = accuracy(model, testX, testY, 0.15);
    let rmseTrain = rootMSE(model, trainX, trainY);
    let rmseTest = rootMSE(model, testX, testY);

    process.stdout.write("  Acc train = " + 
      accTrain.toFixed(4));
    process.stdout.write("  Acc test = " + 
      accTest.toFixed(4));
    process.stdout.write("  RMSE train = " + 
      rmseTrain.toFixed(4));
    process.stdout.write("  RMSE test = " + 
      rmseTest.toFixed(4));
    console.log("");
  }

  // 3. create and pseudo-train model
  console.log("\nCreating k-NN regression model k = 5 ");
  let knnrModel = new KNNR(5, "skewed");  // skewed wts
  console.log("Done ");

  console.log("\nStoring train data into model ");
  knnrModel.store(trainX, trainY);
  console.log("Done ");

  // 4. evaluate model
  console.log("\nComputing accuracy (within 0.15) ");
  let accTrain = accuracy(knnrModel, trainX, trainY, 0.15);
  console.log("Accuracy train = " +
    accTrain.toFixed(4));
  accTest = accuracy(knnrModel, testX, testY, 0.15);
  console.log("Accuracy test = " +
    accTest.toFixed(4));

  // 5. use model
  console.log("\nExplain for (0.5, -0.5, 0.5, -0.5, 0.5) \n");
  let x = vecMake(5, 0.0);
  x[0] = 0.5; x[1] = -0.5; x[2] = 0.5;
  x[3] = -0.5; x[4] = 0.5;

  let predY = knnrModel.predict(x, true);  // verbose

  console.log("\nEnd demo ");
}

main();

This entry was posted in JavaScript. Bookmark the permalink.