Implementing the ANOVA Statistics Test Using JavaScript

The ANOVA (“analysis of variance”) test is a classical statistics technique that is used to infer if the means of three or more different groups are all the same, or not, based on samples from the groups. For example, you might want to infer if the average math ability of students from three different high schools is the same or not, based on giving a test to 20 randomly selected students from each school.

There are two steps in ANOVA. The first step is to compute the “F-statistic” of the data, based on the sample means of each of the groups and the number of data points in each group. This step is relatively easy.

The second step is to programmatically compute a p-value from the F-statistic using the mathematical F-distribution. The p-value is the probability that the means of all groups are the same. This step is extremely difficult.

A low p-value, typically 0.05 or smaller, indicates the population means are not all the same. A larger p-value indicates that there’s not enough evidence to conclude that the population means are not all the same.

Here’s the output of my JavaScript demo:

C:\JavaScript\AnovaJavaScript: node anova.js

Begin ANOVA using JavaScript
The goal is infer if 3 or more population
means are all equal based on samples.

The sample data is:

Group1:     3.0    4.0    6.0    5.0
Group2:     8.0   12.0    9.0   11.0   10.0    8.0
Group3:    13.0    9.0   11.0    8.0   12.0

Calculating F-statistic (verbosely)
Group [0] mean = 4.50
Group [1] mean = 9.67
Group [2] mean = 10.60
Overall mean = 8.60

Calculated SSb = 94.0667
Calculated MSb = 47.0333
Calculated SSw = 35.5333
Calculated MSw = 2.9611

F stat = MSb / MSw
F stat = 15.884
The degrees of freedom are
2, 12

Calculating p-value from F stat, df1, df2
p-value = 0.00042480

The p-value is probability that all
group means are equal.
Interpreting the p-value is a bit subtle.

End demo

C:\JavaScript\AnovaJavaScript:

Because the p-value is so small, the conclusion is that the means of the three populations from which the samples were taken are not all the same. It looks like the mean of Group 1 is much smaller than the means of Groups 2 and 3.

It would take many pages to explain in detail how to compute the p-value using the F-statistic and the F-distribution. Briefly, the top-level FDist() function calls a RegularizedIncomepleteBeta() function, which calls helpers ContinuedFraction() and LogBeta(), and the LogBeta() helper calls a LogGamma() sub-helper. All of these helpers are very tricky to implement. I had to rely on many references and spend many hours figuring everything out.

But it was good fun.



Artist Edwin Georgi (1896-1964) used color variability to good effect. He did illustrations for many of the leading magazines of the 1950s, such as Fortune, Redbook, Esquire, and Saturday Evening Post. Here are three examples of his work. Notice that almost all of the colors are unnatural, such as the orange and green skin tones.


// anova.js
// classical three-groups 
// node.js

let FS = require("fs")  // for loadTxt() -- not used

// ----------------------------------------------------------

function vecMake(n, val)
{
  let result = [];
  for (let i = 0; i "lt" n; ++i) {
    result[i] = val;
  }
  return result;
}

// ----------------------------------------------------------

function vecShow(vec, dec, wid, nl)
{
  let small = 1.0 / Math.pow(10, dec);
  for (let i = 0; i "lt" vec.length; ++i) {
    let x = vec[i];
    if (Math.abs(x) "lt" small) x = 0.0  // avoid -0.00
    let xx = x.toFixed(dec);
    let s = xx.toString().padStart(wid, ' ');
    process.stdout.write(s);
    process.stdout.write(" ");
  }

  if (nl == true)
    process.stdout.write("\n");
}

// ----------------------------------------------------------

function showData(data, colNames)
{
  for (let i = 0; i "lt" data.length; ++i) {
    process.stdout.write(colNames[i].toString() + ": ");
    for (let j = 0; j "lt" data[i].length; ++j) {
      let s = 
        data[i][j].toFixed(1).toString().padStart(7);
      process.stdout.write(s);
    }
    console.log("");
  }
}

// ----------------------------------------------------------

function Fstat(data)
{
  // calculate F statistic
  // assumes data has specific structure:
  // data[0] : 3, 4, etc (group 1)
  // data[1] : 8, 12, 7, etc. (group 2)
  // data[2] : 6, 5, etc. (group 3)

  let K = data.length; // number groups
  let n = vecMake(K, 0);  // number items each group
  let N = 0; // total number data points
  for (let i = 0; i "lt" K; ++i) {
    n[i] = data[i].length;
    N += data[i].length;
  }

  // 1. group means and grand mean
  let means = vecMake(K, 0.0);
  let gMean = 0.0;
  for (let i = 0; i "lt" K; ++i) {
    for (let j = 0; j "lt" data[i].length; ++j) {
      means[i] += data[i][j];
      gMean += data[i][j];
    }
    means[i] /= n[i];

    console.log("Group [" + i + "] mean = " +
      means[i].toFixed(2).toString());
  }
  gMean /= N;
  console.log("Overall mean = " + 
    gMean.toFixed(2).toString());

  // 2. SSb and MSb
  let SSb = 0.0;
  for (let i = 0; i "lt" K; ++i)
    SSb += n[i] * (means[i] - gMean) * (means[i] - gMean);
  let MSb = SSb / (K - 1);

  // 3. SSw and MSw
  let SSw = 0.0;
  for (let i = 0; i "lt" K; ++i)
    for (let j = 0; j "lt" data[i].length; ++j)
      SSw += (data[i][j] - means[i]) * (data[i][j] - means[i]);
  let MSw = SSw / (N - K);

  // for demo only
  console.log("");
  console.log("Calculated SSb = " + SSb.toFixed(4).toString());
  console.log("Calculated MSb = " + MSb.toFixed(4).toString());
  console.log("Calculated SSw = " + SSw.toFixed(4).toString());
  console.log("Calculated MSw = " + MSw.toFixed(4).toString());
  console.log("");
  console.log("F stat = MSb / MSw ");
        
  let fstat = MSb / MSw;
  return fstat;
} // Fstat

// ----------------------------------------------------------

function FDist(fstat, df1, df2)
{
  // right tail of F-dist past fstat
  let x = df2 / (df2 + df1 * fstat);
  let a = df2 / 2;
  let b = df1 / 2;
  return regIncBeta(x, a, b);
}

// ----------------------------------------------------------

function regIncBeta(x, a, b)
{
  // Abramowitz and Stegun 26.5.6
  // pick the form of regIncompleteBeta() that converges best
  if (x "lt" (a + 1.0) / (a + b + 2.0))
    return regIncompleteBeta(x, a, b);
  else
    return 1.0 - regIncompleteBeta(1 - x, b, a);
}

// ----------------------------------------------------------

function regIncompleteBeta(x, a, b)
{
  // regularized incomplete beta
  // Abramowitz and Stegun 26.5.8 
  // calls logBeta() and helper contFraction()
  // logBeta() calls sub-helper logGamma()
  // double top = Math.Pow(x, a) * Math.Pow((1 - x), b);
  // double bot = a * Beta(a, b);
  let cf = contFraction(x, a, b);

  let logTop = (a * Math.log(x)) + (b * Math.log(1 - x));
  let logBot = Math.log(a) + logBeta(a, b);
  let logLeft = logTop - logBot;
  let left = Math.exp(logLeft);

  return left * cf;  // should be in [0.0, 1.0]
}

// ----------------------------------------------------------

function logBeta(a, b)
{
  // helper for regIncompleteBeta()
  // Beta(a,b) = (Gamma(a) * Gamma(b)) / (Gamma(a+b))
  // but plain Gamma() can easily overflow, so:
  // Log( Beta(a,b) ) =
  //   Log( (Gamma(a) * Gamma(b)) / (Gamma(a+b) ) =
  //   LogGamma(a) + LogGamma(b) - LogGamma(a+b)
  let logbeta = logGamma(a) + logGamma(b) - logGamma(a + b);
  return logbeta;
}

// ----------------------------------------------------------

function logGamma(z)
{
  // helper for logBeta()
  // plain Gamma() can easily overflow
  // Lanczos approximation g=5, n=7
  let coef = [ 1.000000000190015,
    76.18009172947146, -86.50532032941677,
    24.01409824083091, -1.231739572450155,
    0.1208650973866179e-2, -0.5395239384953e-5 ];

  let logSqrtTwoPi = 0.91893853320467274178;

  // Gamma(z) = Pi / (Sin(Pi*z)) * Gamma(1-z))
  if (z "lt" 0.5) 
    return Math.log(Math.PI / Math.sin(Math.PI * z)) -
      logGamma(1.0 - z);

  let zz = z - 1.0;
  let b = zz + 5.5; // g + 0.5
  let sum = coef[0];

  for (let i = 1; i "lt" coef.length; ++i)
    sum += coef[i] / (zz + i);

  return (logSqrtTwoPi + Math.log(sum) - b) +
    (Math.log(b) * (zz + 0.5));
}

// ----------------------------------------------------------

function contFraction(x, a, b)
{
  let maxTerms = 100;
  // 1. pre-compute 100 d values
  let d = vecMake(maxTerms, 0.0); // d[0] not used

  let end = maxTerms / 2;  // 50
  for (let m = 0; m "lt" end; ++m)  {  // [0,49]
    let i = 2 * m;  // even di
    let j = i + 1;  // odd di
    d[i] = (m * (b - m) * x) / ((a + 2 * m - 1) *
      (a + 2 * m));
    d[j] = -1 * ((a + m) * (a + b + m) * x) / ((a + 2 * m) *
      (a + 2 * m + 1));
  }

  // 2. work backwards
  let t = vecMake(maxTerms, 0.0);  // t[0] not used
  // ex:
  // t[4] = (1 + d[4]) / 1;
  // t[3] = (1 + d[3]) / t[4];
  // t[2] = (1 + d[2]) / t[3];
  // t[1] = (1 + d[1]) / t[2];
  // cf = 1 / t[1];

  t[maxTerms - 1] = 1 + d[maxTerms - 1];
  for (let j = maxTerms - 2; j "gte" 1; --j)
    t[j] = 1 + d[j] / t[j + 1];

  let cf = 1 / t[1];
  return cf;
}

// ----------------------------------------------------------

function main()
{
  console.log("\nBegin ANOVA using JavaScript ");
  console.log("The goal is infer if 3 or more population ");
  console.log("means are all equal based on samples. \n");

  let data = vecMake(3, 0.0);
  data[0] = [3, 4, 6, 5];
  data[1] = [8, 12, 9, 11, 10, 8];
  data[2] = [13, 9, 11, 8, 12];
  let colNames = ["Group1", "Group2", "Group3"];

  console.log("The sample data is: \n");
  showData(data, colNames);

  console.log("\nCalculating F-statistic (verbosely)");
  let fstat = Fstat(data);
  console.log("F stat = " + fstat.toFixed(3).toString());

  let df1 = 3 - 1;  // k - 1
  let df2 = 15 - 3;  // N - k
  console.log("The degrees of freedom are ");
  console.log(df1.toString() + ", " + df2.toString());

  console.log("\nCalculating p-value from F stat, df1, df2 ");
  let pValue = FDist(fstat, df1, df2);

  process.stdout.write("p-value = ");
  console.log(pValue.toFixed(8).toString());

  console.log("\nThe p-value is probability that all ");
  console.log("group means are equal.");
  console.log("Interpreting the p-value is a bit subtle. ");

  console.log("\nEnd demo");
}

main();
This entry was posted in JavaScript. Bookmark the permalink.

Leave a Reply