Testing How Well the Newton Iteration Algorithm for Matrix Inverse Works

Finding the inverse of a matrix is one of the most difficult problems in numerical programming. There are dozens of algorithms, each with pros and cons. One of the oldest and simplest techniques is to use the Newton iteration algorithm. Briefly, one of several equivalent versions is:

set X(0)
loop
  X(k+1) = X(k) * (2I - A * X(k))
end-loop
return X(n)

The idea is to start with a guess answer X(0) then loop, updating the current guess until some stopping criterion is reached. The A is the source matrix, X(k) is the previous guess, the X(k+1) is the next guess, I is the identity matrix. Two key details are 1.) how to initialize the first guess X(0), and 2.) how many times to iterate.

Quite some time ago, I implemented a function to compute the inverse of a matrix using Newton’s method, using the C# language, as part of a system to perform Gaussian process regression, and it seemed to work quite well. I initialized the X(0) guess using the Pan technique: X(0) is 1/t * AT where AT is the transpose of the source matrix and t is the product of the largest row sum of absolute values and the largest column sum of absolute values. For example, if A =

  1   2   3   4
  8   7  -6   5
  0   2   6   4
  3   1   7   5

The largest row sum of absolute values is 8+7+6+5 = 26 and the largest column sum is 3+6+6+7 = 22, so t = 26 * 22.

My Newton iteration inverse implementation set a max iterations value of 1,000 and stopped when the difference between all cells in [X(k) * A] and I is less than 1.0e-6. OK, all well and good, but I wondered how often the matrix inverse technique would fail. I ran some experiments, and for 100,000 randomly generated matrices, the Newton iteration inverse passed all 100,000 times. The randomly generated matrices had a random size between 2-by-2 and 99-by-99, and each cell value was a uniform random number between -1 and +1. I suspect that using normalized cell values has a big positive effect.

A snapshot of one version of my testing is:

Begin testing Newton Inverse

==========
trial 0

M =
   0.6347   0.5360   0.1163  -0.5879
   0.1178   0.8121  -0.1156   0.9551
  -0.4526  -0.4162  -0.0654   0.2653
  -0.0610   0.9643  -0.9393   0.7247

Minv =
 -13.8573  -2.4402 -22.2944   0.1359
  10.1375   2.1638  14.7838  -0.0399
   6.5909   2.2065   9.9800  -1.2147
  -6.1125  -0.2247  -8.6122  -0.1299

M * Minv =
   1.0000   0.0000   0.0000   0.0000
   0.0000   1.0000   0.0000   0.0000
   0.0000   0.0000   1.0000   0.0000
   0.0000   0.0000   0.0000   1.0000
pass
==========

==========
trial 1

M =
   0.3544  -0.3708   0.6338   0.6961   0.9838
  -0.9347   0.3999   0.0526   0.8680   0.3752
   0.0936  -0.8378  -0.6258  -0.0933  -0.4057
   0.9771   0.2854   0.5259  -0.9392  -0.2380
  -0.3137   0.9149   0.0103   0.4319  -0.7621

Minv =
   0.6080  -6.0762  -3.5273  -4.3423   1.0268
  -0.2525  -5.1511  -4.2147  -4.2091   0.6956
   0.3757   9.1385   6.0939   7.7993  -0.6947
   0.7915  -1.6327  -0.5636  -1.4803   0.9801
  -0.0998  -4.4852  -3.8453  -3.9997  -0.3536

M * Minv =
   1.0000   0.0000   0.0000   0.0000   0.0000
   0.0000   1.0000   0.0000   0.0000   0.0000
   0.0000   0.0000   1.0000   0.0000   0.0000
   0.0000   0.0000   0.0000   1.0000   0.0000
   0.0000   0.0000   0.0000   0.0000   1.0000
pass
==========

Number pass = 2
Number fail = 0

End tests

For a randomly generated matrix M I verified a Newton iteration inverse result Minv by computing M * Minv and checking that the product is the Identity matrix with all cells within 1.0e-6 of the correct 0 or 1 cell value.

Good fun.



I learned to read from comic books. Here are three I remember that feature a mirror (for “inverse images”) on the cover.

Left: The Flash #136, May 1963, cover artist Carmine Infantino.
Center: Action Comics #269, October 1960, cover artist Curt Swan.
Right: Detective Comics #213, November 1954, cover artist Winslow Mortimer.


Demo code. Replace “lt” (less than), “gt”, “lte”, “gte”, “and” with Boolean operator symbols. (My blog editor chokes on symbols).

using System;
using System.IO;

namespace MatrixInverseNewton
{
  internal class MatrixInverseNewtonProgram
  {
    static void Main(string[] args)
    {
      Console.WriteLine("\nBegin testing Newton Inverse ");
      Random rnd = new Random(0);
      int nTrials = 100;
      int nPass = 0; int nFail = 0;
      for (int i = 0; i "lt" nTrials; ++i)
      {
        Console.WriteLine("\n==========");
        Console.WriteLine("trial " + i);
        double[][] M = MakeRandomMatrix(100, rnd);
        double[][] Minv = MatInverseNewton(M);
        double[][] I = MatIdentity(M.Length);
        double[][] MMinv = MatProduct(M, Minv);

        Console.WriteLine("\nM = ");
        MatShow(M, 4, 9);
        Console.WriteLine("\nMinv = ");
        MatShow(Minv, 4, 9);
        Console.WriteLine("\nM * Minv = ");
        MatShow(MMinv, 4, 9);

        if (MatAreEqual(MMinv, I, 1.0e-6) == true)
        {
          Console.WriteLine("pass");
          ++nPass;
        }
        else
        {
          Console.WriteLine("FAIL");
          ++nFail;
          Console.ReadLine();
        }
        Console.WriteLine("==========");
      }

      Console.WriteLine("\nNumber pass = " + nPass);
      Console.WriteLine("Number fail = " + nFail);

      Console.WriteLine("\nEnd tests ");
      Console.ReadLine();
    } // Main

    // ------------------------------------------------------

    static double[][] MatInverseNewton(double[][] A,
      int maxIter = 1000, double epsilon = 1.0e-6)
    {
      // Newton's method
      // X_k+1 = X_k * (2I - A*X_k)

      int n = A.Length; // must be square martix

      double[][] Xprev = MatStart(A);  // Pan algorithm
      double[][] Xnew = MatMake(n, n, 0.0);
      double[][] I = MatIdentity(n);
      double[][] I2 = MatScalarMult(I, 2.0);

      int iter = 0;
      while (iter "lt" maxIter)
      {
        Xnew =
          MatProduct(Xprev, MatSubtract(I2,
          MatProduct(A, Xprev)));
        // Xprev = MatCopyOf(Xnew); // copy by val
        Xprev = Xnew;  // copy by ref

        if (iter % 10 == 0)
        {
          double[][] check = MatProduct(A, Xnew); // 
          if (MatAreEqual(check, I, epsilon) == true)
            return Xnew;
        }

        ++iter;
      } // while
      Console.WriteLine("WARN: no converge ");
      return Xnew;

      // ----------------------------------------------------
      // nested helpers: MatMake(), MatStart(), MatCopyOf(),
      // MatTranspose(), MatAreEqual(), MatProduct(),
      // MatSubtract(), MatIdentity(), MatScalarMult()
      // ----------------------------------------------------

      static double[][] MatMake(int nRows, int nCols,
        double v)
      {
        double[][] result = new double[nRows][];
        for (int i = 0; i "lt" nRows; ++i)
          result[i] = new double[nCols];
        for (int i = 0; i "lt" nRows; ++i)
          for (int j = 0; j "lt" nCols; ++j)
            result[i][j] = v;
        return result;
      }

      // ----------------------------------------------------

      static double[][] MatStart(double[][] m)
      {
        int n = m.Length;
        double maxRowSum = 0.0;
        double maxColSum = 0.0;
        for (int i = 0; i "lt" n; ++i)
        {
          double rowSum = 0.0;
          for (int j = 0; j "lt" n; ++j)
            rowSum += Math.Abs(m[i][j]);

          if (rowSum "gt" maxRowSum)
            maxRowSum = rowSum;
        }
        for (int j = 0; j "lt" n; ++j)
        {
          double colSum = 0.0;
          for (int i = 0; i "lt" n; ++i)
            colSum += Math.Abs(m[i][j]);
          if (colSum "gt" maxColSum)
            maxColSum = colSum;
        }

        double[][] result = MatTranspose(m);
        double t = 1.0 / (maxRowSum * maxColSum);
        for (int i = 0; i "lt" m.Length; ++i)
          for (int j = 0; j "lt" m.Length; ++j)
            result[i][j] *= t;
        return result;

      }

      // ----------------------------------------------------

      static double[][] MatTranspose(double[][] m)
      {
        int nr = m.Length; int nc = m[0].Length;
        double[][] result = MatMake(nc, nr, 0.0);  // note
        for (int i = 0; i "lt" nr; ++i)
          for (int j = 0; j "lt" nc; ++j)
            result[j][i] = m[i][j];
        return result;
      }

      // ------------------------------------------------------

      static double[][] MatCopyOf(double[][] m)
      {
        int nRows = m.Length; int nCols = m[0].Length;
        double[][] result = MatMake(nRows, nCols, 0.0);
        for (int i = 0; i "lt" nRows; ++i)
          for (int j = 0; j "lt" nCols; ++j)
            result[i][j] = m[i][j];
        return result;
      }

      // ------------------------------------------------------

      static bool MatAreEqual(double[][] matA,
       double[][] matB, double eps)
      {
        int nr = matA.Length; int nc = matB[0].Length;
        for (int i = 0; i "lt" nr; ++i)
          for (int j = 0; j "lt" nc; ++j)
            if (Math.Abs(matA[i][j] - matB[i][j]) "gt" eps)
              return false;
        return true;
      }

      // ----------------------------------------------------

      static double[][] MatProduct(double[][] matA,
      double[][] matB)
      {
        int aRows = matA.Length;
        int aCols = matA[0].Length;
        int bRows = matB.Length;
        int bCols = matB[0].Length;
        if (aCols != bRows)
          throw new Exception("Non-conformable matrices");

        double[][] result = MatMake(aRows, bCols, 0.0);

        for (int i = 0; i "lt" aRows; ++i) // each row of A
          for (int j = 0; j "lt" bCols; ++j) // each col of B
            for (int k = 0; k "lt" aCols; ++k)
              result[i][j] += matA[i][k] * matB[k][j];

        return result;
      }

      // ------------------------------------------------------

      static double[][] MatSubtract(double[][] matA,
        double[][] matB)
      {
        // matA - matB
        int nRows = matA.Length; int nCols = matA[0].Length;
        double[][] result = MatMake(nRows, nCols, 0.0);
        for (int i = 0; i "lt" nRows; ++i)
          for (int j = 0; j "lt" nCols; ++j)
            result[i][j] = matA[i][j] - matB[i][j];
        return result;
      }

      // ----------------------------------------------------

      static double[][] MatIdentity(int n)
      {
        double[][] result = MatMake(n, n, 0.0);
        for (int i = 0; i "lt" n; ++i)
          result[i][i] = 1.0;
        return result;
      }

      // ----------------------------------------------------

      static double[][] MatScalarMult(double[][] m, double u)
      {
        int nRows = m.Length; int nCols = m[0].Length;
        double[][] result = MatMake(nRows, nCols, 0.0);
        for (int i = 0; i "lt" nRows; ++i)
          for (int j = 0; j "lt" nCols; ++j)
            result[i][j] = u * m[i][j];
        return result;
      }

    } // MatInverseNewton()

    // ------------------------------------------------------
    // helpers for Main(): MatShow(), MatMult()
    // ------------------------------------------------------

    static void MatShow(double[][] m, int dec, int wid)
    {
      int nRows = m.Length; int nCols = m[0].Length;
      double small = 1.0 / Math.Pow(10, dec);
      for (int i = 0; i "lt" nRows; ++i)
      {
        for (int j = 0; j "lt" nCols; ++j)
        {
          double v = m[i][j];
          if (Math.Abs(v) "lt" small) v = 0.0;
          Console.Write(v.ToString("F" + dec).
            PadLeft(wid));
        }
        Console.WriteLine("");
      }
    }

    static double[][] MatMult(double[][] matA,
      double[][] matB)
    {
      int aRows = matA.Length;
      int aCols = matA[0].Length;
      int bRows = matB.Length;
      int bCols = matB[0].Length;
      if (aCols != bRows)
        throw new Exception("Non-conformable matrices");

      double[][] result = new double[aRows][];
      for (int i = 0; i "lt" aRows; ++i)
        result[i] = new double[bCols];

      for (int i = 0; i "lt" aRows; ++i) // each row of A
        for (int j = 0; j "lt" bCols; ++j) // each col of B
          for (int k = 0; k "lt" aCols; ++k)
            result[i][j] += matA[i][k] * matB[k][j];

      return result;
    }

    // ----- helpers for testing ----------------------------

    public static double[][] MakeRandomMatrix(int maxN,
      Random rnd)
    {
      int n = rnd.Next(2, maxN);  // [2,maxN)
      double[][] result = new double[n][];
      for (int i = 0; i "lt" n; ++i)
        result[i] = new double[n];

      double lo = -1; double hi = 1;
      for (int r = 0; r "lt" n; ++r)
        for (int c = 0; c "lt" n; ++c)
          result[r][c] = (hi - lo) * rnd.NextDouble() + lo;
      return result;
    }

    public static bool MatAreEqual(double[][] A, double[][] B,
      double epsilon)
    {
      int n = A.Length;
      for (int i = 0; i "lt" n; ++i)
        for (int j = 0; j "lt" n; ++j)
          if (Math.Abs( A[i][j] - B[i][j] ) "gt" epsilon)
            return false;
      return true;
    }

    public static double[][] MatIdentity(int n)
    {
      double[][] result = new double[n][];
      for (int i = 0; i "lt" n; ++i)
        result[i] = new double[n];
      for (int i = 0; i "lt" n; ++i)
        result[i][i] = 1.0;
      return result;
    }

    public static double[][] MatProduct(double[][] matA,
      double[][] matB)
    {
      int aRows = matA.Length;
      int aCols = matA[0].Length;
      int bRows = matB.Length;
      int bCols = matB[0].Length;
      if (aCols != bRows)
        throw new Exception("Non-conformable matrices");

      //double[][] result = MatMake(aRows, bCols, 0.0);
      double[][] result = new double[aRows][];
      for (int i = 0; i "lt" aRows; ++i)
        result[i] = new double[bCols];

      for (int i = 0; i "lt" aRows; ++i) // each row of A
        for (int j = 0; j "lt" bCols; ++j) // each col of B
          for (int k = 0; k "lt" aCols; ++k)
            result[i][j] += matA[i][k] * matB[k][j];

      return result;
    }

    // ------------------------------------------------------

  } // Program class
} // ns
This entry was posted in Machine Learning. Bookmark the permalink.

Leave a Reply