Matrix Inverse Using Cholesky Decomposition: Two Approaches Using C#

Computing the inverse of a matrix using Cholesky decomposition illustrates many factors in low level software design. I put together two versions of a matrix inverse function, using C#.

The first version uses seven helper functions and has a total of about 170 lines of code. The second version computes the inverse directly, without using any helper functions. It has about 60 lines of code. The first version is easier to understand, easier to test, and easier to modify. The second version is more efficient and has significantly fewer lines of code.

The output of the demo is:

Begin matrix inverse using Cholesky decmposition

Generating square symmetic positive definite matrix M

Matrix M:
   120.0000    46.0000    82.0000
    46.0000    75.0000   -14.0000
    82.0000   -14.0000   127.0000

Cholesky inverse M (direct):
     0.0387    -0.0290    -0.0282
    -0.0290     0.0354     0.0226
    -0.0282     0.0226     0.0286

Cholesky inverse (helpers):
     0.0387    -0.0290    -0.0282
    -0.0290     0.0354     0.0226
    -0.0282     0.0226     0.0286

The check matrix (M * Minv) should be I:
     1.0000     0.0000    -0.0000
     0.0000     1.0000    -0.0000
     0.0000     0.0000     1.0000

End demo

Computing the inverse of a matrix using Cholesky inverse applies only to matrices that are square, symmetric, positive definite. In machine learning, this occurs mostly in two scenarios: 1.) solving for model weights using left pseudo-inverse via normal equations (linear regression, quadratic regression, etc.), 2.) solving for model weights using a kernel covariance matrix (kernel ridge regression, Gaussian process regression, etc.)

Here’s the short version that uses helpers:

static double[][] MatInvCholesky2(double[][] A)
{
  // inv(A) = inv(L * Lt) = inv(Lt) * inv(L)
  double[][] L = MatDecompCholesky(A);
  double[][] Lt = MatTranspose(L);
  double[][] invL = MatInvLowerTri(L);
  double[][] invLt = MatInvUpperTri(Lt);
  double[][] result = MatProd(invLt, invL);
  return result;
}

The long version that uses no helpers looks like:

static double[][] MatInvCholesky1(double[][] A)
{
  // 1. decompose to L
  int m = A.Length; int n = A[0].Length;  // m == n
  double[][] L = new double[n][];
  . . . .
  // about 170  lines of tricky code here
  return result;
}

There’s no big moral to the story. But understanding design decisions like this are an important part of software engineering, even in a world where AI generates the code.



There’s an element of subjectivity when it comes to software design. In animated films, character design is important. Three of my favorite stop-motion animated films have (in my opinion) excellent character design.

Left: “Coraline” (2009) tells the story ofa young girl who discovers a gateway to a strange world, which at first appears prefect, but hides a dark truth. I give this movie my personal solid A grade.

Center: “Isle of Dogs” (2018) tells the story of Atari Kobayashi, a young boy who is looking for his dog, in a world where all dogs have been exiled to an island. Wildly creative story. I give this movie my personal A- grade.

Right: “Rango” (2011) tells the story of a pet chameleon who has adventures in the Old West. Very entertaining movie. I always thought this was a stop-motion film, but it’s actually 100% digital. I give the movie my personal B+ grade.


Demo program. Replace “lt” (less than), “gt”, “lte”, “gte” with Boolean operator symbols (my blog editor chokes on symbols).

using System;
using System.IO;
using System.Collections.Generic;

namespace MatrixInverseCholesky
{
  internal class Program
  {
    static void Main(string[] args)
    {
      Console.WriteLine("\nBegin matrix inverse" +
        " using Cholesky decmposition ");

      double[][] A = new double[4][];
      A[0] = new double[] { 4.0, 7.0, 1.0 };
      A[1] = new double[] { 6.0, 0.0, 3.0 };
      A[2] = new double[] { 8.0, 1.0, 9.0 };
      A[3] = new double[] { 2.0, 5.0, -6.0 };

      Console.WriteLine("\nGenerating square symmetic " +
        "positive definite matrix M ");

      double[][] M = MatProd(MatTranspose(A), A);
      for (int i = 0; i "lt" M.Length; ++i)
        M[i][i] += 1.0e-8;
      Console.WriteLine("\nMatrix M: ");
      MatShow(M, 4, 11);

      double[][] Minv1 = MatInvCholesky1(M);
      Console.WriteLine("\nCholesky inverse M (direct): ");
      MatShow(Minv1, 4, 11);

      double[][] Minv2 = MatInvCholesky2(M);
      Console.WriteLine("\nCholesky inverse (helpers): ");
      MatShow(Minv2, 4, 11);

      double[][] C = MatProd(M, Minv1);
      Console.WriteLine("\nThe check matrix " +
        "(M * Minv) should be I: ");
      MatShow(C, 4, 11);

      Console.WriteLine("\nEnd demo ");
      Console.ReadLine();

    } // Main()

    // ------------------------------------------------------

    static double[][] MatInvCholesky1(double[][] A)
    {
      // no helper functions version
      // A must be square, symmetric, positive definite
      // 1. decompose to L
      int m = A.Length; int n = A[0].Length;  // m == n
      double[][] L = new double[n][];
      for (int i = 0; i "lt" n; ++i)
        L[i] = new double[n];

      for (int i = 0; i "lt" n; ++i)
      {
        for (int j = 0; j "lte" i; ++j)
        {
          double sum = 0.0;
          for (int k = 0; k "lt" j; ++k)
            sum += L[i][k] * L[j][k];
          if (i == j)
          {
            double tmp = A[i][i] - sum;
            if (tmp "lt" 0.0)
              throw new
                Exception("decomp Cholesky fatal");
            L[i][j] = Math.Sqrt(tmp);
          }
          else
          {
            if (L[j][j] == 0.0)
              throw new
                Exception("decomp Cholesky fatal ");
            L[i][j] = (A[i][j] - sum) / L[j][j];
          }
        } // j
      } // i

      // 2. compute inverse from L
      double[][] result = new double[n][];  // make Identity
      for (int i = 0; i "lt" n; ++i)
        result[i] = new double[n];
      for (int i = 0; i "lt" n; ++i)
        result[i][i] = 1.0;

      for (int k = 0; k "lt" n; ++k)
      {
        for (int j = 0; j "lt" n; j++)
        {
          for (int i = 0; i "lt" k; i++)
          {
            result[k][j] -= result[i][j] * L[k][i];
          }
          result[k][j] /= L[k][k];
        }
      }

      for (int k = n - 1; k "gte" 0; --k)
      {
        for (int j = 0; j "lt" n; j++)
        {
          for (int i = k + 1; i "lt" n; i++)
          {
            result[k][j] -= result[i][j] * L[i][k];
          }
          result[k][j] /= L[k][k];
        }
      }
      return result;
    } // MatInvCholesky()

    // ------------------------------------------------------

    static double[][] MatInvCholesky2(double[][] A)
    {
      // version with helper functions
      // A must be square symmetric positive definite
      // inv(A) = inv(L * Lt) = inv(Lt) * inv(L)
      // calls MatDecompCholesky, MatTranspose,
      // MatInvLowerTri, MatInvUpperTri, MatProd
      // MatIdentity, MatMake

      double[][] L = MatDecompCholesky(A);
      double[][] Lt = MatTranspose(L);
      double[][] invL = MatInvLowerTri(L);
      double[][] invLt = MatInvUpperTri(Lt);
      double[][] result = MatProd(invLt, invL);
      return result;
    }

    // ------------------------------------------------------

    static double[][] MatInvLowerTri(double[][] lower)
      {
        // inverse of lower triangular non-fancy version
        int n = lower.Length;  // must be square matrix
        double[][] result = MatIdentity(n);

        for (int k = 0; k "lt" n; ++k)
        {
          for (int j = 0; j "lt" n; ++j)
          {
            for (int i = 0; i "lt" k; ++i)
            {
              result[k][j] -= result[i][j] * lower[k][i];
            }
            result[k][j] /= lower[k][k];
          }
        }
        return result;
      }

    // ------------------------------------------------------

    static double[][] MatInvUpperTri(double[][] U)
      {
        int n = U.Length;  // must be square matrix
        double[][] result = MatIdentity(n);

        for (int k = 0; k "lt" n; ++k)
        {
          for (int j = 0; j "lt" n; ++j)
          {
            for (int i = 0; i "lt" k; ++i)
            {
              result[j][k] -= result[j][i] * U[i][k];
            }
            result[j][k] /= U[k][k];
          }
        }
        return result;
      }

    // ------------------------------------------------------

    static double[][] MatDecompCholesky(double[][] M)
    {
      // Cholesky decomposition (Banachiewicz algorithm)
      // M is square, symmetric, positive definite
      // (conditioned too)
      int n = M.Length;
      double[][] result = MatMake(n, n);  // all 0.0
      for (int i = 0; i "lt" n; ++i)
      {
        for (int j = 0; j "lte" i; ++j)
        {
          double sum = 0.0;
          for (int k = 0; k "lt" j; ++k)
            sum += result[i][k] * result[j][k];
          if (i == j)
          {
            double tmp = M[i][i] - sum;
            if (tmp "lt" 0.0)
              throw new
                Exception("MatDecompCholesky fatal");
            result[i][j] = Math.Sqrt(tmp);
          }
          else
          {
            if (result[j][j] == 0.0)
              throw new
                Exception("MatDecompCholesky fatal ");
            result[i][j] =
              (1.0 / result[j][j] * (M[i][j] - sum));
          }
        } // j
      } // i
      return result;
    } // MatDecompCholesky

    // ------------------------------------------------------

    static double[][] MatIdentity(int n)
    {
      double[][] result = MatMake(n, n);
      for (int i = 0; i "lt" n; ++i)
        result[i][i] = 1.0;
      return result;
    }

    // ------------------------------------------------------

    // helpers for Main to set up problem, show result

    // ------------------------------------------------------

    static double[][] MatMake(int nRows, int nCols)
    {
      double[][] result = new double[nRows][];
      for (int i = 0; i "lt" nRows; ++i)
        result[i] = new double[nCols];
      return result;
    }

    // ------------------------------------------------------

    static double[][] MatTranspose(double[][] m)
    {
      int nr = m.Length;
      int nc = m[0].Length;
      double[][] result = MatMake(nc, nr);  // note
      for (int i = 0; i "lt" nr; ++i)
        for (int j = 0; j "lt" nc; ++j)
          result[j][i] = m[i][j];
      return result;
    }

    // ------------------------------------------------------

    static double[][] MatProd(double[][] A,
        double[][] B)
    {
      int aRows = A.Length;
      int aCols = A[0].Length;
      int bRows = B.Length;
      int bCols = B[0].Length;
      if (aCols != bRows)
        throw new Exception("Non-conformable matrices");

      double[][] result = MatMake(aRows, bCols);
      for (int i = 0; i "lt" aRows; ++i) // each row of A
        for (int j = 0; j "lt" bCols; ++j) // each col of B
          for (int k = 0; k "lt" aCols; ++k)
            result[i][j] += A[i][k] * B[k][j];

      return result;
    }

    // ------------------------------------------------------

    static void MatShow(double[][] M, int dec, int wid)
    {
      for (int i = 0; i "lt" M.Length; ++i)
      {
        for (int j = 0; j "lt" M[0].Length; ++j)
        {
          double v = M[i][j];
          Console.Write(v.ToString("F" + dec).
            PadLeft(wid));
        }
        Console.WriteLine("");
      }
    }

    // ------------------------------------------------------

    // function to load from file -- not used this demo

    static double[][] MatLoad(string fn, int[] usecols,
      char sep, string comment)
    {
      List"lt"double[]"gt" result =
        new List"lt"double[]"gt"();
      string line = "";
      FileStream ifs = new FileStream(fn, FileMode.Open);
      StreamReader sr = new StreamReader(ifs);
      while ((line = sr.ReadLine()) != null)
      {
        if (line.StartsWith(comment) == true)
          continue;
        string[] tokens = line.Split(sep);
        List"lt"double"gt" lst = new List"lt"double"gt"();
        for (int j = 0; j "lt" usecols.Length; ++j)
          lst.Add(double.Parse(tokens[usecols[j]]));
        double[] row = lst.ToArray();
        result.Add(row);
      }
      sr.Close(); ifs.Close();
      return result.ToArray();
    }

    // ------------------------------------------------------

  } // Program

} // ns
This entry was posted in Machine Learning. Bookmark the permalink.

Leave a Reply