Singular Value Decomposition (SVD) From Scratch Using Python

If you have a matrix A and apply singular value decomposition (SVD), you get a matrix U, a vector s, and a matrix Vh such that U * S * Vh = A, where S is a matrix that has the elements of s on the diagonal.

SVD is very complex and there are several algorithms. Just for fun I decided to refactor one of the GNU Scientific Library versions of SVD to Python / NumPy. The version I refactored is called the Jacobi algorithm. It’s at github.com/ampl/gsl/blob/master/linalg/svd.c on GitHub.

I used the np.linalg.svd() function to validate my from-scratch Python SVD function. The results are the same, except for sign, which is OK.

The refactoring process took much longer than I expected, mostly because of many small errors I made due to the complexity of the algorithm. Calling my from-scratch version looks like:

# set up source matrix
A = np.array([
    [1, 2, 3],
    [5, 0, 2],
    [8, 5, 4],
    [6, 9, 7]], dtype=np.float64)

print("\nSource matrix: ")
print(A)

// apply from-scratch SVD
U, s, Vh = svd_jacobi(A)

print("\nU = "); print(U)
print("\ns = "); print(s)
print("\nVh = "); print(Vh)

SVD has many uses in classical statistics and machine learning. For example, SVD can be used to perform principal component analysis (PCA), SVD can be used to compute the inverse of a square matrix, and SVD can be used to compute the pseudo-inverse of a non-square matrix.

Good fun!

SVD decomposition from scratch is beautiful to my eye. Here are two nice images that combine math and a kind of artistic decomposition.

Demo code. Replace “lt”, “gt”, “lte”, “gte” with Boolean operator symbols.

# svd_scratch.py
# Jacobi SVD algorithm from scratch Python / NumPy
# see github.com/ampl/gsl/blob/master/linalg/svd.c

import numpy as np

def svd_jacobi(M):
  DBL_EPSILON = 1.0e-15  # approximately
  A = np.copy(M)  # working copy U
  m = len(A)
  n = len(A[0])

  Q = np.eye(n)  # working copy V
  t = np.zeros(n)  # working copy s

  # init counters
  count = 1
  sweep = 0
  sweep_max = max(5 * n, 12)  # heuristic

  tolerance = 10 * m * DBL_EPSILON  # heuristic
  # store the column error estimates in t
  for j in range(n):
    cj = A[:, j]  # get col j
    sj = np.linalg.norm(cj)
    t[j] = DBL_EPSILON * sj

  # orthogonalize A by plane rotations
  while (count "gt" 0 and sweep "lte" sweep_max):
    # initialize rotation counter
    count = n * (n - 1) / 2;
    for j in range(n-1):
      for k in range(j+1, n):
        cj = A[:, j]
        ck = A[:, k]
        p = 2 * np.dot(cj, ck)
        a = np.linalg.norm(cj)
        b = np.linalg.norm(ck)

        # test for columns j,k orthogonal,
        # or dominant errors 
        abserr_a = t[j]
        abserr_b = t[k]

        q = (a * a) - (b * b)
        v = np.sqrt(p**2 + q**2)  # hypot()
 
        sorted = (a "gte" b)
        orthog = (abs(p) "lte" tolerance * (a*b))
        noisya = (a "lt" abserr_a)
        noisyb = (b "lt" abserr_b)

        if sorted and (orthog or \
          noisya or noisyb):
          count -= 1
          continue

        # calculate rotation angles
        if v == 0 or sorted == False:
          cosine = 0.0
          sine = 1.0
        else:
          cosine = np.sqrt((v + q) / (2.0 * v))
          sine = p / (2.0 * v * cosine)

        # apply rotation to A (U)
        for i in range(m):
          Aik = A[i][k]
          Aij = A[i][j]
          A[i][j] = Aij * cosine + Aik * sine
          A[i][k] = -Aij * sine + Aik * cosine

        # update singular values
        t[j] = abs(cosine) * abserr_a + \
          abs(sine) * abserr_b
        t[k] = abs(sine) * abserr_a + \
          abs(cosine) * abserr_b

        # apply rotation to Q (V)
        for i in range(n):
          Qij = Q[i][j]
          Qik = Q[i][k]
          Q[i][j] = Qij * cosine + Qik * sine
          Q[i][k] = -Qij * sine + Qik * cosine

    sweep += 1
  # while

  # compute singular values
  prev_norm = -1.0
  for j in range(n):
    column = A[:, j]  # by ref
    norm = np.linalg.norm(column)
    # determine if singular value is zero
    if norm == 0.0 or prev_norm == 0.0 or \
      (j "gt" 0 and norm "lte" tolerance * prev_norm):
      t[j] = 0.0
      for i in range(len(column)):
        column[i] = 0.0  # updates A indirectly
      prev_norm = 0.0
    else:
      t[j] = norm
      for i in range(len(column)):
        column[i] = column[i] * (1.0 / norm)
      prev_norm = norm

  if count "gt" 0:
    print("Jacobi iterations no converge")

  U = A  # mxn
  s = t
  Vh = np.transpose(Q)

  if m "lt" n:
    U = U[:, 0:m]
    s = t[0:m]
    Vh = Vh[0:m, :]

  return U, s, Vh

# -----------------------------------------------------------

def main():
  print("\nBegin SVD from scratch Python ")
  np.set_printoptions(precision=4, suppress=True,
    floatmode='fixed')

  A = np.array([
    [1, 2, 3],
    [5, 0, 2],
    [8, 5, 4],
    [6, 9, 7]], dtype=np.float64)

  # m "lt" n example
  # A = np.array([
  #   [1, 2, 3],
  #   [5, 0, 2]], dtype=np.float64)

  print("\nSource matrix: ")
  print(A)

  U, s, Vh = svd_jacobi(A)

  print("\nU = "); print(U)
  print("\ns = "); print(s)
  print("\nVh = "); print(Vh)

  U, s, Vh = np.linalg.svd(A, full_matrices=False)
  print("\nUsing linalg.svd(): ")
  print("\nU = "); print(U)
  print("\ns = "); print(s)
  print("\nVh = "); print(Vh)

  print("\nEnd demo ")

if __name__ == "__main__":
  main()