Loading Data From a Local Text File into a Matrix Using Client-Side JavaScript

An interesting machine learning scenario is running a prediction system, such as kernel ridge regression, in a browser. All machine learning systems begin with the data. In a Web environment, you can use data stored on the Web server, or data stored externally such as a SQL database, or data stored locally on the user’s machine.

I set out to put together a demo of a Web page that allows users to point to a local text file of training data, gets the file, and parses the file into a numeric matrix of predictor x values, and a numeric vector of target y values. The training data could then be used by a ML prediction system, but that’s a problem for another day.

There are two main tasks. The first task is to read data from a text file into memory as one giant string with newline characters embedded. The second task is to parse the string into a 2D matrix of predictor values and a 1D vector of target y values.

The screenshot above shows the demo page I came up with. The “Choose File” button is associated with the HTML FileReader object. When clicked, a window to select the source training data pops up, the file is read into memory as one giant string (with embedded newline characters), and displayed in a textarea field.

The “Create trainX matrix and trainY vector” button is associated with a function that parses the big source string and transfers the data into a source numeric matrix. Then the source matrix is processed to transfer the predictors in columns 0, 1, 2, 3, 4 into a trainX matrix, and the targets in column 5 into a trainY vector. The matrix and vector are displayed in two textarea fields.

Demo code. Note: My blog editor consistently chokes on the less-than and greater-than symbols. Here is the JavaScript. Replace “lt” with the less-than character and “gt” with the greater-than character.

    "lt"script"gt"
      var trainDataAsString = "";  // globals
      var trainXAsMatrix = null;

      var useCols = [0,1,2,3,4,5];  // source
      var xCols = [0,1,2,3,4];
      var yCol = 5;

      function readTrainFile(input) {
        let file = input.files[0];
        let reader = new FileReader();
        reader.readAsText(file);

        reader.onload = function() {
          textArea1.value = reader.result;
          trainDataAsString = reader.result;
        };

        reader.onerror = function() {
          alert(reader.error);
        };
      }

      function extract()
      {
        // 1. store train data x and y into one matrix
        // useCols = [0,1,2,3,4,5];  // y in col [5]
        let comment = "#";
        let delimit = ",";
        let all = trainDataAsString.trim(); // assume exists
        let lines = all.split("\n");  // array of lines

        // count number non-comment lines
        let nRows = 0;
        for (let i = 0; i "lt" lines.length; ++i) {
          if (!lines[i].startsWith(comment))
            ++nRows;
        }
        let nCols = useCols.length;
        let trainDataAsMatrix = matMake(nRows, nCols, 0.0);
        let r = 0;  // ptr into lines
        let i = 0;  // ptr into result[][]
        while (r "lt" lines.length) {
          if (lines[r].startsWith(comment)) {
            ++r;  // next row
          }
          else {
            let tokens = lines[r].split(delimit);
            for (let j = 0; j "lt" nCols; ++j) {
              trainDataAsMatrix[i][j] = 
                parseFloat(tokens[useCols[j]]);
            }
            ++r;
            ++i;
          }
        } // while

        // 2. extract the X predictors
        // xCols = [0,1,2,3,4];
        nCols = xCols.length;
        trainXAsMatrix = matMake(nRows, nCols, 0.0);
        for (let i = 0; i "lt" nRows; ++i) {
          for (let j = 0; j "lt" xCols.length; ++j) {
            let jj = xCols[j];
            trainXAsMatrix[i][j] = trainDataAsMatrix[i][jj];
          }
          textArea2.value += trainXAsMatrix[i] + "\n";
        }

        // 3. extract the y targets
        // yCol = 5;
        trainYAsVector = vecMake(nRows, 0.0);
        for (let i = 0; i "lt" nRows; ++i)
          trainYAsVector[i] = trainDataAsMatrix[i][yCol];

        textArea3.value = trainYAsVector;
      } // extract()

      function matMake(nRows, nCols, val)
      {
        let result = [];
        for (let i = 0; i "lt" nRows; ++i) {
          result[i] = [];
          for (let j = 0; j "lt" nCols; ++j) {
            result[i][j] = val;
          }
        }
        return result;
      } 

      function vecMake(n, val)
      {
        let result = [];
        for (let i = 0; i "lt" n; ++i)
          result[i] = val;
        return result;
      }

    "lt"/script"gt"

The screenshots below show the entire program, including the HTML.



Demo data:

# synthetic_train_40.txt
#
-0.1660,  0.4406, -0.9998, -0.3953, -0.7065,  0.4840
 0.0776, -0.1616,  0.3704, -0.5911,  0.7562,  0.1568
-0.9452,  0.3409, -0.1654,  0.1174, -0.7192,  0.8054
 0.9365, -0.3732,  0.3846,  0.7528,  0.7892,  0.1345
-0.8299, -0.9219, -0.6603,  0.7563, -0.8033,  0.7955
 0.0663,  0.3838, -0.3690,  0.3730,  0.6693,  0.3206
-0.9634,  0.5003,  0.9777,  0.4963, -0.4391,  0.7377
-0.1042,  0.8172, -0.4128, -0.4244, -0.7399,  0.4801
-0.9613,  0.3577, -0.5767, -0.4689, -0.0169,  0.6861
-0.7065,  0.1786,  0.3995, -0.7953, -0.1719,  0.5569
 0.3888, -0.1716, -0.9001,  0.0718,  0.3276,  0.2500
 0.1731,  0.8068, -0.7251, -0.7214,  0.6148,  0.3297
-0.2046, -0.6693,  0.8550, -0.3045,  0.5016,  0.2129
 0.2473,  0.5019, -0.3022, -0.4601,  0.7918,  0.2613
-0.1438,  0.9297,  0.3269,  0.2434, -0.7705,  0.5171
 0.1568, -0.1837, -0.5259,  0.8068,  0.1474,  0.3307
-0.9943,  0.2343, -0.3467,  0.0541,  0.7719,  0.5581
 0.2467, -0.9684,  0.8589,  0.3818,  0.9946,  0.1092
-0.6553, -0.7257,  0.8652,  0.3936, -0.8680,  0.7018
 0.8460,  0.4230, -0.7515, -0.9602, -0.9476,  0.1996
-0.9434, -0.5076,  0.7201,  0.0777,  0.1056,  0.5664
 0.9392,  0.1221, -0.9627,  0.6013, -0.5341,  0.1533
 0.6142, -0.2243,  0.7271,  0.4942,  0.1125,  0.1661
 0.4260,  0.1194, -0.9749, -0.8561,  0.9346,  0.2230
 0.1362, -0.5934, -0.4953,  0.4877, -0.6091,  0.3810
 0.6937, -0.5203, -0.0125,  0.2399,  0.6580,  0.1460
-0.6864, -0.9628, -0.8600, -0.0273,  0.2127,  0.5387
 0.9772,  0.1595, -0.2397,  0.1019,  0.4907,  0.1611
 0.3385, -0.4702, -0.8673, -0.2598,  0.2594,  0.2270
-0.8669, -0.4794,  0.6095, -0.6131,  0.2789,  0.4700
 0.0493,  0.8496, -0.4734, -0.8681,  0.4701,  0.3516
 0.8639, -0.9721, -0.5313,  0.2336,  0.8980,  0.1412
 0.9004,  0.1133,  0.8312,  0.2831, -0.2200,  0.1782
 0.0991,  0.8524,  0.8375, -0.2102,  0.9265,  0.2150
-0.6521, -0.7473, -0.7298,  0.0113, -0.9570,  0.7422
 0.6190, -0.3105,  0.8802,  0.1640,  0.7577,  0.1056
 0.6895,  0.8108, -0.0802,  0.0927,  0.5972,  0.2214
 0.1982, -0.9689,  0.1870, -0.1326,  0.6147,  0.1310
-0.3695,  0.7858,  0.1557, -0.6320,  0.5759,  0.3773
-0.1596,  0.3581,  0.8372, -0.9992,  0.9535,  0.2071
This entry was posted in JavaScript, Machine Learning. Bookmark the permalink.

Leave a Reply