StephenGrider · wjn · Dec 8, 2020 · Dec 9, 2020 · Dec 9, 2020 · Dec 12, 2020
diff --git a/plinko/score.js b/plinko/score.js
@@ -1,8 +1,111 @@
+const outputs = [];
+// determine the corellation between bucket and predictionPoint
+
 function onScoreUpdate(dropPosition, bounciness, size, bucketLabel) {
-  // Ran every time a balls drops into a bucket
+  outputs.push([dropPosition, bounciness, size, bucketLabel]);
 }
 
 function runAnalysis() {
-  // Write code here to analyze stuff
+  const testSetSize = 100;
+  const k = 10;
+  const colNames = ['Drop Position', 'Bounciness', 'Ball Size'];
+
+  // vary k using range
+  _.range(0, 3).forEach((feature) => {
+    const data = _.map(outputs, (row) => [row[feature], _.last(row)]);
+
+    const [testSet, trainingSet] = splitDataSet(minMax(data, 1), testSetSize);
+    const accuracy = _.chain(testSet)
+      .filter((testPoint) => knn(trainingSet, _.initial(testPoint), k) === _.last(testPoint))
+      .size()
+      .divide(testSetSize)
+      .value();
+
+    console.log(`k(${k}) Accuracy for ${colNames[feature]}: ${accuracy * 100}%`);
+  });
+}
+
+function knn(data, point, k) {
+  // K-Nearest Neighbor Algorithm
+  return (
+    _.chain(data)
+      // [[distance(dropPosition, predictionPoint), bucketLabel],[72,4],[227,5]]
+      .map((row) => {
+        return [distance(_.initial(row), point), _.last(row)];
+      })
+      // sort by drop position
+      .sortBy((row) => row[0])
+      // Gets the top 'k' results from sorted list
+      .slice(0, k)
+      // counts frequency of buckets
+      // e.g.,  {"3":1,"4":2}
+      .countBy((row) => row[1])
+      // e.g., [["3",1],["4",2]]
+      // converts the countBy obj to an multidimensional array
+      .toPairs()
+      // sorts so that the most frequent is the last array element
+      .sortBy((row) => row[1])
+      // get the last array element of ["bucket", frequency]
+      .last()
+      // e.g., "4"
+      // get the bucket number (first element)
+      .first()
+      // e.g., 4
+      // convert the string "4" to int 4
+      .parseInt()
+      // end the chain and return the value
+      .value()
+  );
+}
+
+function distance(pointA, pointB) {
+  // pointA/B are arrays
+  // employing the pythagorean therom to solve a multidimensional point distance
+  _.chain(pointA)
+    // takes each value at the same index of each array and creates a new zipped
+    // array:
+    // [[pointA[0], pointB[0]], [pointA[1], pointB[1]] ... ]
+    .zip(pointB)
+    // subtracts b from a
+    .map(([a, b]) => (a - b) ** 2)
+    // sums the squares
+    .sum()
+    // returns the squareroot of the sum
+    .value() ** 0.5;
 }
 
+function splitDataSet(data, testCount) {
+  const shuffled = _.shuffle(data);
+
+  const testSet = _.slice(shuffled, 0, testCount);
+  const trainingSet = _.slice(shuffled, testCount);
+
+  return [testSet, trainingSet];
+}
+
+function minMax(data, featureCount) {
+  const clonedData = _.cloneDeep(data);
+
+  // iterate over each feature (independent variables)
+  for (let i = 0; i < featureCount; i++) {
+    const column = clonedData.map((row) => row[i]);
+    const min = _.min(column);
+    const max = _.max(column);
+
+    // iterate over each row [j] in clonedData
+    // and normalize each feature [i]
+    // a row would look something like :
+    //
+    // [ position, bounciness, ballSize, bucketName ]
+    //
+    // where bucketName is a label and not normalized
+    for (let j = 0; j < clonedData.length; j++) {
+      clonedData[j][i] = (clonedData[j][i] - min) / (max - min);
+      if (max - min === 0) {
+        clonedData[j][i] = 0;
+      }
+    }
+  }
+
+  return clonedData;
+}
diff --git a/regressions/.gitignore b/regressions/.gitignore
@@ -0,0 +1 @@
+.vscode/
diff --git a/regressions/data/Numeric-cars-corgis.csv b/regressions/data/Numeric-cars-corgis.csv
diff --git a/regressions/data/cars-corgis.csv b/regressions/data/cars-corgis.csv
diff --git a/regressions/cars.csv → regressions/data/cars.csv b/regressions/cars.csv → regressions/data/cars.csv
diff --git a/regressions/linear-regression/index.js b/regressions/linear-regression/index.js
@@ -0,0 +1,67 @@
+require('@tensorflow/tfjs-node');
+const loadCSV = require('../load-csv');
+const LinearRegression = require('./linear-regression');
+const plot = require('node-remote-plot');
+
+let { features, labels, testFeatures, testLabels } = loadCSV('../data/Numeric-cars-corgis.csv', {
+  shuffle: true,
+  splitTest: 50,
+  dataColumns: [
+    'Year',
+    'Driveline',
+    'Transmission',
+    'Horsepower',
+    'Torque',
+    'Displacement',
+    'Cylinder_Count',
+    'Gears_Forward',
+  ],
+  labelColumns: ['MPG_CITY'],
+});
+
+const initLR = 0.1;
+const regression = new LinearRegression(features, labels, {
+  learningRate: initLR,
+  iterations: 5,
+  batchSize: 10,
+});
+
+regression.train();
+/**
+ * weights tensor has a [2,1] shape and looks like this:
+ * [
+ *  [0],
+ *  [0]
+ * ]
+ */
+const r2 = regression.test(testFeatures, testLabels);
+plot({
+  x: regression.mseHistory.reverse(),
+  xLabel: 'Iterations',
+  yLabel: 'MSE',
+});
+console.log('R2 : ', r2, ' initLR: ', initLR, ' iterations: ', regression.options.iterations);
+
+litersToCID = (liters) => {
+  // There are 61 cubic inches in a liter
+  return liters * 61;
+};
+
+/**
+    'Year',
+    'Driveline',  (FWD, RWD, AWD, 4WD)
+    'Transmission', (manual, automatic)
+    'Horsepower',
+    'Torque',
+    'Displacement',
+    'Cylinder_Count',
+    'Gears_Forward',
+ */
+vehicles = [
+  [2010, 3, 2, 350, 325, 4.2, 8, 6], // 14 mpg Audi A8
+  [2009, 3, 2, 265, 243, 3.2, 6, 6], // 18 mpg Audi A5
+  [2011, 2, 1, 400, 450, 4.4, 8, 6], // 17 mpg BMW 550i
+  [2011, 1, 2, 108, 105, 1.6, 4, 4], // 25 mpg Chevy Aveo5 2LT AT
+  [2016, 1, 2, 275, 301, 1.8, 4, 6],
+];
+regression.predict(vehicles).print();