Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
3f5b537
recording onScoreupdate
wjn Dec 8, 2020
501c42c
distance calc, KNN calc, predictionPoint and k vars
wjn Dec 9, 2020
4672eb9
added accuracy measurement
wjn Dec 9, 2020
2972c4c
test accuracy by feature
wjn Dec 12, 2020
a9997fb
array based vanilla JS solution. will refactor to a vectorized soluti…
wjn Dec 21, 2020
549feb4
array based vanilla JS solution. will refactor to a vectorized soluti…
wjn Dec 21, 2020
3031557
Merge branch 'master' of github.com:wjn/MLKits
wjn Dec 21, 2020
cd88b9b
vectorized gradient decent all arrays converted to tensors
wjn Dec 22, 2020
7b67e7f
added coefficient of determination R^2
wjn Dec 22, 2020
63298f3
added test() method to calculate R^2 accuracy
wjn Dec 22, 2020
92b4a14
added standardization method
wjn Dec 22, 2020
9a7ca8d
* fixed issue with node standarizing col of 1s as -0.9999995. We stan…
wjn Dec 22, 2020
de9aead
weights tensor now supports dynamic number of features. updated learn…
wjn Dec 22, 2020
61f7be2
system now updates it's learning rate based on guess quality
wjn Dec 22, 2020
aa04ec5
added plot-js for charts
wjn Dec 24, 2020
e01c562
multivariate linear regression complete
wjn Dec 25, 2020
00e238e
Merge pull request #1 from wjn/vectorized-tf
wjn Dec 25, 2020
65f7760
new folder structure and npm scripts
wjn Dec 25, 2020
41a874b
new folder structure
wjn Dec 25, 2020
9386a6b
logistic regression and multinomial regression with softmax
wjn Dec 28, 2020
802b1cb
replaced sigmoid with softmax for multinomial logistical regression
wjn Dec 29, 2020
e3d7bea
added npm scripts
wjn Dec 31, 2020
8529117
Added memory.js for heap exercise
wjn Dec 31, 2020
5176b61
added mnist images dir
wjn Dec 31, 2020
9d26955
finished Stephen's optimizations
wjn Dec 31, 2020
a2bc55d
added closures for initializing logisticRegression class and getting …
wjn Dec 31, 2020
3b69e9c
gitignore
wjn Dec 31, 2020
20b4ca2
92.8% accuracy, mnist completed
wjn Dec 31, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 105 additions & 2 deletions plinko/score.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,111 @@
const outputs = [];
// determine the corellation between bucket and predictionPoint

function onScoreUpdate(dropPosition, bounciness, size, bucketLabel) {
// Ran every time a balls drops into a bucket
outputs.push([dropPosition, bounciness, size, bucketLabel]);
}

function runAnalysis() {
// Write code here to analyze stuff
const testSetSize = 100;
const k = 10;
const colNames = ['Drop Position', 'Bounciness', 'Ball Size'];

// vary k using range
_.range(0, 3).forEach((feature) => {
const data = _.map(outputs, (row) => [row[feature], _.last(row)]);

const [testSet, trainingSet] = splitDataSet(minMax(data, 1), testSetSize);
const accuracy = _.chain(testSet)
.filter((testPoint) => knn(trainingSet, _.initial(testPoint), k) === _.last(testPoint))
.size()
.divide(testSetSize)
.value();

console.log(`k(${k}) Accuracy for ${colNames[feature]}: ${accuracy * 100}%`);
});
}

function knn(data, point, k) {
// K-Nearest Neighbor Algorithm
return (
_.chain(data)
// [[distance(dropPosition, predictionPoint), bucketLabel],[72,4],[227,5]]
.map((row) => {
return [distance(_.initial(row), point), _.last(row)];
})
// sort by drop position
.sortBy((row) => row[0])
// Gets the top 'k' results from sorted list
.slice(0, k)
// counts frequency of buckets
// e.g., {"3":1,"4":2}
.countBy((row) => row[1])
// e.g., [["3",1],["4",2]]
// converts the countBy obj to an multidimensional array
.toPairs()
// sorts so that the most frequent is the last array element
.sortBy((row) => row[1])
// get the last array element of ["bucket", frequency]
.last()
// e.g., "4"
// get the bucket number (first element)
.first()
// e.g., 4
// convert the string "4" to int 4
.parseInt()
// end the chain and return the value
.value()
);
}

function distance(pointA, pointB) {
// pointA/B are arrays
// employing the pythagorean therom to solve a multidimensional point distance
_.chain(pointA)
// takes each value at the same index of each array and creates a new zipped
// array:
// [[pointA[0], pointB[0]], [pointA[1], pointB[1]] ... ]
.zip(pointB)
// subtracts b from a
.map(([a, b]) => (a - b) ** 2)
// sums the squares
.sum()
// returns the squareroot of the sum
.value() ** 0.5;
}

function splitDataSet(data, testCount) {
const shuffled = _.shuffle(data);

const testSet = _.slice(shuffled, 0, testCount);
const trainingSet = _.slice(shuffled, testCount);

return [testSet, trainingSet];
}

function minMax(data, featureCount) {
const clonedData = _.cloneDeep(data);

// iterate over each feature (independent variables)
for (let i = 0; i < featureCount; i++) {
const column = clonedData.map((row) => row[i]);
const min = _.min(column);
const max = _.max(column);

// iterate over each row [j] in clonedData
// and normalize each feature [i]
// a row would look something like :
//
// [ position, bounciness, ballSize, bucketName ]
//
// where bucketName is a label and not normalized
for (let j = 0; j < clonedData.length; j++) {
clonedData[j][i] = (clonedData[j][i] - min) / (max - min);
if (max - min === 0) {
clonedData[j][i] = 0;
}
}
}

return clonedData;
}
1 change: 1 addition & 0 deletions regressions/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.vscode/
5,077 changes: 5,077 additions & 0 deletions regressions/data/Numeric-cars-corgis.csv

Large diffs are not rendered by default.

5,077 changes: 5,077 additions & 0 deletions regressions/data/cars-corgis.csv

Large diffs are not rendered by default.

File renamed without changes.
67 changes: 67 additions & 0 deletions regressions/linear-regression/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
require('@tensorflow/tfjs-node');
const loadCSV = require('../load-csv');
const LinearRegression = require('./linear-regression');
const plot = require('node-remote-plot');

let { features, labels, testFeatures, testLabels } = loadCSV('../data/Numeric-cars-corgis.csv', {
shuffle: true,
splitTest: 50,
dataColumns: [
'Year',
'Driveline',
'Transmission',
'Horsepower',
'Torque',
'Displacement',
'Cylinder_Count',
'Gears_Forward',
],
labelColumns: ['MPG_CITY'],
});

const initLR = 0.1;
const regression = new LinearRegression(features, labels, {
learningRate: initLR,
iterations: 5,
batchSize: 10,
});

regression.train();
/**
* weights tensor has a [2,1] shape and looks like this:
* [
* [0],
* [0]
* ]
*/
const r2 = regression.test(testFeatures, testLabels);
plot({
x: regression.mseHistory.reverse(),
xLabel: 'Iterations',
yLabel: 'MSE',
});
console.log('R2 : ', r2, ' initLR: ', initLR, ' iterations: ', regression.options.iterations);

litersToCID = (liters) => {
// There are 61 cubic inches in a liter
return liters * 61;
};

/**
'Year',
'Driveline', (FWD, RWD, AWD, 4WD)
'Transmission', (manual, automatic)
'Horsepower',
'Torque',
'Displacement',
'Cylinder_Count',
'Gears_Forward',
*/
vehicles = [
[2010, 3, 2, 350, 325, 4.2, 8, 6], // 14 mpg Audi A8
[2009, 3, 2, 265, 243, 3.2, 6, 6], // 18 mpg Audi A5
[2011, 2, 1, 400, 450, 4.4, 8, 6], // 17 mpg BMW 550i
[2011, 1, 2, 108, 105, 1.6, 4, 4], // 25 mpg Chevy Aveo5 2LT AT
[2016, 1, 2, 275, 301, 1.8, 4, 6],
];
regression.predict(vehicles).print();
Loading