From 828c67a7145433320fdc74c1e3a31db26ed9275a Mon Sep 17 00:00:00 2001 From: "Zack M. Davis" Date: Tue, 20 Sep 2016 17:12:58 -0700 Subject: [PATCH 1/3] =?UTF-8?q?example:=20classifying=20dogs=20with=20a=20?= =?UTF-8?q?na=C3=AFve=20Bayes=20model?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This in the matter of #128. --- examples/README.md | 23 +++++++ examples/naive_bayes_dogs.rs | 129 +++++++++++++++++++++++++++++++++++ 2 files changed, 152 insertions(+) create mode 100644 examples/naive_bayes_dogs.rs diff --git a/examples/README.md b/examples/README.md index 0010cf55..a2d7fe2c 100644 --- a/examples/README.md +++ b/examples/README.md @@ -9,6 +9,7 @@ This directory gathers fully-fledged programs, each using a piece of * [K-Means](#k-means) * [SVM](#svm) * [Neural Networks](#neural-networks) +* [Naïve Bayes](#naïve-bayes) ## The Examples @@ -123,3 +124,25 @@ Got Expected Hits: 4, Misses: 0 Accuracy: 100% ``` + +### Naïve Bayes + +#### Dog Classification + +Suppose we have a population composed of red dogs and white dogs, whose friendliness, furriness, and speed can be measured. The group of white dogs is friendlier, furrier, and slower than red dogs by one standard deviation (respectively), but given the color of a dog, friendliness, furriness, and speed are independent of each other. We can use a naïve Bayes model to try to predict dog color given friendliness, furriness, and speed as observations. + +Sample run (note the `--features stats` flag): + +``` +$ cargo run --example naive_bayes_dogs --features stats + Compiling rusty-machine v0.4.4 (file:///[...]/rusty-machine) + Running `target/debug/examples/naive_bayes_dogs` +Predicted: White; Actual: White; Accurate? true +Predicted: White; Actual: White; Accurate? true +[redacted for brevity ...] +Predicted: White; Actual: Red; Accurate? false +Predicted: White; Actual: White; Accurate? true +Predicted: Red; Actual: Red; Accurate? true +Predicted: Red; Actual: Red; Accurate? true +Accuracy: 805/1000 = 80.5% +``` diff --git a/examples/naive_bayes_dogs.rs b/examples/naive_bayes_dogs.rs new file mode 100644 index 00000000..841bd8de --- /dev/null +++ b/examples/naive_bayes_dogs.rs @@ -0,0 +1,129 @@ +extern crate rusty_machine; +extern crate rand; + +use rand::Rand; +use rand::distributions::Sample; +use rusty_machine::learning::naive_bayes::{self, NaiveBayes}; +use rusty_machine::linalg::Matrix; +use rusty_machine::learning::SupModel; +use rusty_machine::stats::dist::gaussian::Gaussian; + + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum Color { + Red, + White, +} + +#[derive(Clone, Debug)] +struct Dog { + color: Color, + friendliness: f64, + furriness: f64, + speed: f64, +} + +impl Rand for Dog { + fn rand(rng: &mut R) -> Self { + let mut red_dog_friendliness = Gaussian::from_std_dev(0., 1.); + let mut red_dog_furriness = Gaussian::from_std_dev(0., 1.); + let mut red_dog_speed = Gaussian::from_std_dev(0., 1.); + + let mut white_dog_friendliness = Gaussian::from_std_dev(1., 1.); + let mut white_dog_furriness = Gaussian::from_std_dev(1., 1.); + let mut white_dog_speed = Gaussian::from_std_dev(-1., 1.); + + let coin: f64 = rng.gen(); + let color = if coin < 0.5 { Color::Red } else { Color::White }; + + match color { + Color::Red => { + Dog { + color: Color::Red, + friendliness: red_dog_friendliness.sample(rng), + furriness: red_dog_furriness.sample(rng), + speed: red_dog_speed.sample(rng), + } + }, + Color::White => { + Dog { + color: Color::White, + friendliness: white_dog_friendliness.sample(rng), + furriness: white_dog_furriness.sample(rng), + speed: white_dog_speed.sample(rng), + } + }, + } + } +} + +fn main() { + let mut randomness = rand::StdRng::new() + .expect("we should be able to get an RNG"); + let rng = &mut randomness; + + let training_set_size = 1000; + let test_set_size = 1000; + + let training_dogs = (0..training_set_size) + .map(|_| { Dog::rand(rng) }) + .collect::>(); + + let test_dogs = (0..test_set_size) + .map(|_| { Dog::rand(rng) }) + .collect::>(); + + let mut training_matrix = Matrix::new(0, 3, Vec::new()); + let mut target_matrix = Matrix::new(0, 2, Vec::new()); + for training_dog in &training_dogs { + let dog_row = Matrix::new( + 1, 3, + vec![training_dog.friendliness, + training_dog.furriness, + training_dog.speed] + ); + training_matrix = training_matrix.vcat(&dog_row); + let color_row = match training_dog.color { + Color::Red => Matrix::new(1, 2, vec![1., 0.]), + Color::White => Matrix::new(1, 2, vec![0., 1.]), + }; + target_matrix = target_matrix.vcat(&color_row); + } + + let mut model = NaiveBayes::::new(); + model.train(&training_matrix, &target_matrix) + .expect("failed to train model of dogs"); + + let mut test_matrix = Matrix::new(0, 3, Vec::new()); + for test_dog in &test_dogs { + let dog_row = Matrix::new( + 1, 3, + vec![test_dog.friendliness, + test_dog.furriness, + test_dog.speed] + ); + test_matrix = test_matrix.vcat(&dog_row); + } + + let predictions = model.predict(&test_matrix) + .expect("failed to predict dogs!?"); + + let mut hits = 0; + for (dog, prediction) in test_dogs.iter().zip(predictions.iter_rows()) { + let predicted_color = dog.color; + let actual_color = if prediction[0] == 1. { + Color::Red + } else { + Color::White + }; + let accurate = predicted_color == actual_color; + if accurate { + hits += 1; + } + println!("Predicted: {:?}; Actual: {:?}; Accurate? {:?}", + predicted_color, actual_color, accurate); + } + + println!("Accuracy: {}/{} = {:.1}%", hits, test_set_size, + (hits as f64)/(test_set_size as f64) * 100.); +} From 2bae9ac51ed9bdfb78f3cfbf9c13234f38caee81 Mon Sep 17 00:00:00 2001 From: "Zack M. Davis" Date: Sun, 25 Sep 2016 12:38:38 -0700 Subject: [PATCH 2/3] =?UTF-8?q?example:=20classifying=20dogs=20with=20a=20?= =?UTF-8?q?na=C3=AFve=20Bayes=20model?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This in the matter of #128. --- examples/README.md | 25 ++++++ examples/naive_bayes_dogs.rs | 148 +++++++++++++++++++++++++++++++++++ 2 files changed, 173 insertions(+) create mode 100644 examples/naive_bayes_dogs.rs diff --git a/examples/README.md b/examples/README.md index 0010cf55..f8fc9c50 100644 --- a/examples/README.md +++ b/examples/README.md @@ -9,6 +9,7 @@ This directory gathers fully-fledged programs, each using a piece of * [K-Means](#k-means) * [SVM](#svm) * [Neural Networks](#neural-networks) +* [Naïve Bayes](#naïve-bayes) ## The Examples @@ -123,3 +124,27 @@ Got Expected Hits: 4, Misses: 0 Accuracy: 100% ``` + +### Naïve Bayes + +#### Dog Classification + +Suppose we have a population composed of red dogs and white dogs, whose friendliness, furriness, and speed can be measured. The group of white dogs is friendlier, furrier, and slower than red dogs by one standard deviation (respectively), but given the color of a dog, friendliness, furriness, and speed are independent of each other. We can use a naïve Bayes model to try to predict dog color given friendliness, furriness, and speed as observations. + +Sample run: + +``` +$ cargo run --example naive_bayes_dogs +Predicted: White; Actual: White; Accurate? true +Predicted: White; Actual: White; Accurate? true +Predicted: White; Actual: White; Accurate? true +Predicted: White; Actual: White; Accurate? true +Predicted: White; Actual: Red; Accurate? false +Predicted: Red; Actual: Red; Accurate? true +[redacted for brevity ...] +Predicted: Red; Actual: Red; Accurate? true +Predicted: White; Actual: White; Accurate? true +Predicted: Red; Actual: Red; Accurate? true +Predicted: Red; Actual: White; Accurate? false +Accuracy: 815/1000 = 81.5% +``` diff --git a/examples/naive_bayes_dogs.rs b/examples/naive_bayes_dogs.rs new file mode 100644 index 00000000..4fe44663 --- /dev/null +++ b/examples/naive_bayes_dogs.rs @@ -0,0 +1,148 @@ +extern crate rusty_machine; +extern crate rand; + +use rand::Rand; +use rand::distributions::Sample; +use rand::distributions::normal::Normal; +use rusty_machine::prelude::*; +use rusty_machine::learning::naive_bayes::{self, NaiveBayes}; + + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum Color { + Red, + White, +} + +#[derive(Clone, Debug)] +struct Dog { + color: Color, + friendliness: f64, + furriness: f64, + speed: f64, +} + +impl Rand for Dog { + /// Generate a random dog. + fn rand(rng: &mut R) -> Self { + // Friendliness, furriness, and speed are normally distributed and + // (given color:) independent. + let mut red_dog_friendliness = Normal::new(0., 1.); + let mut red_dog_furriness = Normal::new(0., 1.); + let mut red_dog_speed = Normal::new(0., 1.); + + let mut white_dog_friendliness = Normal::new(1., 1.); + let mut white_dog_furriness = Normal::new(1., 1.); + let mut white_dog_speed = Normal::new(-1., 1.); + + // Flip a coin to decide whether to generate a red or white dog. + let coin: f64 = rng.gen(); + let color = if coin < 0.5 { Color::Red } else { Color::White }; + + match color { + Color::Red => { + Dog { + color: Color::Red, + // sample from our normal distributions for each trait + friendliness: red_dog_friendliness.sample(rng), + furriness: red_dog_furriness.sample(rng), + speed: red_dog_speed.sample(rng), + } + }, + Color::White => { + Dog { + color: Color::White, + friendliness: white_dog_friendliness.sample(rng), + furriness: white_dog_furriness.sample(rng), + speed: white_dog_speed.sample(rng), + } + }, + } + } +} + +fn main() { + let mut randomness = rand::StdRng::new() + .expect("we should be able to get an RNG"); + let rng = &mut randomness; + + let training_set_size = 1000; + let test_set_size = 1000; + + // We'll train the model on these dogs + let training_dogs = (0..training_set_size) + .map(|_| { Dog::rand(rng) }) + .collect::>(); + + // ... and then use the model to make predictions about these dogs' color + // given only their trait measurements. + let test_dogs = (0..test_set_size) + .map(|_| { Dog::rand(rng) }) + .collect::>(); + + // The model's `.train` method will take two matrices, each with a row for + // each dog in the training set: the rows in the first matrix contain the + // trait measurements; the rows in the second are either [1, 0] or [0, 1] + // to indicate color. + let mut training_matrix = Matrix::new(0, 3, Vec::new()); + let mut target_matrix = Matrix::new(0, 2, Vec::new()); + + // For every dog in the training set, + for training_dog in &training_dogs { + // add a row to the input matrix for the trait measurements + let dog_row = Matrix::new( + 1, 3, + vec![training_dog.friendliness, + training_dog.furriness, + training_dog.speed] + ); + training_matrix = training_matrix.vcat(&dog_row); + // add a row to the target matrix indicating color + let color_row = match training_dog.color { + Color::Red => Matrix::new(1, 2, vec![1., 0.]), + Color::White => Matrix::new(1, 2, vec![0., 1.]), + }; + target_matrix = target_matrix.vcat(&color_row); + } + + // Train! + let mut model = NaiveBayes::::new(); + model.train(&training_matrix, &target_matrix) + .expect("failed to train model of dogs"); + + // Build another matrix for the test set of dogs to make predictions about. + let mut test_matrix = Matrix::new(0, 3, Vec::new()); + for test_dog in &test_dogs { + let dog_row = Matrix::new( + 1, 3, + vec![test_dog.friendliness, + test_dog.furriness, + test_dog.speed] + ); + test_matrix = test_matrix.vcat(&dog_row); + } + + // Predict! + let predictions = model.predict(&test_matrix) + .expect("failed to predict dogs!?"); + + // Score how well we did. + let mut hits = 0; + for (dog, prediction) in test_dogs.iter().zip(predictions.iter_rows()) { + let predicted_color = dog.color; + let actual_color = if prediction[0] == 1. { + Color::Red + } else { + Color::White + }; + let accurate = predicted_color == actual_color; + if accurate { + hits += 1; + } + println!("Predicted: {:?}; Actual: {:?}; Accurate? {:?}", + predicted_color, actual_color, accurate); + } + + println!("Accuracy: {}/{} = {:.1}%", hits, test_set_size, + (hits as f64)/(test_set_size as f64) * 100.); +} From 9694f2dee5eede8e93e20f58dd073c9eb6baf299 Mon Sep 17 00:00:00 2001 From: James Lucas Date: Mon, 14 Nov 2016 19:19:51 +0000 Subject: [PATCH 3/3] Limiting output to console --- examples/README.md | 14 ++++++------- examples/naive_bayes_dogs.rs | 38 ++++++++++++++++++++++++------------ 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/examples/README.md b/examples/README.md index 8113381c..cee33482 100644 --- a/examples/README.md +++ b/examples/README.md @@ -152,16 +152,16 @@ Sample run: ``` $ cargo run --example naive_bayes_dogs +... +Predicted: Red; Actual: Red; Accurate? true +Predicted: Red; Actual: Red; Accurate? true +Predicted: White; Actual: Red; Accurate? false +Predicted: Red; Actual: White; Accurate? false +Predicted: Red; Actual: Red; Accurate? true Predicted: White; Actual: White; Accurate? true Predicted: White; Actual: White; Accurate? true Predicted: White; Actual: White; Accurate? true Predicted: White; Actual: White; Accurate? true -Predicted: White; Actual: Red; Accurate? false -Predicted: Red; Actual: Red; Accurate? true -[redacted for brevity ...] -Predicted: Red; Actual: Red; Accurate? true -Predicted: White; Actual: White; Accurate? true Predicted: Red; Actual: Red; Accurate? true -Predicted: Red; Actual: White; Accurate? false -Accuracy: 815/1000 = 81.5% +Accuracy: 822/1000 = 82.2% ``` diff --git a/examples/naive_bayes_dogs.rs b/examples/naive_bayes_dogs.rs index 47ad66ba..2e57de54 100644 --- a/examples/naive_bayes_dogs.rs +++ b/examples/naive_bayes_dogs.rs @@ -104,6 +104,20 @@ fn generate_dog_data(training_set_size: u32, test_set_size: u32) (training_matrix, target_matrix, test_matrix, test_dogs) } +fn evaluate_prediction(hits: &mut u32, dog: &Dog, prediction: &[f64]) -> (Color, bool) { + let predicted_color = dog.color; + let actual_color = if prediction[0] == 1. { + Color::Red + } else { + Color::White + }; + let accurate = predicted_color == actual_color; + if accurate { + *hits += 1; + } + (actual_color, accurate) +} + fn main() { let (training_set_size, test_set_size) = (1000, 1000); // Generate all of our train and test data @@ -120,19 +134,19 @@ fn main() { // Score how well we did. let mut hits = 0; - for (dog, prediction) in test_dogs.iter().zip(predictions.iter_rows()) { - let predicted_color = dog.color; - let actual_color = if prediction[0] == 1. { - Color::Red - } else { - Color::White - }; - let accurate = predicted_color == actual_color; - if accurate { - hits += 1; - } + let unprinted_total = test_set_size.saturating_sub(10) as usize; + for (dog, prediction) in test_dogs.iter().zip(predictions.iter_rows()).take(unprinted_total) { + evaluate_prediction(&mut hits, dog, prediction); + } + + if unprinted_total > 0 { + println!("..."); + } + + for (dog, prediction) in test_dogs.iter().zip(predictions.iter_rows()).skip(unprinted_total) { + let (actual_color, accurate) = evaluate_prediction(&mut hits, dog, prediction); println!("Predicted: {:?}; Actual: {:?}; Accurate? {:?}", - predicted_color, actual_color, accurate); + dog.color, actual_color, accurate); } println!("Accuracy: {}/{} = {:.1}%", hits, test_set_size,