AtheMathmo · AtheMathmo · Nov 14, 2016 · Sep 21, 2016 · Sep 25, 2016 · Nov 14, 2016
diff --git a/examples/README.md b/examples/README.md
@@ -1,5 +1,4 @@
-Sample Codes with rusty-machine
-===============================
+Examples with rusty-machine
 
 This directory gathers fully-fledged programs, each using a piece of
 `rusty-machine`'s API.
@@ -9,6 +8,7 @@ This directory gathers fully-fledged programs, each using a piece of
 * [K-Means](#k-means)
 * [SVM](#svm)
 * [Neural Networks](#neural-networks)
+* [Naïve Bayes](#naïve-bayes)
 
 ## The Examples
 
@@ -123,3 +123,45 @@ Got  Expected
 Hits: 4, Misses: 0
 Accuracy: 100%
 ```
+
+### Naïve Bayes
+
+#### Dog Classification
+
+Suppose we have a population composed of red dogs and white dogs,
+whose friendliness, furriness, and speed can be measured. In this
+example we train a Naïve Bayes model to determine whether
+a dog is white or red.
+
+The group of white dogs are friendlier, furrier, and slower than
+the red dogs. Given the color of a dog, friendliness, furriness,
+and speed are independent of each other (a requirement of the Naïve
+Bayes model).
+
+In the example code we will generate our own data and then train
+our model using it. This is a common technique used to validate
+a model. We generate the data by sampling each of the dogs features
+from Gaussian random variables. We will have a total of 6 Gaussian
+random variables representing three features for both colors of dog.
+As we are using Gaussian random variables we will use a Gaussian
+Naive Bayes model. Once we have generated our data we will convert
+it into `Matrix` structures and train our model.
+
+
+Sample run:
+
+```
+$ cargo run --example naive_bayes_dogs
+...
+Predicted: Red; Actual: Red; Accurate? true
+Predicted: Red; Actual: Red; Accurate? true
+Predicted: White; Actual: Red; Accurate? false
+Predicted: Red; Actual: White; Accurate? false
+Predicted: Red; Actual: Red; Accurate? true
+Predicted: White; Actual: White; Accurate? true
+Predicted: White; Actual: White; Accurate? true
+Predicted: White; Actual: White; Accurate? true
+Predicted: White; Actual: White; Accurate? true
+Predicted: Red; Actual: Red; Accurate? true
+Accuracy: 822/1000 = 82.2%
+```
diff --git a/examples/naive_bayes_dogs.rs b/examples/naive_bayes_dogs.rs
@@ -0,0 +1,154 @@
+extern crate rusty_machine;
+extern crate rand;
+
+use rand::Rand;
+use rand::distributions::Sample;
+use rand::distributions::normal::Normal;
+use rusty_machine::learning::naive_bayes::{self, NaiveBayes};
+use rusty_machine::linalg::{Matrix, BaseMatrix};
+use rusty_machine::learning::SupModel;
+
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+enum Color {
+    Red,
+    White,
+}
+
+#[derive(Clone, Debug)]
+struct Dog {
+    color: Color,
+    friendliness: f64,
+    furriness: f64,
+    speed: f64,
+}
+
+impl Rand for Dog {
+    /// Generate a random dog.
+    fn rand<R: rand::Rng>(rng: &mut R) -> Self {
+        // Friendliness, furriness, and speed are normally distributed and
+        // (given color:) independent.
+        let mut red_dog_friendliness = Normal::new(0., 1.);
+        let mut red_dog_furriness = Normal::new(0., 1.);
+        let mut red_dog_speed = Normal::new(0., 1.);
+
+        let mut white_dog_friendliness = Normal::new(1., 1.);
+        let mut white_dog_furriness = Normal::new(1., 1.);
+        let mut white_dog_speed = Normal::new(-1., 1.);
+
+        // Flip a coin to decide whether to generate a red or white dog.
+        let coin: f64 = rng.gen();
+        let color = if coin < 0.5 { Color::Red } else { Color::White };
+
+        match color {
+            Color::Red => {
+                Dog {
+                    color: Color::Red,
+                    // sample from our normal distributions for each trait
+                    friendliness: red_dog_friendliness.sample(rng),
+                    furriness: red_dog_furriness.sample(rng),
+                    speed: red_dog_speed.sample(rng),
+                }
+            },
+            Color::White => {
+                Dog {
+                    color: Color::White,
+                    friendliness: white_dog_friendliness.sample(rng),
+                    furriness: white_dog_furriness.sample(rng),
+                    speed: white_dog_speed.sample(rng),
+                }
+            },
+        }
+    }
+}
+
+fn generate_dog_data(training_set_size: u32, test_set_size: u32)
+    -> (Matrix<f64>, Matrix<f64>, Matrix<f64>, Vec<Dog>) {
+    let mut randomness = rand::StdRng::new()
+        .expect("we should be able to get an RNG");
+    let rng = &mut randomness;
+
+    // We'll train the model on these dogs
+    let training_dogs = (0..training_set_size)
+        .map(|_| { Dog::rand(rng) })
+        .collect::<Vec<_>>();
+
+    // ... and then use the model to make predictions about these dogs' color
+    // given only their trait measurements.
+    let test_dogs = (0..test_set_size)
+        .map(|_| { Dog::rand(rng) })
+        .collect::<Vec<_>>();
+
+    // The model's `.train` method will take two matrices, each with a row for
+    // each dog in the training set: the rows in the first matrix contain the
+    // trait measurements; the rows in the second are either [1, 0] or [0, 1]
+    // to indicate color.
+    let training_data: Vec<f64> = training_dogs.iter()
+        .flat_map(|dog| vec![dog.friendliness, dog.furriness, dog.speed])
+        .collect();
+    let training_matrix: Matrix<f64> = training_data.chunks(3).collect();
+    let target_data: Vec<f64> = training_dogs.iter()
+        .flat_map(|dog| match dog.color {
+            Color::Red => vec![1., 0.],
+            Color::White => vec![0., 1.],
+        })
+        .collect();
+    let target_matrix: Matrix<f64> = target_data.chunks(2).collect();
+
+    // Build another matrix for the test set of dogs to make predictions about.
+    let test_data: Vec<f64> = test_dogs.iter()
+        .flat_map(|dog| vec![dog.friendliness, dog.furriness, dog.speed])
+        .collect();
+    let test_matrix: Matrix<f64> = test_data.chunks(3).collect();
+
+    (training_matrix, target_matrix, test_matrix, test_dogs)
+}
+
+fn evaluate_prediction(hits: &mut u32, dog: &Dog, prediction: &[f64]) -> (Color, bool) {
+    let predicted_color = dog.color;
+    let actual_color = if prediction[0] == 1. {
+        Color::Red
+    } else {
+        Color::White
+    };
+    let accurate = predicted_color == actual_color;
+    if accurate {
+        *hits += 1;
+    }
+    (actual_color, accurate)
+}
+
+fn main() {
+    let (training_set_size, test_set_size) = (1000, 1000);
+    // Generate all of our train and test data
+    let (training_matrix, target_matrix, test_matrix, test_dogs) = generate_dog_data(training_set_size, test_set_size);
+
+    // Train!
+    let mut model = NaiveBayes::<naive_bayes::Gaussian>::new();
+    model.train(&training_matrix, &target_matrix)
+        .expect("failed to train model of dogs");
+
+    // Predict!
+    let predictions = model.predict(&test_matrix)
+        .expect("failed to predict dogs!?");
+
+    // Score how well we did.
+    let mut hits = 0;
+    let unprinted_total = test_set_size.saturating_sub(10) as usize;
+    for (dog, prediction) in test_dogs.iter().zip(predictions.iter_rows()).take(unprinted_total) {
+        evaluate_prediction(&mut hits, dog, prediction);
+    }
+
+    if unprinted_total > 0 {
+        println!("...");
+    }
+
+    for (dog, prediction) in test_dogs.iter().zip(predictions.iter_rows()).skip(unprinted_total) {
+        let (actual_color, accurate) = evaluate_prediction(&mut hits, dog, prediction);
+        println!("Predicted: {:?}; Actual: {:?}; Accurate? {:?}",
+                 dog.color, actual_color, accurate);
+    }
+
+    println!("Accuracy: {}/{} = {:.1}%", hits, test_set_size,
+             (f64::from(hits))/(f64::from(test_set_size)) * 100.);
+}