diff --git a/examples/README.md b/examples/README.md index 0010cf55..cee33482 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,5 +1,4 @@ -Sample Codes with rusty-machine -=============================== +Examples with rusty-machine This directory gathers fully-fledged programs, each using a piece of `rusty-machine`'s API. @@ -9,6 +8,7 @@ This directory gathers fully-fledged programs, each using a piece of * [K-Means](#k-means) * [SVM](#svm) * [Neural Networks](#neural-networks) +* [Naïve Bayes](#naïve-bayes) ## The Examples @@ -123,3 +123,45 @@ Got Expected Hits: 4, Misses: 0 Accuracy: 100% ``` + +### Naïve Bayes + +#### Dog Classification + +Suppose we have a population composed of red dogs and white dogs, +whose friendliness, furriness, and speed can be measured. In this +example we train a Naïve Bayes model to determine whether +a dog is white or red. + +The group of white dogs are friendlier, furrier, and slower than +the red dogs. Given the color of a dog, friendliness, furriness, +and speed are independent of each other (a requirement of the Naïve +Bayes model). + +In the example code we will generate our own data and then train +our model using it. This is a common technique used to validate +a model. We generate the data by sampling each of the dogs features +from Gaussian random variables. We will have a total of 6 Gaussian +random variables representing three features for both colors of dog. +As we are using Gaussian random variables we will use a Gaussian +Naive Bayes model. Once we have generated our data we will convert +it into `Matrix` structures and train our model. + + +Sample run: + +``` +$ cargo run --example naive_bayes_dogs +... +Predicted: Red; Actual: Red; Accurate? true +Predicted: Red; Actual: Red; Accurate? true +Predicted: White; Actual: Red; Accurate? false +Predicted: Red; Actual: White; Accurate? false +Predicted: Red; Actual: Red; Accurate? true +Predicted: White; Actual: White; Accurate? true +Predicted: White; Actual: White; Accurate? true +Predicted: White; Actual: White; Accurate? true +Predicted: White; Actual: White; Accurate? true +Predicted: Red; Actual: Red; Accurate? true +Accuracy: 822/1000 = 82.2% +``` diff --git a/examples/naive_bayes_dogs.rs b/examples/naive_bayes_dogs.rs new file mode 100644 index 00000000..2e57de54 --- /dev/null +++ b/examples/naive_bayes_dogs.rs @@ -0,0 +1,154 @@ +extern crate rusty_machine; +extern crate rand; + +use rand::Rand; +use rand::distributions::Sample; +use rand::distributions::normal::Normal; +use rusty_machine::learning::naive_bayes::{self, NaiveBayes}; +use rusty_machine::linalg::{Matrix, BaseMatrix}; +use rusty_machine::learning::SupModel; + + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum Color { + Red, + White, +} + +#[derive(Clone, Debug)] +struct Dog { + color: Color, + friendliness: f64, + furriness: f64, + speed: f64, +} + +impl Rand for Dog { + /// Generate a random dog. + fn rand(rng: &mut R) -> Self { + // Friendliness, furriness, and speed are normally distributed and + // (given color:) independent. + let mut red_dog_friendliness = Normal::new(0., 1.); + let mut red_dog_furriness = Normal::new(0., 1.); + let mut red_dog_speed = Normal::new(0., 1.); + + let mut white_dog_friendliness = Normal::new(1., 1.); + let mut white_dog_furriness = Normal::new(1., 1.); + let mut white_dog_speed = Normal::new(-1., 1.); + + // Flip a coin to decide whether to generate a red or white dog. + let coin: f64 = rng.gen(); + let color = if coin < 0.5 { Color::Red } else { Color::White }; + + match color { + Color::Red => { + Dog { + color: Color::Red, + // sample from our normal distributions for each trait + friendliness: red_dog_friendliness.sample(rng), + furriness: red_dog_furriness.sample(rng), + speed: red_dog_speed.sample(rng), + } + }, + Color::White => { + Dog { + color: Color::White, + friendliness: white_dog_friendliness.sample(rng), + furriness: white_dog_furriness.sample(rng), + speed: white_dog_speed.sample(rng), + } + }, + } + } +} + +fn generate_dog_data(training_set_size: u32, test_set_size: u32) + -> (Matrix, Matrix, Matrix, Vec) { + let mut randomness = rand::StdRng::new() + .expect("we should be able to get an RNG"); + let rng = &mut randomness; + + // We'll train the model on these dogs + let training_dogs = (0..training_set_size) + .map(|_| { Dog::rand(rng) }) + .collect::>(); + + // ... and then use the model to make predictions about these dogs' color + // given only their trait measurements. + let test_dogs = (0..test_set_size) + .map(|_| { Dog::rand(rng) }) + .collect::>(); + + // The model's `.train` method will take two matrices, each with a row for + // each dog in the training set: the rows in the first matrix contain the + // trait measurements; the rows in the second are either [1, 0] or [0, 1] + // to indicate color. + let training_data: Vec = training_dogs.iter() + .flat_map(|dog| vec![dog.friendliness, dog.furriness, dog.speed]) + .collect(); + let training_matrix: Matrix = training_data.chunks(3).collect(); + let target_data: Vec = training_dogs.iter() + .flat_map(|dog| match dog.color { + Color::Red => vec![1., 0.], + Color::White => vec![0., 1.], + }) + .collect(); + let target_matrix: Matrix = target_data.chunks(2).collect(); + + // Build another matrix for the test set of dogs to make predictions about. + let test_data: Vec = test_dogs.iter() + .flat_map(|dog| vec![dog.friendliness, dog.furriness, dog.speed]) + .collect(); + let test_matrix: Matrix = test_data.chunks(3).collect(); + + (training_matrix, target_matrix, test_matrix, test_dogs) +} + +fn evaluate_prediction(hits: &mut u32, dog: &Dog, prediction: &[f64]) -> (Color, bool) { + let predicted_color = dog.color; + let actual_color = if prediction[0] == 1. { + Color::Red + } else { + Color::White + }; + let accurate = predicted_color == actual_color; + if accurate { + *hits += 1; + } + (actual_color, accurate) +} + +fn main() { + let (training_set_size, test_set_size) = (1000, 1000); + // Generate all of our train and test data + let (training_matrix, target_matrix, test_matrix, test_dogs) = generate_dog_data(training_set_size, test_set_size); + + // Train! + let mut model = NaiveBayes::::new(); + model.train(&training_matrix, &target_matrix) + .expect("failed to train model of dogs"); + + // Predict! + let predictions = model.predict(&test_matrix) + .expect("failed to predict dogs!?"); + + // Score how well we did. + let mut hits = 0; + let unprinted_total = test_set_size.saturating_sub(10) as usize; + for (dog, prediction) in test_dogs.iter().zip(predictions.iter_rows()).take(unprinted_total) { + evaluate_prediction(&mut hits, dog, prediction); + } + + if unprinted_total > 0 { + println!("..."); + } + + for (dog, prediction) in test_dogs.iter().zip(predictions.iter_rows()).skip(unprinted_total) { + let (actual_color, accurate) = evaluate_prediction(&mut hits, dog, prediction); + println!("Predicted: {:?}; Actual: {:?}; Accurate? {:?}", + dog.color, actual_color, accurate); + } + + println!("Accuracy: {}/{} = {:.1}%", hits, test_set_size, + (f64::from(hits))/(f64::from(test_set_size)) * 100.); +}