Skip to content
This repository was archived by the owner on Jul 16, 2021. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ script:
- cargo test --verbose
- cargo build --features stats
- cargo test --features stats
- cargo build --features datasets
- cargo test --features datasets
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ license = "MIT"

[features]
stats = []
datasets = []

[dependencies]
num = { version = "0.1.35", default-features = false }
Expand Down
191 changes: 191 additions & 0 deletions src/datasets/iris.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
use rulinalg::matrix::Matrix;
use rulinalg::vector::Vector;

use super::Dataset;

/// Load iris dataset.
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This description is great!

///
/// The data set contains 3 classes of 50 instances each, where each class refers to a type of iris plant.
///
/// ## Attribute Information
///
/// ### Data
///
/// ``Matrix<f64>`` contains following columns.
///
/// - sepal length in cm
/// - sepal width in cm
/// - petal length in cm
/// - petal width in cm
///
/// ### Target
///
/// ``Vector<usize>`` contains numbers corresponding to iris species:
///
/// - ``0``: Iris Setosa
/// - ``1``: Iris Versicolour
/// - ``2``: Iris Virginica
///
/// Lichman, M. (2013). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml].
/// Irvine, CA: University of California, School of Information and Computer Science.
pub fn load() -> Dataset<Matrix<f64>, Vector<usize>> {
let data: Matrix<f64> = matrix![5.1, 3.5, 1.4, 0.2;
4.9, 3.0, 1.4, 0.2;
4.7, 3.2, 1.3, 0.2;
4.6, 3.1, 1.5, 0.2;
5.0, 3.6, 1.4, 0.2;
5.4, 3.9, 1.7, 0.4;
4.6, 3.4, 1.4, 0.3;
5.0, 3.4, 1.5, 0.2;
4.4, 2.9, 1.4, 0.2;
4.9, 3.1, 1.5, 0.1;
5.4, 3.7, 1.5, 0.2;
4.8, 3.4, 1.6, 0.2;
4.8, 3.0, 1.4, 0.1;
4.3, 3.0, 1.1, 0.1;
5.8, 4.0, 1.2, 0.2;
5.7, 4.4, 1.5, 0.4;
5.4, 3.9, 1.3, 0.4;
5.1, 3.5, 1.4, 0.3;
5.7, 3.8, 1.7, 0.3;
5.1, 3.8, 1.5, 0.3;
5.4, 3.4, 1.7, 0.2;
5.1, 3.7, 1.5, 0.4;
4.6, 3.6, 1.0, 0.2;
5.1, 3.3, 1.7, 0.5;
4.8, 3.4, 1.9, 0.2;
5.0, 3.0, 1.6, 0.2;
5.0, 3.4, 1.6, 0.4;
5.2, 3.5, 1.5, 0.2;
5.2, 3.4, 1.4, 0.2;
4.7, 3.2, 1.6, 0.2;
4.8, 3.1, 1.6, 0.2;
5.4, 3.4, 1.5, 0.4;
5.2, 4.1, 1.5, 0.1;
5.5, 4.2, 1.4, 0.2;
4.9, 3.1, 1.5, 0.1;
5.0, 3.2, 1.2, 0.2;
5.5, 3.5, 1.3, 0.2;
4.9, 3.1, 1.5, 0.1;
4.4, 3.0, 1.3, 0.2;
5.1, 3.4, 1.5, 0.2;
5.0, 3.5, 1.3, 0.3;
4.5, 2.3, 1.3, 0.3;
4.4, 3.2, 1.3, 0.2;
5.0, 3.5, 1.6, 0.6;
5.1, 3.8, 1.9, 0.4;
4.8, 3.0, 1.4, 0.3;
5.1, 3.8, 1.6, 0.2;
4.6, 3.2, 1.4, 0.2;
5.3, 3.7, 1.5, 0.2;
5.0, 3.3, 1.4, 0.2;
7.0, 3.2, 4.7, 1.4;
6.4, 3.2, 4.5, 1.5;
6.9, 3.1, 4.9, 1.5;
5.5, 2.3, 4.0, 1.3;
6.5, 2.8, 4.6, 1.5;
5.7, 2.8, 4.5, 1.3;
6.3, 3.3, 4.7, 1.6;
4.9, 2.4, 3.3, 1.0;
6.6, 2.9, 4.6, 1.3;
5.2, 2.7, 3.9, 1.4;
5.0, 2.0, 3.5, 1.0;
5.9, 3.0, 4.2, 1.5;
6.0, 2.2, 4.0, 1.0;
6.1, 2.9, 4.7, 1.4;
5.6, 2.9, 3.6, 1.3;
6.7, 3.1, 4.4, 1.4;
5.6, 3.0, 4.5, 1.5;
5.8, 2.7, 4.1, 1.0;
6.2, 2.2, 4.5, 1.5;
5.6, 2.5, 3.9, 1.1;
5.9, 3.2, 4.8, 1.8;
6.1, 2.8, 4.0, 1.3;
6.3, 2.5, 4.9, 1.5;
6.1, 2.8, 4.7, 1.2;
6.4, 2.9, 4.3, 1.3;
6.6, 3.0, 4.4, 1.4;
6.8, 2.8, 4.8, 1.4;
6.7, 3.0, 5.0, 1.7;
6.0, 2.9, 4.5, 1.5;
5.7, 2.6, 3.5, 1.0;
5.5, 2.4, 3.8, 1.1;
5.5, 2.4, 3.7, 1.0;
5.8, 2.7, 3.9, 1.2;
6.0, 2.7, 5.1, 1.6;
5.4, 3.0, 4.5, 1.5;
6.0, 3.4, 4.5, 1.6;
6.7, 3.1, 4.7, 1.5;
6.3, 2.3, 4.4, 1.3;
5.6, 3.0, 4.1, 1.3;
5.5, 2.5, 4.0, 1.3;
5.5, 2.6, 4.4, 1.2;
6.1, 3.0, 4.6, 1.4;
5.8, 2.6, 4.0, 1.2;
5.0, 2.3, 3.3, 1.0;
5.6, 2.7, 4.2, 1.3;
5.7, 3.0, 4.2, 1.2;
5.7, 2.9, 4.2, 1.3;
6.2, 2.9, 4.3, 1.3;
5.1, 2.5, 3.0, 1.1;
5.7, 2.8, 4.1, 1.3;
6.3, 3.3, 6.0, 2.5;
5.8, 2.7, 5.1, 1.9;
7.1, 3.0, 5.9, 2.1;
6.3, 2.9, 5.6, 1.8;
6.5, 3.0, 5.8, 2.2;
7.6, 3.0, 6.6, 2.1;
4.9, 2.5, 4.5, 1.7;
7.3, 2.9, 6.3, 1.8;
6.7, 2.5, 5.8, 1.8;
7.2, 3.6, 6.1, 2.5;
6.5, 3.2, 5.1, 2.0;
6.4, 2.7, 5.3, 1.9;
6.8, 3.0, 5.5, 2.1;
5.7, 2.5, 5.0, 2.0;
5.8, 2.8, 5.1, 2.4;
6.4, 3.2, 5.3, 2.3;
6.5, 3.0, 5.5, 1.8;
7.7, 3.8, 6.7, 2.2;
7.7, 2.6, 6.9, 2.3;
6.0, 2.2, 5.0, 1.5;
6.9, 3.2, 5.7, 2.3;
5.6, 2.8, 4.9, 2.0;
7.7, 2.8, 6.7, 2.0;
6.3, 2.7, 4.9, 1.8;
6.7, 3.3, 5.7, 2.1;
7.2, 3.2, 6.0, 1.8;
6.2, 2.8, 4.8, 1.8;
6.1, 3.0, 4.9, 1.8;
6.4, 2.8, 5.6, 2.1;
7.2, 3.0, 5.8, 1.6;
7.4, 2.8, 6.1, 1.9;
7.9, 3.8, 6.4, 2.0;
6.4, 2.8, 5.6, 2.2;
6.3, 2.8, 5.1, 1.5;
6.1, 2.6, 5.6, 1.4;
7.7, 3.0, 6.1, 2.3;
6.3, 3.4, 5.6, 2.4;
6.4, 3.1, 5.5, 1.8;
6.0, 3.0, 4.8, 1.8;
6.9, 3.1, 5.4, 2.1;
6.7, 3.1, 5.6, 2.4;
6.9, 3.1, 5.1, 2.3;
5.8, 2.7, 5.1, 1.9;
6.8, 3.2, 5.9, 2.3;
6.7, 3.3, 5.7, 2.5;
6.7, 3.0, 5.2, 2.3;
6.3, 2.5, 5.0, 1.9;
6.5, 3.0, 5.2, 2.0;
6.2, 3.4, 5.4, 2.3;
5.9, 3.0, 5.1, 1.8];
let target: Vec<usize> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2];

Dataset{ data: data,
target: Vector::new(target) }
}
25 changes: 25 additions & 0 deletions src/datasets/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
use std::fmt::Debug;

/// Module for iris dataset.
pub mod iris;

/// Dataset container
#[derive(Clone, Debug)]
pub struct Dataset<D, T> where D: Clone + Debug, T: Clone + Debug {
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this makes sense for now. We might want to be more strict in future if we want to be generic over DataSets. However, this is something that I don't think we will ever want to do.


data: D,
target: T
}

impl<D, T> Dataset<D, T> where D: Clone + Debug, T: Clone + Debug {

/// Returns explanatory variable (features)
pub fn data(&self) -> &D {
&self.data
}

/// Returns objective variable (target)
pub fn target(&self) -> &T {
&self.target
}
}
4 changes: 4 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,3 +221,7 @@ pub mod analysis {
pub mod cross_validation;
pub mod score;
}

#[cfg(feature = "datasets")]
/// Module for datasets.
pub mod datasets;
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should feature gate this. My thinking is that if we have a few datasets users will not want to download all of this data by default.

To do this:

18 changes: 18 additions & 0 deletions tests/datasets.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
extern crate rusty_machine as rm;


#[cfg(datasets)]
mod test {

use rm::datasets::iris;
use rm::linalg::BaseMatrix;

#[test]
fn test_iris() {
let dt = iris::load_();
assert_eq!(dt.data().rows(), 150);
assert_eq!(dt.data().cols(), 4);

assert_eq!(dt.target().size(), 150);
}
}
5 changes: 4 additions & 1 deletion tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,7 @@ pub mod learning {
pub mod optim {
mod grad_desc;
}
}
}

#[cfg(datasets)]
pub mod datasets;