diff --git a/Cargo.toml b/Cargo.toml
index 5bd52a23b..343cf6f64 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,7 +22,10 @@ rand_isaac = "0.2.0"
 ndarray-npy = { version = "0.5", default-features = false }
 
 [workspace]
-members = ["linfa-clustering"]
+members = [
+    "linfa-clustering",
+    "linfa-reduction"
+]
 
 [profile.release]
 opt-level = 3
diff --git a/linfa-clustering/Cargo.toml b/linfa-clustering/Cargo.toml
index c9e903bf8..1e9bae844 100644
--- a/linfa-clustering/Cargo.toml
+++ b/linfa-clustering/Cargo.toml
@@ -16,7 +16,10 @@ categories = ["algorithms", "mathematics", "science"]
 ndarray = { version = "0.13" , features = ["rayon", "serde", "approx"]}
 ndarray-rand = "0.11"
 ndarray-stats = "0.3"
+ndarray-linalg = { version = "0.12", features = ["openblas"] }
+sprs = "0.7"
 serde = { version = "1", features = ["derive"] }
+num-traits = "0.1.32"
 
 [dev-dependencies]
 rand_isaac = "0.2.0"
diff --git a/linfa-clustering/examples/kmeans.rs b/linfa-clustering/examples/kmeans.rs
index a9fb71e9c..dde643b64 100644
--- a/linfa-clustering/examples/kmeans.rs
+++ b/linfa-clustering/examples/kmeans.rs
@@ -37,3 +37,4 @@ fn main() {
     )
     .expect("Failed to write .npy file");
 }
+
diff --git a/linfa-clustering/src/lib.rs b/linfa-clustering/src/lib.rs
index 9f262177f..d0ebc7dd7 100644
--- a/linfa-clustering/src/lib.rs
+++ b/linfa-clustering/src/lib.rs
@@ -18,6 +18,10 @@
 //! Implementation choices, algorithmic details and a tutorial can be found [here](struct.KMeans.html).
 //!
 //! Check [here](https://github.com/LukeMathWalker/clustering-benchmarks) for extensive benchmarks against `scikit-learn`'s K-means implementation.
+
+extern crate ndarray;
+extern crate ndarray_linalg;
+
 mod dbscan;
 #[allow(clippy::new_ret_no_self)]
 mod k_means;
diff --git a/linfa-reduction/Cargo.toml b/linfa-reduction/Cargo.toml
new file mode 100644
index 000000000..fcce021a9
--- /dev/null
+++ b/linfa-reduction/Cargo.toml
@@ -0,0 +1,32 @@
+[package]
+name = "linfa-reduction"
+version = "0.1.0"
+authors = ["Lorenz Schmidt <bytesnake@mailbox.org>"]
+description = "A collection of dimensionality reduction techniques"
+edition = "2018"
+license = "MIT/Apache-2.0"
+
+repository = "https://github.com/LukeMathWalker/linfa"
+readme = "README.md"
+
+keywords = ["dimensionality reduction", "machine-learning", "linfa", "spectral", "unsupervised"]
+categories = ["algorithms", "mathematics", "science"]
+
+[dependencies]
+ndarray = { version = "0.13" , features = ["rayon", "serde", "approx"]}
+ndarray-rand = "0.11"
+ndarray-stats = "0.3"
+ndarray-linalg = { version = "0.12", features = ["openblas"] }
+sprs = "0.7"
+serde = { version = "1", features = ["derive"] }
+num-traits = "0.1.32"
+hnsw = "0.6"
+space = "0.10"
+
+[dev-dependencies]
+rand_isaac = "0.2.0"
+ndarray-npy = { version = "0.5", default-features = false }
+criterion = "0.3"
+serde_json = "1" 
+approx = "0.3"
+linfa-clustering = { path = "../linfa-clustering" }
diff --git a/linfa-reduction/examples/diffusion_map.rs b/linfa-reduction/examples/diffusion_map.rs
new file mode 100644
index 000000000..5c474eb9b
--- /dev/null
+++ b/linfa-reduction/examples/diffusion_map.rs
@@ -0,0 +1,34 @@
+use linfa_reduction::utils::generate_convoluted_rings;
+use linfa_reduction::kernel::{IntoKernel, SparsePolynomialKernel};
+use ndarray_npy::write_npy;
+use ndarray_rand::rand::SeedableRng;
+use rand_isaac::Isaac64Rng;
+
+// A routine K-means task: build a synthetic dataset, fit the algorithm on it
+// and save both training data and predictions to disk.
+fn main() {
+    // Our random number generator, seeded for reproducibility
+    let mut rng = Isaac64Rng::seed_from_u64(42);
+
+    // For each our expected centroids, generate `n` data points around it (a "blob")
+    let n = 3000;
+
+    // generate three convoluted rings
+    let dataset = generate_convoluted_rings(&[(0.0, 3.0), (10.0, 13.0), (20.0, 23.0)], n, &mut rng);
+
+    // generate sparse polynomial kernel with k = 14, c = 5 and d = 2
+    let diffusion_map = SparsePolynomialKernel::new(&dataset, 14, 5.0, 2.0)
+        .reduce_fixed(4);
+
+    // get embedding
+    let embedding = diffusion_map.embedding();
+
+    // Save to disk our dataset (and the cluster label assigned to each observation)
+    // We use the `npy` format for compatibility with NumPy
+    write_npy("diffusion_map_dataset.npy", dataset).expect("Failed to write .npy file");
+    write_npy(
+        "diffusion_map_embedding.npy",
+        embedding
+    )
+    .expect("Failed to write .npy file");
+}
diff --git a/linfa-reduction/examples/pca.rs b/linfa-reduction/examples/pca.rs
new file mode 100644
index 000000000..834ed6cd8
--- /dev/null
+++ b/linfa-reduction/examples/pca.rs
@@ -0,0 +1,34 @@
+use linfa_clustering::generate_blobs;
+use linfa_reduction::PrincipalComponentAnalysis;
+use ndarray::array;
+use ndarray_npy::write_npy;
+use ndarray_rand::rand::SeedableRng;
+use rand_isaac::Isaac64Rng;
+
+// A routine K-means task: build a synthetic dataset, fit the algorithm on it
+// and save both training data and predictions to disk.
+fn main() {
+    // Our random number generator, seeded for reproducibility
+    let mut rng = Isaac64Rng::seed_from_u64(42);
+
+    // For each our expected centroids, generate `n` data points around it (a "blob")
+    let expected_centroids = array![[10., 10.], [1., 12.], [20., 30.], [-20., 30.],];
+    let n = 10;
+    let dataset = generate_blobs(n, &expected_centroids, &mut rng);
+
+    dbg!(&dataset);
+
+    let embedding = PrincipalComponentAnalysis::fit(dataset.clone(), 1)
+        .predict(&dataset);
+
+    dbg!(&embedding);
+
+    // Save to disk our dataset (and the cluster label assigned to each observation)
+    // We use the `npy` format for compatibility with NumPy
+    write_npy("pca_dataset.npy", dataset).expect("Failed to write .npy file");
+    write_npy(
+        "pca_embedding.npy",
+        embedding
+    )
+    .expect("Failed to write .npy file");
+}
diff --git a/linfa-reduction/src/diffusion_map/algorithms.rs b/linfa-reduction/src/diffusion_map/algorithms.rs
new file mode 100644
index 000000000..333c113f3
--- /dev/null
+++ b/linfa-reduction/src/diffusion_map/algorithms.rs
@@ -0,0 +1,113 @@
+use ndarray::{Array1, Array2};
+use ndarray_linalg::{TruncatedOrder, lobpcg::LobpcgResult, lobpcg, eigh::EighInto, lapack::UPLO, Scalar};
+use ndarray_rand::{rand_distr::Uniform, RandomExt};
+use num_traits::NumCast;
+
+use crate::Float;
+use crate::kernel::{Kernel, IntoKernel};
+use super::hyperparameters::DiffusionMapHyperParams;
+
+pub struct DiffusionMap<A> {
+    hyperparameters: DiffusionMapHyperParams,
+    embedding: Array2<A>,
+    eigvals: Array1<A>
+}
+
+impl<A: Float> DiffusionMap<A> {
+    pub fn project(
+        hyperparameters: DiffusionMapHyperParams,
+        kernel: impl IntoKernel<A>
+    ) -> Self {
+        // compute spectral embedding with diffusion map
+        let (embedding, eigvals) = compute_diffusion_map(kernel.into_kernel(), hyperparameters.steps(), 0.0, hyperparameters.embedding_size(), None);
+
+        DiffusionMap {
+            hyperparameters,
+            embedding,
+            eigvals
+        }
+    }
+
+    /// Return the hyperparameters used to train this spectral mode instance.
+    pub fn hyperparameters(&self) -> &DiffusionMapHyperParams {
+        &self.hyperparameters
+    }
+
+    /// Estimate the number of clusters in this embedding (very crude for now)
+    pub fn estimate_clusters(&self) -> usize {
+        let mean = self.eigvals.sum() / NumCast::from(self.eigvals.len()).unwrap();
+        self.eigvals.iter().filter(|x| *x > &mean).count() + 1
+    }
+
+    /// Return a copy of the eigenvalues
+    pub fn eigvals(&self) -> Array1<A> {
+        self.eigvals.clone()
+    }
+
+    pub fn embedding(&self) -> Array2<A> {
+        self.embedding.clone()
+    }
+}
+
+fn compute_diffusion_map<A: Float>(kernel: impl Kernel<A>, steps: usize, alpha: f32, embedding_size: usize, guess: Option<Array2<A>>) -> (Array2<A>, Array1<A>) {
+    assert!(embedding_size < kernel.size());
+
+    let d = kernel.sum()
+        .mapv(|x| x.recip());
+
+    let d2 = d.mapv(|x| x.powf(NumCast::from(0.5 + alpha).unwrap()));
+
+    // use full eigenvalue decomposition for small problem sizes
+    let (vals, mut vecs) = if kernel.size() < 5 * embedding_size + 1 {
+        let mut matrix = kernel.mul_similarity(&Array2::from_diag(&d).view());
+        matrix.gencolumns_mut().into_iter().zip(d.iter()).for_each(|(mut a,b)| a *= *b);
+
+        let (vals, vecs) = matrix.eigh_into(UPLO::Lower).unwrap();
+        let (vals, vecs) = (vals.slice_move(s![..; -1]), vecs.slice_move(s![.., ..; -1]));
+        (
+            vals.slice_move(s![1..embedding_size+1]).mapv(|x| Scalar::from_real(x)),
+            vecs.slice_move(s![..,1..embedding_size+1])
+        )
+    } else {
+        // calculate truncated eigenvalue decomposition
+        let x = guess.unwrap_or(
+            Array2::random((kernel.size(), embedding_size + 1), Uniform::new(0.0f64, 1.0))
+            .mapv(|x| NumCast::from(x).unwrap())
+        );
+
+        let result = lobpcg::lobpcg(|y| {
+            let mut y = y.to_owned();
+            y.genrows_mut().into_iter().zip(d2.iter()).for_each(|(mut a,b)| a *= *b);
+            let mut y = kernel.mul_similarity(&y.view());
+            y.genrows_mut().into_iter().zip(d2.iter()).for_each(|(mut a,b)| a *= *b);
+
+            y
+        }, x, |_| {}, None, 1e-15, 200, TruncatedOrder::Largest);
+
+        let (vals, vecs) = match result {
+            LobpcgResult::Ok(vals, vecs, _) | LobpcgResult::Err(vals, vecs, _, _) => { (vals, vecs)},
+            _ => panic!("Eigendecomposition failed!")
+        };
+
+
+        // cut away first eigenvalue/eigenvector
+        (
+            vals.slice_move(s![1..]),
+            vecs.slice_move(s![..,1..])
+        )
+    };
+
+    let d = d.mapv(|x| x.sqrt());
+
+    for (mut col, val) in vecs.genrows_mut().into_iter().zip(d.iter()) {
+        col *= *val;
+    }
+
+    let steps = NumCast::from(steps).unwrap();
+    for (mut vec, val) in vecs.gencolumns_mut().into_iter().zip(vals.iter()) {
+        vec *= val.powf(steps);
+    }
+
+    (vecs, vals)
+}
+
diff --git a/linfa-reduction/src/diffusion_map/hyperparameters.rs b/linfa-reduction/src/diffusion_map/hyperparameters.rs
new file mode 100644
index 000000000..c35dfad30
--- /dev/null
+++ b/linfa-reduction/src/diffusion_map/hyperparameters.rs
@@ -0,0 +1,48 @@
+pub struct DiffusionMapHyperParams {
+    steps: usize,
+    embedding_size: usize,
+}
+
+pub struct DiffusionMapHyperParamsBuilder {
+    steps: usize,
+    embedding_size: usize,
+}
+
+impl DiffusionMapHyperParamsBuilder {
+    pub fn steps(mut self, steps: usize) -> Self {
+        self.steps = steps;
+
+        self
+    }
+
+    pub fn build(self) -> DiffusionMapHyperParams {
+        DiffusionMapHyperParams::build(
+            self.steps,
+            self.embedding_size,
+        )
+    }
+}
+
+impl DiffusionMapHyperParams {
+    pub fn new(embedding_size: usize) -> DiffusionMapHyperParamsBuilder {
+        DiffusionMapHyperParamsBuilder {
+            steps: 10,
+            embedding_size
+        }
+    }
+
+    pub fn steps(&self) -> usize {
+        self.steps
+    }
+
+    pub fn embedding_size(&self) -> usize {
+        self.embedding_size
+    }
+
+    pub fn build(steps: usize, embedding_size: usize) -> Self {
+        DiffusionMapHyperParams {
+            steps,
+            embedding_size,
+        }
+    }
+}
diff --git a/linfa-reduction/src/diffusion_map/mod.rs b/linfa-reduction/src/diffusion_map/mod.rs
new file mode 100644
index 000000000..2d0728a5d
--- /dev/null
+++ b/linfa-reduction/src/diffusion_map/mod.rs
@@ -0,0 +1,5 @@
+mod algorithms;
+mod hyperparameters;
+
+pub use algorithms::*;
+pub use hyperparameters::*;
diff --git a/linfa-reduction/src/kernel/gaussian.rs b/linfa-reduction/src/kernel/gaussian.rs
new file mode 100644
index 000000000..20de6f321
--- /dev/null
+++ b/linfa-reduction/src/kernel/gaussian.rs
@@ -0,0 +1,59 @@
+use num_traits::NumCast;
+use ndarray::{Array2, ArrayView1};
+use sprs::CsMat;
+use std::iter::Sum;
+
+use crate::Float;
+use crate::kernel::{IntoKernel, dense_from_fn, sparse_from_fn};
+
+fn kernel<A: Float>(a: ArrayView1<A>, b: ArrayView1<A>, eps: A) -> A {
+    let distance = a.iter().zip(b.iter()).map(|(x,y)| (*x-*y)*(*x-*y))
+        .sum::<A>();
+
+    (-distance / eps).exp()
+}
+
+pub struct GaussianKernel<A> {
+    pub data: Array2<A>
+}
+
+impl<A: Float + Sum<A>> GaussianKernel<A> {
+    pub fn new(dataset: &Array2<A>, eps: f32) -> Self {
+        let eps = NumCast::from(eps).unwrap();
+
+        GaussianKernel {
+            data: dense_from_fn(dataset, |a,b| kernel(a,b,eps))
+        }
+    }
+}
+
+impl<A: Float> IntoKernel<A> for GaussianKernel<A> {
+    type IntoKer = Array2<A>;
+
+    fn into_kernel(self) -> Self::IntoKer {
+        self.data
+    }
+}
+
+pub struct SparseGaussianKernel<A> {
+    similarity: CsMat<A>
+}
+
+impl<A: Float> SparseGaussianKernel<A> {
+    pub fn new(dataset: &Array2<A>, k: usize, eps: f32) -> Self {
+        let eps = NumCast::from(eps).unwrap();
+        
+        SparseGaussianKernel { 
+            similarity: sparse_from_fn(dataset, k, |a,b| kernel(a,b,eps))
+        }
+    }
+}
+
+impl<A: Float> IntoKernel<A> for SparseGaussianKernel<A> {
+    type IntoKer = CsMat<A>;
+
+    fn into_kernel(self) -> Self::IntoKer {
+        self.similarity
+    }
+}
+
diff --git a/linfa-reduction/src/kernel/mod.rs b/linfa-reduction/src/kernel/mod.rs
new file mode 100644
index 000000000..808cbdd03
--- /dev/null
+++ b/linfa-reduction/src/kernel/mod.rs
@@ -0,0 +1,140 @@
+pub mod sparse;
+pub mod gaussian;
+pub mod polynomial;
+
+pub use gaussian::{GaussianKernel, SparseGaussianKernel};
+pub use polynomial::{PolynomialKernel, SparsePolynomialKernel};
+
+use ndarray::prelude::*;
+use ndarray::Data;
+use sprs::CsMat;
+
+use crate::Float;
+use crate::diffusion_map::{DiffusionMapHyperParams, DiffusionMap};
+
+/// Symmetric kernel function, can be sparse or dense
+///
+/// Required functions are:
+///  * `mul_normalized_similarity`: performs a matrix multiplication with `rhs`, which is a
+///  normalized (sum of columns and rows equal one) symmetric similarity matrix
+///  * `size`: returns the number of data-points
+pub trait Kernel<A: Float> {
+    fn mul_similarity(&self, rhs: &ArrayView2<A>) -> Array2<A>;
+    fn sum(&self) -> Array1<A>;
+    fn size(&self) -> usize;
+
+}
+
+impl<S: Data<Elem = A>, A: Float> Kernel<A> for ArrayBase<S, Ix2> {
+    fn mul_similarity(&self, rhs: &ArrayView2<A>) -> Array2<A> {
+        self.dot(rhs)
+    }
+
+    fn sum(&self) -> Array1<A> {
+        self.sum_axis(Axis(1))
+    }
+
+    fn size(&self) -> usize {
+        assert_eq!(self.ncols(), self.nrows());
+
+        self.ncols()
+    }
+}
+
+impl<A: Float> Kernel<A> for CsMat<A> {
+    fn mul_similarity(&self, rhs: &ArrayView2<A>) -> Array2<A> {
+        self * rhs
+    }
+
+    fn sum(&self) -> Array1<A> {
+        let mut sum = Array1::zeros(self.cols());
+
+        for (val, i) in self.iter() {
+            let (_, col) = i;
+            sum[col] += *val;
+        }
+
+        sum
+    }
+
+    fn size(&self) -> usize {
+        assert_eq!(self.cols(), self.rows());
+
+        self.cols()
+    }
+}
+
+/// Converts an object into a kernel
+pub trait IntoKernel<A: Float> {
+    type IntoKer: Kernel<A>;
+
+    fn into_kernel(self) -> Self::IntoKer;
+
+    fn reduce_fixed(self, embedding_size: usize) -> DiffusionMap<A>
+    where Self: Sized
+    {
+        let params = DiffusionMapHyperParams::new(embedding_size)
+            .steps(1)
+            .build();
+
+        DiffusionMap::project(params, self)
+    }
+}
+
+pub fn dense_from_fn<A: Float, T: Fn(ArrayView1<A>, ArrayView1<A>) -> A>(dataset: &Array2<A>, fnc: T) -> Array2<A> {
+    let n_observations = dataset.len_of(Axis(0));
+    let mut similarity = Array2::eye(n_observations);
+
+    for i in 0..n_observations {
+        for j in 0..n_observations {
+            let a = dataset.row(i);
+            let b = dataset.row(j);
+
+            similarity[(i, j)] = fnc(a, b);
+        }
+    }
+
+    similarity
+}
+
+pub fn sparse_from_fn<A: Float, T: Fn(ArrayView1<A>, ArrayView1<A>) -> A>(dataset: &Array2<A>, k: usize, fnc: T) -> CsMat<A> {
+    let mut data = sparse::adjacency_matrix(dataset, k);
+        
+    for (i, mut vec) in data.outer_iterator_mut().enumerate() {
+        for (j, val) in vec.iter_mut() {
+            let a = dataset.row(i);
+            let b = dataset.row(j);
+
+            *val = fnc(a, b);
+        }
+    }
+    
+    data
+}
+
+#[cfg(test)]
+mod tests {
+    use super::Kernel;
+    use sprs::CsMatBase;
+    use ndarray::{Array1, Array2};
+    use ndarray_rand::{rand_distr::Uniform, RandomExt};
+
+    #[test]
+    fn test_sprs() {
+        let a: Array2<f64> = Array2::random((10, 10), Uniform::new(0., 1.));
+        let a = CsMatBase::csr_from_dense(a.view(), 1e-5);
+
+        assert_eq!(a.size(), 10);
+        assert_eq!(a.apply_gram(Array2::eye(10)), a.to_dense());
+    }
+
+    #[test]
+    fn test_dense() {
+        let id: Array2<f64> = Array2::eye(10);
+
+        assert_eq!(Kernel::sum(&id), Array1::ones(10));
+        assert_eq!(Kernel::sum(&id), Array1::ones(10));
+
+        assert_eq!(id.size(), 10);
+    }
+}
diff --git a/linfa-reduction/src/kernel/polynomial.rs b/linfa-reduction/src/kernel/polynomial.rs
new file mode 100644
index 000000000..0052ef305
--- /dev/null
+++ b/linfa-reduction/src/kernel/polynomial.rs
@@ -0,0 +1,58 @@
+use num_traits::NumCast;
+use ndarray::{Array2, ArrayView1};
+use sprs::CsMat;
+use std::iter::Sum;
+
+use crate::Float;
+use crate::kernel::{IntoKernel, dense_from_fn, sparse_from_fn};
+
+fn kernel<A: Float>(a: ArrayView1<A>, b: ArrayView1<A>, c: A, d: A) -> A {
+    (a.dot(&b) + c).powf(d)
+}
+
+pub struct PolynomialKernel<A> {
+    pub data: Array2<A>
+}
+
+impl<A: Float + Sum<A>> PolynomialKernel<A> {
+    pub fn new(dataset: &Array2<A>, c: f32, d: f32) -> Self {
+        let c = NumCast::from(c).unwrap();
+        let d = NumCast::from(d).unwrap();
+
+        PolynomialKernel {
+            data: dense_from_fn(dataset, |a,b| kernel(a,b,c,d))
+        }
+    }
+}
+
+impl<A: Float> IntoKernel<A> for PolynomialKernel<A> {
+    type IntoKer = Array2<A>;
+
+    fn into_kernel(self) -> Self::IntoKer {
+        self.data
+    }
+}
+
+pub struct SparsePolynomialKernel<A> {
+    similarity: CsMat<A>
+}
+
+impl<A: Float> SparsePolynomialKernel<A> {
+    pub fn new(dataset: &Array2<A>, k: usize, c: A, d: A) -> Self {
+        let c = NumCast::from(c).unwrap();
+        let d = NumCast::from(d).unwrap();
+
+        SparsePolynomialKernel { 
+            similarity: sparse_from_fn(dataset, k, |a,b| kernel(a,b,c,d))
+        }
+    }
+}
+
+impl<A: Float> IntoKernel<A> for SparsePolynomialKernel<A> {
+    type IntoKer = CsMat<A>;
+
+    fn into_kernel(self) -> Self::IntoKer {
+        self.similarity
+    }
+}
+
diff --git a/linfa-reduction/src/kernel/sparse.rs b/linfa-reduction/src/kernel/sparse.rs
new file mode 100644
index 000000000..8a7dd2bf4
--- /dev/null
+++ b/linfa-reduction/src/kernel/sparse.rs
@@ -0,0 +1,95 @@
+use sprs::{CsMat, CsMatBase};
+use hnsw::{Searcher, HNSW, Params};
+use space::{Neighbor, MetricPoint};
+use ndarray::{Array2, Axis, ArrayView1};
+
+use num_traits::NumCast;
+
+use crate::Float;
+
+/// Implementation of euclidean distance for ndarray
+struct Euclidean<'a, A>(ArrayView1<'a, A>);
+
+impl<A: Float> MetricPoint for Euclidean<'_, A> {
+    fn distance(&self, rhs: &Self) -> u32 {
+        let val = self.0
+            .iter()
+            .zip(rhs.0.iter())
+            .map(|(&a, &b)| (a - b) * (a - b))
+            .sum::<A>()
+            .sqrt();
+
+        space::f32_metric(val.to_f32().unwrap())
+    }
+}
+
+/// Create sparse adjacency matrix from dense dataset
+pub fn adjacency_matrix<A: Float>(dataset: &Array2<A>, k: usize) -> CsMat<A> {
+    let n_points = dataset.len_of(Axis(0));
+
+    // ensure that the number of neighbours is at least one and less than the total number of
+    // points
+    assert!(k < n_points);
+    assert!(k > 0);
+
+    let params = Params::new()
+        .ef_construction(k);
+
+    let mut searcher = Searcher::default();
+    let mut hnsw: HNSW<Euclidean<A>> = HNSW::new_params(params);
+
+    // insert all rows as data points into HNSW graph
+    for feature in dataset.genrows().into_iter() {
+        hnsw.insert(Euclidean(feature), &mut searcher);
+    }
+
+    // allocate buffer for k neighbours (plus the points itself)
+    let mut neighbours = vec![Neighbor::invalid(); k + 1];
+
+    // allocate buffer to initialize the sparse matrix later on
+    //  * data: we have exact #points * k positive entries
+    //  * indptr: has structure [0,k,2k,...,#points*k]
+    //  * indices: filled with the nearest indices
+    let mut data = Vec::with_capacity(n_points * (k+1));
+    let mut indptr = Vec::with_capacity(n_points + 1);
+    //let indptr = (0..n_points+1).map(|x| x * (k+1)).collect::<Vec<_>>();
+    let mut indices = Vec::with_capacity(n_points * (k+1));
+    indptr.push(0);
+
+    // find neighbours for each data point
+    let mut added = 0;
+    for (m, feature) in dataset.genrows().into_iter().enumerate() {
+        hnsw.nearest(&Euclidean(feature), 3 * k, &mut searcher, &mut neighbours);
+
+        //dbg!(&neighbours);
+
+        // sort by indices
+        neighbours.sort_unstable();
+
+        indices.push(m);
+        data.push(NumCast::from(1.0).unwrap());
+        added += 1;
+
+        // push each index into the indices array
+        for n in &neighbours {
+            if m != n.index {
+                indices.push(n.index);
+                data.push(NumCast::from(1.0).unwrap());
+                added += 1;
+            }
+        }
+
+        indptr.push(added);
+    }
+
+
+    // create CSR matrix from data, indptr and indices
+    let mat = CsMatBase::new((n_points, n_points), indptr, indices, data);
+    let mut mat = &mat + &mat.transpose_view();
+
+    // ensure that all values are one
+    let val: A = NumCast::from(1.0).unwrap();
+    mat.map_inplace(|_| val);
+
+    mat
+}
diff --git a/linfa-reduction/src/lib.rs b/linfa-reduction/src/lib.rs
new file mode 100644
index 000000000..db5777375
--- /dev/null
+++ b/linfa-reduction/src/lib.rs
@@ -0,0 +1,26 @@
+#[macro_use] extern crate ndarray;
+
+pub mod kernel;
+pub mod diffusion_map;
+pub mod pca;
+pub mod utils;
+
+pub use diffusion_map::{DiffusionMap, DiffusionMapHyperParams};
+pub use utils::to_gaussian_similarity;
+pub use pca::PrincipalComponentAnalysis;
+
+use ndarray::NdFloat;
+use ndarray_linalg::Lapack;
+use num_traits::FromPrimitive;
+
+pub trait Float:
+    NdFloat
+    + Lapack
+    + Default
+    + Clone
+    + FromPrimitive
+{
+}
+
+impl Float for f32 {}
+impl Float for f64 {}
diff --git a/linfa-reduction/src/pca/algorithms.rs b/linfa-reduction/src/pca/algorithms.rs
new file mode 100644
index 000000000..c58948cb9
--- /dev/null
+++ b/linfa-reduction/src/pca/algorithms.rs
@@ -0,0 +1,58 @@
+///! Principal Component Analysis 
+///
+/// Reduce dimensionality with a linear projection using Singular Value Decomposition. The data is
+/// centered before applying the SVD. This uses TruncatedSvd from ndarray-linalg package.
+
+use ndarray::{ArrayBase, Array1, Array2, Ix2, Axis, DataMut};
+use ndarray_linalg::{TruncatedSvd, TruncatedOrder};
+
+/// Pincipal Component Analysis
+pub struct PrincipalComponentAnalysis {
+    embedding: Array2<f64>,
+    explained_variance: Array1<f64>,
+    mean: Array1<f64>
+}
+
+impl PrincipalComponentAnalysis {
+    pub fn fit<S: DataMut<Elem = f64>>(
+        mut dataset: ArrayBase<S, Ix2>,
+        embedding_size: usize
+    ) -> Self {
+        // calculate mean of data and subtract it
+        let mean = dataset.mean_axis(Axis(0)).unwrap();
+        dataset -= &mean;
+
+        // estimate Singular Value Decomposition
+        let result = TruncatedSvd::new(dataset.to_owned(), TruncatedOrder::Largest)
+            .decompose(embedding_size)
+            .unwrap();
+
+        // explained variance is the spectral distribution of the eigenvalues
+        let (_, sigma, v_t) = result.values_vectors();
+        let explained_variance = sigma.mapv(|x| x*x / (dataset.len() as f64 - 1.0));
+
+        PrincipalComponentAnalysis {
+            embedding: v_t,
+            explained_variance,
+            mean
+        }
+    }
+
+    /// Given a new data points project with fitted model
+    pub fn predict<S: DataMut<Elem = f64>>(
+        &self, 
+        dataset: &ArrayBase<S, Ix2>
+    ) -> Array2<f64> {
+        (dataset - &self.mean).dot(&self.embedding.t())
+    }
+
+    /// Return the amount of explained variance per element
+    pub fn explained_variance(&self) -> Array1<f64> {
+        self.explained_variance.clone()
+    }
+
+    /// Return the normalized amount of explained variance per element
+    pub fn explained_variance_ratio(&self) -> Array1<f64> {
+        &self.explained_variance / self.explained_variance.sum()
+    }
+}
diff --git a/linfa-reduction/src/pca/mod.rs b/linfa-reduction/src/pca/mod.rs
new file mode 100644
index 000000000..60025f396
--- /dev/null
+++ b/linfa-reduction/src/pca/mod.rs
@@ -0,0 +1,3 @@
+mod algorithms;
+
+pub use algorithms::*;
diff --git a/linfa-reduction/src/utils.rs b/linfa-reduction/src/utils.rs
new file mode 100644
index 000000000..768f0d0f1
--- /dev/null
+++ b/linfa-reduction/src/utils.rs
@@ -0,0 +1,83 @@
+use ndarray::{Array2, Ix2, Data, Axis, ArrayBase};
+use ndarray_rand::rand::Rng;
+use num_traits::float::FloatConst;
+
+/// Computes a similarity matrix with gaussian kernel and scaling parameter `eps`
+///
+/// The generated matrix is a upper triangular matrix with dimension NxN (number of observations) and contains the similarity between all permutations of observations
+/// similarity 
+pub fn to_gaussian_similarity(
+    observations: &ArrayBase<impl Data<Elem = f64>, Ix2>,
+    eps: f64,
+) -> Array2<f64> {
+    let n_observations = observations.len_of(Axis(0));
+    let mut similarity = Array2::eye(n_observations);
+
+    for i in 0..n_observations {
+        for j in 0..n_observations {
+            let a = observations.row(i);
+            let b = observations.row(j);
+
+            let distance = a.iter().zip(b.iter()).map(|(x,y)| (x-y).powf(2.0))
+                .sum::<f64>();
+
+            similarity[(i,j)] = (-distance / eps).exp();
+        }
+    }
+
+    similarity
+}  
+///
+/// Generates a three dimension swiss roll, centered at the origin with height `height` and
+/// outwards speed `speed`
+pub fn generate_swissroll(
+    height: f64,
+    speed: f64,
+    n_points: usize,
+    rng: &mut impl Rng,
+) -> Array2<f64> {
+    let mut roll: Array2<f64> = Array2::zeros((n_points, 3));
+
+    for i in 0..n_points {
+        let z = rng.gen_range(0.0, height);
+        let phi: f64 = rng.gen_range(0.0, 10.0);
+        //let offset: f64 = rng.gen_range(-0.5, 0.5);
+        let offset = 0.0;
+
+        let x = speed * phi * phi.cos() + offset;
+        let y = speed * phi * phi.sin() + offset;
+
+        roll[(i, 0)] = x;
+        roll[(i, 1)] = y;
+        roll[(i, 2)] = z;
+    }
+    roll
+}
+
+pub fn generate_convoluted_rings(
+    rings: &[(f64, f64)],
+    n_points: usize,
+    rng: &mut impl Rng
+) -> Array2<f64> {
+    let n_points = (n_points as f32 / rings.len() as f32).ceil() as usize;
+    let mut array = Array2::zeros((n_points * rings.len(), 3));
+
+    for (n, (start, end)) in rings.into_iter().enumerate() {
+        // inner circle
+        for i in 0..n_points {
+            let r: f64 = rng.gen_range(start, end);
+            let phi: f64 = rng.gen_range(0.0, f64::PI() * 2.0);
+            let theta: f64 = rng.gen_range(0.0, f64::PI() * 2.0);
+
+            let x = theta.sin() * phi.cos() * r;
+            let y = theta.sin() * phi.sin() * r;
+            let z = theta.cos() * r;
+
+            array[(n * n_points + i, 0)] = x;
+            array[(n * n_points + i, 1)] = y;
+            array[(n * n_points + i, 2)] = z;
+        }
+    }
+
+    array
+}