diff --git a/.github/ISSUE_TEMPLATE/issue.md b/.github/ISSUE_TEMPLATE/issue.md
new file mode 100644
index 00000000..925cfede
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/issue.md
@@ -0,0 +1,17 @@
+---
+about: A generic issue template
+assignees:
+    - FL03
+labels: []
+projects: ['@FL03/concision:features']
+name: Generic Issue
+title: ''
+---
+
+**Describe the proposal or feature that this issue is tracking.**
+
+## Issues
+
+- []
+
+## Pull Requests
diff --git a/.github/ISSUE_TEMPLATE/proposal.md b/.github/ISSUE_TEMPLATE/proposal.md
index d7bacdf8..d8370f22 100644
--- a/.github/ISSUE_TEMPLATE/proposal.md
+++ b/.github/ISSUE_TEMPLATE/proposal.md
@@ -1,15 +1,14 @@
 ---
-name: Improvement Proposal
 about: A formal proposal discussing any new features, changes, or improvements to the project.
-title: 'CNC-0000:'
-labels: ['proposal']
-projects: ['@FL03/concision:features', '@FL03/concision:roadmap']
 assignees:
     - FL03
-
+labels: ['proposal']
+name: Improvement Proposal
+projects: ['@FL03/concision:features', '@FL03/concision:roadmap']
+title: 'CNC-0000:'
 ---
 
 
 ### Resources
 
-- [Google](https://google.com)
\ No newline at end of file
+- [company](https://github.com/scattered-systems)
diff --git a/.github/ISSUE_TEMPLATE/tracking.md b/.github/ISSUE_TEMPLATE/tracking.md
new file mode 100644
index 00000000..0139c486
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/tracking.md
@@ -0,0 +1,17 @@
+---
+about: Create a new tracking issue to track the progress of a proposal or feature.
+assignees:
+    - FL03
+labels: ['tracking']
+projects: ['@FL03/concision:features']
+name: Tracking Issue
+title: 'Tracking Issue:'
+---
+
+**Describe the proposal or feature that this issue is tracking.**
+
+## Issues
+
+- []
+
+## Pull Requests
diff --git a/concision/Cargo.toml b/concision/Cargo.toml
index e8ddbdfb..1cd286ac 100644
--- a/concision/Cargo.toml
+++ b/concision/Cargo.toml
@@ -45,7 +45,7 @@ models = [
     "gnn",
     "kan",
     "linear",
-    "transformers",
+    "transformer",
 ]
 
 gnn = [
@@ -60,7 +60,7 @@ linear = [
     "dep:concision-linear",
 ]
 
-transformers = [
+transformer = [
     "dep:concision-transformers",
 ]
 
@@ -184,6 +184,10 @@ test = true
 name = "linear"
 required-features = ["linear", "rand", "serde", "tracing"]
 
+[[example]]
+name = "transformer"
+required-features = ["transformer", "rand", "serde", "tracing"]
+
 [build-dependencies]
 
 [dependencies.concision-core]
@@ -229,6 +233,7 @@ version = "0.1.14"
 
 [dev-dependencies]
 anyhow = "1"
+approx.workspace = true
 lazy_static.workspace = true
 ndarray.workspace = true
 num = { features = ["rand", "serde"], version = "0.4" }
diff --git a/concision/examples/transformer.rs b/concision/examples/transformer.rs
new file mode 100644
index 00000000..cc24879b
--- /dev/null
+++ b/concision/examples/transformer.rs
@@ -0,0 +1,38 @@
+/*
+    Appellation: transformer <example>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+extern crate concision as cnc;
+
+use approx::AbsDiffEq;
+use cnc::prelude::Result;
+use cnc::transformer::AttentionHead;
+use ndarray::Array2;
+
+fn tracing() {
+    use tracing::Level;
+    use tracing_subscriber::fmt::time;
+
+    tracing_subscriber::fmt()
+        .compact()
+        .with_ansi(true)
+        .with_max_level(Level::DEBUG)
+        .with_target(false)
+        .with_timer(time::uptime())
+        .init();
+}
+
+fn main() -> Result<()> {
+    tracing();
+    tracing::info!("Starting up the transformer model example...");
+
+    let shape = (3, 3);
+    let head = AttentionHead::<f64>::ones(shape);
+    let score = head.attention();
+    assert!(score
+        .attention()
+        .abs_diff_eq(&Array2::from_elem(shape, 1f64 / 3f64), 1e-6));
+    println!("{:?}", score);
+
+    Ok(())
+}
diff --git a/concision/src/lib.rs b/concision/src/lib.rs
index ced1df1d..ae868246 100644
--- a/concision/src/lib.rs
+++ b/concision/src/lib.rs
@@ -27,9 +27,9 @@ pub use concision_kan as kan;
 pub use concision_linear as linear;
 #[cfg(feature = "macros")]
 pub use concision_macros::*;
-#[cfg(feature = "transformers")]
+#[cfg(feature = "transformer")]
 #[doc(inline)]
-pub use concision_transformers as transformers;
+pub use concision_transformers as transformer;
 
 pub mod prelude {
     pub use concision_core::prelude::*;
@@ -45,4 +45,6 @@ pub mod prelude {
     pub use concision_linear::prelude::*;
     #[cfg(feature = "macros")]
     pub use concision_macros::*;
+    #[cfg(feature = "transformer")]
+    pub use concision_transformers::prelude::*;
 }
diff --git a/core/Cargo.toml b/core/Cargo.toml
index b0891b64..c8f85779 100644
--- a/core/Cargo.toml
+++ b/core/Cargo.toml
@@ -103,16 +103,24 @@ crate-type = ["lib"]
 doctest = false
 test = true
 
-[[test]]
-name = "random"
-required-features = ["rand"]
+
 
 [[test]]
 name = "fft"
 required-features = ["approx"]
 
+[[test]]
+name = "init"
+required-features = ["rand", "std"]
+
+[[test]]
+name = "nn"
+
 [build-dependencies]
 
+[dev-dependencies]
+lazy_static.workspace = true
+
 [dependencies]
 ndarray.workspace = true
 num.workspace = true
@@ -154,9 +162,6 @@ default-features = false
 features = ["v5", "v8"]
 version = "1"
 
-[dev-dependencies]
-lazy_static = "1"
-
 [package.metadata.docs.rs]
 all-features = true
 rustc-args = ["--cfg", "docsrs"]
diff --git a/core/src/func/activate/nl.rs b/core/src/func/activate/nl.rs
index 694145c7..d9e70fed 100644
--- a/core/src/func/activate/nl.rs
+++ b/core/src/func/activate/nl.rs
@@ -7,7 +7,7 @@ use ndarray::*;
 use num::complex::{Complex, ComplexFloat};
 use num::traits::Zero;
 
-pub fn relu<T>(args: T) -> T
+fn _relu<T>(args: T) -> T
 where
     T: PartialOrd + Zero,
 {
@@ -17,23 +17,33 @@ where
     T::zero()
 }
 
-pub fn sigmoid<T>(args: T) -> T
+fn _sigmoid<T>(args: T) -> T
 where
     T: ComplexFloat,
 {
     (T::one() + args.neg().exp()).recip()
 }
 
-pub fn softmax<A, S, D>(args: &ArrayBase<S, D>) -> Array<A, D>
+fn _softmax<A, S, D>(args: &ArrayBase<S, D>) -> Array<A, D>
 where
     A: ComplexFloat + ScalarOperand,
     D: Dimension,
     S: Data<Elem = A>,
 {
-    args.exp() / args.exp().sum()
+    let e = args.exp();
+    &e / e.sum()
 }
 
-pub fn tanh<T>(args: T) -> T
+// fn __softmax<T, I>(args: &I) -> I
+// where
+//     I: Clone + core::ops::Div<T, Output = I> + Exp<Output = I>, T: Exp<Output = T> + core::iter::Sum ,
+//     for<'a> I: IntoIterator<Item = &'a T>,
+// {
+//     let e = args.exp();
+//     e.clone() / e.into_iter().sum::<T>()
+// }
+
+fn _tanh<T>(args: T) -> T
 where
     T: ComplexFloat,
 {
@@ -64,22 +74,25 @@ macro_rules! nonlinear {
         nonlinear!(@arr $rho::$call);
     };
     (@impl $rho:ident::$call:ident<$T:ty>) => {
-        impl $rho for $T {
-            type Output = $T;
+        paste::paste! {
+            impl $rho for $T {
+                type Output = $T;
 
-            fn $call(self) -> Self::Output {
-                $call(self)
+                fn $call(self) -> Self::Output {
+                    [<_ $call>](self)
+                }
             }
-        }
 
-        impl<'a> $rho for &'a $T {
-            type Output = $T;
+            impl<'a> $rho for &'a $T {
+                type Output = $T;
 
-            fn $call(self) -> Self::Output {
-                $call(*self)
+                fn $call(self) -> Self::Output {
+                    [<_ $call>](*self)
+                }
             }
         }
 
+
     };
     (@arr $name:ident::$call:ident) => {
         impl<A, S, D> $name for ArrayBase<S, D>
@@ -150,7 +163,7 @@ where
     type Output = Array<A, D>;
 
     fn softmax(self) -> Self::Output {
-        softmax(&self)
+        _softmax(&self)
     }
 }
 
@@ -163,6 +176,6 @@ where
     type Output = Array<A, D>;
 
     fn softmax(self) -> Self::Output {
-        softmax(self)
+        _softmax(self)
     }
 }
diff --git a/core/src/func/mod.rs b/core/src/func/mod.rs
index bb99ccba..96513d96 100644
--- a/core/src/func/mod.rs
+++ b/core/src/func/mod.rs
@@ -7,12 +7,9 @@ pub use self::prelude::*;
 
 #[macro_use]
 pub mod activate;
-pub mod dropout;
 pub mod loss;
 
 pub(crate) mod prelude {
     pub use super::activate::prelude::*;
-    #[cfg(feature = "rand")]
-    pub use super::dropout::*;
     pub use super::loss::prelude::*;
 }
diff --git a/core/src/init/gen/lecun.rs b/core/src/init/distr/lecun.rs
similarity index 73%
rename from core/src/init/gen/lecun.rs
rename to core/src/init/distr/lecun.rs
index b8cae16c..0c4763c5 100644
--- a/core/src/init/gen/lecun.rs
+++ b/core/src/init/distr/lecun.rs
@@ -1,10 +1,11 @@
 /*
-    Appellation: lecun <module>
+    Appellation: lecun <distr>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
+use crate::init::distr::TruncatedNormal;
 use num::Float;
 use rand::Rng;
-use rand_distr::{Distribution, Normal, NormalError, StandardNormal};
+use rand_distr::{Distribution, NormalError, StandardNormal};
 
 /// [LecunNormal] is a truncated [normal](rand_distr::Normal) distribution centered at 0
 /// with a standard deviation that is calculated as `σ = sqrt(1/n_in)`
@@ -18,14 +19,14 @@ impl LecunNormal {
     pub fn new(n: usize) -> Self {
         Self { n }
     }
-    /// Create a [normal](rand_distr::Normal) [distribution](Distribution) centered at 0;
+    /// Create a [truncated normal](TruncatedNormal) [distribution](Distribution) centered at 0;
     /// See [Self::std_dev] for the standard deviation calculations.
-    pub fn distr<F>(&self) -> Result<Normal<F>, NormalError>
+    pub fn distr<F>(&self) -> Result<TruncatedNormal<F>, NormalError>
     where
         F: Float,
         StandardNormal: Distribution<F>,
     {
-        Normal::new(F::zero(), self.std_dev())
+        TruncatedNormal::new(F::zero(), self.std_dev())
     }
     /// Calculate the standard deviation (`σ`) of the distribution.
     /// This is done by computing the root of the reciprocal of the number of inputs
@@ -48,6 +49,6 @@ where
     where
         R: Rng + ?Sized,
     {
-        self.distr().unwrap().sample(rng)
+        self.distr().expect("NormalError").sample(rng)
     }
 }
diff --git a/core/src/init/distr/trunc.rs b/core/src/init/distr/trunc.rs
new file mode 100644
index 00000000..fc94f0b9
--- /dev/null
+++ b/core/src/init/distr/trunc.rs
@@ -0,0 +1,81 @@
+/*
+    Appellation: trunc <distr>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use num::traits::Float;
+use rand::Rng;
+use rand_distr::{Distribution, Normal, NormalError, StandardNormal};
+
+/// A truncated normal distribution is similar to a [normal](rand_distr::Normal) [distribution](rand_distr::Distribution), however,
+/// any generated value over two standard deviations from the mean is discarded and re-generated.
+#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub struct TruncatedNormal<F>
+where
+    StandardNormal: Distribution<F>,
+{
+    mean: F,
+    std: F,
+}
+
+impl<F> TruncatedNormal<F>
+where
+    F: Float,
+    StandardNormal: Distribution<F>,
+{
+    /// Create a new truncated normal distribution with a given mean and standard deviation
+    pub fn new(mean: F, std: F) -> Result<Self, NormalError> {
+        Ok(Self { mean, std })
+    }
+
+    pub(crate) fn boundary(&self) -> F {
+        self.mean() + self.std_dev() * F::from(2).unwrap()
+    }
+
+    pub(crate) fn score(&self, x: F) -> F {
+        self.mean() - self.std_dev() * x
+    }
+
+    pub fn distr(&self) -> Normal<F> {
+        Normal::new(self.mean(), self.std_dev()).unwrap()
+    }
+
+    pub fn mean(&self) -> F {
+        self.mean
+    }
+
+    pub fn std_dev(&self) -> F {
+        self.std
+    }
+}
+
+impl<F> Distribution<F> for TruncatedNormal<F>
+where
+    F: Float,
+    StandardNormal: Distribution<F>,
+{
+    fn sample<R>(&self, rng: &mut R) -> F
+    where
+        R: Rng + ?Sized,
+    {
+        let bnd = self.boundary();
+        let mut x = self.score(rng.sample(StandardNormal));
+        // if x is outside of the boundary, re-sample
+        while x < -bnd || x > bnd {
+            x = self.score(rng.sample(StandardNormal));
+        }
+        x
+    }
+}
+
+impl<F> From<Normal<F>> for TruncatedNormal<F>
+where
+    F: Float,
+    StandardNormal: Distribution<F>,
+{
+    fn from(normal: Normal<F>) -> Self {
+        Self {
+            mean: normal.mean(),
+            std: normal.std_dev(),
+        }
+    }
+}
diff --git a/core/src/init/distr/xavier.rs b/core/src/init/distr/xavier.rs
new file mode 100644
index 00000000..ecc1ee0c
--- /dev/null
+++ b/core/src/init/distr/xavier.rs
@@ -0,0 +1,117 @@
+/*
+    Appellation: xavier <distr>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+//! # Xavier
+//!
+//! Xavier initialization techniques were developed in 2010 by Xavier Glorot.
+//! These methods are designed to initialize the weights of a neural network in a way that
+//! prevents the vanishing and exploding gradient problems. The initialization technique
+//! manifests into two distributions: [XavierNormal] and [XavierUniform].
+// #76
+use num::Float;
+use rand::Rng;
+use rand_distr::uniform::{SampleUniform, Uniform};
+use rand_distr::{Distribution, Normal, NormalError, StandardNormal};
+
+pub(crate) fn std_dev<F>(inputs: usize, outputs: usize) -> F
+where
+    F: Float,
+{
+    (F::from(2).unwrap() / F::from(inputs + outputs).unwrap()).sqrt()
+}
+
+pub(crate) fn boundary<F>(inputs: usize, outputs: usize) -> F
+where
+    F: Float,
+{
+    (F::from(6).unwrap() / F::from(inputs + outputs).unwrap()).sqrt()
+}
+/// Normal Xavier initializers leverage a normal distribution with a mean of 0 and a standard deviation (`σ`)
+/// computed by the formula: `σ = sqrt(2/(d_in + d_out))`
+#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub struct XavierNormal<F>
+where
+    F: Float,
+    StandardNormal: Distribution<F>,
+{
+    std: F,
+}
+
+impl<F> XavierNormal<F>
+where
+    F: Float,
+    StandardNormal: Distribution<F>,
+{
+    pub fn new(inputs: usize, outputs: usize) -> Self {
+        Self {
+            std: std_dev(inputs, outputs),
+        }
+    }
+
+    pub fn distr(&self) -> Result<Normal<F>, NormalError> {
+        Normal::new(F::zero(), self.std_dev())
+    }
+
+    pub fn std_dev(&self) -> F {
+        self.std
+    }
+}
+
+impl<F> Distribution<F> for XavierNormal<F>
+where
+    F: Float,
+    StandardNormal: Distribution<F>,
+{
+    fn sample<R>(&self, rng: &mut R) -> F
+    where
+        R: Rng + ?Sized,
+    {
+        self.distr().unwrap().sample(rng)
+    }
+}
+
+/// Uniform Xavier initializers use a uniform distribution to initialize the weights of a neural network
+/// within a given range.
+#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub struct XavierUniform<X>
+where
+    X: SampleUniform,
+{
+    boundary: X,
+}
+
+impl<X> XavierUniform<X>
+where
+    X: Float + SampleUniform,
+{
+    pub fn new(inputs: usize, outputs: usize) -> Self {
+        Self {
+            boundary: boundary(inputs, outputs),
+        }
+    }
+
+    pub fn boundary(&self) -> X {
+        self.boundary
+    }
+
+    pub fn distr(&self) -> Uniform<X>
+    where
+        X: Float,
+    {
+        let bnd = self.boundary();
+        Uniform::new(-bnd, bnd)
+    }
+}
+
+impl<X> Distribution<X> for XavierUniform<X>
+where
+    X: Float + SampleUniform,
+{
+    fn sample<R>(&self, rng: &mut R) -> X
+    where
+        R: Rng + ?Sized,
+    {
+        self.distr().sample(rng)
+    }
+}
diff --git a/core/src/init/initializer.rs b/core/src/init/initializer.rs
new file mode 100644
index 00000000..2de38df9
--- /dev/null
+++ b/core/src/init/initializer.rs
@@ -0,0 +1,40 @@
+/*
+    Appellation: initializer <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use super::Initialize;
+use core::marker::PhantomData;
+use nd::prelude::*;
+use nd::DataOwned;
+use rand_distr::{Distribution, StandardNormal};
+
+pub struct InitializerBase<A = f64, D = Ix2, Dst = StandardNormal>
+where
+    D: Dimension,
+    Dst: Clone + Distribution<A>,
+{
+    pub(crate) dim: D,
+    pub(crate) distr: Dst,
+    pub(crate) _dtype: PhantomData<A>,
+}
+
+impl<A, D, Dst> InitializerBase<A, D, Dst>
+where
+    D: Dimension,
+    Dst: Clone + Distribution<A>,
+{
+    pub fn new(dim: D, distr: Dst) -> Self {
+        Self {
+            dim,
+            distr,
+            _dtype: PhantomData::<A>,
+        }
+    }
+
+    pub fn init<S>(self) -> ArrayBase<S, D>
+    where
+        S: DataOwned<Elem = A>,
+    {
+        ArrayBase::rand(self.dim, self.distr)
+    }
+}
diff --git a/core/src/init/mod.rs b/core/src/init/mod.rs
index 22ee1bac..7f5bc4b4 100644
--- a/core/src/init/mod.rs
+++ b/core/src/init/mod.rs
@@ -11,18 +11,26 @@
 //! better suited for machine-learning workloads.
 #![cfg(feature = "rand")]
 
-pub use self::prelude::*;
+pub use self::distr::prelude::*;
+pub use self::traits::*;
+pub use self::utils::*;
 
-pub(crate) mod initialize;
+pub(crate) mod traits;
 pub(crate) mod utils;
 
-pub mod gen {
+pub mod initializer;
+
+pub mod distr {
     pub use self::prelude::*;
 
     pub mod lecun;
+    pub mod trunc;
+    pub mod xavier;
 
     pub(crate) mod prelude {
         pub use super::lecun::*;
+        pub use super::trunc::*;
+        pub use super::xavier::*;
     }
 }
 
@@ -34,7 +42,7 @@ pub use rand;
 pub use rand_distr;
 
 pub(crate) mod prelude {
-    pub use super::gen::prelude::*;
-    pub use super::initialize::{Initialize, InitializeExt};
+    pub use super::distr::prelude::*;
+    pub use super::traits::{Initialize, InitializeExt};
     pub use super::utils::*;
 }
diff --git a/core/src/init/initialize.rs b/core/src/init/traits.rs
similarity index 87%
rename from core/src/init/initialize.rs
rename to core/src/init/traits.rs
index 91b41b13..a01ca7d9 100644
--- a/core/src/init/initialize.rs
+++ b/core/src/init/traits.rs
@@ -2,16 +2,17 @@
     Appellation: initialize <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
+use crate::init::distr::*;
+
 use core::ops::Neg;
 use nd::{ArrayBase, DataOwned, Dimension, RawData, ShapeBuilder};
 use ndrand::RandomExt;
 use num::complex::ComplexDistribution;
 use num::traits::Float;
-use rand::{rngs, Rng, SeedableRng};
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
 use rand_distr::uniform::{SampleUniform, Uniform};
-use rand_distr::{Bernoulli, BernoulliError, Distribution, Normal, StandardNormal};
-
-use super::LecunNormal;
+use rand_distr::{Bernoulli, BernoulliError, Distribution, Normal, NormalError, StandardNormal};
 
 /// This trait provides the base methods required for initializing an [ndarray](ndarray::ArrayBase) with random values.
 /// [Initialize] is similar to [RandomExt](ndarray_rand::RandomExt), however, it focuses on flexibility while implementing additional
@@ -79,7 +80,7 @@ where
         Self::rand(shape, distr)
     }
     /// Given a shape, mean, and standard deviation generate a new object using the [Normal](rand_distr::Normal) distribution
-    fn normal<Sh>(shape: Sh, mean: A, std: A) -> Result<Self, rand_distr::NormalError>
+    fn normal<Sh>(shape: Sh, mean: A, std: A) -> Result<Self, NormalError>
     where
         A: Float,
         S: DataOwned,
@@ -115,11 +116,18 @@ where
         Sh: ShapeBuilder<Dim = D>,
         StandardNormal: Distribution<A>,
     {
-        Self::rand_with(
-            shape,
-            StandardNormal,
-            &mut rngs::StdRng::seed_from_u64(seed),
-        )
+        Self::rand_with(shape, StandardNormal, &mut StdRng::seed_from_u64(seed))
+    }
+    /// Initialize the object using the [TruncatedNormal](crate::init::distr::TruncatedNormal) distribution
+    fn truncnorm<Sh>(shape: Sh, mean: A, std: A) -> Result<Self, NormalError>
+    where
+        A: Float,
+        S: DataOwned,
+        Sh: ShapeBuilder<Dim = D>,
+        StandardNormal: Distribution<A>,
+    {
+        let distr = TruncatedNormal::new(mean, std)?;
+        Ok(Self::rand(shape, distr))
     }
     /// A [uniform](rand_distr::uniform::Uniform) generator with values between u(-dk, dk)
     fn uniform<Sh>(shape: Sh, dk: A) -> Self
@@ -131,6 +139,20 @@ where
     {
         Self::rand(shape, Uniform::new(dk.clone().neg(), dk))
     }
+
+    fn uniform_from_seed<Sh>(shape: Sh, start: A, stop: A, key: u64) -> Self
+    where
+        A: SampleUniform,
+        S: DataOwned,
+        Sh: ShapeBuilder<Dim = D>,
+        <A as SampleUniform>::Sampler: Clone,
+    {
+        Self::rand_with(
+            shape,
+            Uniform::new(start, stop),
+            &mut StdRng::seed_from_u64(key),
+        )
+    }
     /// Generate a random array with values between u(-a, a) where a is the reciprocal of the value at the given axis
     fn uniform_along<Sh>(shape: Sh, axis: usize) -> Self
     where
diff --git a/core/src/init/utils.rs b/core/src/init/utils.rs
index 3994589c..dacb3df2 100644
--- a/core/src/init/utils.rs
+++ b/core/src/init/utils.rs
@@ -22,23 +22,7 @@ where
     let distr = ComplexDistribution::<A, A>::new(A::one(), A::one());
     ArrayBase::random(shape, distr)
 }
-/// Creates a random array from a uniform distribution using a given key
-pub fn seeded_uniform<T, D>(
-    key: u64,
-    start: T,
-    stop: T,
-    shape: impl IntoDimension<Dim = D>,
-) -> Array<T, D>
-where
-    D: Dimension,
-    T: SampleUniform,
-{
-    Array::random_using(
-        shape,
-        Uniform::new(start, stop),
-        &mut rngs::StdRng::seed_from_u64(key),
-    )
-}
+
 /// Given a shape, generate a random array using the StandardNormal distribution
 pub fn stdnorm<S, D, Sh>(shape: Sh) -> ArrayBase<S, D>
 where
@@ -59,3 +43,20 @@ where
 {
     ArrayBase::random_using(shape, StandardNormal, &mut StdRng::seed_from_u64(seed))
 }
+/// Creates a random array from a uniform distribution using a given key
+pub fn uniform_from_seed<T, D>(
+    key: u64,
+    start: T,
+    stop: T,
+    shape: impl IntoDimension<Dim = D>,
+) -> Array<T, D>
+where
+    D: Dimension,
+    T: SampleUniform,
+{
+    Array::random_using(
+        shape,
+        Uniform::new(start, stop),
+        &mut rngs::StdRng::seed_from_u64(key),
+    )
+}
diff --git a/core/src/lib.rs b/core/src/lib.rs
index 5906aa6f..a09d48e1 100644
--- a/core/src/lib.rs
+++ b/core/src/lib.rs
@@ -34,6 +34,7 @@ pub mod types;
 pub mod utils;
 
 pub mod prelude {
+    #[allow(unused_imports)]
     pub(crate) use super::primitives::rust::*;
 
     pub use super::error::prelude::*;
diff --git a/core/src/macros/builder.rs b/core/src/macros/builder.rs
index 8fba06d2..6e545c9f 100644
--- a/core/src/macros/builder.rs
+++ b/core/src/macros/builder.rs
@@ -5,11 +5,8 @@
 
 #[macro_export]
 macro_rules! builder {
-    ($(#[derive($($d:ident),+)])?$name:ident::<$inner:ty> {$($k:ident: $v:ty),* $(,)?}) => {
-        builder!(@loop builder: $name, derive: [$($($d),+)?], inner: $inner {$($k: $v),*});
-    };
     ($(#[derive($($d:ident),+)])? $name:ident($inner:ty) {$($k:ident: $v:ty),* $(,)?}) => {
-        builder!(@loop builder: $name, derive: [$($($d),+)?], inner: $inner {$($k: $v),*});
+        $crate::builder!(@loop builder: $name, derive: [$($($d),+)?], inner: $inner {$($k: $v),*});
     };
     (@loop builder: $name:ident, derive: [$($d:ident),* $(,)?], inner: $inner:ty {$($k:ident: $v:ty),* $(,)?}) => {
 
@@ -18,7 +15,7 @@ macro_rules! builder {
             inner: $inner,
         }
 
-        builder!(@impl builder: $name, inner: $inner {$($k: $v),*});
+        $crate::builder!(@impl builder: $name, inner: $inner {$($k: $v),*});
     };
     (@impl builder: $name:ident, inner: $inner:ty {$($k:ident: $v:ty),* $(,)?}) => {
         impl $name {
diff --git a/core/src/math/arith.rs b/core/src/math/arith.rs
new file mode 100644
index 00000000..04c88c1a
--- /dev/null
+++ b/core/src/math/arith.rs
@@ -0,0 +1,68 @@
+/*
+    Appellation: arith <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use num::integer::Roots;
+use num::traits::FromPrimitive;
+
+pub trait Root {
+    type Output;
+
+    fn nth_root(&self, n: u32) -> Self::Output;
+
+    fn sqrt(&self) -> Self::Output {
+        self.nth_root(2)
+    }
+
+    fn cbrt(&self) -> Self::Output {
+        self.nth_root(3)
+    }
+}
+
+macro_rules! impl_root {
+    (float $($T:ty),* $(,)?) => {
+        $(
+            impl_root!(@float $T);
+        )*
+    };
+    ($($T:ty),* $(,)?) => {
+        $(
+            impl_root!(@impl $T);
+        )*
+    };
+
+    (@impl $T:ty) => {
+        impl Root for $T {
+            type Output = $T;
+
+            fn nth_root(&self, n: u32) -> Self::Output {
+                Roots::nth_root(self, n)
+            }
+        }
+    };
+    (@float $T:ty) => {
+        impl Root for $T {
+            type Output = $T;
+
+            fn nth_root(&self, n: u32) -> Self::Output {
+                self.powf(<$T>::from_u32(n).unwrap().recip())
+            }
+        }
+    };
+}
+
+impl_root!(float f32, f64);
+impl_root! {
+    i8,
+    i16,
+    i32,
+    i64,
+    i128,
+    isize,
+    u8,
+    u16,
+    u32,
+    u64,
+    u128,
+    usize,
+}
diff --git a/core/src/math/mod.rs b/core/src/math/mod.rs
index bc9dc16f..da193f09 100644
--- a/core/src/math/mod.rs
+++ b/core/src/math/mod.rs
@@ -4,12 +4,16 @@
 */
 //! # Mathematics
 //!
-//! This module focuses on implementing various mathematical objects and operations that are
-//! critical to the development of machine learning algorithms.
+//! This module focuses on providing the mathematical foundation for the library.
+//! Any defined operation is designed to extend the functionality of the basic primitives
+//! as well as the `ndarray` crate.
 pub use self::traits::*;
 
+pub mod arith;
+pub mod stats;
 pub mod traits;
 
 pub(crate) mod prelude {
+    pub use super::stats::prelude::*;
     pub use super::traits::*;
 }
diff --git a/core/src/math/stats/mod.rs b/core/src/math/stats/mod.rs
new file mode 100644
index 00000000..7a0a3892
--- /dev/null
+++ b/core/src/math/stats/mod.rs
@@ -0,0 +1,13 @@
+/*
+    Appellation: stats <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+//! # Statistics
+//!
+pub use self::summary::*;
+
+mod summary;
+
+pub(crate) mod prelude {
+    pub use super::summary::*;
+}
diff --git a/core/src/math/stats/summary.rs b/core/src/math/stats/summary.rs
new file mode 100644
index 00000000..35b5821d
--- /dev/null
+++ b/core/src/math/stats/summary.rs
@@ -0,0 +1,151 @@
+/*
+    Appellation: summary <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use crate::math::arith::Root;
+use core::iter::{Product, Sum};
+use nd::{ArrayBase, Data, Dimension};
+use num::traits::{FromPrimitive, Num, NumOps, Pow};
+
+/// This trait describes the fundamental methods of summary statistics.
+/// These include the mean, standard deviation, variance, and more.
+pub trait SummaryStatistics
+where
+    Self::Item: FromPrimitive,
+    Self::Output: NumOps<Self::Item, Self::Output>,
+{
+    type Item;
+    type Output;
+
+    fn elems(&self) -> Self::Item {
+        Self::Item::from_usize(self.len()).unwrap()
+    }
+
+    fn len(&self) -> usize;
+
+    fn mean(&self) -> Self::Output {
+        self.sum() / self.elems()
+    }
+
+    fn product(&self) -> Self::Output;
+
+    fn sum(&self) -> Self::Output;
+
+    fn std(&self) -> Self::Output;
+
+    fn var(&self) -> Self::Output;
+}
+
+/*
+ ************* Implementations *************
+*/
+impl<'a, T, I> SummaryStatistics for &'a I
+where
+    I: Clone + ExactSizeIterator<Item = T>,
+    T: Copy + FromPrimitive + Num + Pow<i32, Output = T> + Product + Root<Output = T> + Sum,
+{
+    type Item = T;
+    type Output = T;
+
+    fn len(&self) -> usize {
+        ExactSizeIterator::len(*self)
+    }
+
+    fn product(&self) -> Self::Output {
+        (*self).clone().product()
+    }
+
+    fn sum(&self) -> Self::Output {
+        (*self).clone().sum()
+    }
+
+    fn std(&self) -> Self::Output {
+        let mean = self.mean();
+        let sum = (*self).clone().map(|x| (x - mean).pow(2)).sum::<T>();
+        (sum / self.elems()).sqrt()
+    }
+
+    fn var(&self) -> Self::Output {
+        let mean = self.mean();
+        let sum = (*self).clone().map(|x| (x - mean).pow(2)).sum::<T>();
+        sum / self.elems()
+    }
+}
+
+macro_rules! impl_summary {
+    ($($T:ty),* $(,)?) => {
+        $(
+            impl_summary!(@impl $T);
+        )*
+    };
+    (@impl $T:ty) => {
+
+        impl<T> SummaryStatistics for $T
+        where
+            T: Copy + FromPrimitive + Num + Pow<i32, Output = T> + Product + Root<Output = T> + Sum,
+        {
+            type Item = T;
+            type Output = T;
+
+            fn len(&self) -> usize {
+                self.len()
+            }
+
+            fn product(&self) -> Self::Output {
+                self.iter().copied().product::<T>()
+            }
+
+            fn sum(&self) -> Self::Output {
+                self.iter().copied().sum::<T>()
+            }
+
+            fn std(&self) -> Self::Output {
+                let mean = self.mean();
+                let sum = self.iter().copied().map(|x| (x - mean).pow(2)).sum::<T>();
+                (sum / self.elems()).sqrt()
+            }
+
+            fn var(&self) -> Self::Output {
+                let mean = self.mean();
+                let sum = self.iter().copied().map(|x| (x - mean).pow(2)).sum::<T>();
+                sum / self.elems()
+            }
+        }
+    };
+}
+
+impl_summary!(Vec<T>, [T]);
+
+impl<A, S, D> SummaryStatistics for ArrayBase<S, D>
+where
+    A: Copy + FromPrimitive + Num + Pow<i32, Output = A> + Product + Root<Output = A> + Sum,
+    D: Dimension,
+    S: Data<Elem = A>,
+{
+    type Item = A;
+    type Output = A;
+
+    fn len(&self) -> usize {
+        self.len()
+    }
+
+    fn product(&self) -> Self::Output {
+        self.iter().copied().product::<A>()
+    }
+
+    fn sum(&self) -> Self::Output {
+        self.iter().copied().sum::<A>()
+    }
+
+    fn std(&self) -> Self::Output {
+        let mean = self.mean().unwrap_or_else(A::zero);
+        let sum = self.iter().copied().map(|x| (x - mean).pow(2)).sum::<A>();
+        (sum / self.elems()).sqrt()
+    }
+
+    fn var(&self) -> Self::Output {
+        let mean = self.mean().unwrap_or_else(A::zero);
+        let sum = self.iter().copied().map(|x| (x - mean).pow(2)).sum::<A>();
+        sum / self.elems()
+    }
+}
diff --git a/core/src/func/dropout.rs b/core/src/nn/dropout.rs
similarity index 56%
rename from core/src/func/dropout.rs
rename to core/src/nn/dropout.rs
index 00b24b13..19acdbc0 100644
--- a/core/src/func/dropout.rs
+++ b/core/src/nn/dropout.rs
@@ -2,14 +2,15 @@
     Appellation: dropout <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
-#![cfg(feature = "rand")]
+#![allow(unused_imports)]
 use crate::Forward;
 use nd::prelude::*;
-use nd::{DataOwned, RemoveAxis, ScalarOperand};
-use ndrand::rand_distr::Bernoulli;
-use ndrand::RandomExt;
+use nd::{DataOwned, ScalarOperand};
+#[cfg(feature = "rand")]
+use ndrand::{rand_distr::Bernoulli, RandomExt};
 use num::traits::Num;
 
+#[cfg(feature = "rand")]
 pub fn dropout<A, S, D>(array: &ArrayBase<S, D>, p: f64) -> Array<A, D>
 where
     A: Num + ScalarOperand,
@@ -27,44 +28,46 @@ where
     array * mask
 }
 
-pub fn dropout_axis<A, S, D>(array: &ArrayBase<S, D>, _axis: Axis, p: f64) -> Array<A, D>
-where
-    A: Num + ScalarOperand,
-    D: RemoveAxis,
-    S: DataOwned<Elem = A>,
-{
-    // Create a Bernoulli distribution for dropout
-    let distribution = Bernoulli::new(p).unwrap();
+/// [Dropout] randomly zeroizes elements with a given probability (`p`).
+pub trait Dropout {
+    type Output;
 
-    // Create a mask of the same shape as the input array
-    let _mask: Array<bool, D> = Array::random(array.dim(), distribution);
-
-    unimplemented!()
+    fn dropout(&self, p: f64) -> Self::Output;
 }
 
-/// The [Dropout] layer is randomly zeroizes inputs with a given probability (`p`).
+/// The [DropoutLayer] layer is randomly zeroizes inputs with a given probability (`p`).
 /// This regularization technique is often used to prevent overfitting.
 ///
 ///
 /// ### Config
 ///
 /// - (p) Probability of dropping an element
-pub struct Dropout {
-    p: f64,
+#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+pub struct DropoutLayer {
+    pub(crate) p: f64,
 }
 
-impl Dropout {
-    pub fn new(p: f64) -> Self {
-        Self { p }
+/*
+ ************* Implementations *************
+*/
+#[cfg(feature = "rand")]
+impl<A, S, D> Dropout for ArrayBase<S, D>
+where
+    A: Num + ScalarOperand,
+    D: Dimension,
+    S: DataOwned<Elem = A>,
+{
+    type Output = Array<A, D>;
+
+    fn dropout(&self, p: f64) -> Self::Output {
+        dropout(self, p)
     }
+}
 
-    pub fn dropout<A, S, D>(&self, array: &ArrayBase<S, D>) -> Array<A, D>
-    where
-        A: Num + ScalarOperand,
-        D: Dimension,
-        S: DataOwned<Elem = A>,
-    {
-        dropout(array, self.p)
+impl DropoutLayer {
+    pub fn new(p: f64) -> Self {
+        Self { p }
     }
 
     pub fn scale(&self) -> f64 {
@@ -72,13 +75,14 @@ impl Dropout {
     }
 }
 
-impl Default for Dropout {
+impl Default for DropoutLayer {
     fn default() -> Self {
         Self::new(0.5)
     }
 }
 
-impl<A, S, D> Forward<ArrayBase<S, D>> for Dropout
+#[cfg(feature = "rand")]
+impl<A, S, D> Forward<ArrayBase<S, D>> for DropoutLayer
 where
     A: Num + ScalarOperand,
     D: Dimension,
@@ -87,6 +91,6 @@ where
     type Output = Array<A, D>;
 
     fn forward(&self, input: &ArrayBase<S, D>) -> Self::Output {
-        dropout(input, self.p)
+        input.dropout(self.p)
     }
 }
diff --git a/core/src/nn/mask/mask.rs b/core/src/nn/mask/mask.rs
new file mode 100644
index 00000000..94da711a
--- /dev/null
+++ b/core/src/nn/mask/mask.rs
@@ -0,0 +1,236 @@
+/*
+    Appellation: mask <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use nd::iter::{Iter, IterMut};
+use nd::prelude::*;
+use nd::{Data, DataMut, OwnedRepr, RawData, RawDataClone};
+
+pub struct Mask<S = OwnedRepr<bool>, D = Ix2>(ArrayBase<S, D>)
+where
+    D: Dimension,
+    S: RawData<Elem = bool>;
+
+impl<S, D> Mask<S, D>
+where
+    D: Dimension,
+    S: RawData<Elem = bool>,
+{
+    pub fn from_arr(data: ArrayBase<S, D>) -> Self {
+        Self(data)
+    }
+
+    pub fn apply<A, T, F>(&mut self, data: &ArrayBase<T, D>, fill: A) -> ArrayBase<T, D>
+    where
+        A: Clone,
+        S: Data,
+        T: DataMut<Elem = A> + RawDataClone,
+    {
+        let mut res = data.clone();
+        res.zip_mut_with(self.as_mut(), |x, &m| {
+            if m {
+                *x = fill.clone();
+            }
+        });
+        res
+    }
+
+    pub fn mask_inplace<'a, A, T, F>(
+        &mut self,
+        data: &'a mut ArrayBase<T, D>,
+        fill: A,
+    ) -> &'a mut ArrayBase<T, D>
+    where
+        A: Clone,
+        S: Data,
+        T: DataMut<Elem = A>,
+    {
+        data.zip_mut_with(&mut self.0, |x, &m| {
+            if m {
+                *x = fill.clone();
+            }
+        });
+        data
+    }
+
+    pub fn as_slice(&self) -> &[bool]
+    where
+        S: Data,
+    {
+        self.get().as_slice().unwrap()
+    }
+
+    pub fn as_mut_slice(&mut self) -> &mut [bool]
+    where
+        S: DataMut,
+    {
+        self.get_mut().as_slice_mut().unwrap()
+    }
+
+    pub fn dim(&self) -> D::Pattern {
+        self.get().dim()
+    }
+
+    pub fn iter(&self) -> Iter<'_, bool, D>
+    where
+        S: Data,
+    {
+        self.get().iter()
+    }
+
+    pub fn iter_mut(&mut self) -> IterMut<'_, bool, D>
+    where
+        S: DataMut,
+    {
+        self.get_mut().iter_mut()
+    }
+
+    pub fn get(&self) -> &ArrayBase<S, D> {
+        &self.0
+    }
+
+    pub fn get_mut(&mut self) -> &mut ArrayBase<S, D> {
+        &mut self.0
+    }
+
+    pub fn into_inner(self) -> ArrayBase<S, D> {
+        self.0
+    }
+
+    pub fn ndim(&self) -> usize {
+        self.get().ndim()
+    }
+
+    pub fn raw_dim(&self) -> D {
+        self.get().raw_dim()
+    }
+
+    pub fn set(&mut self, data: ArrayBase<S, D>) {
+        self.0 = data;
+    }
+
+    pub fn shape(&self) -> D {
+        self.get().raw_dim()
+    }
+}
+
+/*
+ ************* Implementations *************
+*/
+mod impls {
+    use super::Mask;
+    use core::borrow::{Borrow, BorrowMut};
+    use core::ops::{Deref, DerefMut, Index, IndexMut};
+    use nd::{ArrayBase, Data, DataMut, Dimension, NdIndex, RawData};
+
+    impl<S, D> AsRef<ArrayBase<S, D>> for Mask<S, D>
+    where
+        D: Dimension,
+        S: RawData<Elem = bool>,
+    {
+        fn as_ref(&self) -> &ArrayBase<S, D> {
+            &self.0
+        }
+    }
+
+    impl<S, D> AsMut<ArrayBase<S, D>> for Mask<S, D>
+    where
+        D: Dimension,
+        S: RawData<Elem = bool>,
+    {
+        fn as_mut(&mut self) -> &mut ArrayBase<S, D> {
+            &mut self.0
+        }
+    }
+
+    impl<S, D> Borrow<ArrayBase<S, D>> for Mask<S, D>
+    where
+        D: Dimension,
+        S: RawData<Elem = bool>,
+    {
+        fn borrow(&self) -> &ArrayBase<S, D> {
+            &self.0
+        }
+    }
+
+    impl<S, D> BorrowMut<ArrayBase<S, D>> for Mask<S, D>
+    where
+        D: Dimension,
+        S: RawData<Elem = bool>,
+    {
+        fn borrow_mut(&mut self) -> &mut ArrayBase<S, D> {
+            &mut self.0
+        }
+    }
+
+    impl<S, D> Deref for Mask<S, D>
+    where
+        D: Dimension,
+        S: RawData<Elem = bool>,
+    {
+        type Target = ArrayBase<S, D>;
+
+        fn deref(&self) -> &Self::Target {
+            &self.0
+        }
+    }
+
+    impl<S, D> DerefMut for Mask<S, D>
+    where
+        D: Dimension,
+        S: RawData<Elem = bool>,
+    {
+        fn deref_mut(&mut self) -> &mut Self::Target {
+            &mut self.0
+        }
+    }
+
+    impl<S, D, I> Index<I> for Mask<S, D>
+    where
+        D: Dimension,
+        I: NdIndex<D>,
+        S: Data<Elem = bool>,
+    {
+        type Output = <ArrayBase<S, D> as Index<I>>::Output;
+
+        fn index(&self, index: I) -> &Self::Output {
+            &self.0[index]
+        }
+    }
+
+    impl<S, D, I> IndexMut<I> for Mask<S, D>
+    where
+        D: Dimension,
+        I: NdIndex<D>,
+        S: DataMut<Elem = bool>,
+    {
+        fn index_mut(&mut self, index: I) -> &mut Self::Output {
+            &mut self.0[index]
+        }
+    }
+}
+
+mod impl_from {
+    use super::Mask;
+    use nd::{ArrayBase, Dimension, RawData};
+
+    impl<S, D> From<ArrayBase<S, D>> for Mask<S, D>
+    where
+        D: Dimension,
+        S: RawData<Elem = bool>,
+    {
+        fn from(mask: ArrayBase<S, D>) -> Self {
+            Mask(mask)
+        }
+    }
+
+    impl<S, D> From<Mask<S, D>> for ArrayBase<S, D>
+    where
+        D: Dimension,
+        S: RawData<Elem = bool>,
+    {
+        fn from(mask: Mask<S, D>) -> Self {
+            mask.0
+        }
+    }
+}
diff --git a/core/src/nn/mask/mod.rs b/core/src/nn/mask/mod.rs
new file mode 100644
index 00000000..5a3aaa2b
--- /dev/null
+++ b/core/src/nn/mask/mod.rs
@@ -0,0 +1,29 @@
+/*
+    Appellation: mask <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+pub use self::mask::*;
+
+pub(crate) mod mask;
+
+pub(crate) mod prelude {
+    pub use super::mask::Mask;
+    pub use super::NdMask;
+}
+
+use nd::{ArrayBase, Dimension, Ix2, RawData};
+
+pub trait NdMask<D = Ix2>
+where
+    D: Dimension,
+{
+    type Data: RawData<Elem = bool>;
+}
+
+impl<S, D> NdMask<D> for ArrayBase<S, D>
+where
+    D: Dimension,
+    S: RawData<Elem = bool>,
+{
+    type Data = S;
+}
diff --git a/core/src/nn/mod.rs b/core/src/nn/mod.rs
index c0eb1f81..172b7e7d 100644
--- a/core/src/nn/mod.rs
+++ b/core/src/nn/mod.rs
@@ -2,19 +2,29 @@
    Appellation: nn <mod>
    Contrib: FL03 <jo3mccain@icloud.com>
 */
-pub use self::{error::ModelError, model::prelude::*};
+#[cfg(any(feature = "alloc", feature = "std"))]
+pub use self::types::*;
+pub use self::{dropout::*, error::ModelError, model::prelude::*};
 
+pub mod dropout;
 pub mod error;
+pub mod mask;
 pub mod model;
 
 pub(crate) mod prelude {
-    pub use super::error::ModelError;
+    pub use super::dropout::*;
+    pub use super::error::*;
+    pub use super::mask::prelude::*;
     pub use super::model::prelude::*;
 }
 
 #[cfg(any(feature = "alloc", feature = "std"))]
-pub type ForwardDyn<T = nd::Array2<f64>, O = T> =
-    crate::rust::Box<dyn crate::Forward<T, Output = O>>;
+mod types {
+    use crate::rust::Box;
+    use nd::prelude::Array2;
+
+    pub type ForwardDyn<T = Array2<f64>, O = T> = Box<dyn crate::Forward<T, Output = O>>;
+}
 
 #[cfg(test)]
 mod tests {}
diff --git a/core/src/nn/model.rs b/core/src/nn/model.rs
index 8991d08e..316d03e7 100644
--- a/core/src/nn/model.rs
+++ b/core/src/nn/model.rs
@@ -6,6 +6,8 @@ pub use self::module::*;
 
 pub mod config;
 pub mod module;
+#[doc(hidden)]
+pub mod repo;
 
 pub(crate) mod prelude {
     pub use super::config::*;
@@ -26,3 +28,32 @@ where
 
     fn context(&self) -> Self::Ctx;
 }
+
+/// This trait describes any neural networks or models that
+/// adhears to the deep netural network architecture.
+/// This design considers a single input and output layer, while
+/// allowing for any number of hidden layers to be persisted.
+///
+/// The `HIDDEN` constant is used to specify the number of hidden layers
+/// and is used to compute the total number of layers (HIDDEN + 2)
+pub trait DeepNeuralNetwork<S, T>: Forward<S, Output = T> {
+    const HIDDEN: Option<usize> = None;
+
+    type Input: Forward<S, Output = T>;
+    type Hidden: Forward<T, Output = T>; // The type of `hidden` layers; all hidden layers implement the same activation function
+    type Out: Forward<T, Output = T>;
+
+    fn input(&self) -> &Self::Input;
+
+    fn hidden(&self) -> &[Self::Hidden];
+
+    fn output(&self) -> &Self::Out;
+
+    fn nlayers(&self) -> usize {
+        self.nhidden() + 2
+    }
+
+    fn nhidden(&self) -> usize {
+        Self::HIDDEN.unwrap_or_else(|| self.hidden().len())
+    }
+}
diff --git a/core/src/nn/model/repo.rs b/core/src/nn/model/repo.rs
new file mode 100644
index 00000000..affd401a
--- /dev/null
+++ b/core/src/nn/model/repo.rs
@@ -0,0 +1,10 @@
+/*
+    Appellation: repo <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+#![allow(unused)]
+
+pub struct ModelRepo {
+    pub name: String,
+    pub(crate) store: String,
+}
diff --git a/core/src/traits/arr/create.rs b/core/src/traits/arr/create.rs
index b99a5eaa..8c45d927 100644
--- a/core/src/traits/arr/create.rs
+++ b/core/src/traits/arr/create.rs
@@ -86,11 +86,9 @@ where
     }
 }
 
-macro_rules! impl_like {
+macro_rules! impl_ndlike {
+
     ($name:ident::$method:ident.$call:ident: $($p:tt)*) => {
-        impl_like!(@impl $name::$method.$call: $($p)*);
-    };
-    (@impl $name:ident::$method:ident.$call:ident: $($p:tt)*) => {
         impl<A, S, D> $name for ArrayBase<S, D>
         where
             A: $($p)*,
@@ -106,6 +104,6 @@ macro_rules! impl_like {
     };
 }
 
-impl_like!(DefaultLike::default_like.default: Default);
-impl_like!(OnesLike::ones_like.ones: Clone + num::One);
-impl_like!(ZerosLike::zeros_like.zeros: Clone + num::Zero);
+impl_ndlike!(DefaultLike::default_like.default: Default);
+impl_ndlike!(OnesLike::ones_like.ones: Clone + num::One);
+impl_ndlike!(ZerosLike::zeros_like.zeros: Clone + num::Zero);
diff --git a/core/src/traits/arr/misc.rs b/core/src/traits/arr/misc.rs
index 4cc76e4c..40857596 100644
--- a/core/src/traits/arr/misc.rs
+++ b/core/src/traits/arr/misc.rs
@@ -2,17 +2,18 @@
    Appellation: convert <mod>
    Contrib: FL03 <jo3mccain@icloud.com>
 */
-use nd::Axis;
-use nd::{ArrayBase, Dimension, RawData};
+use nd::prelude::*;
+use nd::{DataMut, RawData};
 
-pub trait Dimensional<D> {
-    type Pattern;
-
-    fn dim(&self) -> Self::Pattern;
-
-    fn raw_dim(&self) -> D;
+/// This trait is used to fill an array with a value based on a mask.
+/// The mask is a boolean array of the same shape as the array.
+pub trait MaskFill<A, D>
+where
+    D: Dimension,
+{
+    type Output;
 
-    fn shape(&self) -> &[usize];
+    fn masked_fill(&self, mask: &Array<bool, D>, value: A) -> Self::Output;
 }
 
 pub trait IntoAxis {
@@ -26,23 +27,24 @@ pub trait IsSquare {
 /*
  ******** implementations ********
 */
-impl<S, D> Dimensional<D> for ArrayBase<S, D>
+
+impl<A, S, D> MaskFill<A, D> for ArrayBase<S, D>
 where
+    A: Clone,
     D: Dimension,
-    S: RawData,
+    S: DataMut<Elem = A>,
+    Self: Clone,
 {
-    type Pattern = D::Pattern;
-
-    fn shape(&self) -> &[usize] {
-        ArrayBase::shape(self)
-    }
-
-    fn dim(&self) -> Self::Pattern {
-        ArrayBase::dim(self)
-    }
+    type Output = ArrayBase<S, D>;
 
-    fn raw_dim(&self) -> D {
-        ArrayBase::raw_dim(self)
+    fn masked_fill(&self, mask: &Array<bool, D>, value: A) -> Self::Output {
+        let mut arr = self.clone();
+        arr.zip_mut_with(&mask, |x, &m| {
+            if m {
+                *x = value.clone();
+            }
+        });
+        arr
     }
 }
 
diff --git a/core/src/traits/arr/reshape.rs b/core/src/traits/arr/reshape.rs
new file mode 100644
index 00000000..7079f130
--- /dev/null
+++ b/core/src/traits/arr/reshape.rs
@@ -0,0 +1,40 @@
+/*
+    Appellation: reshape <module> [traits::arr]
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use nd::prelude::*;
+use nd::{RawData, RawDataClone};
+
+pub trait Unsqueeze {
+    type Output;
+
+    fn unsqueeze(self, axis: usize) -> Self::Output;
+}
+
+/*
+ ************* Implementations *************
+*/
+
+impl<A, S, D> Unsqueeze for ArrayBase<S, D>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+{
+    type Output = ArrayBase<S, D::Larger>;
+
+    fn unsqueeze(self, axis: usize) -> Self::Output {
+        self.insert_axis(Axis(axis))
+    }
+}
+
+impl<'a, A, S, D> Unsqueeze for &'a ArrayBase<S, D>
+where
+    D: Dimension,
+    S: RawDataClone<Elem = A>,
+{
+    type Output = ArrayBase<S, D::Larger>;
+
+    fn unsqueeze(self, axis: usize) -> Self::Output {
+        self.clone().insert_axis(Axis(axis))
+    }
+}
diff --git a/core/src/traits/arr/tensor.rs b/core/src/traits/arr/tensor.rs
deleted file mode 100644
index 22a00f99..00000000
--- a/core/src/traits/arr/tensor.rs
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
-    Appellation: generator <module>
-    Contrib: FL03 <jo3mccain@icloud.com>
-*/
-use nd::prelude::*;
-use nd::{Data, DataMut, DataOwned, OwnedRepr, RawData};
-use num::{One, Zero};
-
-/// [NdBuilder] describes common creation routines for [ArrayBase](ndarray::ArrayBase)
-pub trait NdBuilder<A = f64, D = Ix2>
-where
-    D: Dimension,
-{
-    type Data: RawData<Elem = A>;
-    type Store;
-
-    /// Create a new array with the given shape whose elements are set to the default value of the element type.
-    fn default<Sh>(shape: Sh) -> Self::Store
-    where
-        A: Default,
-        Sh: ShapeBuilder<Dim = D>,
-        Self::Data: DataOwned;
-
-    fn fill<Sh>(shape: Sh, elem: A) -> Self::Store
-    where
-        A: Clone,
-        Sh: ShapeBuilder<Dim = D>,
-        Self::Data: DataOwned;
-
-    fn ones<Sh>(shape: Sh) -> Self::Store
-    where
-        A: Clone + One,
-        Sh: ShapeBuilder<Dim = D>,
-        Self::Data: DataOwned;
-
-    fn zeros<Sh>(shape: Sh) -> Self::Store
-    where
-        A: Clone + Zero,
-        Sh: ShapeBuilder<Dim = D>,
-        Self::Data: DataOwned;
-}
-
-pub trait NdBuilderExt<A = f64, D = Ix2>: NdBuilder<A, D>
-where
-    D: Dimension,
-{
-    fn dim(&self) -> D::Pattern;
-
-    fn default_like<Sh>(&self) -> Self::Store
-    where
-        A: Default,
-        Sh: ShapeBuilder<Dim = D>,
-        Self::Data: DataOwned,
-    {
-        Self::default(self.dim())
-    }
-
-    fn fill_like<Sh>(&self, elem: A) -> Self::Store
-    where
-        A: Clone,
-        Sh: ShapeBuilder<Dim = D>,
-        Self::Data: DataOwned,
-    {
-        Self::fill(self.dim(), elem)
-    }
-
-    fn ones_like<Sh>(&self) -> Self::Store
-    where
-        A: Clone + One,
-        Sh: ShapeBuilder<Dim = D>,
-        Self::Data: DataOwned,
-    {
-        Self::ones(self.dim())
-    }
-
-    fn zeros_like<Sh>(&self) -> Self::Store
-    where
-        A: Clone + Zero,
-        Sh: ShapeBuilder<Dim = D>,
-        Self::Data: DataOwned,
-    {
-        Self::zeros(self.dim())
-    }
-}
-
-pub trait AsOwned<S, D = Ix2>
-where
-    D: Dimension,
-    S: RawData,
-{
-    type Output;
-
-    fn into_owned(self) -> Self::Output
-    where
-        S: Data,
-        S::Elem: Clone;
-
-    fn to_owned(&self) -> Self::Output
-    where
-        S: Data,
-        S::Elem: Clone;
-}
-
-pub trait AsShared<S, D = Ix2>
-where
-    D: Dimension,
-    S: RawData,
-{
-    type Output;
-
-    fn into_shared(self) -> Self::Output
-    where
-        S: DataOwned,
-        S::Elem: Clone;
-
-    fn to_shared(&self) -> Self::Output
-    where
-        S: DataOwned,
-        S::Elem: Clone;
-}
-
-pub trait NdView<A = f64, S = OwnedRepr<A>, D = Ix2>: AsOwned<S, D> + AsShared<S, D>
-where
-    D: Dimension,
-    S: RawData<Elem = A>,
-{
-    fn view(&self) -> ArrayView<'_, A, D>
-    where
-        A: Clone,
-        S: Data;
-
-    fn view_mut(&mut self) -> ArrayViewMut<'_, A, D>
-    where
-        A: Clone,
-        S: DataMut;
-}
-
-/*
- ************* Implementations *************
-*/
-impl<A, S, D> NdBuilder<A, D> for ArrayBase<S, D>
-where
-    D: Dimension,
-    S: RawData<Elem = A>,
-{
-    type Data = S;
-    type Store = ArrayBase<S, D>;
-
-    fn default<Sh>(shape: Sh) -> Self
-    where
-        A: Default,
-        Sh: ShapeBuilder<Dim = D>,
-        Self::Data: DataOwned,
-    {
-        ArrayBase::default(shape)
-    }
-
-    fn fill<Sh>(shape: Sh, elem: A) -> Self
-    where
-        A: Clone,
-        S: DataOwned,
-        Sh: ShapeBuilder<Dim = D>,
-    {
-        ArrayBase::from_elem(shape, elem)
-    }
-
-    fn ones<Sh>(shape: Sh) -> Self
-    where
-        A: Clone + One,
-        Sh: ShapeBuilder<Dim = D>,
-        Self::Data: DataOwned,
-    {
-        ArrayBase::ones(shape)
-    }
-
-    fn zeros<Sh>(shape: Sh) -> Self
-    where
-        A: Clone + Zero,
-        Sh: ShapeBuilder<Dim = D>,
-        Self::Data: DataOwned,
-    {
-        ArrayBase::zeros(shape)
-    }
-}
-
-impl<A, S, D> NdBuilderExt<A, D> for ArrayBase<S, D>
-where
-    D: Dimension,
-    S: RawData<Elem = A>,
-{
-    fn dim(&self) -> D::Pattern {
-        ArrayBase::dim(self)
-    }
-}
-
-impl<A, S, D> AsOwned<S, D> for ArrayBase<S, D>
-where
-    D: Dimension,
-    S: RawData<Elem = A>,
-{
-    type Output = Array<A, D>;
-
-    fn into_owned(self) -> Self::Output
-    where
-        A: Clone,
-        S: Data,
-    {
-        self.into_owned()
-    }
-
-    fn to_owned(&self) -> Self::Output
-    where
-        A: Clone,
-        S: Data,
-    {
-        self.to_owned()
-    }
-}
-
-impl<A, S, D> AsShared<S, D> for ArrayBase<S, D>
-where
-    D: Dimension,
-    S: RawData<Elem = A>,
-{
-    type Output = ArcArray<A, D>;
-
-    fn into_shared(self) -> Self::Output
-    where
-        A: Clone,
-        S: DataOwned,
-    {
-        self.into_shared()
-    }
-
-    fn to_shared(&self) -> Self::Output
-    where
-        A: Clone,
-        S: DataOwned,
-    {
-        self.to_shared()
-    }
-}
-
-impl<A, S, D> NdView<A, S, D> for ArrayBase<S, D>
-where
-    D: Dimension,
-    S: RawData<Elem = A>,
-{
-    fn view(&self) -> ArrayView<'_, A, D>
-    where
-        A: Clone,
-        S: Data,
-    {
-        self.view()
-    }
-
-    fn view_mut(&mut self) -> ArrayViewMut<'_, A, D>
-    where
-        A: Clone,
-        S: DataMut,
-    {
-        self.view_mut()
-    }
-}
diff --git a/core/src/traits/misc/container.rs b/core/src/traits/misc/container.rs
deleted file mode 100644
index 842d96db..00000000
--- a/core/src/traits/misc/container.rs
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
-    Appellation: container <module>
-    Contrib: FL03 <jo3mccain@icloud.com>
-*/
-
-pub trait Container<T> {
-    type Data: Data<Item = T>;
-}
-
-pub trait Data {
-    type Item;
-}
diff --git a/core/src/traits/misc/sequential.rs b/core/src/traits/misc/sequential.rs
new file mode 100644
index 00000000..8e92192e
--- /dev/null
+++ b/core/src/traits/misc/sequential.rs
@@ -0,0 +1,63 @@
+/*
+    Appellation: sequential <module> [traits::misc]
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use num::traits::FromPrimitive;
+
+/// A trait for sequential data structures;
+/// This trait is implemented for iterators that have a known length.
+pub trait Sequence<T> {
+    const LENGTH: Option<usize> = None;
+
+    fn len(&self) -> usize;
+
+    fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    fn elems(&self) -> T
+    where
+        T: FromPrimitive,
+    {
+        T::from_usize(self.len()).unwrap()
+    }
+}
+
+pub trait SequenceIter {
+    type Item;
+
+    fn len(&self) -> usize;
+}
+/*
+ ************* Implementations *************
+*/
+impl<T, I> SequenceIter for I
+where
+    I: ExactSizeIterator<Item = T>,
+{
+    type Item = T;
+
+    fn len(&self) -> usize {
+        self.len()
+    }
+}
+
+impl<T> Sequence<T> for Vec<T> {
+    fn len(&self) -> usize {
+        self.len()
+    }
+}
+
+impl<T> Sequence<T> for [T] {
+    fn len(&self) -> usize {
+        self.len()
+    }
+}
+
+impl<T, const N: usize> Sequence<T> for [T; N] {
+    const LENGTH: Option<usize> = Some(N);
+
+    fn len(&self) -> usize {
+        N
+    }
+}
diff --git a/core/src/traits/mod.rs b/core/src/traits/mod.rs
index b6aa6b21..9dc12247 100644
--- a/core/src/traits/mod.rs
+++ b/core/src/traits/mod.rs
@@ -7,36 +7,38 @@ pub use self::prelude::*;
 pub mod num;
 pub mod ops;
 pub mod predict;
+pub mod setup;
 pub mod train;
 
 pub mod arr {
     pub use self::prelude::*;
 
-    pub(crate) mod create;
-    pub(crate) mod misc;
-    pub(crate) mod ops;
-    pub(crate) mod tensor;
+    mod create;
+    mod misc;
+    mod ops;
+    mod reshape;
 
     pub(crate) mod prelude {
         pub use super::create::*;
         pub use super::misc::*;
         pub use super::ops::*;
-        pub use super::tensor::*;
+        pub use super::reshape::*;
     }
 }
 
 pub mod misc {
-    pub mod adjust;
+    pub use self::prelude::*;
+
+    pub(crate) mod adjust;
+    #[doc(hidden)]
+    pub(crate) mod sequential;
     #[doc(hidden)]
-    pub mod container;
-    pub mod setup;
-    pub mod store;
-    pub mod toggle;
+    pub(crate) mod store;
+    pub(crate) mod toggle;
 
     pub(crate) mod prelude {
         pub use super::adjust::*;
-        pub use super::container::*;
-        pub use super::setup::*;
+        pub use super::sequential::*;
         pub use super::store::*;
         pub use super::toggle::*;
     }
@@ -48,5 +50,6 @@ pub(crate) mod prelude {
     pub use super::num::*;
     pub use super::ops::*;
     pub use super::predict::*;
+    pub use super::setup::*;
     pub use super::train::*;
 }
diff --git a/core/src/traits/misc/setup.rs b/core/src/traits/setup.rs
similarity index 100%
rename from core/src/traits/misc/setup.rs
rename to core/src/traits/setup.rs
diff --git a/core/src/types/mod.rs b/core/src/types/mod.rs
index a639d5a4..d6347e49 100644
--- a/core/src/types/mod.rs
+++ b/core/src/types/mod.rs
@@ -7,6 +7,7 @@ pub use self::prelude::*;
 pub use self::std_types::*;
 
 pub mod propagate;
+pub mod shape;
 
 pub type NdResult<T> = core::result::Result<T, nd::ShapeError>;
 /// A type alias for a [Result](core::result::Result) with the crate's [Error](crate::error::Error) type.
@@ -23,6 +24,7 @@ mod std_types {
 
 pub(crate) mod prelude {
     pub use super::propagate::Propagate;
+    pub use super::shape::ModelShape;
     #[cfg(feature = "std")]
     pub use super::std_types::*;
     pub use super::{NdResult, Result};
diff --git a/core/src/types/shape.rs b/core/src/types/shape.rs
new file mode 100644
index 00000000..37e75047
--- /dev/null
+++ b/core/src/types/shape.rs
@@ -0,0 +1,166 @@
+/*
+    Appellation: shape <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use nd::prelude::{Ix1, Ix2};
+use nd::{Dimension, ErrorKind, IntoDimension, RemoveAxis, ShapeBuilder, ShapeError};
+
+pub(crate) fn _from_dim<D>(dim: D) -> Result<Features, ShapeError>
+where
+    D: Dimension,
+{
+    if dim.ndim() == 1 {
+        Ok(Features::new(dim[0], 1))
+    } else if dim.ndim() >= 2 {
+        Ok(Features::new(dim[1], dim[0]))
+    } else {
+        Err(ShapeError::from_kind(ErrorKind::IncompatibleShape))
+    }
+}
+
+#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+pub struct ModelShape {
+    pub(crate) features: Features,
+    pub(crate) network: usize,
+}
+
+impl ModelShape {
+    pub fn new(model: usize, network: usize) -> Self {
+        let features = Features::from_network(model, network);
+        Self { features, network }
+    }
+
+    pub fn from_features(features: Features) -> Self {
+        Self {
+            features,
+            network: features.size(),
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+pub struct Features {
+    pub(crate) inputs: usize,
+    pub(crate) outputs: usize,
+}
+
+impl Features {
+    /// Create a new, unchecked [Features] instance.
+    ///
+    pub fn new(inputs: usize, outputs: usize) -> Self {
+        debug_assert_ne!(inputs, 0);
+        debug_assert_ne!(outputs, 0);
+
+        Self { inputs, outputs }
+    }
+    /// Attempts to build a new [Features] instance from the given dimension ([`D`](Dimension))
+    pub fn from_dimension<D>(dim: D) -> Result<Self, ShapeError>
+    where
+        D: Dimension,
+    {
+        _from_dim(dim)
+    }
+    /// Builds a new instance from the given shape ([`Sh`](ShapeBuilder));
+    /// Unlike [Features::from_dimension], this method requires the dimension (`D`) to
+    /// additionally implement the [RemoveAxis] trait
+    pub fn from_shape<D, Sh>(shape: Sh) -> Self
+    where
+        D: RemoveAxis,
+        Sh: ShapeBuilder<Dim = D>,
+    {
+        let dim = shape.into_shape().raw_dim().clone();
+        _from_dim(dim).unwrap()
+    }
+    /// Creates a new instance given the model size (`inputs`, `d_model`) and total number of nodes within the network (`size`, `network`, `d_network`)
+    pub fn from_network(model: usize, network: usize) -> Self {
+        let outputs = network / model;
+        Self::new(model, outputs)
+    }
+
+    pub const fn as_array(&self) -> [usize; 2] {
+        [self.outputs(), self.inputs()]
+    }
+    /// Creates a new two-tuple instance from the given dimensions;
+    pub const fn as_tuple(&self) -> (usize, usize) {
+        (self.outputs(), self.inputs())
+    }
+    pub fn check_dim<D>(&self, dim: D) -> bool
+    where
+        D: Dimension,
+    {
+        if dim.ndim() == 1 {
+            self.inputs() == dim[0]
+        } else if dim.ndim() >= 2 {
+            self.outputs() == dim[0] && self.inputs() == dim[1]
+        } else {
+            false
+        }
+    }
+    /// Forwards the [into_pattern](ndarray::Dimension::into_pattern) method from the [Dimension] trait
+    #[inline]
+    pub fn into_pattern(self) -> (usize, usize) {
+        self.into_dimension().into_pattern()
+    }
+    /// An aliased function that returns the number of input features
+    pub const fn d_model(&self) -> usize {
+        self.inputs()
+    }
+    /// Returns the number of input features
+    pub const fn inputs(&self) -> usize {
+        self.inputs
+    }
+    /// Checks to see if the features speak to a so-called `unit`;
+    /// i.e. see if the number of output features is equal to 1.
+    pub fn is_unit(&self) -> bool {
+        self.outputs() == 1
+    }
+    /// Returns the number of output features
+    pub const fn outputs(&self) -> usize {
+        self.outputs
+    }
+    /// Computes the total number of nodes in the network
+    pub fn size(&self) -> usize {
+        self.inputs() * self.outputs()
+    }
+    #[doc(hidden)]
+    pub fn uniform_scale(&self) -> f64 {
+        (self.inputs as f64).recip().sqrt()
+    }
+}
+
+impl IntoDimension for Features {
+    type Dim = Ix2;
+
+    fn into_dimension(self) -> Self::Dim {
+        (self.outputs, self.inputs).into_dimension()
+    }
+}
+
+impl From<Ix1> for Features {
+    fn from(dim: Ix1) -> Self {
+        Self::new(1, dim[0])
+    }
+}
+
+impl From<Ix2> for Features {
+    fn from(dim: Ix2) -> Self {
+        Self::new(dim[1], dim[0])
+    }
+}
+
+impl From<Features> for Ix2 {
+    fn from(features: Features) -> Self {
+        features.into_dimension()
+    }
+}
+
+impl<U> PartialEq<U> for Features
+where
+    [usize; 2]: PartialEq<U>,
+{
+    fn eq(&self, other: &U) -> bool {
+        self.as_array() == *other
+    }
+}
diff --git a/core/tests/init.rs b/core/tests/init.rs
new file mode 100644
index 00000000..39d29e5d
--- /dev/null
+++ b/core/tests/init.rs
@@ -0,0 +1,45 @@
+/*
+   Appellation: random <test>
+   Contrib: FL03 <jo3mccain@icloud.com>
+*/
+extern crate concision_core as cnc;
+
+use cnc::init::distr::LecunNormal;
+use cnc::init::InitializeExt;
+use ndarray::prelude::*;
+
+#[test]
+fn test_init_ext() {
+    let shape = [3, 3];
+    let seed = 0u64;
+    let a = Array2::<f64>::stdnorm(shape);
+    let b = Array2::<f64>::stdnorm_from_seed(shape, seed);
+
+    assert_eq!(a.shape(), shape);
+    assert_eq!(a.shape(), b.shape());
+}
+
+#[test]
+fn test_lecun_normal() {
+    let n = 3;
+    let shape = (3, 3);
+
+    let distr = LecunNormal::new(n);
+
+    let bnd = 2f64 * distr.std_dev::<f64>();
+
+    let arr = Array2::<f64>::lecun_normal(shape, n);
+
+    assert!(arr.iter().all(|&x| x >= -bnd && x <= bnd));
+
+    assert_eq!(arr.dim(), shape);
+}
+
+#[test]
+fn test_truncnorm() {
+    let (mean, std) = (0f64, 2f64);
+    let bnd = 2f64 * std;
+    let shape = (3, 3);
+    let arr = Array::truncnorm(shape, mean, std).unwrap();
+    assert!(arr.iter().all(|&x| x >= -bnd && x <= bnd));
+}
diff --git a/core/tests/func.rs b/core/tests/nn.rs
similarity index 83%
rename from core/tests/func.rs
rename to core/tests/nn.rs
index e1a5ccef..55b51198 100644
--- a/core/tests/func.rs
+++ b/core/tests/nn.rs
@@ -1,7 +1,7 @@
 #![allow(unused_imports)]
 extern crate concision_core as concision;
 
-use concision::func::Dropout;
+use concision::nn::DropoutLayer;
 use concision::Forward;
 use ndarray::prelude::*;
 
@@ -10,7 +10,7 @@ use ndarray::prelude::*;
 fn test_dropout() {
     let shape = (512, 2048);
     let arr = Array2::<f64>::ones(shape);
-    let dropout = Dropout::new(0.5);
+    let dropout = DropoutLayer::new(0.5);
     let out = dropout.forward(&arr);
 
     assert!(arr.iter().all(|&x| x == 1.0));
diff --git a/core/tests/random.rs b/core/tests/random.rs
deleted file mode 100644
index daa76435..00000000
--- a/core/tests/random.rs
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
-   Appellation: random <test>
-   Contrib: FL03 <jo3mccain@icloud.com>
-*/
-extern crate concision_core as cnc;
-
-use cnc::init::InitializeExt;
-use ndarray::prelude::*;
-
-#[test]
-fn test_stdnorm() {
-    let shape = [3, 3];
-    let seed = 0u64;
-    let a = Array2::<f64>::stdnorm(shape);
-    let b = Array2::<f64>::stdnorm_from_seed(shape, seed);
-
-    assert_eq!(a.shape(), shape);
-    assert_eq!(a.shape(), b.shape());
-}
diff --git a/core/tests/traits.rs b/core/tests/traits.rs
index 1778fefd..b1038f94 100644
--- a/core/tests/traits.rs
+++ b/core/tests/traits.rs
@@ -4,20 +4,32 @@
 */
 extern crate concision_core as cnc;
 
-use cnc::traits::{Affine, AsComplex, Matpow};
-use ndarray::prelude::{array, Array2};
+use cnc::linarr;
+use ndarray::prelude::*;
 use num::Complex;
 
 #[test]
 fn test_affine() {
+    use cnc::traits::Affine;
     let x = array![[0.0, 1.0], [2.0, 3.0]];
 
     let y = x.affine(4.0, -2.0);
     assert_eq!(y, array![[-2.0, 2.0], [6.0, 10.0]]);
 }
 
+#[test]
+fn test_masked_fill() {
+    use cnc::traits::MaskFill;
+    let shape = (2, 2);
+    let mask = array![[true, false], [false, true]];
+    let arr = linarr::<f64, Ix2>(shape).unwrap();
+    let a = arr.masked_fill(&mask, 0.0);
+    assert_eq!(a, array![[0.0, 1.0], [2.0, 0.0]]);
+}
+
 #[test]
 fn test_as_complex() {
+    use cnc::traits::AsComplex;
     let x = 1.0;
     let y = x.as_re();
     assert_eq!(y, Complex::new(1.0, 0.0));
@@ -25,8 +37,19 @@ fn test_as_complex() {
 
 #[test]
 fn test_matrix_power() {
+    use cnc::traits::Matpow;
     let x = array![[1.0, 2.0], [3.0, 4.0]];
     assert_eq!(x.pow(0), Array2::<f64>::eye(2));
     assert_eq!(x.pow(1), x);
     assert_eq!(x.pow(2), x.dot(&x));
 }
+
+#[test]
+fn test_unsqueeze() {
+    use cnc::traits::Unsqueeze;
+    let arr = array![1, 2, 3, 4];
+    let a = arr.clone().unsqueeze(0);
+    assert_eq!(a.dim(), (1, 4));
+    let b = arr.unsqueeze(1);
+    assert_eq!(b.dim(), (4, 1));
+}
diff --git a/data/src/kernel/mod.rs b/data/src/kernel/mod.rs
new file mode 100644
index 00000000..e69de29b
diff --git a/data/src/lib.rs b/data/src/lib.rs
index 0186ca18..5f1d6ead 100644
--- a/data/src/lib.rs
+++ b/data/src/lib.rs
@@ -22,9 +22,11 @@ pub mod params;
 pub mod preproc;
 pub mod tensor;
 pub mod traits;
+pub mod types;
 
 pub mod prelude {
     pub use super::dataset::*;
     pub use super::params::prelude::*;
     pub use super::traits::prelude::*;
+    pub use super::types::prelude::*;
 }
diff --git a/data/src/tensor/mod.rs b/data/src/tensor/mod.rs
index 17945f2f..7d31345f 100644
--- a/data/src/tensor/mod.rs
+++ b/data/src/tensor/mod.rs
@@ -2,6 +2,6 @@
     Appellation: tensor <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
-pub use self::ndtensor::NdTensor;
+pub use self::ndtensor::NdContainer;
 
 pub mod ndtensor;
diff --git a/data/src/tensor/ndtensor/traits.rs b/data/src/tensor/ndtensor/traits.rs
index c55c3afe..b125c25b 100644
--- a/data/src/tensor/ndtensor/traits.rs
+++ b/data/src/tensor/ndtensor/traits.rs
@@ -11,7 +11,7 @@ pub trait TensorData {
     fn as_mut_slice(&mut self) -> &mut [Self::Elem];
 }
 
-pub trait NdTensor<T> {
+pub trait NdContainer<T> {
     const RANK: Option<usize> = None;
 
     type Data: TensorData<Elem = T>;
diff --git a/data/src/traits/build.rs b/data/src/traits/build.rs
new file mode 100644
index 00000000..7944014b
--- /dev/null
+++ b/data/src/traits/build.rs
@@ -0,0 +1,140 @@
+/*
+    Appellation: ndarray <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use crate::traits::Dimensional;
+use nd::{ArrayBase, DataOwned, Dimension, RawData, ShapeBuilder};
+use num::{One, Zero};
+
+/// [NdBuilder] describes common creation routines for [ArrayBase]
+pub trait NdBuilder<A = f64, D = nd::Ix2>
+where
+    D: Dimension,
+{
+    type Data: RawData<Elem = A>;
+
+    /// Create a new array with the given shape whose elements are set to the default value of the element type.
+    fn default<Sh>(shape: Sh) -> Self
+    where
+        A: Default,
+        Sh: ShapeBuilder<Dim = D>,
+        Self::Data: DataOwned;
+
+    fn fill<Sh>(shape: Sh, elem: A) -> Self
+    where
+        A: Clone,
+        Sh: ShapeBuilder<Dim = D>,
+        Self::Data: DataOwned;
+
+    fn ones<Sh>(shape: Sh) -> Self
+    where
+        A: Clone + One,
+        Sh: ShapeBuilder<Dim = D>,
+        Self::Data: DataOwned;
+
+    fn zeros<Sh>(shape: Sh) -> Self
+    where
+        A: Clone + Zero,
+        Sh: ShapeBuilder<Dim = D>,
+        Self::Data: DataOwned;
+}
+
+pub trait NdBuilderExt<A = f64, D = nd::Ix2>: NdBuilder<A, D> + Sized
+where
+    D: Dimension,
+{
+    fn dim(&self) -> D::Pattern;
+
+    fn default_like<Sh>(&self) -> Self
+    where
+        A: Default,
+        Sh: ShapeBuilder<Dim = D>,
+        Self::Data: DataOwned,
+    {
+        Self::default(self.dim())
+    }
+
+    fn fill_like<Sh>(&self, elem: A) -> Self
+    where
+        A: Clone,
+        Sh: ShapeBuilder<Dim = D>,
+        Self::Data: DataOwned,
+    {
+        Self::fill(self.dim(), elem)
+    }
+
+    fn ones_like<Sh>(&self) -> Self
+    where
+        A: Clone + One,
+        Sh: ShapeBuilder<Dim = D>,
+        Self::Data: DataOwned,
+    {
+        Self::ones(self.dim())
+    }
+
+    fn zeros_like<Sh>(&self) -> Self
+    where
+        A: Clone + Zero,
+        Sh: ShapeBuilder<Dim = D>,
+        Self::Data: DataOwned,
+    {
+        Self::zeros(self.dim())
+    }
+}
+
+/*
+ ************* Implementations *************
+*/
+impl<A, S, D> NdBuilder<A, D> for ArrayBase<S, D>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+{
+    type Data = S;
+
+    fn default<Sh>(shape: Sh) -> Self
+    where
+        A: Default,
+        Sh: ShapeBuilder<Dim = D>,
+        Self::Data: DataOwned,
+    {
+        ArrayBase::default(shape)
+    }
+
+    fn fill<Sh>(shape: Sh, elem: A) -> Self
+    where
+        A: Clone,
+        S: DataOwned,
+        Sh: ShapeBuilder<Dim = D>,
+    {
+        ArrayBase::from_elem(shape, elem)
+    }
+
+    fn ones<Sh>(shape: Sh) -> Self
+    where
+        A: Clone + One,
+        Sh: ShapeBuilder<Dim = D>,
+        Self::Data: DataOwned,
+    {
+        ArrayBase::ones(shape)
+    }
+
+    fn zeros<Sh>(shape: Sh) -> Self
+    where
+        A: Clone + Zero,
+        Sh: ShapeBuilder<Dim = D>,
+        Self::Data: DataOwned,
+    {
+        ArrayBase::zeros(shape)
+    }
+}
+
+impl<U, A, D> NdBuilderExt<A, D> for U
+where
+    U: Dimensional<Dim = D> + NdBuilder<A, D>,
+    D: Dimension,
+{
+    fn dim(&self) -> D::Pattern {
+        self.dim()
+    }
+}
diff --git a/data/src/traits/data/container.rs b/data/src/traits/data/container.rs
new file mode 100644
index 00000000..0d9e0044
--- /dev/null
+++ b/data/src/traits/data/container.rs
@@ -0,0 +1,25 @@
+/*
+    Appellation: container <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use crate::traits::{ContainerRepr, Dimensional};
+
+pub trait Container<T> {
+    type Data: ContainerRepr<Elem = T>;
+}
+
+/// This trait describes the basic operations for any n-dimensional container.
+pub trait NdContainer<A, D>: Dimensional<Dim = D> {
+    type Data: ContainerRepr<Elem = A>;
+
+    fn as_slice(&self) -> &[A];
+
+    fn as_mut_slice(&mut self) -> &mut [A];
+}
+
+/*
+ ************* Implementations *************
+*/
+impl<S, T> Container<T> for Vec<S> {
+    type Data = Vec<T>;
+}
diff --git a/data/src/traits/data/repr.rs b/data/src/traits/data/repr.rs
new file mode 100644
index 00000000..3c583d95
--- /dev/null
+++ b/data/src/traits/data/repr.rs
@@ -0,0 +1,15 @@
+/*
+    Appellation: data <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+
+pub trait ContainerRepr {
+    type Elem;
+}
+
+/*
+ ************* Implementations *************
+*/
+impl<T> ContainerRepr for Vec<T> {
+    type Elem = T;
+}
diff --git a/data/src/traits/ext/ndarray.rs b/data/src/traits/ext/ndarray.rs
new file mode 100644
index 00000000..6d3e6ed8
--- /dev/null
+++ b/data/src/traits/ext/ndarray.rs
@@ -0,0 +1,45 @@
+/*
+    Appellation: ndarray <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use nd::iter::{Iter, IterMut};
+use nd::{Dimension, RawData};
+
+pub trait NdArray<A, D>
+where
+    D: Dimension,
+{
+    type Data: RawData<Elem = A>;
+
+    fn as_slice(&self) -> &[A];
+
+    fn as_mut_slice(&mut self) -> &mut [A];
+
+    fn iter(&self) -> Iter<'_, A, D>;
+
+    fn iter_mut(&mut self) -> IterMut<'_, A, D>;
+
+    fn map<F>(&self, f: F) -> Self
+    where
+        F: FnMut(&A) -> A;
+
+    fn mapv<F>(&mut self, f: F)
+    where
+        A: Clone,
+        F: FnMut(A) -> A;
+}
+
+pub trait NdIter<A, D>
+where
+    D: Dimension,
+{
+    type Data: RawData<Elem = A>;
+
+    fn iter(&self) -> Iter<'_, A, D>;
+
+    fn iter_mut(&mut self) -> IterMut<'_, A, D>;
+}
+
+/*
+ ************* Implementations *************
+*/
diff --git a/data/src/traits/ext/ndtensor.rs b/data/src/traits/ext/ndtensor.rs
new file mode 100644
index 00000000..0edbd756
--- /dev/null
+++ b/data/src/traits/ext/ndtensor.rs
@@ -0,0 +1,52 @@
+/*
+    Appellation: ndtensor <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use nd::{ArrayBase, Data, Dimension, RawData};
+use num::complex::ComplexFloat;
+use num::traits::Float;
+
+pub trait Scalar {
+    type R: Float;
+}
+
+pub trait NdTensor<A, D>
+where
+    A: ComplexFloat,
+    D: Dimension,
+{
+    type Data: RawData<Elem = A>;
+    type Output;
+
+    fn conj(&self) -> Self::Output;
+
+    fn cos(&self) -> Self::Output;
+
+    fn cosh(&self) -> Self::Output;
+}
+
+/*
+ ************* Implementations *************
+*/
+impl<A, S, D> NdTensor<A, D> for ArrayBase<S, D>
+where
+    A: ComplexFloat,
+    D: Dimension,
+    S: Data<Elem = A>,
+    Self: Clone,
+{
+    type Data = S;
+    type Output = nd::Array<A, D>;
+
+    fn conj(&self) -> Self::Output {
+        self.mapv(|x| x.conj())
+    }
+
+    fn cos(&self) -> Self::Output {
+        self.mapv(|x| x.cos())
+    }
+
+    fn cosh(&self) -> Self::Output {
+        self.mapv(|x| x.cosh())
+    }
+}
diff --git a/data/src/traits/ext/ndview.rs b/data/src/traits/ext/ndview.rs
new file mode 100644
index 00000000..56b88c3f
--- /dev/null
+++ b/data/src/traits/ext/ndview.rs
@@ -0,0 +1,157 @@
+/*
+    Appellation: ndview <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+/*
+    Appellation: ndarray <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use nd::prelude::*;
+use nd::{Data, DataMut, DataOwned, OwnedRepr, RawData};
+
+pub trait AsOwned<A, S, D = Ix2>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+{
+    type Output;
+
+    fn into_owned(self) -> Self::Output
+    where
+        A: Clone,
+        S: Data;
+
+    fn to_owned(&self) -> Self::Output
+    where
+        A: Clone,
+        S: Data;
+}
+
+pub trait AsShared<S, D = Ix2>
+where
+    D: Dimension,
+    S: RawData,
+{
+    type Output;
+
+    fn into_shared(self) -> Self::Output
+    where
+        S: DataOwned,
+        S::Elem: Clone;
+
+    fn to_shared(&self) -> Self::Output
+    where
+        S: DataOwned,
+        S::Elem: Clone;
+}
+
+pub trait NdView<A = f64, S = OwnedRepr<A>, D = Ix2>: AsOwned<A, S, D> + AsShared<S, D>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+{
+    fn view(&self) -> ArrayView<'_, A, D>
+    where
+        A: Clone,
+        S: Data;
+
+    fn view_mut(&mut self) -> ArrayViewMut<'_, A, D>
+    where
+        A: Clone,
+        S: DataMut;
+}
+
+pub trait View<A = f64, D = Ix2>
+where
+    D: Dimension,
+{
+    type Data: RawData<Elem = A>;
+    type Output;
+
+    fn view(&self) -> Self::Output
+    where
+        A: Clone,
+        Self::Data: Data;
+}
+pub trait ViewMut<A = f64, D = Ix2>: View<A, D>
+where
+    D: Dimension,
+{
+    fn view_mut(&mut self) -> ArrayViewMut<'_, A, D>
+    where
+        A: Clone,
+        Self::Data: DataMut;
+}
+
+/*
+ ************* Implementations *************
+*/
+impl<A, S, D> AsOwned<A, S, D> for ArrayBase<S, D>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+{
+    type Output = Array<A, D>;
+
+    fn into_owned(self) -> Self::Output
+    where
+        A: Clone,
+        S: Data,
+    {
+        self.into_owned()
+    }
+
+    fn to_owned(&self) -> Self::Output
+    where
+        A: Clone,
+        S: Data,
+    {
+        self.to_owned()
+    }
+}
+
+impl<A, S, D> AsShared<S, D> for ArrayBase<S, D>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+{
+    type Output = ArcArray<A, D>;
+
+    fn into_shared(self) -> Self::Output
+    where
+        A: Clone,
+        S: DataOwned,
+    {
+        self.into_shared()
+    }
+
+    fn to_shared(&self) -> Self::Output
+    where
+        A: Clone,
+        S: DataOwned,
+    {
+        self.to_shared()
+    }
+}
+
+impl<A, S, D> NdView<A, S, D> for ArrayBase<S, D>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+{
+    fn view(&self) -> ArrayView<'_, A, D>
+    where
+        A: Clone,
+        S: Data,
+    {
+        self.view()
+    }
+
+    fn view_mut(&mut self) -> ArrayViewMut<'_, A, D>
+    where
+        A: Clone,
+        S: DataMut,
+    {
+        self.view_mut()
+    }
+}
diff --git a/data/src/traits/mod.rs b/data/src/traits/mod.rs
index 83d994c3..14b24d38 100644
--- a/data/src/traits/mod.rs
+++ b/data/src/traits/mod.rs
@@ -2,10 +2,43 @@
     Appellation: traits <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
-pub use self::prelude::*;
+pub use self::{data::*, ext::*, records::*, shape::*};
+
+pub mod build;
 
 pub mod records;
+pub mod shape;
+
+#[doc(hidden)]
+pub mod data {
+    pub use self::{container::*, repr::*};
+
+    pub(crate) mod container;
+    pub(crate) mod repr;
+
+    pub(crate) mod prelude {
+        pub use super::container::*;
+        pub use super::repr::*;
+    }
+}
+
+pub mod ext {
+    pub use self::{ndarray::*, ndtensor::*, ndview::*};
+
+    pub(crate) mod ndarray;
+    pub(crate) mod ndtensor;
+    pub(crate) mod ndview;
+
+    pub(crate) mod prelude {
+        pub use super::ndarray::*;
+        pub use super::ndtensor::*;
+        pub use super::ndview::*;
+    }
+}
 
 pub(crate) mod prelude {
+    pub use super::data::prelude::*;
+    pub use super::ext::prelude::*;
     pub use super::records::*;
+    pub use super::shape::*;
 }
diff --git a/data/src/traits/shape.rs b/data/src/traits/shape.rs
new file mode 100644
index 00000000..a8127e46
--- /dev/null
+++ b/data/src/traits/shape.rs
@@ -0,0 +1,96 @@
+/*
+    Appellation: shape <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use nd::{ArrayBase, Dimension, RawData};
+
+pub trait IntoPattern {
+    type Pattern;
+
+    fn into_pattern(self) -> Self::Pattern;
+}
+
+/// [Dimensional] provides a common interface for containers to access their shape and dimension.
+pub trait Dimensional {
+    const RANK: Option<usize> = None;
+
+    type Dim: IntoPattern;
+
+    fn dim(&self) -> <Self::Dim as IntoPattern>::Pattern {
+        self.raw_dim().into_pattern()
+    }
+
+    fn is_scalar(&self) -> bool {
+        self.rank() == 0 || self.shape().iter().all(|x| *x == 1)
+    }
+
+    fn rank(&self) -> usize {
+        Self::RANK.unwrap_or(self.shape().len())
+    }
+
+    fn raw_dim(&self) -> Self::Dim;
+
+    fn size(&self) -> usize {
+        self.shape().iter().product()
+    }
+
+    fn shape(&self) -> &[usize];
+}
+
+/*
+ ******** implementations ********
+*/
+impl<D> IntoPattern for D
+where
+    D: Dimension,
+{
+    type Pattern = D::Pattern;
+
+    fn into_pattern(self) -> Self::Pattern {
+        Dimension::into_pattern(self)
+    }
+}
+
+// impl<D> Dimensional for D
+// where
+//     D: Dimension + IntoPattern,
+// {
+//     type Dim = D;
+
+//     fn dim(&self) -> D::Pattern {
+//         self.clone().into_pattern()
+//     }
+
+//     fn raw_dim(&self) -> D {
+//         self.clone()
+//     }
+
+//     fn shape(&self) -> &[usize] {
+//         D::slice(self)
+//     }
+// }
+
+impl<S, D> Dimensional for ArrayBase<S, D>
+where
+    D: Dimension,
+    S: RawData,
+{
+    const RANK: Option<usize> = D::NDIM;
+    type Dim = D;
+
+    fn dim(&self) -> D::Pattern {
+        ArrayBase::dim(self)
+    }
+
+    fn raw_dim(&self) -> D {
+        ArrayBase::raw_dim(self)
+    }
+
+    fn shape(&self) -> &[usize] {
+        ArrayBase::shape(self)
+    }
+
+    fn size(&self) -> usize {
+        ArrayBase::len(self)
+    }
+}
diff --git a/data/src/types/kernel.rs b/data/src/types/kernel.rs
new file mode 100644
index 00000000..248ad95f
--- /dev/null
+++ b/data/src/types/kernel.rs
@@ -0,0 +1,6 @@
+/*
+    Appellation: kernel <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+
+pub struct Kernel;
diff --git a/data/src/types/mod.rs b/data/src/types/mod.rs
new file mode 100644
index 00000000..b8ca6da5
--- /dev/null
+++ b/data/src/types/mod.rs
@@ -0,0 +1,11 @@
+/*
+    Appellation: types <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+pub use self::kernel::Kernel;
+
+pub mod kernel;
+
+pub(crate) mod prelude {
+    pub use super::kernel::Kernel;
+}
diff --git a/models/linear/src/impls/impl_rand.rs b/models/linear/src/impls/impl_rand.rs
index f2e602e1..28bb0126 100644
--- a/models/linear/src/impls/impl_rand.rs
+++ b/models/linear/src/impls/impl_rand.rs
@@ -4,7 +4,7 @@
 */
 #![cfg(feature = "rand")]
 
-use crate::params::{ParamMode, ParamsBase};
+use crate::params::{LinearParams, ParamMode, ParamsBase};
 use crate::{bias_dim, Linear};
 use concision::init::rand::Rng;
 use concision::init::rand_distr::{uniform::SampleUniform, Distribution, StandardNormal};
@@ -12,30 +12,32 @@ use concision::{Initialize, InitializeExt};
 use nd::*;
 use num::Float;
 
-impl<A, D, K> Linear<A, K, D>
+impl<A, S, D, K> Linear<A, K, D, S>
 where
     A: Clone + Float,
     D: RemoveAxis,
     K: ParamMode,
+    S: DataOwned<Elem = A>,
     StandardNormal: Distribution<A>,
 {
-    pub fn uniform(self) -> Self
+    pub fn uniform(self) -> Linear<A, K, D, OwnedRepr<A>>
     where
         A: SampleUniform,
         <A as SampleUniform>::Sampler: Clone,
     {
-        Self {
+        Linear {
+            config: self.config,
             params: self.params.uniform(),
-            ..self
         }
     }
 }
 
-impl<A, K, D> crate::LinearParams<A, K, D>
+impl<A, S, D, K> ParamsBase<S, D, K>
 where
     A: Clone + Float + SampleUniform,
     D: RemoveAxis,
     K: ParamMode,
+    S: RawData<Elem = A>,
     StandardNormal: Distribution<A>,
     <A as SampleUniform>::Sampler: Clone,
 {
@@ -48,42 +50,42 @@ where
         self.dk().sqrt()
     }
 
-    pub fn uniform(self) -> Self {
+    pub fn uniform(self) -> LinearParams<A, K, D>
+    where
+        S: DataOwned,
+    {
         let dk = self.dk_sqrt();
         self.uniform_between(-dk, dk)
     }
 
-    pub fn uniform_between(self, low: A, high: A) -> Self {
-        if self.is_biased() && !self.bias.is_some() {
+    pub fn uniform_between(self, low: A, high: A) -> LinearParams<A, K, D>
+    where
+        S: DataOwned,
+    {
+        let weight = Array::uniform_between(self.raw_dim(), low, high);
+        let bias = if self.is_biased() && !self.bias.is_some() {
             let b_dim = bias_dim(self.raw_dim());
-            Self {
-                bias: Some(Array::uniform_between(b_dim, low, high)),
-                weight: Array::uniform_between(self.raw_dim(), low, high),
-                _mode: self._mode,
-            }
+            Some(Array::uniform_between(b_dim, low, high))
         } else if !self.is_biased() && self.bias.is_some() {
-            Self {
-                bias: None,
-                weight: Array::uniform_between(self.raw_dim(), low, high),
-                _mode: self._mode,
-            }
+            None
         } else {
-            Self {
-                bias: self
-                    .bias
-                    .as_ref()
-                    .map(|b| Array::uniform_between(b.raw_dim(), low, high)),
-                weight: Array::uniform_between(self.raw_dim(), low, high),
-                _mode: self._mode,
-            }
+            self.bias
+                .as_ref()
+                .map(|b| Array::uniform_between(b.raw_dim(), low, high))
+        };
+        LinearParams {
+            weight,
+            bias,
+            _mode: core::marker::PhantomData::<K>,
         }
     }
 }
 
-impl<A, K, D> Initialize<A, D> for Linear<A, K, D>
+impl<A, S, D, K> Initialize<A, D> for Linear<A, K, D, S>
 where
     D: RemoveAxis,
     K: ParamMode,
+    S: DataOwned<Elem = A>,
     StandardNormal: Distribution<A>,
 {
     type Data = OwnedRepr<A>;
diff --git a/models/linear/src/impls/model/impl_linear.rs b/models/linear/src/impls/model/impl_linear.rs
index 03c97f82..49ee85ba 100644
--- a/models/linear/src/impls/model/impl_linear.rs
+++ b/models/linear/src/impls/model/impl_linear.rs
@@ -2,47 +2,52 @@
     Appellation: impl_linear <impls>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
-use crate::{Config, Linear, LinearParams, ParamMode};
+use crate::{Config, Linear, ParamMode, ParamsBase};
 use core::borrow::{Borrow, BorrowMut};
-use nd::RemoveAxis;
+use nd::{DataOwned, Ix2, RawData, RemoveAxis};
 
-impl<A, K> Linear<A, K>
+impl<A, K, S> Linear<A, K, Ix2, S>
 where
     K: ParamMode,
+    S: RawData<Elem = A>,
 {
     pub fn from_features(inputs: usize, outputs: usize) -> Self
     where
         A: Clone + Default,
+        S: DataOwned,
     {
         let config = Config::std(inputs, outputs);
-        let params = LinearParams::new(config.dim());
+        let params = ParamsBase::new(config.dim());
         Self { config, params }
     }
 }
 
-impl<A, K, D> Borrow<Config<K, D>> for Linear<A, K, D>
+impl<A, S, D, K> Borrow<Config<K, D>> for Linear<A, K, D, S>
 where
     D: RemoveAxis,
+    S: RawData<Elem = A>,
 {
     fn borrow(&self) -> &Config<K, D> {
         &self.config
     }
 }
 
-impl<A, K, D> Borrow<LinearParams<A, K, D>> for Linear<A, K, D>
+impl<A, S, D, K> Borrow<ParamsBase<S, D, K>> for Linear<A, K, D, S>
 where
     D: RemoveAxis,
+    S: RawData<Elem = A>,
 {
-    fn borrow(&self) -> &LinearParams<A, K, D> {
+    fn borrow(&self) -> &ParamsBase<S, D, K> {
         &self.params
     }
 }
 
-impl<A, K, D> BorrowMut<LinearParams<A, K, D>> for Linear<A, K, D>
+impl<A, S, D, K> BorrowMut<ParamsBase<S, D, K>> for Linear<A, K, D, S>
 where
     D: RemoveAxis,
+    S: RawData<Elem = A>,
 {
-    fn borrow_mut(&mut self) -> &mut LinearParams<A, K, D> {
+    fn borrow_mut(&mut self) -> &mut ParamsBase<S, D, K> {
         &mut self.params
     }
 }
diff --git a/models/linear/src/mlp/model.rs b/models/linear/src/mlp/model.rs
new file mode 100644
index 00000000..4128cb88
--- /dev/null
+++ b/models/linear/src/mlp/model.rs
@@ -0,0 +1,10 @@
+/*
+    Appellation: model <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+
+pub struct Mlp<I, H, O> {
+    input: I,
+    hidden: H,
+    output: O,
+}
\ No newline at end of file
diff --git a/models/linear/src/model/layer.rs b/models/linear/src/model/layer.rs
index 486bb43d..9bb00f52 100644
--- a/models/linear/src/model/layer.rs
+++ b/models/linear/src/model/layer.rs
@@ -3,39 +3,56 @@
     Contrib: FL03 <jo3mccain@icloud.com>
 */
 use super::{Config, Layout};
-use crate::{Biased, LinearParams, ParamMode, Unbiased};
+use crate::{Biased, LinearParams, ParamMode, ParamsBase, Unbiased};
 use concision::prelude::{Predict, Result};
 use nd::prelude::*;
-use nd::RemoveAxis;
+use nd::{DataOwned, OwnedRepr, RawData, RemoveAxis};
 
 /// An implementation of a linear model.
 ///
 /// In an effort to streamline the api, the [Linear] model relies upon a [ParamMode] type ([Biased] or [Unbiased](crate::params::mode::Unbiased))
 /// which enables the model to automatically determine whether or not to include a bias term. Doing so allows the model to inherit several methods
 /// familar to the underlying [ndarray](https://docs.rs/ndarray) crate.
-pub struct Linear<A = f64, K = Biased, D = Ix2>
+pub struct Linear<A = f64, K = Biased, D = Ix2, S = OwnedRepr<A>>
 where
     D: Dimension,
+    S: RawData<Elem = A>,
 {
     pub(crate) config: Config<K, D>,
-    pub(crate) params: LinearParams<A, K, D>,
+    pub(crate) params: ParamsBase<S, D, K>,
 }
 
-impl<A, K, D> Linear<A, K, D>
+impl<A, K> Linear<A, K, Ix2, OwnedRepr<A>>
+where
+    K: ParamMode,
+{
+    pub fn std(inputs: usize, outputs: usize) -> Self
+    where
+        A: Default,
+    {
+        let config = Config::<K, Ix2>::new().with_shape((inputs, outputs));
+        let params = ParamsBase::new(config.features());
+        Linear { config, params }
+    }
+}
+
+impl<A, S, D, K> Linear<A, K, D, S>
 where
     D: RemoveAxis,
     K: ParamMode,
+    S: RawData<Elem = A>,
 {
-    mbuilder!(new where A: Default);
-    mbuilder!(ones where A: Clone + num::One);
-    mbuilder!(zeros where A: Clone + num::Zero);
+    mbuilder!(new where A: Default, S: DataOwned);
+    mbuilder!(ones where A: Clone + num::One, S: DataOwned);
+    mbuilder!(zeros where A: Clone + num::Zero, S: DataOwned);
 
     pub fn from_config(config: Config<K, D>) -> Self
     where
         A: Clone + Default,
         K: ParamMode,
+        S: DataOwned,
     {
-        let params = LinearParams::new(config.dim());
+        let params = ParamsBase::new(config.dim());
         Self { config, params }
     }
 
@@ -43,13 +60,14 @@ where
     where
         A: Clone + Default,
         K: ParamMode,
+        S: DataOwned,
     {
         let config = Config::<K, D>::new().with_layout(layout);
-        let params = LinearParams::new(config.dim());
+        let params = ParamsBase::new(config.dim());
         Self { config, params }
     }
 
-    pub fn from_params(params: LinearParams<A, K, D>) -> Self {
+    pub fn from_params(params: ParamsBase<S, D, K>) -> Self {
         let config = Config::<K, D>::new().with_shape(params.raw_dim());
         Self { config, params }
     }
@@ -67,26 +85,27 @@ where
         &self.config
     }
 
-    pub fn weights(&self) -> &Array<A, D> {
+    pub fn weights(&self) -> &ArrayBase<S, D> {
         self.params.weights()
     }
 
-    pub fn weights_mut(&mut self) -> &mut Array<A, D> {
+    pub fn weights_mut(&mut self) -> &mut ArrayBase<S, D> {
         self.params.weights_mut()
     }
 
-    pub const fn params(&self) -> &LinearParams<A, K, D> {
+    pub const fn params(&self) -> &ParamsBase<S, D, K> {
         &self.params
     }
 
-    pub fn params_mut(&mut self) -> &mut LinearParams<A, K, D> {
+    pub fn params_mut(&mut self) -> &mut ParamsBase<S, D, K> {
         &mut self.params
     }
 
-    pub fn into_biased(self) -> Linear<A, Biased, D>
+    pub fn into_biased(self) -> Linear<A, Biased, D, S>
     where
         A: Default,
         K: 'static,
+        S: DataOwned,
     {
         Linear {
             config: self.config.into_biased(),
@@ -94,10 +113,11 @@ where
         }
     }
 
-    pub fn into_unbiased(self) -> Linear<A, Unbiased, D>
+    pub fn into_unbiased(self) -> Linear<A, Unbiased, D, S>
     where
         A: Default,
         K: 'static,
+        S: DataOwned,
     {
         Linear {
             config: self.config.into_unbiased(),
@@ -130,40 +150,44 @@ where
     concision::dimensional!(params());
 }
 
-impl<A, D> Linear<A, Biased, D>
+impl<A, S, D> Linear<A, Biased, D, S>
 where
     D: RemoveAxis,
+    S: RawData<Elem = A>,
 {
     pub fn biased<Sh>(shape: Sh) -> Self
     where
         A: Default,
+        S: DataOwned,
         Sh: ShapeBuilder<Dim = D>,
     {
         let config = Config::<Biased, D>::new().with_shape(shape);
-        let params = LinearParams::biased(config.dim());
+        let params = ParamsBase::biased(config.dim());
         Linear { config, params }
     }
 
-    pub fn bias(&self) -> &Array<A, D::Smaller> {
+    pub fn bias(&self) -> &ArrayBase<S, D::Smaller> {
         self.params().bias()
     }
 
-    pub fn bias_mut(&mut self) -> &mut Array<A, D::Smaller> {
+    pub fn bias_mut(&mut self) -> &mut ArrayBase<S, D::Smaller> {
         self.params_mut().bias_mut()
     }
 }
 
-impl<A, D> Linear<A, Unbiased, D>
+impl<A, S, D> Linear<A, Unbiased, D, S>
 where
     D: RemoveAxis,
+    S: RawData<Elem = A>,
 {
     pub fn unbiased<Sh>(shape: Sh) -> Self
     where
         A: Default,
+        S: DataOwned,
         Sh: ShapeBuilder<Dim = D>,
     {
         let config = Config::<Unbiased, D>::new().with_shape(shape);
-        let params = LinearParams::unbiased(config.dim());
+        let params = ParamsBase::unbiased(config.dim());
         Linear { config, params }
     }
 }
diff --git a/models/linear/src/norm/layer/mod.rs b/models/linear/src/norm/layer/mod.rs
index 6b54d6e8..28254dc1 100644
--- a/models/linear/src/norm/layer/mod.rs
+++ b/models/linear/src/norm/layer/mod.rs
@@ -19,13 +19,15 @@ pub(crate) mod prelude {
 }
 
 pub(crate) mod utils {
-    use nd::{Array, Axis, Dimension, RemoveAxis};
+    use nd::prelude::*;
+    use nd::{Data, RemoveAxis};
     use num::traits::{Float, FromPrimitive};
 
-    pub(crate) fn layer_norm<A, D>(x: &Array<A, D>, eps: f64) -> Array<A, D>
+    pub(crate) fn layer_norm<A, S, D>(x: &ArrayBase<S, D>, eps: f64) -> Array<A, D>
     where
         A: Float + FromPrimitive,
         D: Dimension,
+        S: Data<Elem = A>,
     {
         let mean = x.mean().unwrap();
         let denom = {
@@ -36,10 +38,11 @@ pub(crate) mod utils {
         x.mapv(|xi| (xi - mean) / denom)
     }
 
-    pub(crate) fn layer_norm_axis<A, D>(x: &Array<A, D>, axis: Axis, eps: f64) -> Array<A, D>
+    pub(crate) fn layer_norm_axis<A, S, D>(x: &ArrayBase<S, D>, axis: Axis, eps: f64) -> Array<A, D>
     where
         A: Float + FromPrimitive,
         D: RemoveAxis,
+        S: Data<Elem = A>,
     {
         let eps = A::from(eps).unwrap();
         let mean = x.mean_axis(axis).unwrap();
diff --git a/models/linear/src/norm/layer/model.rs b/models/linear/src/norm/layer/model.rs
index e5dc6b67..1cca2419 100644
--- a/models/linear/src/norm/layer/model.rs
+++ b/models/linear/src/norm/layer/model.rs
@@ -6,7 +6,7 @@ use super::Config;
 use crate::{Biased, LinearParams, ParamMode, Unbiased};
 use concision::Forward;
 use nd::prelude::*;
-use nd::RemoveAxis;
+use nd::{Data, RemoveAxis};
 use num::traits::{Float, FromPrimitive, One, Zero};
 
 // #62
@@ -139,14 +139,15 @@ where
     }
 }
 
-impl<A, D> Forward<Array<A, D>> for LayerNorm<A, Biased, D>
+impl<A, S, D> Forward<ArrayBase<S, D>> for LayerNorm<A, Biased, D>
 where
     A: Float + FromPrimitive,
     D: RemoveAxis,
+    S: Data<Elem = A>,
 {
     type Output = Array<A, D>;
 
-    fn forward(&self, x: &Array<A, D>) -> Self::Output {
+    fn forward(&self, x: &ArrayBase<S, D>) -> Self::Output {
         let norm = if let Some(axis) = self.config().axis() {
             super::layer_norm_axis(x, *axis, self.eps())
         } else {
@@ -156,14 +157,15 @@ where
     }
 }
 
-impl<A, D> Forward<Array<A, D>> for LayerNorm<A, Unbiased, D>
+impl<A, S, D> Forward<ArrayBase<S, D>> for LayerNorm<A, Unbiased, D>
 where
     A: Float + FromPrimitive,
     D: RemoveAxis,
+    S: Data<Elem = A>,
 {
     type Output = Array<A, D>;
 
-    fn forward(&self, x: &Array<A, D>) -> Self::Output {
+    fn forward(&self, x: &ArrayBase<S, D>) -> Self::Output {
         let norm = if let Some(axis) = self.config().axis() {
             super::layer_norm_axis(x, *axis, self.eps())
         } else {
diff --git a/models/transformers/Cargo.toml b/models/transformers/Cargo.toml
index 00bf9fb7..7dc36953 100644
--- a/models/transformers/Cargo.toml
+++ b/models/transformers/Cargo.toml
@@ -95,6 +95,10 @@ crate-type = ["lib"]
 doctest = true
 test = true
 
+[[test]]
+name = "attention"
+required-features = ["approx", "rand"]
+
 [build-dependencies]
 
 [dependencies]
@@ -128,8 +132,8 @@ version = "1"
 optional = true
 version = "0.1"
 
-[dev-dependencies.lazy_static]
-workspace = true
+[dev-dependencies]
+lazy_static.workspace = true
 
 [package.metadata.docs.rs]
 all-features = true
diff --git a/models/transformers/src/attention/head.rs b/models/transformers/src/attention/head.rs
index c5146a34..e80fdda9 100644
--- a/models/transformers/src/attention/head.rs
+++ b/models/transformers/src/attention/head.rs
@@ -2,30 +2,68 @@
     Appellation: head <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
+use super::{Score, _attention};
 use crate::params::QkvBase;
 use concision::getters;
-use core::borrow::{Borrow, BorrowMut};
+use concision::nn::DropoutLayer;
 use nd::linalg::Dot;
 use nd::*;
 use num::complex::ComplexFloat;
 
 // #68
+/// [AttentionHead] implements the scaled dot-product attention mechanism formally defined in
+/// [Attention is all you need](https://arxiv.org/abs/1706.03762). The structure is designed to
+/// be flexible, relying upon the n-dimensional [QkvBase] to store the query, key, and value tensors.
+/// More so, the head may be configured with an optional dropout and/or masking layers.
+///
+/// ### Dropout
+///
+/// The [DropoutLayer] is an optional layer applied after the softmax function is applied to the
+/// score. The layer is used to prevent overfitting by randomly setting a fraction of the input
+/// units to zero at each update during training time.
+///
+/// ### Masking
+///
+/// After computing the dot-product of the query and key tensors, an optional mask may be applied to
+/// the attention score. The mask is used to prevent the model from attending to certain parts of the
+/// input sequence. For example, in the case of a language model, the mask may be used to prevent the
+/// model from attending to the padding tokens.
 pub struct AttentionHead<A = f64, D = Ix2, S = OwnedRepr<A>>
 where
     D: Dimension,
     S: RawData<Elem = A>,
 {
-    pub(crate) mask: Option<ArrayBase<S, D>>,
+    #[cfg(feature = "rand")]
+    pub(crate) dropout: Option<DropoutLayer>,
+    pub(crate) mask: Option<Array<bool, D>>,
     pub(crate) params: QkvBase<S, D>,
 }
 
+impl<A, S> AttentionHead<A, Ix2, S>
+where
+    S: RawData<Elem = A>,
+{
+    pub fn std(dm: usize, dk: usize) -> Self
+    where
+        A: Default,
+        S: DataOwned,
+    {
+        Self::from_params(QkvBase::new((dk, dm)))
+    }
+}
+
 impl<A, S, D> AttentionHead<A, D, S>
 where
     D: Dimension,
     S: RawData<Elem = A>,
 {
     pub fn from_params(params: QkvBase<S, D>) -> Self {
-        Self { mask: None, params }
+        Self {
+            #[cfg(feature = "rand")]
+            dropout: None,
+            mask: None,
+            params,
+        }
     }
 
     pub fn builder<Sh, F>(shape: Sh, builder: F) -> Self
@@ -44,8 +82,8 @@ where
     {
         Self::from_params(QkvBase::from_elem(shape, value))
     }
-
-    pub fn attention(&self) -> Array<A, D>
+    /// Computes the [Score] using scaled dot-product attention.
+    pub fn attention(&self) -> Score<A, D>
     where
         A: ComplexFloat + ScalarOperand,
         S: Data,
@@ -53,7 +91,11 @@ where
         Array<A, D>: Dot<ArrayBase<S, D>, Output = Array<A, D>>,
     {
         let (q, k, v) = self.qkv();
-        crate::attention::scaled_dot_product_attention(q, k, v)
+        _attention(q, k, v, self.mask(), self.dropout())
+    }
+    /// Returns an immutable reference to the, optional, mask.
+    pub fn mask(&self) -> Option<&Array<bool, D>> {
+        self.mask.as_ref()
     }
     /// Returns an immuable reference to the underlying parameters.
     pub const fn params(&self) -> &QkvBase<S, D> {
@@ -71,6 +113,30 @@ where
     pub fn into_qkv(self) -> (ArrayBase<S, D>, ArrayBase<S, D>, ArrayBase<S, D>) {
         self.params.into_qkv()
     }
+    /// Sets the dropout layer for the [AttentionHead]
+    #[cfg(feature = "rand")]
+    pub fn set_dropout(&mut self, dropout: Option<DropoutLayer>) {
+        self.dropout = dropout;
+    }
+    /// Sets the mask for the [AttentionHead]
+    pub fn set_mask(&mut self, mask: Option<Array<bool, D>>) {
+        self.mask = mask;
+    }
+    /// Configure the [AttentionHead] with a [DropoutLayer]
+    #[cfg(feature = "rand")]
+    pub fn with_dropout(self, dropout: DropoutLayer) -> Self {
+        Self {
+            dropout: Some(dropout),
+            ..self
+        }
+    }
+    /// Consume and store a mask for the [AttentionHead]
+    pub fn with_mask(self, mask: Array<bool, D>) -> Self {
+        Self {
+            mask: Some(mask),
+            ..self
+        }
+    }
 
     getters!(params::<[q, k, v]> => ArrayBase<S, D>);
     ndbuilder!(new::default() where A: Default, S: DataOwned);
@@ -78,22 +144,31 @@ where
     ndbuilder!(zeros() where A: Clone + num::Zero, S: DataOwned);
 }
 
-impl<A, S, D> Borrow<QkvBase<S, D>> for AttentionHead<A, D, S>
+#[cfg(feature = "rand")]
+impl<A, S, D> AttentionHead<A, D, S>
 where
     D: Dimension,
     S: RawData<Elem = A>,
 {
-    fn borrow(&self) -> &QkvBase<S, D> {
-        self.params()
+    /// Returns an immutable reference to the, optional, [dropout](DropoutLayer) layer.
+    /// With the `rand` feature flag disabled, the dropout layer is
+    /// unavailable and returns `None`.
+    pub fn dropout(&self) -> Option<&DropoutLayer> {
+        self.dropout.as_ref()
     }
 }
 
-impl<A, S, D> BorrowMut<QkvBase<S, D>> for AttentionHead<A, D, S>
+#[cfg(not(feature = "rand"))]
+impl<A, S, D> AttentionHead<A, D, S>
 where
     D: Dimension,
     S: RawData<Elem = A>,
 {
-    fn borrow_mut(&mut self) -> &mut QkvBase<S, D> {
-        self.params_mut()
+    /// Returns an immutable reference to the, optional, [dropout](DropoutLayer) layer.
+    /// With the `rand` feature flag disabled, the dropout layer is
+    /// unavailable and returns `None`.
+    #[cfg(not(feature = "rand"))]
+    pub fn dropout(&self) -> Option<&DropoutLayer> {
+        None
     }
 }
diff --git a/models/transformers/src/attention/mod.rs b/models/transformers/src/attention/mod.rs
index 80a264c7..a500b5f5 100644
--- a/models/transformers/src/attention/mod.rs
+++ b/models/transformers/src/attention/mod.rs
@@ -8,47 +8,95 @@
 //! Today, these mechanisms are found in several state-of-the-art models, such as
 //! the Transformer model, primarily due to its capabilities in natural language
 //! processing (NLP) domains
+pub(crate) use self::_impl_methods::*;
 pub use self::head::AttentionHead;
+pub use self::score::Score;
 pub use self::utils::*;
 
 pub(crate) mod head;
+pub(crate) mod score;
 
 // #69: Multi-Head Attention implementation
 pub mod multi;
 
 pub(crate) mod prelude {
     pub use super::head::AttentionHead;
+    pub use super::multi::prelude::*;
+    pub use super::score::Score;
     pub use super::utils::*;
 }
 
+pub trait Attention {
+    type Output;
+
+    fn attention(&self) -> Self::Output;
+}
+
 pub(crate) mod utils {
-    use concision::func::activate::Softmax;
+    use super::Score;
+    use concision::nn::DropoutLayer;
     use nd::linalg::Dot;
-    use nd::prelude::{Array, ArrayBase, ArrayView, Axis, Dimension};
-    use nd::{Data, ScalarOperand};
+    use nd::prelude::*;
     use num::complex::ComplexFloat;
 
-    pub(crate) fn scale<A>(dk: usize) -> A
+    /// A functional implementation of the scaled dot-product attention mechanism;
+    pub fn scaled_dot_product_attention<A, S, D>(
+        q: &ArrayBase<S, D>,
+        k: &ArrayBase<S, D>,
+        v: &ArrayBase<S, D>,
+        mask: Option<&Array<bool, D>>,
+        dropout: Option<&DropoutLayer>,
+    ) -> Score<A, D>
     where
-        A: ComplexFloat,
+        A: ComplexFloat + nd::ScalarOperand,
+        S: nd::Data<Elem = A>,
+        D: Dimension,
+        ArrayBase<S, D>: for<'a> Dot<ArrayView<'a, A, D>, Output = Array<A, D>>,
+        Array<A, D>: Dot<ArrayBase<S, D>, Output = Array<A, D>>,
     {
-        A::from(dk).unwrap().sqrt().recip()
+        super::_attention(q, k, v, mask, dropout)
     }
+}
 
-    /// A functional implementation of the scaled dot-product attention mechanism;
-    pub fn scaled_dot_product_attention<A, S, D>(
+mod _impl_methods {
+    use super::Score;
+    use concision::prelude::{DropoutLayer, MaskFill, Softmax};
+    use nd::linalg::Dot;
+    use nd::prelude::*;
+    use num::complex::ComplexFloat;
+
+    pub(crate) fn _attention<A, S, D>(
         q: &ArrayBase<S, D>,
         k: &ArrayBase<S, D>,
         v: &ArrayBase<S, D>,
-    ) -> Array<A, D>
+        mask: Option<&Array<bool, D>>,
+        dropout: Option<&DropoutLayer>,
+    ) -> Score<A, D>
     where
-        A: ComplexFloat + ScalarOperand,
-        S: Data<Elem = A>,
+        A: ComplexFloat + nd::ScalarOperand,
+        S: nd::Data<Elem = A>,
         D: Dimension,
         ArrayBase<S, D>: for<'a> Dot<ArrayView<'a, A, D>, Output = Array<A, D>>,
         Array<A, D>: Dot<ArrayBase<S, D>, Output = Array<A, D>>,
     {
-        let dk = scale::<A>(k.len_of(Axis(1)));
-        (q.dot(&k.t()) * dk).softmax().dot(&v)
+        use concision::Forward;
+        let dk = scale::<A>(k.len_of(nd::Axis(1)));
+        let mut z = q.dot(&k.t()) * dk;
+        if let Some(mask) = mask {
+            z = z.masked_fill(mask, A::zero());
+        }
+        z = z.softmax();
+        #[cfg(feature = "rand")]
+        if let Some(dropout) = dropout {
+            z = dropout.forward(&z);
+        }
+        (z.dot(&v), z).into()
+    }
+
+    pub(crate) fn scale<A>(dk: usize) -> A
+    where
+        A: ComplexFloat,
+    {
+        A::from(dk).unwrap().sqrt().recip()
     }
 }
diff --git a/models/transformers/src/attention/multi/config.rs b/models/transformers/src/attention/multi/config.rs
new file mode 100644
index 00000000..58c510c6
--- /dev/null
+++ b/models/transformers/src/attention/multi/config.rs
@@ -0,0 +1,49 @@
+/*
+    Appellation: config <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+
+pub(crate) fn dk(d_model: usize, heads: usize) -> usize {
+    d_model / heads
+}
+
+#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+pub struct Config {
+    pub d_model: usize,
+    pub heads: usize,
+}
+
+impl Config {
+    pub fn new() -> ConfigBuilder {
+        ConfigBuilder::new()
+    }
+
+    pub fn d_model(&self) -> usize {
+        self.d_model
+    }
+
+    pub fn dk(&self) -> usize {
+        dk(self.d_model(), self.heads())
+    }
+
+    pub fn heads(&self) -> usize {
+        self.heads
+    }
+}
+
+impl Default for Config {
+    fn default() -> Self {
+        Self {
+            d_model: crate::D_MODEL,
+            heads: crate::HEADS,
+        }
+    }
+}
+
+concision::builder! {
+    ConfigBuilder(Config) {
+        d_model: usize,
+        heads: usize,
+    }
+}
diff --git a/models/transformers/src/attention/multi/mod.rs b/models/transformers/src/attention/multi/mod.rs
index 014e29b9..e101f032 100644
--- a/models/transformers/src/attention/multi/mod.rs
+++ b/models/transformers/src/attention/multi/mod.rs
@@ -5,6 +5,12 @@
 //! # Multi-Head Attention
 //!
 //!
-pub use self::multi_head::*;
+pub use self::{config::Config, multi_head::*};
 
+pub(crate) mod config;
 pub(crate) mod multi_head;
+
+pub(crate) mod prelude {
+    pub use super::config::Config as MultiHeadAttentionConfig;
+    pub use super::multi_head::MultiHeadAttention;
+}
diff --git a/models/transformers/src/attention/multi/multi_head.rs b/models/transformers/src/attention/multi/multi_head.rs
index ad36fe45..36a4051d 100644
--- a/models/transformers/src/attention/multi/multi_head.rs
+++ b/models/transformers/src/attention/multi/multi_head.rs
@@ -2,5 +2,76 @@
     Appellation: multi_head <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
+use super::Config;
+use crate::AttentionHead;
+use linear::{Biased, Linear};
+use nd::prelude::*;
+use nd::{DataOwned, OwnedRepr, RawData};
 
-pub struct MultiHeadAttention;
+pub struct MultiHeadAttention<A = f64, D = Ix2, S = OwnedRepr<A>>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+{
+    pub(crate) config: Config,
+    pub(crate) head: AttentionHead<A, D, S>,
+    pub(crate) linears: Vec<Linear<A, Biased, D, S>>,
+}
+
+impl<A, S, D> MultiHeadAttention<A, D, S>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+{
+    pub const fn config(&self) -> &Config {
+        &self.config
+    }
+
+    pub const fn head(&self) -> &AttentionHead<A, D, S> {
+        &self.head
+    }
+
+    pub fn head_mut(&mut self) -> &mut AttentionHead<A, D, S> {
+        &mut self.head
+    }
+
+    pub fn linears(&self) -> &[Linear<A, Biased, D, S>] {
+        &self.linears
+    }
+}
+
+impl<A, S> MultiHeadAttention<A, Ix2, S>
+where
+    S: RawData<Elem = A>,
+{
+    pub fn std(d_model: usize, heads: usize) -> Self
+    where
+        A: Clone + Default,
+        S: DataOwned,
+    {
+        let config = Config::new().d_model(d_model).heads(heads).build();
+        let linears = (0..4)
+            .map(|_| Linear::from_features(d_model, d_model))
+            .collect();
+        Self {
+            config,
+            head: AttentionHead::std(d_model, config.dk()),
+            linears,
+        }
+    }
+}
+
+impl<A, S, D> Default for MultiHeadAttention<A, D, S>
+where
+    A: Default,
+    D: Dimension,
+    S: DataOwned<Elem = A>,
+{
+    fn default() -> Self {
+        Self {
+            config: Config::default(),
+            head: AttentionHead::default(),
+            linears: Vec::new(),
+        }
+    }
+}
diff --git a/models/transformers/src/attention/score.rs b/models/transformers/src/attention/score.rs
new file mode 100644
index 00000000..3e1df96e
--- /dev/null
+++ b/models/transformers/src/attention/score.rs
@@ -0,0 +1,94 @@
+/*
+    Appellation: score <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use core::fmt;
+use nd::{Array, Dimension};
+
+/// [Score] is a created as a result of invoking an attention mechanism;
+///
+/// - attention: the actual result; returns the dot product of the score with the value tensor
+/// - score: the attention score tensor
+#[derive(Clone, Eq, Hash, PartialEq)]
+pub struct Score<A, D>
+where
+    D: Dimension,
+{
+    pub(crate) attention: Array<A, D>,
+    pub(crate) score: Array<A, D>,
+}
+
+impl<A, D> Score<A, D>
+where
+    D: Dimension,
+{
+    pub(crate) fn new(attention: Array<A, D>, score: Array<A, D>) -> Self {
+        Self { attention, score }
+    }
+    /// Consumes the instance and returns the attention tensor.
+    pub fn into_attention(self) -> Array<A, D> {
+        self.attention
+    }
+    /// Consumes the container and returns the score tensor.
+    pub fn into_score(self) -> Array<A, D> {
+        self.score
+    }
+
+    /// Retrieve the attention tensor.
+    pub fn attention(&self) -> &Array<A, D> {
+        &self.attention
+    }
+    /// Retrieve the score tensor
+    pub fn score(&self) -> &Array<A, D> {
+        &self.score
+    }
+}
+
+impl<A, D> Copy for Score<A, D>
+where
+    A: Copy,
+    D: Copy + Dimension,
+    Array<A, D>: Copy,
+{
+}
+
+impl<A, D> fmt::Debug for Score<A, D>
+where
+    A: fmt::Debug,
+    D: Dimension,
+{
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Score")
+            .field("attention", &self.attention)
+            .field("score", &self.score)
+            .finish()
+    }
+}
+
+impl<A, D> fmt::Display for Score<A, D>
+where
+    A: fmt::Display,
+    D: Dimension,
+{
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "({}, {})", self.attention, self.score)
+    }
+}
+
+impl<A, D> From<(Array<A, D>, Array<A, D>)> for Score<A, D>
+where
+    D: Dimension,
+{
+    fn from((attention, score): (Array<A, D>, Array<A, D>)) -> Self {
+        Self::new(attention, score)
+    }
+}
+
+impl<A, D> From<Score<A, D>> for (Array<A, D>, Array<A, D>)
+where
+    D: Dimension,
+{
+    fn from(score: Score<A, D>) -> Self {
+        (score.attention, score.score)
+    }
+}
diff --git a/models/transformers/src/codec/encoder/layer.rs b/models/transformers/src/codec/encoder/layer.rs
index 10821bd3..5c00ebcf 100644
--- a/models/transformers/src/codec/encoder/layer.rs
+++ b/models/transformers/src/codec/encoder/layer.rs
@@ -2,12 +2,25 @@
     Appellation: layer <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
+use crate::attention::multi::MultiHeadAttention;
 
 #[derive(Default)]
-pub struct EncoderLayer {}
+pub struct EncoderLayer {
+    pub(crate) attention: MultiHeadAttention,
+}
 
 impl EncoderLayer {
     pub fn new() -> Self {
-        Self {}
+        let attention = MultiHeadAttention::default();
+
+        Self { attention }
+    }
+    /// Returns an immutable reference to the multi-head, self-attention layer.
+    pub fn attention(&self) -> &MultiHeadAttention {
+        &self.attention
+    }
+    /// Returns a mutable reference to the multi-head, self-attention layer.
+    pub fn attention_mut(&mut self) -> &mut MultiHeadAttention {
+        &mut self.attention
     }
 }
diff --git a/models/transformers/src/codec/mod.rs b/models/transformers/src/codec/mod.rs
index 3a7e3f77..52e34740 100644
--- a/models/transformers/src/codec/mod.rs
+++ b/models/transformers/src/codec/mod.rs
@@ -2,6 +2,13 @@
     Appellation: codec <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
+//! # Codec
+//!
+//! The `codec` module implements the [Decoder] and [Encoder] layers of the [Transformer](crate::Transformer) model.
+//! Each layer has two sublayers, namely:
+//! - multi-head, self-attention layer
+//! - fully-connected, piecewise feed-forward network.
+//!
 pub use self::{decoder::Decoder, encoder::Encoder, model::*};
 
 pub(crate) mod model;
diff --git a/models/transformers/src/codec/model.rs b/models/transformers/src/codec/model.rs
index 494c0a0e..470938a5 100644
--- a/models/transformers/src/codec/model.rs
+++ b/models/transformers/src/codec/model.rs
@@ -24,11 +24,13 @@ impl Codec {
     );
 }
 
-builder!(CodecBuilder::<Codec> {
-    ctx: Context,
-    decoder: Decoder,
-    encoder: Encoder,
-});
+builder! {
+    CodecBuilder(Codec) {
+        ctx: Context,
+        decoder: Decoder,
+        encoder: Encoder,
+    }
+}
 
 #[derive(Default)]
 pub struct Generator {
diff --git a/models/transformers/src/impls/impl_head.rs b/models/transformers/src/impls/impl_head.rs
index fa22f80a..4160975d 100644
--- a/models/transformers/src/impls/impl_head.rs
+++ b/models/transformers/src/impls/impl_head.rs
@@ -2,10 +2,48 @@
     Appellation: impl_head <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
-use crate::attention::AttentionHead;
+use crate::attention::{Attention, AttentionHead, Score};
 use crate::params::QkvBase;
+use core::borrow::{Borrow, BorrowMut};
+use nd::linalg::Dot;
 use nd::prelude::*;
-use nd::{DataOwned, RawDataClone};
+use nd::{Data, DataOwned, RawData, RawDataClone, ScalarOperand};
+use num::complex::ComplexFloat;
+
+impl<A, S, D> Attention for AttentionHead<A, D, S>
+where
+    A: ComplexFloat + ScalarOperand,
+    D: Dimension,
+    S: Data<Elem = A>,
+    ArrayBase<S, D>: for<'a> Dot<ArrayView<'a, A, D>, Output = Array<A, D>>,
+    Array<A, D>: Dot<ArrayBase<S, D>, Output = Array<A, D>>,
+{
+    type Output = Score<A, D>;
+
+    fn attention(&self) -> Self::Output {
+        self.attention()
+    }
+}
+
+impl<A, S, D> Borrow<QkvBase<S, D>> for AttentionHead<A, D, S>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+{
+    fn borrow(&self) -> &QkvBase<S, D> {
+        self.params()
+    }
+}
+
+impl<A, S, D> BorrowMut<QkvBase<S, D>> for AttentionHead<A, D, S>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+{
+    fn borrow_mut(&mut self) -> &mut QkvBase<S, D> {
+        self.params_mut()
+    }
+}
 
 impl<A, S, D> Clone for AttentionHead<A, D, S>
 where
@@ -15,6 +53,8 @@ where
 {
     fn clone(&self) -> Self {
         Self {
+            #[cfg(feature = "rand")]
+            dropout: self.dropout.clone(),
             mask: self.mask.clone(),
             params: self.params.clone(),
         }
@@ -26,6 +66,7 @@ where
     A: Copy,
     D: Copy + Dimension,
     S: Copy + RawDataClone<Elem = A>,
+    Array<bool, D>: Copy,
 {
 }
 
@@ -39,3 +80,13 @@ where
         Self::from_params(QkvBase::default())
     }
 }
+
+impl<A, S, D> From<QkvBase<S, D>> for AttentionHead<A, D, S>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+{
+    fn from(params: QkvBase<S, D>) -> Self {
+        Self::from_params(params)
+    }
+}
diff --git a/models/transformers/src/impls/impl_init.rs b/models/transformers/src/impls/impl_init.rs
new file mode 100644
index 00000000..1ed7effd
--- /dev/null
+++ b/models/transformers/src/impls/impl_init.rs
@@ -0,0 +1,64 @@
+/*
+    Appellation: init <impls>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+#![cfg(feature = "rand")]
+use crate::QkvBase;
+use concision::Initialize;
+use concision::init::rand::Rng;
+use concision::init::rand_distr::{Distribution, StandardNormal};
+use concision::init::rand_distr::uniform::SampleUniform;
+use nd::{ArrayBase, DataOwned, Dimension, ShapeBuilder};
+
+impl<A, S, D> Initialize for QkvBase<S, D> where
+    D: RemoveAxis,
+    S: DataOwned<Elem = A>,
+    StandardNormal: Distribution<A>,
+{
+    type Data = S;
+
+    fn rand<Sh, Dstr>(shape: Sh, distr: Dstr) -> Self
+    where
+        Sh: ShapeBuilder<Dim = D>,
+        Dstr: Clone + Distribution<A>,
+    {
+        let dim = shape.into_shape().raw_dim().clone();
+        Self {
+            q: ArrayBase::rand(dim.clone(), distr.clone()),
+            k: ArrayBase::rand(dim.clone(), distr.clone()),
+            v: ArrayBase::rand(dim, distr)
+        }
+    }
+
+    fn rand_with<Sh, Ds, R>(shape: Sh, distr: Ds, rng: &mut R) -> Self
+    where
+        R: Rng + ?Sized,
+        Ds: Clone + Distribution<A>,
+        Sh: ShapeBuilder<Dim = D>,
+    {
+        let dim = shape.into_shape().raw_dim().clone();
+        Self {
+            q: ArrayBase::rand_with(dim.clone(), distr.clone(), &mut rng),
+            k: ArrayBase::rand_with(dim.clone(), distr.clone(), &mut rng),
+            v: ArrayBase::rand_with(dim, distr, &mut rng)
+        }
+    }
+
+    fn init_rand<Ds>(self, distr: Ds) -> Self
+    where
+        Ds: Clone + Distribution<A>,
+        Self: Sized,
+    {
+        Self::rand(self.dim(), distr)
+    }
+
+    fn init_rand_with<Ds, R>(self, distr: Ds, rng: &mut R) -> Self
+    where
+        R: Rng + ?Sized,
+        Ds: Clone + Distribution<A>,
+    {
+        Self::rand_with(self.dim(), distr, rng)
+    }
+}
+
+
diff --git a/models/transformers/src/impls/impl_linalg.rs b/models/transformers/src/impls/impl_linalg.rs
index ce069afe..c2ab8812 100644
--- a/models/transformers/src/impls/impl_linalg.rs
+++ b/models/transformers/src/impls/impl_linalg.rs
@@ -2,7 +2,7 @@
     Appellation: impl_linalg <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
-use crate::params::{Params, QkvBase};
+use crate::params::{Qkv, QkvBase};
 use concision::Matmul;
 use nd::linalg::Dot;
 use nd::*;
@@ -17,7 +17,7 @@ where
     T: Data<Elem = A>,
     ArrayBase<S, D>: Dot<ArrayBase<T, E>, Output = Array<A, F>>,
 {
-    type Output = Params<A, F>;
+    type Output = Qkv<A, F>;
 
     fn matmul(&self, rhs: &QkvBase<T, E>) -> Self::Output {
         QkvBase {
@@ -38,7 +38,7 @@ where
     T: Data<Elem = A>,
     ArrayBase<S, D>: Dot<ArrayBase<T, E>, Output = Array<A, F>>,
 {
-    type Output = Params<A, F>;
+    type Output = Qkv<A, F>;
 
     fn matmul(&self, rhs: &ArrayBase<T, E>) -> Self::Output {
         QkvBase {
diff --git a/models/transformers/src/impls/impl_params.rs b/models/transformers/src/impls/impl_params.rs
index 9736c1b0..2ea7dec4 100644
--- a/models/transformers/src/impls/impl_params.rs
+++ b/models/transformers/src/impls/impl_params.rs
@@ -6,10 +6,12 @@ use crate::params::QkvBase;
 use nd::prelude::*;
 use nd::{Data, DataOwned, RawDataClone};
 
-impl<S, D> Clone for QkvBase<S, D>
+pub(crate) type ThreeTuple<A, B = A, C = B> = (A, B, C);
+
+impl<A, S, D> Clone for QkvBase<S, D>
 where
     D: Dimension,
-    S: RawDataClone,
+    S: RawDataClone<Elem = A>,
 {
     fn clone(&self) -> Self {
         Self {
@@ -20,18 +22,18 @@ where
     }
 }
 
-impl<S, D> Copy for QkvBase<S, D>
+impl<A, S, D> Copy for QkvBase<S, D>
 where
     D: Copy + Dimension,
-    S: Copy + RawDataClone,
+    S: Copy + RawDataClone<Elem = A>,
 {
 }
 
-impl<S, D> Default for QkvBase<S, D>
+impl<A, S, D> Default for QkvBase<S, D>
 where
+    A: Default,
     D: Dimension,
-    S: DataOwned,
-    S::Elem: Default,
+    S: DataOwned<Elem = A>,
 {
     fn default() -> Self {
         Self {
@@ -49,7 +51,7 @@ where
     S: Data<Elem = A>,
 {
     fn eq(&self, other: &Self) -> bool {
-        self.q == *other.q() && self.k == *other.k() && self.v == *other.v()
+        self.q() == other.q() && self.k() == other.k() && self.v() == other.v()
     }
 }
 
@@ -64,6 +66,21 @@ where
     ArrayBase<S, D>: PartialEq<ArrayBase<S2, D2>>,
 {
     fn eq(&self, other: &ArrayBase<S2, D2>) -> bool {
-        self.q == *other && self.k == *other && self.v == *other
+        self.q() == other && self.k() == other && self.v() == other
+    }
+}
+
+impl<A, B, S, D, S2, D2> PartialEq<ThreeTuple<ArrayBase<S2, D2>>> for QkvBase<S, D>
+where
+    A: PartialEq,
+    B: PartialEq,
+    D: Dimension,
+    S: Data<Elem = A>,
+    S2: Data<Elem = B>,
+    D2: Dimension,
+    ArrayBase<S, D>: PartialEq<ArrayBase<S2, D2>>,
+{
+    fn eq(&self, (q, k, v): &ThreeTuple<ArrayBase<S2, D2>>) -> bool {
+        self.q() == q && self.k() == k && self.v() == v
     }
 }
diff --git a/models/transformers/src/lib.rs b/models/transformers/src/lib.rs
index ed9cf63e..89cc41f1 100644
--- a/models/transformers/src/lib.rs
+++ b/models/transformers/src/lib.rs
@@ -17,7 +17,7 @@ extern crate concision_core as concision;
 extern crate concision_linear as linear;
 extern crate ndarray as nd;
 
-pub use self::attention::AttentionHead;
+pub use self::attention::{scaled_dot_product_attention, AttentionHead};
 pub use self::params::*;
 pub use self::primitives::*;
 pub use self::transformer::Transformer;
@@ -29,6 +29,7 @@ pub(crate) mod transformer;
 
 pub mod attention;
 pub mod codec;
+pub mod model;
 pub mod ops;
 pub mod params;
 
@@ -40,6 +41,5 @@ pub(crate) mod impls {
 
 pub mod prelude {
     pub use super::attention::prelude::*;
-    pub use super::primitives::*;
     pub use super::Transformer;
 }
diff --git a/models/transformers/src/macros.rs b/models/transformers/src/macros.rs
index fd05142e..e25dafae 100644
--- a/models/transformers/src/macros.rs
+++ b/models/transformers/src/macros.rs
@@ -3,81 +3,83 @@
     Contrib: FL03 <jo3mccain@icloud.com>
 */
 
+#[macro_use]
+mod params;
+
 macro_rules! ndbuilder {
-    ($method:ident$(::$call:ident)?() where $($rest:tt)*) => {
-        ndbuilder!(@impl $method$(::$call)?() where $($rest)*);
+    ($method:ident$(::$call:ident)?() $($where:tt)*) => {
+        ndbuilder!(@impl $method$(::$call)?() $($where)*);
     };
-    (@impl $method:ident() where $($rest:tt)*) => {
-        ndbuilder!(@impl $method::$method() where $($rest)*);
+    (@impl $method:ident() $($where:tt)*) => {
+        ndbuilder!(@impl $method::$method() $($where)*);
     };
-    (@impl $method:ident::$call:ident() where $($rest:tt)*) => {
-        pub fn $method<Sh: ndarray::ShapeBuilder<Dim = D>>(shape: Sh) -> Self where $($rest)* {
+    (@impl $method:ident::$call:ident() $($where:tt)*) => {
+        pub fn $method<Sh: ndarray::ShapeBuilder<Dim = D>>(shape: Sh) -> Self $($where)* {
             Self::builder(shape, ndarray::ArrayBase::$call)
         }
     };
 }
 
-// # TODO:
-macro_rules! ndview {
-    ($method:ident::$($rest:tt)*) => {
-        ndview!(@impl $method.$method::$($rest)*);
-    };
-    ($method:ident.$call:ident::$($rest:tt)*) => {
-        ndview!(@impl $method.$call::$($rest)*);
-    };
-    (@impl $method:ident.$call:ident::<$view:ident>(self) where $($rest:tt)*) => {
-        pub fn $method(self) -> $crate::params::QkvBase<$view<A>, D>
-        where
-            $($rest)*
-        {
-            ndview!(@apply $call(self))
-        }
-    };
-    (@impl $method:ident.$call:ident::<$view:ident>(mut self) where $($rest:tt)*) => {
-        pub fn $method(mut self) -> $crate::params::QkvBase<$view<A>, D>
-        where
-            $($rest)*
-        {
-            ndview!(@apply $call(self))
+#[allow(unused_macros)]
+macro_rules! cbuilder {
+    (@impl derive: [$($D:ident),* $(,)?], $name:ident {$($vis:vis $field:ident: $type:ty),*}) => {
+        #[derive(Clone, Debug, PartialEq, $($D),*)]
+        #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+        pub struct $name {
+            $($vis $field: $type),*
         }
-    };
-    (@impl $method:ident.$call:ident::<$view:ident>(&self) where $($rest:tt)*) => {
-        pub fn $method(&self) -> $crate::params::QkvBase<$view<A>, D>
-        where
-            $($rest)*
-        {
-            ndview!(@apply $call(self))
-        }
-    };
-    (@impl $method:ident.$call:ident::<$view:ident>(&mut self) where $($rest:tt)*) => {
-        pub fn $method(&mut self) -> $crate::params::QkvBase<$view<A>, D>
-        where
-            $($rest)*
-        {
-            ndview!(@apply $call(self))
+        impl $name {
+            paste::paste! {
+                pub fn new() -> [<$name Builder>] {
+                    [<$name Builder>]::new()
+                }
+            }
+
+            $(
+                pub fn $field(mut self, $field: $type) -> Self {
+                    self.$field = $field;
+                    self
+                }
+            )*
         }
     };
-    (@impl $method:ident.$call:ident::<'a, $view:ident>(&self) where $($rest:tt)*) => {
-        pub fn $method(&self) -> $crate::params::QkvBase<$view<&'_ A>, D>
-        where
-            $($rest)*
-        {
-            ndview!(@apply $call(self))
+    (@builder derive: [$($D:ident),* $(,)?], $name:ident {$($field:ident: $type:ty),*}) => {
+        pub struct $name {
+            $(pub(crate) $field: $type),*
         }
-    };
-    (@impl $method:ident.$call:ident::<'a, $view:ident>(&mut self) where $($rest:tt)*) => {
-        pub fn $method(&mut self) -> $crate::params::QkvBase<$view<&'_ mut A>, D>
-        where
-            $($rest)*
-        {
-            ndview!(@apply $call(self))
+
+        impl $name {
+            pub fn new() -> Self {
+                Self {
+                    $($field: None),*
+                }
+            }
+
+            $(
+                pub fn $field(mut self, $field: $type) -> Self {
+                    self.$field = Some($field);
+                    self
+                }
+            )*
+
+            pub fn build(&self) -> Config {
+                Config {
+                    $($field: self.$field.unwrap_or_else(|| crate::$field),)*
+                }
+            }
         }
-    };
-    (@apply $call:ident($self:expr)) => {
-        $crate::params::QkvBase {
-            q: $self.q.$call(),
-            k: $self.k.$call(),
-            v: $self.v.$call(),
+
+        impl Default for $name {
+            fn default() -> Self {
+                Self::new()
+            }
         }
     };
 }
+
+/// This macro helps create a stack of identical sublayers.
+///
+#[allow(unused_macros)]
+macro_rules! sublayer {
+    (@impl heads: $heads:expr) => {};
+}
diff --git a/models/transformers/src/macros/params.rs b/models/transformers/src/macros/params.rs
new file mode 100644
index 00000000..f7e12e32
--- /dev/null
+++ b/models/transformers/src/macros/params.rs
@@ -0,0 +1,50 @@
+/*
+    Appellation: params <macros>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+
+macro_rules! qkv_view {
+    ($method:ident$(.$call:ident)?::$($rest:tt)*) => {
+        qkv_view!(@impl $method$(.$call)?::$($rest)*);
+    };
+    (@impl $method:ident::$($rest:tt)*) => {
+        qkv_view!(@impl $method.$method::$($rest)*);
+    };
+    (@impl $method:ident.$call:ident::<$view:ident>(self) $($rest:tt)*) => {
+        pub fn $method(self) -> $crate::params::QkvBase<$view<A>, D> $($rest)* {
+            qkv_view!(@apply $call(self))
+        }
+    };
+    (@impl $method:ident.$call:ident::<$view:ident>(mut self) $($rest:tt)*) => {
+        pub fn $method(mut self) -> $crate::params::QkvBase<$view<A>, D> $($rest)* {
+            qkv_view!(@apply $call(self))
+        }
+    };
+    (@impl $method:ident.$call:ident::<$view:ident>(&self) $($rest:tt)*) => {
+        pub fn $method(&self) -> $crate::params::QkvBase<$view<A>, D> $($rest)* {
+            qkv_view!(@apply $call(self))
+        }
+    };
+    (@impl $method:ident.$call:ident::<$view:ident>(&mut self) $($rest:tt)*) => {
+        pub fn $method(&mut self) -> $crate::params::QkvBase<$view<A>, D> $($rest)* {
+            qkv_view!(@apply $call(self))
+        }
+    };
+    (@impl $method:ident.$call:ident::<'a, $view:ident>(&self) $($rest:tt)*) => {
+        pub fn $method(&self) -> $crate::params::QkvBase<$view<&'_ A>, D> $($rest)* {
+            qkv_view!(@apply $call(self))
+        }
+    };
+    (@impl $method:ident.$call:ident::<'a, $view:ident>(&mut self) $($rest:tt)*) => {
+        pub fn $method(&mut self) -> $crate::params::QkvBase<$view<&'_ mut A>, D> $($rest)* {
+            qkv_view!(@apply $call(self))
+        }
+    };
+    (@apply $call:ident($self:expr)) => {
+        $crate::params::QkvBase {
+            q: $self.q.$call(),
+            k: $self.k.$call(),
+            v: $self.v.$call(),
+        }
+    };
+}
diff --git a/models/transformers/src/model/mod.rs b/models/transformers/src/model/mod.rs
new file mode 100644
index 00000000..ac227da3
--- /dev/null
+++ b/models/transformers/src/model/mod.rs
@@ -0,0 +1,6 @@
+/*
+    Appellation: model <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+
+pub mod sublayer;
diff --git a/models/transformers/src/model/sublayer.rs b/models/transformers/src/model/sublayer.rs
new file mode 100644
index 00000000..a1a5fbe7
--- /dev/null
+++ b/models/transformers/src/model/sublayer.rs
@@ -0,0 +1,74 @@
+/*
+    Appellation: sublayer <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+#![cfg(feature = "rand")]
+use concision::nn::DropoutLayer;
+use concision::Forward;
+use linear::{Biased, LayerNorm, ParamMode, Unbiased};
+use nd::prelude::*;
+use nd::{DataOwned, RemoveAxis, ScalarOperand};
+use num::traits::{Float, FromPrimitive};
+
+/// A residual connection followed by a [layer norm](LayerNorm)
+/// [Transformer](crate::Transformer)
+pub struct Sublayer<A = f64, K = Biased, D = Ix2>
+where
+    D: Dimension,
+{
+    pub(crate) dropout: DropoutLayer,
+    pub(crate) norm: LayerNorm<A, K, D>,
+}
+
+impl<A, K, D> Sublayer<A, K, D>
+where
+    D: RemoveAxis,
+{
+    pub fn new<Sh>(shape: Sh, dropout: f64) -> Self
+    where
+        A: Default,
+        K: ParamMode,
+        Sh: ShapeBuilder<Dim = D>,
+    {
+        Self {
+            dropout: DropoutLayer::new(dropout),
+            norm: LayerNorm::new(shape),
+        }
+    }
+
+    pub fn dropout(&self) -> &DropoutLayer {
+        &self.dropout
+    }
+
+    pub fn norm(&self) -> &LayerNorm<A, K, D> {
+        &self.norm
+    }
+}
+
+impl<A, S, D> Forward<ArrayBase<S, D>> for Sublayer<A, Biased, D>
+where
+    A: Float + FromPrimitive + ScalarOperand,
+    D: RemoveAxis,
+    S: DataOwned<Elem = A>,
+{
+    type Output = Array<A, D>;
+
+    fn forward(&self, input: &ArrayBase<S, D>) -> Self::Output {
+        let normal = self.norm().forward(input);
+        input + self.dropout().forward(&normal)
+    }
+}
+
+impl<A, S, D> Forward<ArrayBase<S, D>> for Sublayer<A, Unbiased, D>
+where
+    A: Float + FromPrimitive + ScalarOperand,
+    D: RemoveAxis,
+    S: DataOwned<Elem = A>,
+{
+    type Output = Array<A, D>;
+
+    fn forward(&self, input: &ArrayBase<S, D>) -> Self::Output {
+        let normal = self.norm().forward(input);
+        input + self.dropout().forward(&normal)
+    }
+}
diff --git a/models/transformers/src/ops/merge.rs b/models/transformers/src/ops/merge.rs
index 747cae82..c7e66e3e 100644
--- a/models/transformers/src/ops/merge.rs
+++ b/models/transformers/src/ops/merge.rs
@@ -2,9 +2,15 @@
    Appellation: merge <mod>
    Contrib: FL03 <jo3mccain@icloud.com>
 */
+use super::_merge_dim;
 use concision::NdResult;
-use nd::prelude::*;
-use nd::{Data, RemoveAxis};
+use nd::{Array, ArrayBase, Data, Dimension, RemoveAxis};
+
+pub trait DimMerge {
+    type Output;
+
+    fn merge(&self, tgt: usize) -> Self::Output;
+}
 
 // #67: Optimize the Merge trait
 pub trait Merge {
@@ -20,6 +26,19 @@ pub trait Merge {
 /*
  ************* Implementations *************
 */
+impl<D> DimMerge for D
+where
+    D: RemoveAxis,
+    D::Smaller: Dimension,
+    D::Larger: Dimension,
+{
+    type Output = D::Smaller;
+
+    fn merge(&self, tgt: usize) -> Self::Output {
+        _merge_dim(self, tgt)
+    }
+}
+
 impl<A, S, D, E> Merge for ArrayBase<S, D>
 where
     A: Clone,
@@ -36,7 +55,6 @@ where
     }
 
     fn merge_along(&self, swap: usize) -> NdResult<Self::Output> {
-        use ndarray::Order;
-        super::merger(self, swap, swap + 1, Order::RowMajor)
+        super::_merge(self, swap, swap + 1, super::ORDER)
     }
 }
diff --git a/models/transformers/src/ops/mod.rs b/models/transformers/src/ops/mod.rs
index 778e4abc..6612af22 100644
--- a/models/transformers/src/ops/mod.rs
+++ b/models/transformers/src/ops/mod.rs
@@ -2,21 +2,29 @@
    Appellation: ops <mod>
    Contrib: FL03 <jo3mccain@icloud.com>
 */
-pub use self::{merge::*, split::*, utils::*};
+pub use self::prelude::*;
 
-pub(crate) mod merge;
-pub(crate) mod split;
+mod merge;
+mod split;
+
+pub(crate) mod prelude {
+    pub use super::merge::*;
+    pub use super::split::*;
+    pub(crate) use super::utils::*;
+}
+
+pub(crate) const ORDER: nd::Order = nd::Order::RowMajor;
 
 pub(crate) mod utils {
     use concision::NdResult;
     use nd::prelude::*;
     use nd::{Data, Order, RemoveAxis};
 
-    #[doc(hidden)]
-    pub fn merge<A, S, D>(
+    pub(crate) fn _merge<A, S, D>(
         arr: &ArrayBase<S, D>,
         src: usize,
         tgt: usize,
+        order: Order,
     ) -> NdResult<Array<A, D::Smaller>>
     where
         A: Clone,
@@ -25,83 +33,68 @@ pub(crate) mod utils {
         D::Smaller: Dimension,
         ArrayBase<S, D>: Clone,
     {
-        merger(arr, src, tgt, Order::RowMajor)
+        let shape = _merge_dim(&arr.raw_dim(), src);
+        let mut head = arr.clone();
+        head.swap_axes(src, tgt);
+        head.to_shape((shape, order)).map(|x| x.to_owned())
     }
 
-    pub(crate) fn merger<A, S, D>(
+    pub(crate) fn _split<A, S, D, E>(
         arr: &ArrayBase<S, D>,
-        src: usize,
-        tgt: usize,
+        h: usize,
         order: Order,
-    ) -> NdResult<Array<A, D::Smaller>>
+    ) -> NdResult<Array<A, E>>
     where
         A: Clone,
-        D: RemoveAxis,
+        D: Dimension<Larger = E>,
+        E: RemoveAxis<Smaller = D>,
         S: Data<Elem = A>,
-        D::Smaller: Dimension,
         ArrayBase<S, D>: Clone,
     {
-        let shape = merge_dims(arr.raw_dim(), src);
-        let mut head = arr.clone();
+        let src = if arr.ndim() >= 2 { arr.ndim() - 2 } else { 0 };
+        let tgt = src + 1;
+        let shape: E = _split_dim(&arr.raw_dim(), h);
+        let mut head = arr.to_shape((shape, order))?.to_owned();
         head.swap_axes(src, tgt);
-        head.to_shape((shape, order)).map(|x| x.to_owned())
+        Ok(head)
     }
-
-    #[doc(hidden)]
-    pub fn merge_dims<D>(dim: D, src: usize) -> D::Smaller
+    /// Creates the new dimension after merging two axes.
+    pub(crate) fn _merge_dim<D>(dim: &D, axis: usize) -> D::Smaller
     where
         D: RemoveAxis,
         D::Smaller: Dimension,
     {
         // create a new dimension with one less axis; initialized with zeros
-        let mut new_dim = <D as Dimension>::Smaller::zeros(dim.ndim() - 1);
+        let mut dn = <D as Dimension>::Smaller::zeros(dim.ndim() - 1);
         // create a mutable vector from the slice
         let mut shape = dim.slice().to_vec();
         // multiply the last axis by the target
-        shape[new_dim.ndim()] *= shape[src];
+        shape[dn.ndim()] *= shape[axis];
         // remove the last dimension
-        shape.remove(src);
-
-        new_dim.slice_mut().copy_from_slice(&shape);
-        new_dim
-    }
-
-    #[doc(hidden)]
-    pub fn merge_batch<T>(heads: &Array4<T>) -> NdResult<Array3<T>>
-    where
-        T: Clone,
-    {
-        let (batch, n, seq, query) = heads.dim();
-        let mut tmp = heads.clone();
-        // swap the head and sequence axes
-        tmp.swap_axes(1, 2);
-        // reshape the qkv matrix into a 2d array
-        tmp.into_shape((batch, seq, n * query))
-    }
+        shape.remove(axis);
 
-    pub fn split_heads<T>(param: &Array2<T>, h: usize) -> NdResult<Array3<T>>
-    where
-        T: Clone,
-    {
-        let dim = param.shape().last().unwrap() / h;
-        // reshape the qkv matrix into a 3d array
-        let mut res = param.clone().into_shape((param.shape()[0], h, dim))?;
-        // swap the sequence and head axes
-        res.swap_axes(0, 1);
-        Ok(res)
+        dn.slice_mut().copy_from_slice(&shape);
+        dn
     }
 
-    pub fn split_batch<T>(param: &Array3<T>, h: usize) -> NdResult<Array4<T>>
+    pub(crate) fn _split_dim<D>(dim: &D::Smaller, h: usize) -> D
     where
-        T: Clone,
+        D: RemoveAxis,
+        D::Smaller: Dimension,
     {
-        let dim = param.shape().last().unwrap() / h;
-        // reshape the qkv matrix into a 3d array
-        let mut res = param
-            .clone()
-            .into_shape((param.shape()[0], param.shape()[1], h, dim))?;
-        // swap the sequence and head axes
-        res.swap_axes(1, 2);
-        Ok(res)
+        let rank = dim.ndim() + 1;
+        // create a new dimension with one less axis; initialized with zeros
+        let mut new_dim = D::zeros(rank);
+        // create a mutable vector from the slice
+        let mut shape = dim.slice().to_vec();
+        // get and remove the last axis
+        let bx = shape.pop().unwrap() / h;
+        // extend the shape with the new axes
+        shape.push(h);
+        shape.push(bx);
+        // shape.swap(rank - 2, rank - 3);
+        // copy the values into the new dimension
+        new_dim.slice_mut().copy_from_slice(&shape);
+        new_dim
     }
 }
diff --git a/models/transformers/src/ops/split.rs b/models/transformers/src/ops/split.rs
index 3a182710..d98d861d 100644
--- a/models/transformers/src/ops/split.rs
+++ b/models/transformers/src/ops/split.rs
@@ -2,49 +2,76 @@
    Appellation: split <mod>
    Contrib: FL03 <jo3mccain@icloud.com>
 */
-use ndarray::prelude::{Array2, Array3, Array4};
-use ndarray::ShapeError;
-
-// pub fn split<D: Dimension, T: Clone>(param: &Array<T, D>, heads: usize) -> Result<Array3<T>, ShapeError> {
-//     let mut dim = param.dim()
-//     let query = param.shape().last().unwrap() / heads;
-//     // reshape the qkv matrix into a 3d array
-//     let mut res = param.clone().into_shape((param.shape()[0], heads, query))?;
-//     // swap the sequence and head axes
-//     res.swap_axes(0, 1);
-//     Ok(res)
-// }
+use ndarray::{Array, ArrayBase, Data, Dimension, RemoveAxis, ShapeError};
+
+/// Split a dimension into two parts
+pub trait DimSplit {
+    type Output;
+
+    fn split(&self, h: usize) -> Self::Output;
+}
 
-pub trait Split {
+pub trait SplitHead {
     type Output;
 
     fn split(&self, heads: usize) -> Result<Self::Output, ShapeError>;
 }
 
-impl<T: Clone> Split for Array2<T> {
-    type Output = Array3<T>;
-
-    fn split(&self, heads: usize) -> Result<Self::Output, ShapeError> {
-        let (seq, model) = self.dim();
-        let query = model / heads;
-        // reshape the qkv matrix into a 3d array
-        let mut res = self.clone().into_shape((seq, heads, query))?;
-        // swap the sequence and head axes
-        res.swap_axes(0, 1);
-        Ok(res)
+/*
+ ************* Implementations *************
+*/
+
+impl<D, E> DimSplit for D
+where
+    D: Dimension<Larger = E>,
+    E: RemoveAxis<Smaller = D>,
+{
+    type Output = E;
+
+    fn split(&self, h: usize) -> Self::Output {
+        super::utils::_split_dim(self, h)
     }
 }
 
-impl<T: Clone> Split for Array3<T> {
-    type Output = Array4<T>;
-
-    fn split(&self, heads: usize) -> Result<Self::Output, ShapeError> {
-        let (batch, seq, model) = self.dim();
-        let query = model / heads;
-        // reshape the qkv matrix into a 3d array
-        let mut res = self.clone().into_shape((batch, seq, heads, query))?;
-        // swap the sequence and head axes
-        res.swap_axes(1, 2);
-        Ok(res)
+impl<A, S, D, E> SplitHead for ArrayBase<S, D>
+where
+    A: Clone,
+    D: Dimension<Larger = E>,
+    E: RemoveAxis<Smaller = D>,
+    S: Data<Elem = A>,
+    ArrayBase<S, D>: Clone,
+{
+    type Output = Array<A, E>;
+
+    fn split(&self, h: usize) -> Result<Self::Output, ShapeError> {
+        super::_split(self, h, super::ORDER)
     }
 }
+
+// impl<T: Clone> Split for Array2<T> {
+//     type Output = Array3<T>;
+
+//     fn split(&self, heads: usize) -> Result<Self::Output, ShapeError> {
+//         let (seq, model) = self.dim();
+//         let query = model / heads;
+//         // reshape the qkv matrix into a 3d array
+//         let mut res = self.clone().into_shape((seq, heads, query))?;
+//         // swap the sequence and head axes
+//         res.swap_axes(0, 1);
+//         Ok(res)
+//     }
+// }
+
+// impl<T: Clone> Split for Array3<T> {
+//     type Output = Array4<T>;
+
+//     fn split(&self, heads: usize) -> Result<Self::Output, ShapeError> {
+//         let (batch, seq, model) = self.dim();
+//         let query = model / heads;
+//         // reshape the qkv matrix into a 3d array
+//         let mut res = self.clone().into_shape((batch, seq, heads, query))?;
+//         // swap the sequence and head axes
+//         res.swap_axes(1, 2);
+//         Ok(res)
+//     }
+// }
diff --git a/models/transformers/src/params/mod.rs b/models/transformers/src/params/mod.rs
index 367f8b2a..ba79e10f 100644
--- a/models/transformers/src/params/mod.rs
+++ b/models/transformers/src/params/mod.rs
@@ -4,11 +4,12 @@
 */
 pub use self::{item::*, store::QkvBase};
 
-pub(crate) mod item;
-pub(crate) mod store;
+mod store;
+
+pub mod item;
 
 macro_rules! params_ty {
-    ($target:ident: [$($name:ident<$(&$lt:lifetime)?$repr:ident>),* $(,)?]) => {
+    ($target:ident {$($name:ident: $(&$lt:lifetime)? $repr:ident),* $(,)?}) => {
         $(params_ty!(@impl $target: $name<$(&$lt)? $repr>);)*
     };
     (@impl $target:ident: $name:ident<$repr:ident>) => {
@@ -20,16 +21,17 @@ macro_rules! params_ty {
 }
 
 params_ty!(
-    QkvBase: [
-        Params<OwnedRepr>,
-        ArcParams<OwnedArcRepr>,
-        ParamsView<&'a ViewRepr>,
-    ]
+    QkvBase {
+        Qkv: OwnedRepr,
+        ArcQkv: OwnedArcRepr,
+        ViewQkv: &'a ViewRepr,
+
+    }
 );
 
 #[allow(unused_imports)]
 pub(crate) mod prelude {
     pub use super::item::{Entry, QKV};
     pub use super::store::QkvBase;
-    pub use super::{ArcParams, Params};
+    pub use super::{ArcQkv, Qkv, ViewQkv};
 }
diff --git a/models/transformers/src/params/store.rs b/models/transformers/src/params/store.rs
index 90c13693..f59ee6eb 100644
--- a/models/transformers/src/params/store.rs
+++ b/models/transformers/src/params/store.rs
@@ -2,10 +2,16 @@
     Appellation: params <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
+use crate::attention::{Score, _attention};
+use concision::nn::DropoutLayer;
 use concision::{dimensional, getters};
+use nd::linalg::Dot;
 use nd::*;
+use num::complex::ComplexFloat;
 use num::traits::{One, Zero};
 
+/// [QkvBase] is a container for the query, key, and value arrays used in the
+/// attention mechanism of the transformer model.
 pub struct QkvBase<S = OwnedRepr<f64>, D = Ix2>
 where
     D: Dimension,
@@ -72,12 +78,53 @@ where
 
     dimensional!(q());
 
-    ndview!(into_owned::<OwnedRepr>(self) where A: Clone, S: Data);
-    ndview!(to_owned::<OwnedRepr>(&self) where A: Clone, S: Data);
+    qkv_view!(into_owned::<OwnedRepr>(self) where A: Clone, S: Data);
+    qkv_view!(to_owned::<OwnedRepr>(&self) where A: Clone, S: Data);
 
-    ndview!(into_shared::<OwnedArcRepr>(self) where A: Clone, S: DataOwned);
-    ndview!(to_shared::<OwnedArcRepr>(&self) where A: Clone, S: DataShared);
+    qkv_view!(into_shared::<OwnedArcRepr>(self) where A: Clone, S: DataOwned);
+    qkv_view!(to_shared::<OwnedArcRepr>(&self) where A: Clone, S: DataShared);
 
-    ndview!(view::<'a, ViewRepr>(&self) where S: Data);
-    ndview!(view_mut::<'a, ViewRepr>(&mut self) where S: DataMut);
+    qkv_view!(view::<'a, ViewRepr>(&self) where S: Data);
+    qkv_view!(view_mut::<'a, ViewRepr>(&mut self) where S: DataMut);
+}
+
+#[cfg(not(feature = "rand"))]
+impl<A, S, D> QkvBase<S, D>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+    A: Clone,
+{
+    /// Computes the [Score] using scaled dot-product attention.
+    pub fn attention(&self, dropout: Option<f64>, mask: Option<&Array<bool, D>>) -> Score<A, D>
+    where
+        A: ComplexFloat + ScalarOperand,
+        S: Data,
+        ArrayBase<S, D>: for<'a> Dot<ArrayView<'a, A, D>, Output = Array<A, D>>,
+        Array<A, D>: Dot<ArrayBase<S, D>, Output = Array<A, D>>,
+    {
+        let (q, k, v) = self.qkv();
+        _attention(q, k, v, mask, None)
+    }
+}
+
+#[cfg(feature = "rand")]
+impl<A, S, D> QkvBase<S, D>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+    A: Clone,
+{
+    /// Computes the [Score] using scaled dot-product attention.
+    pub fn attention(&self, dropout: Option<f64>, mask: Option<&Array<bool, D>>) -> Score<A, D>
+    where
+        A: ComplexFloat + ScalarOperand,
+        S: Data,
+        ArrayBase<S, D>: for<'a> Dot<ArrayView<'a, A, D>, Output = Array<A, D>>,
+        Array<A, D>: Dot<ArrayBase<S, D>, Output = Array<A, D>>,
+    {
+        let dropout = dropout.map(DropoutLayer::new);
+        let (q, k, v) = self.qkv();
+        _attention(q, k, v, mask, dropout.as_ref())
+    }
 }
diff --git a/models/transformers/src/primitives.rs b/models/transformers/src/primitives.rs
index 96db829b..3b30e7aa 100644
--- a/models/transformers/src/primitives.rs
+++ b/models/transformers/src/primitives.rs
@@ -5,6 +5,18 @@
 pub use self::consts::*;
 
 pub mod consts {
+    /// The default dimension of the model; i.e. the number of inputs
+    pub const D_MODEL: usize = 512;
+    /// The default size of the network; i.e. the number of neurons in the network
+    pub const D_NETWORK: usize = 2048;
+    /// The default dimension of the key and query vectors
+    pub const DK: usize = D_MODEL / HEADS;
+    /// The default number of attention heads
+    pub const HEADS: usize = 8;
     /// The default number of layers used for the encoder / decoder.
     pub const N: usize = 6;
 }
+
+pub fn outputs_from_ratio(model: usize, network: usize) -> usize {
+    network / model
+}
diff --git a/models/transformers/tests/attention.rs b/models/transformers/tests/attention.rs
index db1efe2a..6bc023af 100644
--- a/models/transformers/tests/attention.rs
+++ b/models/transformers/tests/attention.rs
@@ -5,39 +5,18 @@
 extern crate concision_core as concision;
 extern crate concision_transformers as transformers;
 
-use concision::{linarr, Matmul};
-use transformers::{AttentionHead, Params};
+use approx::AbsDiffEq;
+use transformers::AttentionHead;
 
 use ndarray::prelude::*;
 
 #[test]
-fn test_qkv() {
-    let shape = (2048, 10);
-    let params = Params::<f64>::new(shape);
-    assert_eq!(params.q(), &Array::default(shape));
-}
-
-#[test]
-fn test_qkv_matmul() {
-    let shape = (2048, 10);
-    // generate some sample data
-    let data = linarr(shape).unwrap();
-    // initialize the parameters
-    let params = Params::<f64>::ones(shape);
-    // calculate the expected result
-    let exp = Array2::<f64>::ones(shape).dot(&data.t());
-    // calculate the result
-    let res = params.matmul(&data.t());
-    // compare the results
-    assert_eq!(res.q(), &exp);
-    assert_eq!(res.k(), &exp);
-    assert_eq!(res.v(), &exp);
-}
-
-#[test]
-fn test_attention_head() {
-    let shape = (30, 3);
+fn attention_head() {
+    let shape = (3, 3);
 
     let head = AttentionHead::<f64>::ones(shape);
     assert_eq!(head.q(), &Array::ones(shape));
+    let exp = Array2::from_elem(shape, 1f64 / 3f64);
+    let score = head.attention();
+    assert!(score.attention().abs_diff_eq(&exp, 1e-6));
 }
diff --git a/models/transformers/tests/ops.rs b/models/transformers/tests/ops.rs
index c39b8efa..687b50db 100644
--- a/models/transformers/tests/ops.rs
+++ b/models/transformers/tests/ops.rs
@@ -4,22 +4,24 @@
 */
 extern crate concision_core as concision;
 extern crate concision_transformers as transformers;
+extern crate ndarray as nd;
 
 use concision::linarr;
-use ndarray::prelude::*;
+use nd::prelude::*;
 use transformers::ops::*;
 
+pub const HEADS: usize = 2;
+pub const ORDER: nd::Order = nd::Order::RowMajor;
+
 #[test]
 fn test_merge() {
     let shape = (3, 4, 5);
     let dout = (4, 15);
     let arr = linarr::<f64, Ix3>(shape.clone()).unwrap();
     let a = arr.clone().merge().unwrap();
-    let b = merge(&arr, 0, 1).unwrap();
 
     assert_eq!(a.dim(), dout);
-    assert_eq!(a.dim(), b.dim());
-    assert_eq!(a, b);
+    assert_eq!(a, utils::merge3(&arr).unwrap());
 }
 
 #[test]
@@ -28,25 +30,100 @@ fn test_merge_batch() {
     let dout = (2, 4, 15);
     let arr = linarr::<f64, Ix4>(shape).unwrap();
     let a = arr.merge().unwrap();
-    let b = merge(&arr, 1, 2).unwrap();
 
     assert_eq!(a.dim(), dout);
-    assert_eq!(a, b);
+    assert_eq!(a, utils::merge4(&arr).unwrap());
+}
+
+#[test]
+fn test_split() {
+    let heads = 2;
+    let shape = (4, 6);
+    let arr = linarr::<f64, Ix2>(shape).unwrap();
+    let a = arr.split(heads).unwrap();
+
+    assert_eq!(a.dim(), (heads, 4, 3));
+    assert_eq!(a, utils::split_heads(&arr, heads).unwrap());
+}
+
+#[test]
+fn test_split_batch() {
+    let heads = 2;
+    let shape = (3, 4, 6);
+    let arr = linarr::<f64, Ix3>(shape).unwrap();
+    let a = arr.split(heads).unwrap();
+
+    assert_eq!(a.dim(), (3, heads, 4, 3));
+    assert_eq!(a, utils::split_batch(&arr, heads).unwrap());
 }
 
 #[test]
 fn reshape_ops() {
-    let dim_input: [usize; 3] = [2, 4, 6]; // (batch, seq, model)
-    let dim_split = [2, 2, 4, 3]; // (batch, heads, seq, model)
-    let data = linarr::<f64, Ix3>(dim_input).unwrap();
-
-    let a = split_batch(&data, 2).unwrap();
-    let b = a.merge().unwrap(); // merge_batch(&a).unwrap();
-
-    assert_eq!(a.shape(), &dim_split);
-    assert_eq!(b.shape(), &dim_input);
-    assert_eq!(a, data.split(2).unwrap());
-    for (i, &j) in b.indexed_iter() {
-        assert_eq!(j, data[i]);
+    let shape = (2, 4, 6);
+    let data = linarr::<f64, Ix3>(shape).unwrap();
+
+    let a = data.split(HEADS).unwrap();
+    assert_eq!(a.dim(), (2, HEADS, 4, 3));
+    let b = a.merge().unwrap();
+    assert_eq!(b.dim(), shape);
+    // verify that doing the ops consecutively is the identity
+    assert_eq!(b, data);
+}
+
+#[allow(dead_code)]
+pub(crate) mod utils {
+    use concision::NdResult;
+    use ndarray::*;
+
+    pub fn merge3<T>(heads: &Array3<T>) -> NdResult<Array2<T>>
+    where
+        T: Clone,
+    {
+        let (n, seq, query) = heads.dim();
+        let shape = (seq, n * query);
+        let mut tmp = heads.clone();
+        // swap the head and sequence axes
+        tmp.swap_axes(0, 1);
+        // reshape the qkv matrix into a 2d array
+        tmp.to_shape((shape, super::ORDER)).map(|x| x.to_owned())
+    }
+
+    pub fn merge4<T>(heads: &Array4<T>) -> NdResult<Array3<T>>
+    where
+        T: Clone,
+    {
+        let (batch, n, seq, query) = heads.dim();
+        let shape = (batch, seq, n * query);
+        let mut tmp = heads.clone();
+        // swap the head and sequence axes
+        tmp.swap_axes(1, 2);
+        // reshape the qkv matrix into a 2d array
+        tmp.to_shape((shape, super::ORDER)).map(|x| x.to_owned())
+    }
+
+    pub fn split_heads<T>(param: &Array2<T>, h: usize) -> NdResult<Array3<T>>
+    where
+        T: Clone,
+    {
+        let dim = param.shape().last().unwrap() / h;
+        // reshape the qkv matrix into a 3d array
+        let mut res = param.clone().into_shape((param.shape()[0], h, dim))?;
+        // swap the sequence and head axes
+        res.swap_axes(0, 1);
+        Ok(res)
+    }
+
+    pub fn split_batch<T>(param: &Array3<T>, h: usize) -> NdResult<Array4<T>>
+    where
+        T: Clone,
+    {
+        let dim = param.shape().last().unwrap() / h;
+        // reshape the qkv matrix into a 3d array
+        let mut res = param
+            .clone()
+            .into_shape((param.shape()[0], param.shape()[1], h, dim))?;
+        // swap the sequence and head axes
+        res.swap_axes(1, 2);
+        Ok(res)
     }
 }
diff --git a/models/transformers/tests/params.rs b/models/transformers/tests/params.rs
new file mode 100644
index 00000000..18656be8
--- /dev/null
+++ b/models/transformers/tests/params.rs
@@ -0,0 +1,35 @@
+/*
+    Appellation: params <test>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+extern crate concision_core as concision;
+extern crate concision_transformers as transformers;
+
+use concision::{linarr, Matmul};
+use transformers::Qkv;
+
+use ndarray::prelude::*;
+
+#[test]
+fn test_qkv() {
+    let shape = (2048, 10);
+    let params = Qkv::<f64>::new(shape);
+    assert_eq!(params.q(), &Array::default(shape));
+}
+
+#[test]
+fn test_qkv_matmul() {
+    let shape = (2048, 10);
+    // generate some sample data
+    let data = linarr(shape).unwrap();
+    // initialize the parameters
+    let params = Qkv::<f64>::ones(shape);
+    // calculate the expected result
+    let exp = Array2::<f64>::ones(shape).dot(&data.t());
+    // calculate the result
+    let res = params.matmul(&data.t());
+    // compare the results
+    assert_eq!(res.q(), &exp);
+    assert_eq!(res.k(), &exp);
+    assert_eq!(res.v(), &exp);
+}