From 7a8a7871f73711aebffa2aeebcf9c6d36e3f2992 Mon Sep 17 00:00:00 2001
From: Joe McCain III <jo3mccain@icloud.com>
Date: Sun, 19 May 2024 13:44:39 -0500
Subject: [PATCH 01/19] update

Signed-off-by: Joe McCain III <jo3mccain@icloud.com>
---
 models/transformers/src/ops/merge.rs | 26 ++++++--
 models/transformers/src/ops/mod.rs   | 89 ++++++++++++++++++++++------
 models/transformers/src/ops/split.rs | 84 ++++++++++++++++++--------
 models/transformers/tests/ops.rs     | 78 +++++++++++++++++++-----
 4 files changed, 217 insertions(+), 60 deletions(-)
diff --git a/models/transformers/src/ops/merge.rs b/models/transformers/src/ops/merge.rs
index 747cae82..2ca6296c 100644
--- a/models/transformers/src/ops/merge.rs
+++ b/models/transformers/src/ops/merge.rs
@@ -2,9 +2,15 @@
    Appellation: merge <mod>
    Contrib: FL03 <jo3mccain@icloud.com>
 */
+use super::_merge_dim;
 use concision::NdResult;
-use nd::prelude::*;
-use nd::{Data, RemoveAxis};
+use nd::{Array, ArrayBase, Data, Dimension, RemoveAxis};
+
+pub trait DimMerge {
+    type Output;
+
+    fn merge(&self, tgt: usize) -> Self::Output;
+}
 
 // #67: Optimize the Merge trait
 pub trait Merge {
@@ -20,6 +26,19 @@ pub trait Merge {
 /*
  ************* Implementations *************
 */
+impl<D> DimMerge for D
+where
+    D: RemoveAxis,
+    D::Smaller: Dimension,
+    D::Larger: Dimension,
+{
+    type Output = D::Smaller;
+
+    fn merge(&self, tgt: usize) -> Self::Output {
+        _merge_dim(self, tgt)
+    }
+}
+
 impl<A, S, D, E> Merge for ArrayBase<S, D>
 where
     A: Clone,
@@ -36,7 +55,6 @@ where
     }
 
     fn merge_along(&self, swap: usize) -> NdResult<Self::Output> {
-        use ndarray::Order;
-        super::merger(self, swap, swap + 1, Order::RowMajor)
+        super::_merge(self, swap, swap + 1, false)
     }
 }
diff --git a/models/transformers/src/ops/mod.rs b/models/transformers/src/ops/mod.rs
index 778e4abc..4af970b3 100644
--- a/models/transformers/src/ops/mod.rs
+++ b/models/transformers/src/ops/mod.rs
@@ -12,7 +12,18 @@ pub(crate) mod utils {
     use nd::prelude::*;
     use nd::{Data, Order, RemoveAxis};
 
-    #[doc(hidden)]
+    pub(crate) fn order(row_major: bool) -> Order {
+        if row_major {
+            Order::RowMajor
+        } else {
+            Order::ColumnMajor
+        }
+    }
+
+    #[deprecated(
+        since = "0.1.14",
+        note = "Please use the `Merge::merge` method instead"
+    )]
     pub fn merge<A, S, D>(
         arr: &ArrayBase<S, D>,
         src: usize,
@@ -25,14 +36,28 @@ pub(crate) mod utils {
         D::Smaller: Dimension,
         ArrayBase<S, D>: Clone,
     {
-        merger(arr, src, tgt, Order::RowMajor)
+        _merge(arr, src, tgt, false)
+    }
+    #[deprecated(
+        since = "0.1.14",
+        note = "Please use the `Split::Split` method instead"
+    )]
+    pub fn split<A, S, D, E>(arr: &ArrayBase<S, D>, h: usize) -> NdResult<Array<A, E>>
+    where
+        A: Clone,
+        D: Dimension<Larger = E>,
+        E: RemoveAxis<Smaller = D>,
+        S: Data<Elem = A>,
+        ArrayBase<S, D>: Clone,
+    {
+        _split(arr, h, true)
     }
 
-    pub(crate) fn merger<A, S, D>(
+    pub(crate) fn _merge<A, S, D>(
         arr: &ArrayBase<S, D>,
         src: usize,
         tgt: usize,
-        order: Order,
+        row_major: bool,
     ) -> NdResult<Array<A, D::Smaller>>
     where
         A: Clone,
@@ -41,14 +66,36 @@ pub(crate) mod utils {
         D::Smaller: Dimension,
         ArrayBase<S, D>: Clone,
     {
-        let shape = merge_dims(arr.raw_dim(), src);
+        let shape = _merge_dim(&arr.raw_dim(), src);
         let mut head = arr.clone();
         head.swap_axes(src, tgt);
-        head.to_shape((shape, order)).map(|x| x.to_owned())
+        head.to_shape((shape, order(row_major)))
+            .map(|x| x.to_owned())
     }
 
-    #[doc(hidden)]
-    pub fn merge_dims<D>(dim: D, src: usize) -> D::Smaller
+    pub(crate) fn _split<A, S, D, E>(
+        arr: &ArrayBase<S, D>,
+        h: usize,
+        row_major: bool,
+    ) -> NdResult<Array<A, E>>
+    where
+        A: Clone,
+        D: Dimension<Larger = E>,
+        E: RemoveAxis<Smaller = D>,
+        S: Data<Elem = A>,
+        ArrayBase<S, D>: Clone,
+    {
+        let src = if arr.ndim() >= 2 { arr.ndim() - 2 } else { 0 };
+        let tgt = src + 1;
+        let shape: E = _split_dim(&arr.raw_dim(), h);
+        let mut head = arr.clone();
+
+        head.swap_axes(src, tgt);
+        head.to_shape((shape, order(row_major)))
+            .map(|x| x.to_owned())
+    }
+    /// Creates the new dimension after merging two axes.
+    pub(crate) fn _merge_dim<D>(dim: &D, src: usize) -> D::Smaller
     where
         D: RemoveAxis,
         D::Smaller: Dimension,
@@ -66,17 +113,25 @@ pub(crate) mod utils {
         new_dim
     }
 
-    #[doc(hidden)]
-    pub fn merge_batch<T>(heads: &Array4<T>) -> NdResult<Array3<T>>
+    pub(crate) fn _split_dim<D>(dim: &D::Smaller, h: usize) -> D
     where
-        T: Clone,
+        D: RemoveAxis,
+        D::Smaller: Dimension,
     {
-        let (batch, n, seq, query) = heads.dim();
-        let mut tmp = heads.clone();
-        // swap the head and sequence axes
-        tmp.swap_axes(1, 2);
-        // reshape the qkv matrix into a 2d array
-        tmp.into_shape((batch, seq, n * query))
+        let rank = dim.ndim() + 1;
+        // create a new dimension with one less axis; initialized with zeros
+        let mut new_dim = D::zeros(rank);
+        // create a mutable vector from the slice
+        let mut shape = dim.slice().to_vec();
+        // get and remove the last axis
+        let bx = shape.pop().unwrap() / h;
+        // extend the shape with the new axes
+        shape.push(h);
+        shape.push(bx);
+        shape.swap(rank - 2, rank - 3);
+        // copy the values into the new dimension
+        new_dim.slice_mut().copy_from_slice(&shape);
+        new_dim
     }
 
     pub fn split_heads<T>(param: &Array2<T>, h: usize) -> NdResult<Array3<T>>
diff --git a/models/transformers/src/ops/split.rs b/models/transformers/src/ops/split.rs
index 3a182710..2a1f96c7 100644
--- a/models/transformers/src/ops/split.rs
+++ b/models/transformers/src/ops/split.rs
@@ -2,8 +2,7 @@
    Appellation: split <mod>
    Contrib: FL03 <jo3mccain@icloud.com>
 */
-use ndarray::prelude::{Array2, Array3, Array4};
-use ndarray::ShapeError;
+use ndarray::{Array, ArrayBase, Data, Dimension, RemoveAxis, ShapeError};
 
 // pub fn split<D: Dimension, T: Clone>(param: &Array<T, D>, heads: usize) -> Result<Array3<T>, ShapeError> {
 //     let mut dim = param.dim()
@@ -15,36 +14,73 @@ use ndarray::ShapeError;
 //     Ok(res)
 // }
 
+pub trait DimSplit {
+    type Output;
+
+    fn split(&self, h: usize) -> Self::Output;
+}
+
 pub trait Split {
     type Output;
 
     fn split(&self, heads: usize) -> Result<Self::Output, ShapeError>;
 }
 
-impl<T: Clone> Split for Array2<T> {
-    type Output = Array3<T>;
-
-    fn split(&self, heads: usize) -> Result<Self::Output, ShapeError> {
-        let (seq, model) = self.dim();
-        let query = model / heads;
-        // reshape the qkv matrix into a 3d array
-        let mut res = self.clone().into_shape((seq, heads, query))?;
-        // swap the sequence and head axes
-        res.swap_axes(0, 1);
-        Ok(res)
+/*
+ ************* Implementations *************
+*/
+
+impl<D, E> DimSplit for D
+where
+    D: Dimension<Larger = E>,
+    E: RemoveAxis<Smaller = D>,
+{
+    type Output = E;
+
+    fn split(&self, h: usize) -> Self::Output {
+        super::utils::_split_dim(self, h)
     }
 }
 
-impl<T: Clone> Split for Array3<T> {
-    type Output = Array4<T>;
-
-    fn split(&self, heads: usize) -> Result<Self::Output, ShapeError> {
-        let (batch, seq, model) = self.dim();
-        let query = model / heads;
-        // reshape the qkv matrix into a 3d array
-        let mut res = self.clone().into_shape((batch, seq, heads, query))?;
-        // swap the sequence and head axes
-        res.swap_axes(1, 2);
-        Ok(res)
+impl<A, S, D, E> Split for ArrayBase<S, D>
+where
+    A: Clone,
+    D: Dimension<Larger = E>,
+    E: RemoveAxis<Smaller = D>,
+    S: Data<Elem = A>,
+    ArrayBase<S, D>: Clone,
+{
+    type Output = Array<A, E>;
+
+    fn split(&self, h: usize) -> Result<Self::Output, ShapeError> {
+        super::_split(self, h, false)
     }
 }
+
+// impl<T: Clone> Split for Array2<T> {
+//     type Output = Array3<T>;
+
+//     fn split(&self, heads: usize) -> Result<Self::Output, ShapeError> {
+//         let (seq, model) = self.dim();
+//         let query = model / heads;
+//         // reshape the qkv matrix into a 3d array
+//         let mut res = self.clone().into_shape((seq, heads, query))?;
+//         // swap the sequence and head axes
+//         res.swap_axes(0, 1);
+//         Ok(res)
+//     }
+// }
+
+// impl<T: Clone> Split for Array3<T> {
+//     type Output = Array4<T>;
+
+//     fn split(&self, heads: usize) -> Result<Self::Output, ShapeError> {
+//         let (batch, seq, model) = self.dim();
+//         let query = model / heads;
+//         // reshape the qkv matrix into a 3d array
+//         let mut res = self.clone().into_shape((batch, seq, heads, query))?;
+//         // swap the sequence and head axes
+//         res.swap_axes(1, 2);
+//         Ok(res)
+//     }
+// }
diff --git a/models/transformers/tests/ops.rs b/models/transformers/tests/ops.rs
index c39b8efa..8227d8b2 100644
--- a/models/transformers/tests/ops.rs
+++ b/models/transformers/tests/ops.rs
@@ -5,21 +5,54 @@
 extern crate concision_core as concision;
 extern crate concision_transformers as transformers;
 
-use concision::linarr;
+use concision::prelude::{linarr, NdResult};
 use ndarray::prelude::*;
+use ndarray::Order;
 use transformers::ops::*;
 
+fn order(row_major: bool) -> Order {
+    if row_major {
+        Order::RowMajor
+    } else {
+        Order::ColumnMajor
+    }
+}
+
+fn merge3<T>(heads: &Array3<T>, row_major: bool) -> NdResult<Array2<T>>
+where
+    T: Clone,
+{
+    let (n, seq, query) = heads.dim();
+    let mut tmp = heads.clone();
+    // swap the head and sequence axes
+    tmp.swap_axes(0, 1);
+    // reshape the qkv matrix into a 2d array
+    tmp.to_shape(((seq, n * query), order(row_major)))
+        .map(|x| x.to_owned())
+}
+
+fn merge4<T>(heads: &Array4<T>, row_major: bool) -> NdResult<Array3<T>>
+where
+    T: Clone,
+{
+    let (batch, n, seq, query) = heads.dim();
+    let mut tmp = heads.clone();
+    // swap the head and sequence axes
+    tmp.swap_axes(1, 2);
+    // reshape the qkv matrix into a 2d array
+    tmp.to_shape(((batch, seq, n * query), order(row_major)))
+        .map(|x| x.to_owned())
+}
+
 #[test]
 fn test_merge() {
     let shape = (3, 4, 5);
     let dout = (4, 15);
     let arr = linarr::<f64, Ix3>(shape.clone()).unwrap();
     let a = arr.clone().merge().unwrap();
-    let b = merge(&arr, 0, 1).unwrap();
 
     assert_eq!(a.dim(), dout);
-    assert_eq!(a.dim(), b.dim());
-    assert_eq!(a, b);
+    assert_eq!(a, merge3(&arr, false).unwrap());
 }
 
 #[test]
@@ -28,25 +61,40 @@ fn test_merge_batch() {
     let dout = (2, 4, 15);
     let arr = linarr::<f64, Ix4>(shape).unwrap();
     let a = arr.merge().unwrap();
-    let b = merge(&arr, 1, 2).unwrap();
 
     assert_eq!(a.dim(), dout);
-    assert_eq!(a, b);
+    assert_eq!(a, merge4(&arr, false).unwrap());
 }
 
 #[test]
+fn test_split() {
+    let heads = 2;
+    let shape = (3, 4, 6);
+    let dout = (3, heads, 4, 3);
+    let arr = linarr::<f64, Ix3>(shape).unwrap();
+    let a = arr.split(heads).unwrap();
+
+    assert_eq!(a.dim(), dout);
+}
+
+#[test]
+#[ignore = "Needs to be fixed; currently fails when trying to recreate the original data."]
 fn reshape_ops() {
-    let dim_input: [usize; 3] = [2, 4, 6]; // (batch, seq, model)
-    let dim_split = [2, 2, 4, 3]; // (batch, heads, seq, model)
+    let heads = 2;
+    let dim_input = (2, 4, 6); // (batch, seq, model)
+    let dim_split = (2, heads, 4, 3); // (batch, heads, seq, model)
     let data = linarr::<f64, Ix3>(dim_input).unwrap();
 
-    let a = split_batch(&data, 2).unwrap();
+    let a = data.split(heads).unwrap(); // split_batch(&data, heads).unwrap();
     let b = a.merge().unwrap(); // merge_batch(&a).unwrap();
 
-    assert_eq!(a.shape(), &dim_split);
-    assert_eq!(b.shape(), &dim_input);
-    assert_eq!(a, data.split(2).unwrap());
-    for (i, &j) in b.indexed_iter() {
-        assert_eq!(j, data[i]);
-    }
+    assert_eq!(a.dim(), dim_split);
+    assert_eq!(b.dim(), dim_input);
+    assert_eq!(b, data);
+    // for (i, &j) in data.split(heads).unwrap().indexed_iter() {
+    //     assert_eq!(j, a[i]);
+    // }
+    // for (i, &j) in b.indexed_iter() {
+    //     assert_eq!(j, data[i]);
+    // }
 }

From 465eafa31506324a5f631cff2c5508bd56f5e5c2 Mon Sep 17 00:00:00 2001
From: Joe McCain III <jo3mccain@icloud.com>
Date: Sun, 19 May 2024 14:17:20 -0500
Subject: [PATCH 02/19] update

Signed-off-by: Joe McCain III <jo3mccain@icloud.com>
---
 models/transformers/src/ops/merge.rs |   2 +-
 models/transformers/src/ops/mod.rs   | 110 +++++----------------
 models/transformers/src/ops/split.rs |  17 +---
 models/transformers/tests/ops.rs     | 141 ++++++++++++++++-----------
 4 files changed, 114 insertions(+), 156 deletions(-)

diff --git a/models/transformers/src/ops/merge.rs b/models/transformers/src/ops/merge.rs
index 2ca6296c..c7e66e3e 100644
--- a/models/transformers/src/ops/merge.rs
+++ b/models/transformers/src/ops/merge.rs
@@ -55,6 +55,6 @@ where
     }
 
     fn merge_along(&self, swap: usize) -> NdResult<Self::Output> {
-        super::_merge(self, swap, swap + 1, false)
+        super::_merge(self, swap, swap + 1, super::ORDER)
     }
 }
diff --git a/models/transformers/src/ops/mod.rs b/models/transformers/src/ops/mod.rs
index 4af970b3..b177d89c 100644
--- a/models/transformers/src/ops/mod.rs
+++ b/models/transformers/src/ops/mod.rs
@@ -2,62 +2,29 @@
    Appellation: ops <mod>
    Contrib: FL03 <jo3mccain@icloud.com>
 */
-pub use self::{merge::*, split::*, utils::*};
+pub use self::prelude::*;
 
-pub(crate) mod merge;
-pub(crate) mod split;
+mod merge;
+mod split;
+
+pub(crate) mod prelude {
+    pub use super::merge::*;
+    pub use super::split::*;
+    pub(crate) use super::utils::*;
+}
+
+pub(crate) const ORDER: nd::Order = nd::Order::RowMajor;
 
 pub(crate) mod utils {
     use concision::NdResult;
     use nd::prelude::*;
     use nd::{Data, Order, RemoveAxis};
-
-    pub(crate) fn order(row_major: bool) -> Order {
-        if row_major {
-            Order::RowMajor
-        } else {
-            Order::ColumnMajor
-        }
-    }
-
-    #[deprecated(
-        since = "0.1.14",
-        note = "Please use the `Merge::merge` method instead"
-    )]
-    pub fn merge<A, S, D>(
-        arr: &ArrayBase<S, D>,
-        src: usize,
-        tgt: usize,
-    ) -> NdResult<Array<A, D::Smaller>>
-    where
-        A: Clone,
-        D: RemoveAxis,
-        S: Data<Elem = A>,
-        D::Smaller: Dimension,
-        ArrayBase<S, D>: Clone,
-    {
-        _merge(arr, src, tgt, false)
-    }
-    #[deprecated(
-        since = "0.1.14",
-        note = "Please use the `Split::Split` method instead"
-    )]
-    pub fn split<A, S, D, E>(arr: &ArrayBase<S, D>, h: usize) -> NdResult<Array<A, E>>
-    where
-        A: Clone,
-        D: Dimension<Larger = E>,
-        E: RemoveAxis<Smaller = D>,
-        S: Data<Elem = A>,
-        ArrayBase<S, D>: Clone,
-    {
-        _split(arr, h, true)
-    }
-
+    
     pub(crate) fn _merge<A, S, D>(
         arr: &ArrayBase<S, D>,
         src: usize,
         tgt: usize,
-        row_major: bool,
+        order: Order,
     ) -> NdResult<Array<A, D::Smaller>>
     where
         A: Clone,
@@ -69,14 +36,13 @@ pub(crate) mod utils {
         let shape = _merge_dim(&arr.raw_dim(), src);
         let mut head = arr.clone();
         head.swap_axes(src, tgt);
-        head.to_shape((shape, order(row_major)))
-            .map(|x| x.to_owned())
+        head.to_shape((shape, order)).map(|x| x.to_owned())
     }
 
     pub(crate) fn _split<A, S, D, E>(
         arr: &ArrayBase<S, D>,
         h: usize,
-        row_major: bool,
+        order: Order,
     ) -> NdResult<Array<A, E>>
     where
         A: Clone,
@@ -88,29 +54,27 @@ pub(crate) mod utils {
         let src = if arr.ndim() >= 2 { arr.ndim() - 2 } else { 0 };
         let tgt = src + 1;
         let shape: E = _split_dim(&arr.raw_dim(), h);
-        let mut head = arr.clone();
-
+        let mut head = arr.to_shape((shape, order))?.to_owned();
         head.swap_axes(src, tgt);
-        head.to_shape((shape, order(row_major)))
-            .map(|x| x.to_owned())
+        Ok(head)
     }
     /// Creates the new dimension after merging two axes.
-    pub(crate) fn _merge_dim<D>(dim: &D, src: usize) -> D::Smaller
+    pub(crate) fn _merge_dim<D>(dim: &D, axis: usize) -> D::Smaller
     where
         D: RemoveAxis,
         D::Smaller: Dimension,
     {
         // create a new dimension with one less axis; initialized with zeros
-        let mut new_dim = <D as Dimension>::Smaller::zeros(dim.ndim() - 1);
+        let mut dn = <D as Dimension>::Smaller::zeros(dim.ndim() - 1);
         // create a mutable vector from the slice
         let mut shape = dim.slice().to_vec();
         // multiply the last axis by the target
-        shape[new_dim.ndim()] *= shape[src];
+        shape[dn.ndim()] *= shape[axis];
         // remove the last dimension
-        shape.remove(src);
+        shape.remove(axis);
 
-        new_dim.slice_mut().copy_from_slice(&shape);
-        new_dim
+        dn.slice_mut().copy_from_slice(&shape);
+        dn
     }
 
     pub(crate) fn _split_dim<D>(dim: &D::Smaller, h: usize) -> D
@@ -128,35 +92,9 @@ pub(crate) mod utils {
         // extend the shape with the new axes
         shape.push(h);
         shape.push(bx);
-        shape.swap(rank - 2, rank - 3);
+        // shape.swap(rank - 2, rank - 3);
         // copy the values into the new dimension
         new_dim.slice_mut().copy_from_slice(&shape);
         new_dim
     }
-
-    pub fn split_heads<T>(param: &Array2<T>, h: usize) -> NdResult<Array3<T>>
-    where
-        T: Clone,
-    {
-        let dim = param.shape().last().unwrap() / h;
-        // reshape the qkv matrix into a 3d array
-        let mut res = param.clone().into_shape((param.shape()[0], h, dim))?;
-        // swap the sequence and head axes
-        res.swap_axes(0, 1);
-        Ok(res)
-    }
-
-    pub fn split_batch<T>(param: &Array3<T>, h: usize) -> NdResult<Array4<T>>
-    where
-        T: Clone,
-    {
-        let dim = param.shape().last().unwrap() / h;
-        // reshape the qkv matrix into a 3d array
-        let mut res = param
-            .clone()
-            .into_shape((param.shape()[0], param.shape()[1], h, dim))?;
-        // swap the sequence and head axes
-        res.swap_axes(1, 2);
-        Ok(res)
-    }
 }
diff --git a/models/transformers/src/ops/split.rs b/models/transformers/src/ops/split.rs
index 2a1f96c7..d98d861d 100644
--- a/models/transformers/src/ops/split.rs
+++ b/models/transformers/src/ops/split.rs
@@ -4,23 +4,14 @@
 */
 use ndarray::{Array, ArrayBase, Data, Dimension, RemoveAxis, ShapeError};
 
-// pub fn split<D: Dimension, T: Clone>(param: &Array<T, D>, heads: usize) -> Result<Array3<T>, ShapeError> {
-//     let mut dim = param.dim()
-//     let query = param.shape().last().unwrap() / heads;
-//     // reshape the qkv matrix into a 3d array
-//     let mut res = param.clone().into_shape((param.shape()[0], heads, query))?;
-//     // swap the sequence and head axes
-//     res.swap_axes(0, 1);
-//     Ok(res)
-// }
-
+/// Split a dimension into two parts
 pub trait DimSplit {
     type Output;
 
     fn split(&self, h: usize) -> Self::Output;
 }
 
-pub trait Split {
+pub trait SplitHead {
     type Output;
 
     fn split(&self, heads: usize) -> Result<Self::Output, ShapeError>;
@@ -42,7 +33,7 @@ where
     }
 }
 
-impl<A, S, D, E> Split for ArrayBase<S, D>
+impl<A, S, D, E> SplitHead for ArrayBase<S, D>
 where
     A: Clone,
     D: Dimension<Larger = E>,
@@ -53,7 +44,7 @@ where
     type Output = Array<A, E>;
 
     fn split(&self, h: usize) -> Result<Self::Output, ShapeError> {
-        super::_split(self, h, false)
+        super::_split(self, h, super::ORDER)
     }
 }
 
diff --git a/models/transformers/tests/ops.rs b/models/transformers/tests/ops.rs
index 8227d8b2..687b50db 100644
--- a/models/transformers/tests/ops.rs
+++ b/models/transformers/tests/ops.rs
@@ -4,45 +4,14 @@
 */
 extern crate concision_core as concision;
 extern crate concision_transformers as transformers;
+extern crate ndarray as nd;
 
-use concision::prelude::{linarr, NdResult};
-use ndarray::prelude::*;
-use ndarray::Order;
+use concision::linarr;
+use nd::prelude::*;
 use transformers::ops::*;
 
-fn order(row_major: bool) -> Order {
-    if row_major {
-        Order::RowMajor
-    } else {
-        Order::ColumnMajor
-    }
-}
-
-fn merge3<T>(heads: &Array3<T>, row_major: bool) -> NdResult<Array2<T>>
-where
-    T: Clone,
-{
-    let (n, seq, query) = heads.dim();
-    let mut tmp = heads.clone();
-    // swap the head and sequence axes
-    tmp.swap_axes(0, 1);
-    // reshape the qkv matrix into a 2d array
-    tmp.to_shape(((seq, n * query), order(row_major)))
-        .map(|x| x.to_owned())
-}
-
-fn merge4<T>(heads: &Array4<T>, row_major: bool) -> NdResult<Array3<T>>
-where
-    T: Clone,
-{
-    let (batch, n, seq, query) = heads.dim();
-    let mut tmp = heads.clone();
-    // swap the head and sequence axes
-    tmp.swap_axes(1, 2);
-    // reshape the qkv matrix into a 2d array
-    tmp.to_shape(((batch, seq, n * query), order(row_major)))
-        .map(|x| x.to_owned())
-}
+pub const HEADS: usize = 2;
+pub const ORDER: nd::Order = nd::Order::RowMajor;
 
 #[test]
 fn test_merge() {
@@ -52,7 +21,7 @@ fn test_merge() {
     let a = arr.clone().merge().unwrap();
 
     assert_eq!(a.dim(), dout);
-    assert_eq!(a, merge3(&arr, false).unwrap());
+    assert_eq!(a, utils::merge3(&arr).unwrap());
 }
 
 #[test]
@@ -63,38 +32,98 @@ fn test_merge_batch() {
     let a = arr.merge().unwrap();
 
     assert_eq!(a.dim(), dout);
-    assert_eq!(a, merge4(&arr, false).unwrap());
+    assert_eq!(a, utils::merge4(&arr).unwrap());
 }
 
 #[test]
 fn test_split() {
+    let heads = 2;
+    let shape = (4, 6);
+    let arr = linarr::<f64, Ix2>(shape).unwrap();
+    let a = arr.split(heads).unwrap();
+
+    assert_eq!(a.dim(), (heads, 4, 3));
+    assert_eq!(a, utils::split_heads(&arr, heads).unwrap());
+}
+
+#[test]
+fn test_split_batch() {
     let heads = 2;
     let shape = (3, 4, 6);
-    let dout = (3, heads, 4, 3);
     let arr = linarr::<f64, Ix3>(shape).unwrap();
     let a = arr.split(heads).unwrap();
 
-    assert_eq!(a.dim(), dout);
+    assert_eq!(a.dim(), (3, heads, 4, 3));
+    assert_eq!(a, utils::split_batch(&arr, heads).unwrap());
 }
 
 #[test]
-#[ignore = "Needs to be fixed; currently fails when trying to recreate the original data."]
 fn reshape_ops() {
-    let heads = 2;
-    let dim_input = (2, 4, 6); // (batch, seq, model)
-    let dim_split = (2, heads, 4, 3); // (batch, heads, seq, model)
-    let data = linarr::<f64, Ix3>(dim_input).unwrap();
-
-    let a = data.split(heads).unwrap(); // split_batch(&data, heads).unwrap();
-    let b = a.merge().unwrap(); // merge_batch(&a).unwrap();
+    let shape = (2, 4, 6);
+    let data = linarr::<f64, Ix3>(shape).unwrap();
 
-    assert_eq!(a.dim(), dim_split);
-    assert_eq!(b.dim(), dim_input);
+    let a = data.split(HEADS).unwrap();
+    assert_eq!(a.dim(), (2, HEADS, 4, 3));
+    let b = a.merge().unwrap();
+    assert_eq!(b.dim(), shape);
+    // verify that doing the ops consecutively is the identity
     assert_eq!(b, data);
-    // for (i, &j) in data.split(heads).unwrap().indexed_iter() {
-    //     assert_eq!(j, a[i]);
-    // }
-    // for (i, &j) in b.indexed_iter() {
-    //     assert_eq!(j, data[i]);
-    // }
+}
+
+#[allow(dead_code)]
+pub(crate) mod utils {
+    use concision::NdResult;
+    use ndarray::*;
+
+    pub fn merge3<T>(heads: &Array3<T>) -> NdResult<Array2<T>>
+    where
+        T: Clone,
+    {
+        let (n, seq, query) = heads.dim();
+        let shape = (seq, n * query);
+        let mut tmp = heads.clone();
+        // swap the head and sequence axes
+        tmp.swap_axes(0, 1);
+        // reshape the qkv matrix into a 2d array
+        tmp.to_shape((shape, super::ORDER)).map(|x| x.to_owned())
+    }
+
+    pub fn merge4<T>(heads: &Array4<T>) -> NdResult<Array3<T>>
+    where
+        T: Clone,
+    {
+        let (batch, n, seq, query) = heads.dim();
+        let shape = (batch, seq, n * query);
+        let mut tmp = heads.clone();
+        // swap the head and sequence axes
+        tmp.swap_axes(1, 2);
+        // reshape the qkv matrix into a 2d array
+        tmp.to_shape((shape, super::ORDER)).map(|x| x.to_owned())
+    }
+
+    pub fn split_heads<T>(param: &Array2<T>, h: usize) -> NdResult<Array3<T>>
+    where
+        T: Clone,
+    {
+        let dim = param.shape().last().unwrap() / h;
+        // reshape the qkv matrix into a 3d array
+        let mut res = param.clone().into_shape((param.shape()[0], h, dim))?;
+        // swap the sequence and head axes
+        res.swap_axes(0, 1);
+        Ok(res)
+    }
+
+    pub fn split_batch<T>(param: &Array3<T>, h: usize) -> NdResult<Array4<T>>
+    where
+        T: Clone,
+    {
+        let dim = param.shape().last().unwrap() / h;
+        // reshape the qkv matrix into a 3d array
+        let mut res = param
+            .clone()
+            .into_shape((param.shape()[0], param.shape()[1], h, dim))?;
+        // swap the sequence and head axes
+        res.swap_axes(1, 2);
+        Ok(res)
+    }
 }

From 7ee384b3c789fb5c030f31118de31d63c0de092a Mon Sep 17 00:00:00 2001
From: Joe McCain III <jo3mccain@icloud.com>
Date: Sun, 19 May 2024 14:43:22 -0500
Subject: [PATCH 03/19] update

Signed-off-by: Joe McCain III <jo3mccain@icloud.com>
---
 core/src/traits/arr/misc.rs                  | 34 ++++++++++++++++++--
 models/transformers/src/attention/head.rs    | 15 +++++++++
 models/transformers/src/attention/mod.rs     |  6 ++++
 models/transformers/src/impls/impl_linalg.rs |  6 ++--
 models/transformers/src/ops/mod.rs           |  2 +-
 models/transformers/src/params/mod.rs        | 20 ++++++------
 models/transformers/tests/attention.rs       |  6 ++--
 7 files changed, 71 insertions(+), 18 deletions(-)

diff --git a/core/src/traits/arr/misc.rs b/core/src/traits/arr/misc.rs
index 4cc76e4c..a38b28e1 100644
--- a/core/src/traits/arr/misc.rs
+++ b/core/src/traits/arr/misc.rs
@@ -2,8 +2,8 @@
    Appellation: convert <mod>
    Contrib: FL03 <jo3mccain@icloud.com>
 */
-use nd::Axis;
-use nd::{ArrayBase, Dimension, RawData};
+use nd::prelude::*;
+use nd::{DataMut, RawData};
 
 pub trait Dimensional<D> {
     type Pattern;
@@ -14,6 +14,16 @@ pub trait Dimensional<D> {
 
     fn shape(&self) -> &[usize];
 }
+/// This trait is used to fill an array with a value based on a mask.
+/// The mask is a boolean array of the same shape as the array.
+pub trait MaskFill<A, D>
+where
+    D: Dimension,
+{
+    type Output;
+
+    fn mask_fill(&self, mask: Array<bool, D>, value: A) -> Self::Output;
+}
 
 pub trait IntoAxis {
     fn into_axis(self) -> Axis;
@@ -46,6 +56,26 @@ where
     }
 }
 
+impl<A, S, D> MaskFill<A, D> for ArrayBase<S, D>
+where
+    A: Clone,
+    D: Dimension,
+    S: DataMut<Elem = A>,
+    Self: Clone,
+{
+    type Output = ArrayBase<S, D>;
+
+    fn mask_fill(&self, mask: Array<bool, D>, value: A) -> Self::Output {
+        let mut arr = self.clone();
+        arr.zip_mut_with(&mask, |x, &m| {
+            if m {
+                *x = value.clone();
+            }
+        });
+        arr
+    }
+}
+
 impl<S> IntoAxis for S
 where
     S: AsRef<usize>,
diff --git a/models/transformers/src/attention/head.rs b/models/transformers/src/attention/head.rs
index c5146a34..8bd27400 100644
--- a/models/transformers/src/attention/head.rs
+++ b/models/transformers/src/attention/head.rs
@@ -78,6 +78,21 @@ where
     ndbuilder!(zeros() where A: Clone + num::Zero, S: DataOwned);
 }
 
+impl<A, S, D> super::Attention for AttentionHead<A, D, S>
+where
+    A: ComplexFloat + ScalarOperand,
+    D: Dimension,
+    S: Data<Elem = A>,
+    ArrayBase<S, D>: for<'a> Dot<ArrayView<'a, A, D>, Output = Array<A, D>>,
+    Array<A, D>: Dot<ArrayBase<S, D>, Output = Array<A, D>>,
+{
+    type Output = Array<A, D>;
+
+    fn attention(&self) -> Self::Output {
+        self.attention()
+    }
+}
+
 impl<A, S, D> Borrow<QkvBase<S, D>> for AttentionHead<A, D, S>
 where
     D: Dimension,
diff --git a/models/transformers/src/attention/mod.rs b/models/transformers/src/attention/mod.rs
index 80a264c7..443f8204 100644
--- a/models/transformers/src/attention/mod.rs
+++ b/models/transformers/src/attention/mod.rs
@@ -21,6 +21,12 @@ pub(crate) mod prelude {
     pub use super::utils::*;
 }
 
+pub trait Attention {
+    type Output;
+
+    fn attention(&self) -> Self::Output;
+}
+
 pub(crate) mod utils {
     use concision::func::activate::Softmax;
     use nd::linalg::Dot;
diff --git a/models/transformers/src/impls/impl_linalg.rs b/models/transformers/src/impls/impl_linalg.rs
index ce069afe..c2ab8812 100644
--- a/models/transformers/src/impls/impl_linalg.rs
+++ b/models/transformers/src/impls/impl_linalg.rs
@@ -2,7 +2,7 @@
     Appellation: impl_linalg <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
-use crate::params::{Params, QkvBase};
+use crate::params::{Qkv, QkvBase};
 use concision::Matmul;
 use nd::linalg::Dot;
 use nd::*;
@@ -17,7 +17,7 @@ where
     T: Data<Elem = A>,
     ArrayBase<S, D>: Dot<ArrayBase<T, E>, Output = Array<A, F>>,
 {
-    type Output = Params<A, F>;
+    type Output = Qkv<A, F>;
 
     fn matmul(&self, rhs: &QkvBase<T, E>) -> Self::Output {
         QkvBase {
@@ -38,7 +38,7 @@ where
     T: Data<Elem = A>,
     ArrayBase<S, D>: Dot<ArrayBase<T, E>, Output = Array<A, F>>,
 {
-    type Output = Params<A, F>;
+    type Output = Qkv<A, F>;
 
     fn matmul(&self, rhs: &ArrayBase<T, E>) -> Self::Output {
         QkvBase {
diff --git a/models/transformers/src/ops/mod.rs b/models/transformers/src/ops/mod.rs
index b177d89c..6612af22 100644
--- a/models/transformers/src/ops/mod.rs
+++ b/models/transformers/src/ops/mod.rs
@@ -19,7 +19,7 @@ pub(crate) mod utils {
     use concision::NdResult;
     use nd::prelude::*;
     use nd::{Data, Order, RemoveAxis};
-    
+
     pub(crate) fn _merge<A, S, D>(
         arr: &ArrayBase<S, D>,
         src: usize,
diff --git a/models/transformers/src/params/mod.rs b/models/transformers/src/params/mod.rs
index 367f8b2a..ba79e10f 100644
--- a/models/transformers/src/params/mod.rs
+++ b/models/transformers/src/params/mod.rs
@@ -4,11 +4,12 @@
 */
 pub use self::{item::*, store::QkvBase};
 
-pub(crate) mod item;
-pub(crate) mod store;
+mod store;
+
+pub mod item;
 
 macro_rules! params_ty {
-    ($target:ident: [$($name:ident<$(&$lt:lifetime)?$repr:ident>),* $(,)?]) => {
+    ($target:ident {$($name:ident: $(&$lt:lifetime)? $repr:ident),* $(,)?}) => {
         $(params_ty!(@impl $target: $name<$(&$lt)? $repr>);)*
     };
     (@impl $target:ident: $name:ident<$repr:ident>) => {
@@ -20,16 +21,17 @@ macro_rules! params_ty {
 }
 
 params_ty!(
-    QkvBase: [
-        Params<OwnedRepr>,
-        ArcParams<OwnedArcRepr>,
-        ParamsView<&'a ViewRepr>,
-    ]
+    QkvBase {
+        Qkv: OwnedRepr,
+        ArcQkv: OwnedArcRepr,
+        ViewQkv: &'a ViewRepr,
+
+    }
 );
 
 #[allow(unused_imports)]
 pub(crate) mod prelude {
     pub use super::item::{Entry, QKV};
     pub use super::store::QkvBase;
-    pub use super::{ArcParams, Params};
+    pub use super::{ArcQkv, Qkv, ViewQkv};
 }
diff --git a/models/transformers/tests/attention.rs b/models/transformers/tests/attention.rs
index db1efe2a..bd7f16a6 100644
--- a/models/transformers/tests/attention.rs
+++ b/models/transformers/tests/attention.rs
@@ -6,14 +6,14 @@ extern crate concision_core as concision;
 extern crate concision_transformers as transformers;
 
 use concision::{linarr, Matmul};
-use transformers::{AttentionHead, Params};
+use transformers::{AttentionHead, Qkv};
 
 use ndarray::prelude::*;
 
 #[test]
 fn test_qkv() {
     let shape = (2048, 10);
-    let params = Params::<f64>::new(shape);
+    let params = Qkv::<f64>::new(shape);
     assert_eq!(params.q(), &Array::default(shape));
 }
 
@@ -23,7 +23,7 @@ fn test_qkv_matmul() {
     // generate some sample data
     let data = linarr(shape).unwrap();
     // initialize the parameters
-    let params = Params::<f64>::ones(shape);
+    let params = Qkv::<f64>::ones(shape);
     // calculate the expected result
     let exp = Array2::<f64>::ones(shape).dot(&data.t());
     // calculate the result

From 465da83804359d0797c13d0c618963cc81ea8942 Mon Sep 17 00:00:00 2001
From: Joe McCain III <jo3mccain@icloud.com>
Date: Sun, 19 May 2024 14:59:55 -0500
Subject: [PATCH 04/19] update

Signed-off-by: Joe McCain III <jo3mccain@icloud.com>
---
 core/src/math/mod.rs          |  5 +++--
 core/src/traits/arr/create.rs | 12 +++++-------
 core/src/traits/arr/misc.rs   |  4 ++--
 core/tests/traits.rs          | 14 ++++++++++++--
 4 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/core/src/math/mod.rs b/core/src/math/mod.rs
index bc9dc16f..16220e0f 100644
--- a/core/src/math/mod.rs
+++ b/core/src/math/mod.rs
@@ -4,8 +4,9 @@
 */
 //! # Mathematics
 //!
-//! This module focuses on implementing various mathematical objects and operations that are
-//! critical to the development of machine learning algorithms.
+//! This module focuses on providing the mathematical foundation for the library.
+//! Any defined operation is designed to extend the functionality of the basic primitives
+//! as well as the `ndarray` crate. 
 pub use self::traits::*;
 
 pub mod traits;
diff --git a/core/src/traits/arr/create.rs b/core/src/traits/arr/create.rs
index b99a5eaa..8c45d927 100644
--- a/core/src/traits/arr/create.rs
+++ b/core/src/traits/arr/create.rs
@@ -86,11 +86,9 @@ where
     }
 }
 
-macro_rules! impl_like {
+macro_rules! impl_ndlike {
+
     ($name:ident::$method:ident.$call:ident: $($p:tt)*) => {
-        impl_like!(@impl $name::$method.$call: $($p)*);
-    };
-    (@impl $name:ident::$method:ident.$call:ident: $($p:tt)*) => {
         impl<A, S, D> $name for ArrayBase<S, D>
         where
             A: $($p)*,
@@ -106,6 +104,6 @@ macro_rules! impl_like {
     };
 }
 
-impl_like!(DefaultLike::default_like.default: Default);
-impl_like!(OnesLike::ones_like.ones: Clone + num::One);
-impl_like!(ZerosLike::zeros_like.zeros: Clone + num::Zero);
+impl_ndlike!(DefaultLike::default_like.default: Default);
+impl_ndlike!(OnesLike::ones_like.ones: Clone + num::One);
+impl_ndlike!(ZerosLike::zeros_like.zeros: Clone + num::Zero);
diff --git a/core/src/traits/arr/misc.rs b/core/src/traits/arr/misc.rs
index a38b28e1..0b3b04e2 100644
--- a/core/src/traits/arr/misc.rs
+++ b/core/src/traits/arr/misc.rs
@@ -22,7 +22,7 @@ where
 {
     type Output;
 
-    fn mask_fill(&self, mask: Array<bool, D>, value: A) -> Self::Output;
+    fn masked_fill(&self, mask: &Array<bool, D>, value: A) -> Self::Output;
 }
 
 pub trait IntoAxis {
@@ -65,7 +65,7 @@ where
 {
     type Output = ArrayBase<S, D>;
 
-    fn mask_fill(&self, mask: Array<bool, D>, value: A) -> Self::Output {
+    fn masked_fill(&self, mask: &Array<bool, D>, value: A) -> Self::Output {
         let mut arr = self.clone();
         arr.zip_mut_with(&mask, |x, &m| {
             if m {
diff --git a/core/tests/traits.rs b/core/tests/traits.rs
index 1778fefd..ab7bd44d 100644
--- a/core/tests/traits.rs
+++ b/core/tests/traits.rs
@@ -4,8 +4,9 @@
 */
 extern crate concision_core as cnc;
 
-use cnc::traits::{Affine, AsComplex, Matpow};
-use ndarray::prelude::{array, Array2};
+use cnc::linarr;
+use cnc::traits::{Affine, AsComplex, MaskFill, Matpow};
+use ndarray::prelude::*;
 use num::Complex;
 
 #[test]
@@ -16,6 +17,15 @@ fn test_affine() {
     assert_eq!(y, array![[-2.0, 2.0], [6.0, 10.0]]);
 }
 
+#[test]
+fn test_masked_fill() {
+    let shape = (2, 2);
+    let mask = array![[true, false], [false, true]];
+    let arr = linarr::<f64, Ix2>(shape).unwrap();
+    let a = arr.masked_fill(&mask, 0.0);
+    assert_eq!(a, array![[0.0, 1.0], [2.0, 0.0]]);
+}
+
 #[test]
 fn test_as_complex() {
     let x = 1.0;

From 72f79eb3d10dfcd7edf82ff2b073ecd53d354f9a Mon Sep 17 00:00:00 2001
From: Joe McCain III <jo3mccain@icloud.com>
Date: Sun, 19 May 2024 15:12:07 -0500
Subject: [PATCH 05/19] update

Signed-off-by: Joe McCain III <jo3mccain@icloud.com>
---
 core/src/traits/arr/tensor.rs | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/core/src/traits/arr/tensor.rs b/core/src/traits/arr/tensor.rs
index 22a00f99..06a99781 100644
--- a/core/src/traits/arr/tensor.rs
+++ b/core/src/traits/arr/tensor.rs
@@ -2,6 +2,7 @@
     Appellation: generator <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
+use super::Dimensional;
 use nd::prelude::*;
 use nd::{Data, DataMut, DataOwned, OwnedRepr, RawData};
 use num::{One, Zero};
@@ -40,12 +41,10 @@ where
         Self::Data: DataOwned;
 }
 
-pub trait NdBuilderExt<A = f64, D = Ix2>: NdBuilder<A, D>
+pub trait NdBuilderExt<A = f64, D = Ix2>: Dimensional<D, Pattern = D::Pattern> + NdBuilder<A, D>
 where
     D: Dimension,
 {
-    fn dim(&self) -> D::Pattern;
-
     fn default_like<Sh>(&self) -> Self::Store
     where
         A: Default,
@@ -183,14 +182,11 @@ where
     }
 }
 
-impl<A, S, D> NdBuilderExt<A, D> for ArrayBase<S, D>
+impl<U, A, D> NdBuilderExt<A, D> for U
 where
+    U: Dimensional<D, Pattern = D::Pattern> + NdBuilder<A, D>,
     D: Dimension,
-    S: RawData<Elem = A>,
 {
-    fn dim(&self) -> D::Pattern {
-        ArrayBase::dim(self)
-    }
 }
 
 impl<A, S, D> AsOwned<S, D> for ArrayBase<S, D>

From 44f3966633909426ad0fcaf37edf5a684fc3a742 Mon Sep 17 00:00:00 2001
From: Joe McCain III <jo3mccain@icloud.com>
Date: Sun, 19 May 2024 15:26:10 -0500
Subject: [PATCH 06/19] update

Signed-off-by: Joe McCain III <jo3mccain@icloud.com>
---
 core/src/traits/arr/misc.rs   | 16 ++++++++++++++++
 core/src/traits/arr/tensor.rs |  9 +++++++++
 2 files changed, 25 insertions(+)

diff --git a/core/src/traits/arr/misc.rs b/core/src/traits/arr/misc.rs
index 0b3b04e2..51a5996d 100644
--- a/core/src/traits/arr/misc.rs
+++ b/core/src/traits/arr/misc.rs
@@ -5,13 +5,28 @@
 use nd::prelude::*;
 use nd::{DataMut, RawData};
 
+/// [Dimensional] provides a common interface for containers to access their shape and dimension.
 pub trait Dimensional<D> {
+    const RANK: Option<usize> = None;
+
     type Pattern;
 
     fn dim(&self) -> Self::Pattern;
 
+    fn is_scalar(&self) -> bool {
+        self.rank() == 0 || self.shape().iter().all(|x| *x == 1)
+    }
+
+    fn rank(&self) -> usize {
+        Self::RANK.unwrap_or(self.shape().len())
+    }
+
     fn raw_dim(&self) -> D;
 
+    fn size(&self) -> usize {
+        self.shape().iter().product()
+    }
+
     fn shape(&self) -> &[usize];
 }
 /// This trait is used to fill an array with a value based on a mask.
@@ -41,6 +56,7 @@ where
     D: Dimension,
     S: RawData,
 {
+    const RANK: Option<usize> = D::NDIM;
     type Pattern = D::Pattern;
 
     fn shape(&self) -> &[usize] {
diff --git a/core/src/traits/arr/tensor.rs b/core/src/traits/arr/tensor.rs
index 06a99781..41de9471 100644
--- a/core/src/traits/arr/tensor.rs
+++ b/core/src/traits/arr/tensor.rs
@@ -7,6 +7,15 @@ use nd::prelude::*;
 use nd::{Data, DataMut, DataOwned, OwnedRepr, RawData};
 use num::{One, Zero};
 
+/// This trait describes the basic operations for any n-dimensional container.
+pub trait NdContainer<A = f64, D = Ix2>: Dimensional<D> {
+    type Data;
+
+    fn as_slice(&self) -> &[A];
+
+    fn as_mut_slice(&mut self) -> &mut [A];
+}
+
 /// [NdBuilder] describes common creation routines for [ArrayBase](ndarray::ArrayBase)
 pub trait NdBuilder<A = f64, D = Ix2>
 where

From 4b67274ef9cb037d1dd3663113ea475d1bead0b0 Mon Sep 17 00:00:00 2001
From: Joe McCain III <jo3mccain@icloud.com>
Date: Mon, 20 May 2024 09:39:09 -0500
Subject: [PATCH 07/19] update

Signed-off-by: Joe McCain III <jo3mccain@icloud.com>
---
 core/src/init/{gen => distr}/lecun.rs |  13 +--
 core/src/init/distr/trunc.rs          |  81 ++++++++++++++++++
 core/src/init/distr/xavier.rs         | 116 ++++++++++++++++++++++++++
 core/src/init/initialize.rs           |  28 ++++---
 core/src/init/mod.rs                  |   8 +-
 core/src/math/mod.rs                  |   2 +-
 core/src/traits/arr/tensor.rs         |   3 +-
 core/tests/random.rs                  |  28 ++++++-
 8 files changed, 258 insertions(+), 21 deletions(-)
 rename core/src/init/{gen => distr}/lecun.rs (73%)
 create mode 100644 core/src/init/distr/trunc.rs
 create mode 100644 core/src/init/distr/xavier.rs

diff --git a/core/src/init/gen/lecun.rs b/core/src/init/distr/lecun.rs
similarity index 73%
rename from core/src/init/gen/lecun.rs
rename to core/src/init/distr/lecun.rs
index b8cae16c..0c4763c5 100644
--- a/core/src/init/gen/lecun.rs
+++ b/core/src/init/distr/lecun.rs
@@ -1,10 +1,11 @@
 /*
-    Appellation: lecun <module>
+    Appellation: lecun <distr>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
+use crate::init::distr::TruncatedNormal;
 use num::Float;
 use rand::Rng;
-use rand_distr::{Distribution, Normal, NormalError, StandardNormal};
+use rand_distr::{Distribution, NormalError, StandardNormal};
 
 /// [LecunNormal] is a truncated [normal](rand_distr::Normal) distribution centered at 0
 /// with a standard deviation that is calculated as `σ = sqrt(1/n_in)`
@@ -18,14 +19,14 @@ impl LecunNormal {
     pub fn new(n: usize) -> Self {
         Self { n }
     }
-    /// Create a [normal](rand_distr::Normal) [distribution](Distribution) centered at 0;
+    /// Create a [truncated normal](TruncatedNormal) [distribution](Distribution) centered at 0;
     /// See [Self::std_dev] for the standard deviation calculations.
-    pub fn distr<F>(&self) -> Result<Normal<F>, NormalError>
+    pub fn distr<F>(&self) -> Result<TruncatedNormal<F>, NormalError>
     where
         F: Float,
         StandardNormal: Distribution<F>,
     {
-        Normal::new(F::zero(), self.std_dev())
+        TruncatedNormal::new(F::zero(), self.std_dev())
     }
     /// Calculate the standard deviation (`σ`) of the distribution.
     /// This is done by computing the root of the reciprocal of the number of inputs
@@ -48,6 +49,6 @@ where
     where
         R: Rng + ?Sized,
     {
-        self.distr().unwrap().sample(rng)
+        self.distr().expect("NormalError").sample(rng)
     }
 }
diff --git a/core/src/init/distr/trunc.rs b/core/src/init/distr/trunc.rs
new file mode 100644
index 00000000..fc94f0b9
--- /dev/null
+++ b/core/src/init/distr/trunc.rs
@@ -0,0 +1,81 @@
+/*
+    Appellation: trunc <distr>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use num::traits::Float;
+use rand::Rng;
+use rand_distr::{Distribution, Normal, NormalError, StandardNormal};
+
+/// A truncated normal distribution is similar to a [normal](rand_distr::Normal) [distribution](rand_distr::Distribution), however,
+/// any generated value over two standard deviations from the mean is discarded and re-generated.
+#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub struct TruncatedNormal<F>
+where
+    StandardNormal: Distribution<F>,
+{
+    mean: F,
+    std: F,
+}
+
+impl<F> TruncatedNormal<F>
+where
+    F: Float,
+    StandardNormal: Distribution<F>,
+{
+    /// Create a new truncated normal distribution with a given mean and standard deviation
+    pub fn new(mean: F, std: F) -> Result<Self, NormalError> {
+        Ok(Self { mean, std })
+    }
+
+    pub(crate) fn boundary(&self) -> F {
+        self.mean() + self.std_dev() * F::from(2).unwrap()
+    }
+
+    pub(crate) fn score(&self, x: F) -> F {
+        self.mean() - self.std_dev() * x
+    }
+
+    pub fn distr(&self) -> Normal<F> {
+        Normal::new(self.mean(), self.std_dev()).unwrap()
+    }
+
+    pub fn mean(&self) -> F {
+        self.mean
+    }
+
+    pub fn std_dev(&self) -> F {
+        self.std
+    }
+}
+
+impl<F> Distribution<F> for TruncatedNormal<F>
+where
+    F: Float,
+    StandardNormal: Distribution<F>,
+{
+    fn sample<R>(&self, rng: &mut R) -> F
+    where
+        R: Rng + ?Sized,
+    {
+        let bnd = self.boundary();
+        let mut x = self.score(rng.sample(StandardNormal));
+        // if x is outside of the boundary, re-sample
+        while x < -bnd || x > bnd {
+            x = self.score(rng.sample(StandardNormal));
+        }
+        x
+    }
+}
+
+impl<F> From<Normal<F>> for TruncatedNormal<F>
+where
+    F: Float,
+    StandardNormal: Distribution<F>,
+{
+    fn from(normal: Normal<F>) -> Self {
+        Self {
+            mean: normal.mean(),
+            std: normal.std_dev(),
+        }
+    }
+}
diff --git a/core/src/init/distr/xavier.rs b/core/src/init/distr/xavier.rs
new file mode 100644
index 00000000..37f43251
--- /dev/null
+++ b/core/src/init/distr/xavier.rs
@@ -0,0 +1,116 @@
+/*
+    Appellation: xavier <distr>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+//! # Xavier
+//!
+//! Xavier initialization techniques were developed in 2010 by Xavier Glorot.
+//! These methods are designed to initialize the weights of a neural network in a way that
+//! prevents the vanishing and exploding gradient problems. The initialization technique
+//! manifests into two distributions: [XavierNormal] and [XavierUniform].
+use num::Float;
+use rand::Rng;
+use rand_distr::uniform::{SampleUniform, Uniform};
+use rand_distr::{Distribution, Normal, NormalError, StandardNormal};
+
+pub(crate) fn std_dev<F>(inputs: usize, outputs: usize) -> F
+where
+    F: Float,
+{
+    (F::from(2).unwrap() / F::from(inputs + outputs).unwrap()).sqrt()
+}
+
+pub(crate) fn boundary<F>(inputs: usize, outputs: usize) -> F
+where
+    F: Float,
+{
+    (F::from(6).unwrap() / F::from(inputs + outputs).unwrap()).sqrt()
+}
+/// Normal Xavier initializers leverage a normal distribution with a mean of 0 and a standard deviation (`σ`)
+/// computed by the formula: `σ = sqrt(2/(d_in + d_out))`
+#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub struct XavierNormal<F>
+where
+    F: Float,
+    StandardNormal: Distribution<F>,
+{
+    std: F,
+}
+
+impl<F> XavierNormal<F>
+where
+    F: Float,
+    StandardNormal: Distribution<F>,
+{
+    pub fn new(inputs: usize, outputs: usize) -> Self {
+        Self {
+            std: std_dev(inputs, outputs),
+        }
+    }
+
+    pub fn distr(&self) -> Result<Normal<F>, NormalError> {
+        Normal::new(F::zero(), self.std_dev())
+    }
+
+    pub fn std_dev(&self) -> F {
+        self.std
+    }
+}
+
+impl<F> Distribution<F> for XavierNormal<F>
+where
+    F: Float,
+    StandardNormal: Distribution<F>,
+{
+    fn sample<R>(&self, rng: &mut R) -> F
+    where
+        R: Rng + ?Sized,
+    {
+        self.distr().unwrap().sample(rng)
+    }
+}
+
+/// Uniform Xavier initializers use a uniform distribution to initialize the weights of a neural network
+/// within a given range.
+#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub struct XavierUniform<X>
+where
+    X: SampleUniform,
+{
+    boundary: X,
+}
+
+impl<X> XavierUniform<X>
+where
+    X: Float + SampleUniform,
+{
+    pub fn new(inputs: usize, outputs: usize) -> Self {
+        Self {
+            boundary: boundary(inputs, outputs),
+        }
+    }
+
+    pub fn boundary(&self) -> X {
+        self.boundary
+    }
+
+    pub fn distr(&self) -> Uniform<X>
+    where
+        X: Float,
+    {
+        let bnd = self.boundary();
+        Uniform::new(-bnd, bnd)
+    }
+}
+
+impl<X> Distribution<X> for XavierUniform<X>
+where
+    X: Float + SampleUniform,
+{
+    fn sample<R>(&self, rng: &mut R) -> X
+    where
+        R: Rng + ?Sized,
+    {
+        self.distr().sample(rng)
+    }
+}
diff --git a/core/src/init/initialize.rs b/core/src/init/initialize.rs
index 91b41b13..4d2eb51d 100644
--- a/core/src/init/initialize.rs
+++ b/core/src/init/initialize.rs
@@ -2,16 +2,17 @@
     Appellation: initialize <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
+use crate::init::distr::*;
+
 use core::ops::Neg;
 use nd::{ArrayBase, DataOwned, Dimension, RawData, ShapeBuilder};
 use ndrand::RandomExt;
 use num::complex::ComplexDistribution;
 use num::traits::Float;
-use rand::{rngs, Rng, SeedableRng};
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
 use rand_distr::uniform::{SampleUniform, Uniform};
-use rand_distr::{Bernoulli, BernoulliError, Distribution, Normal, StandardNormal};
-
-use super::LecunNormal;
+use rand_distr::{Bernoulli, BernoulliError, Distribution, Normal, NormalError, StandardNormal};
 
 /// This trait provides the base methods required for initializing an [ndarray](ndarray::ArrayBase) with random values.
 /// [Initialize] is similar to [RandomExt](ndarray_rand::RandomExt), however, it focuses on flexibility while implementing additional
@@ -79,7 +80,7 @@ where
         Self::rand(shape, distr)
     }
     /// Given a shape, mean, and standard deviation generate a new object using the [Normal](rand_distr::Normal) distribution
-    fn normal<Sh>(shape: Sh, mean: A, std: A) -> Result<Self, rand_distr::NormalError>
+    fn normal<Sh>(shape: Sh, mean: A, std: A) -> Result<Self, NormalError>
     where
         A: Float,
         S: DataOwned,
@@ -115,11 +116,18 @@ where
         Sh: ShapeBuilder<Dim = D>,
         StandardNormal: Distribution<A>,
     {
-        Self::rand_with(
-            shape,
-            StandardNormal,
-            &mut rngs::StdRng::seed_from_u64(seed),
-        )
+        Self::rand_with(shape, StandardNormal, &mut StdRng::seed_from_u64(seed))
+    }
+    /// Initialize the object using the [TruncatedNormal](crate::init::distr::TruncatedNormal) distribution
+    fn truncnorm<Sh>(shape: Sh, mean: A, std: A) -> Result<Self, NormalError>
+    where
+        A: Float,
+        S: DataOwned,
+        Sh: ShapeBuilder<Dim = D>,
+        StandardNormal: Distribution<A>,
+    {
+        let distr = TruncatedNormal::new(mean, std)?;
+        Ok(Self::rand(shape, distr))
     }
     /// A [uniform](rand_distr::uniform::Uniform) generator with values between u(-dk, dk)
     fn uniform<Sh>(shape: Sh, dk: A) -> Self
diff --git a/core/src/init/mod.rs b/core/src/init/mod.rs
index 22ee1bac..2ebcd583 100644
--- a/core/src/init/mod.rs
+++ b/core/src/init/mod.rs
@@ -16,13 +16,17 @@ pub use self::prelude::*;
 pub(crate) mod initialize;
 pub(crate) mod utils;
 
-pub mod gen {
+pub mod distr {
     pub use self::prelude::*;
 
     pub mod lecun;
+    pub mod trunc;
+    pub mod xavier;
 
     pub(crate) mod prelude {
         pub use super::lecun::*;
+        pub use super::trunc::*;
+        pub use super::xavier::*;
     }
 }
 
@@ -34,7 +38,7 @@ pub use rand;
 pub use rand_distr;
 
 pub(crate) mod prelude {
-    pub use super::gen::prelude::*;
+    pub use super::distr::prelude::*;
     pub use super::initialize::{Initialize, InitializeExt};
     pub use super::utils::*;
 }
diff --git a/core/src/math/mod.rs b/core/src/math/mod.rs
index 16220e0f..d8084442 100644
--- a/core/src/math/mod.rs
+++ b/core/src/math/mod.rs
@@ -6,7 +6,7 @@
 //!
 //! This module focuses on providing the mathematical foundation for the library.
 //! Any defined operation is designed to extend the functionality of the basic primitives
-//! as well as the `ndarray` crate. 
+//! as well as the `ndarray` crate.
 pub use self::traits::*;
 
 pub mod traits;
diff --git a/core/src/traits/arr/tensor.rs b/core/src/traits/arr/tensor.rs
index 41de9471..9bd5d863 100644
--- a/core/src/traits/arr/tensor.rs
+++ b/core/src/traits/arr/tensor.rs
@@ -50,7 +50,8 @@ where
         Self::Data: DataOwned;
 }
 
-pub trait NdBuilderExt<A = f64, D = Ix2>: Dimensional<D, Pattern = D::Pattern> + NdBuilder<A, D>
+pub trait NdBuilderExt<A = f64, D = Ix2>:
+    Dimensional<D, Pattern = D::Pattern> + NdBuilder<A, D>
 where
     D: Dimension,
 {
diff --git a/core/tests/random.rs b/core/tests/random.rs
index daa76435..39d29e5d 100644
--- a/core/tests/random.rs
+++ b/core/tests/random.rs
@@ -4,11 +4,12 @@
 */
 extern crate concision_core as cnc;
 
+use cnc::init::distr::LecunNormal;
 use cnc::init::InitializeExt;
 use ndarray::prelude::*;
 
 #[test]
-fn test_stdnorm() {
+fn test_init_ext() {
     let shape = [3, 3];
     let seed = 0u64;
     let a = Array2::<f64>::stdnorm(shape);
@@ -17,3 +18,28 @@ fn test_stdnorm() {
     assert_eq!(a.shape(), shape);
     assert_eq!(a.shape(), b.shape());
 }
+
+#[test]
+fn test_lecun_normal() {
+    let n = 3;
+    let shape = (3, 3);
+
+    let distr = LecunNormal::new(n);
+
+    let bnd = 2f64 * distr.std_dev::<f64>();
+
+    let arr = Array2::<f64>::lecun_normal(shape, n);
+
+    assert!(arr.iter().all(|&x| x >= -bnd && x <= bnd));
+
+    assert_eq!(arr.dim(), shape);
+}
+
+#[test]
+fn test_truncnorm() {
+    let (mean, std) = (0f64, 2f64);
+    let bnd = 2f64 * std;
+    let shape = (3, 3);
+    let arr = Array::truncnorm(shape, mean, std).unwrap();
+    assert!(arr.iter().all(|&x| x >= -bnd && x <= bnd));
+}

From 6f8d88ee84f6c0cf060c930e5530026adb186786 Mon Sep 17 00:00:00 2001
From: Joe McCain III <jo3mccain@icloud.com>
Date: Mon, 20 May 2024 09:40:04 -0500
Subject: [PATCH 08/19] update

Signed-off-by: Joe McCain III <jo3mccain@icloud.com>
---
 core/src/init/distr/xavier.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core/src/init/distr/xavier.rs b/core/src/init/distr/xavier.rs
index 37f43251..ecc1ee0c 100644
--- a/core/src/init/distr/xavier.rs
+++ b/core/src/init/distr/xavier.rs
@@ -8,6 +8,7 @@
 //! These methods are designed to initialize the weights of a neural network in a way that
 //! prevents the vanishing and exploding gradient problems. The initialization technique
 //! manifests into two distributions: [XavierNormal] and [XavierUniform].
+// #76
 use num::Float;
 use rand::Rng;
 use rand_distr::uniform::{SampleUniform, Uniform};

From 02080dc50921237bcbc03cf3fc035e75eade41a7 Mon Sep 17 00:00:00 2001
From: Joe McCain III <jo3mccain@icloud.com>
Date: Mon, 20 May 2024 11:11:53 -0500
Subject: [PATCH 09/19] update

Signed-off-by: Joe McCain III <jo3mccain@icloud.com>
---
 core/Cargo.toml                            | 12 +++-
 core/src/func/mod.rs                       |  3 -
 core/src/lib.rs                            |  1 +
 core/src/{func => nn}/dropout.rs           | 68 ++++++++++++----------
 core/src/nn/mod.rs                         |  6 +-
 core/src/types/mask.rs                     | 10 ++++
 core/src/types/mod.rs                      |  2 +
 core/tests/{func.rs => nn.rs}              |  4 +-
 models/transformers/src/attention/head.rs  | 34 +++++++++--
 models/transformers/src/attention/mod.rs   | 54 ++++++++++++++++-
 models/transformers/src/impls/impl_head.rs |  2 +
 models/transformers/src/params/store.rs    |  2 +
 12 files changed, 150 insertions(+), 48 deletions(-)
 rename core/src/{func => nn}/dropout.rs (56%)
 create mode 100644 core/src/types/mask.rs
 rename core/tests/{func.rs => nn.rs} (83%)

diff --git a/core/Cargo.toml b/core/Cargo.toml
index b0891b64..a4d06c91 100644
--- a/core/Cargo.toml
+++ b/core/Cargo.toml
@@ -103,14 +103,20 @@ crate-type = ["lib"]
 doctest = false
 test = true
 
-[[test]]
-name = "random"
-required-features = ["rand"]
+
 
 [[test]]
 name = "fft"
 required-features = ["approx"]
 
+[[test]]
+name = "nn"
+
+[[test]]
+name = "random"
+required-features = ["rand", "std"]
+
+
 [build-dependencies]
 
 [dependencies]
diff --git a/core/src/func/mod.rs b/core/src/func/mod.rs
index bb99ccba..96513d96 100644
--- a/core/src/func/mod.rs
+++ b/core/src/func/mod.rs
@@ -7,12 +7,9 @@ pub use self::prelude::*;
 
 #[macro_use]
 pub mod activate;
-pub mod dropout;
 pub mod loss;
 
 pub(crate) mod prelude {
     pub use super::activate::prelude::*;
-    #[cfg(feature = "rand")]
-    pub use super::dropout::*;
     pub use super::loss::prelude::*;
 }
diff --git a/core/src/lib.rs b/core/src/lib.rs
index 5906aa6f..a09d48e1 100644
--- a/core/src/lib.rs
+++ b/core/src/lib.rs
@@ -34,6 +34,7 @@ pub mod types;
 pub mod utils;
 
 pub mod prelude {
+    #[allow(unused_imports)]
     pub(crate) use super::primitives::rust::*;
 
     pub use super::error::prelude::*;
diff --git a/core/src/func/dropout.rs b/core/src/nn/dropout.rs
similarity index 56%
rename from core/src/func/dropout.rs
rename to core/src/nn/dropout.rs
index 00b24b13..19acdbc0 100644
--- a/core/src/func/dropout.rs
+++ b/core/src/nn/dropout.rs
@@ -2,14 +2,15 @@
     Appellation: dropout <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
-#![cfg(feature = "rand")]
+#![allow(unused_imports)]
 use crate::Forward;
 use nd::prelude::*;
-use nd::{DataOwned, RemoveAxis, ScalarOperand};
-use ndrand::rand_distr::Bernoulli;
-use ndrand::RandomExt;
+use nd::{DataOwned, ScalarOperand};
+#[cfg(feature = "rand")]
+use ndrand::{rand_distr::Bernoulli, RandomExt};
 use num::traits::Num;
 
+#[cfg(feature = "rand")]
 pub fn dropout<A, S, D>(array: &ArrayBase<S, D>, p: f64) -> Array<A, D>
 where
     A: Num + ScalarOperand,
@@ -27,44 +28,46 @@ where
     array * mask
 }
 
-pub fn dropout_axis<A, S, D>(array: &ArrayBase<S, D>, _axis: Axis, p: f64) -> Array<A, D>
-where
-    A: Num + ScalarOperand,
-    D: RemoveAxis,
-    S: DataOwned<Elem = A>,
-{
-    // Create a Bernoulli distribution for dropout
-    let distribution = Bernoulli::new(p).unwrap();
+/// [Dropout] randomly zeroizes elements with a given probability (`p`).
+pub trait Dropout {
+    type Output;
 
-    // Create a mask of the same shape as the input array
-    let _mask: Array<bool, D> = Array::random(array.dim(), distribution);
-
-    unimplemented!()
+    fn dropout(&self, p: f64) -> Self::Output;
 }
 
-/// The [Dropout] layer is randomly zeroizes inputs with a given probability (`p`).
+/// The [DropoutLayer] layer is randomly zeroizes inputs with a given probability (`p`).
 /// This regularization technique is often used to prevent overfitting.
 ///
 ///
 /// ### Config
 ///
 /// - (p) Probability of dropping an element
-pub struct Dropout {
-    p: f64,
+#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+pub struct DropoutLayer {
+    pub(crate) p: f64,
 }
 
-impl Dropout {
-    pub fn new(p: f64) -> Self {
-        Self { p }
+/*
+ ************* Implementations *************
+*/
+#[cfg(feature = "rand")]
+impl<A, S, D> Dropout for ArrayBase<S, D>
+where
+    A: Num + ScalarOperand,
+    D: Dimension,
+    S: DataOwned<Elem = A>,
+{
+    type Output = Array<A, D>;
+
+    fn dropout(&self, p: f64) -> Self::Output {
+        dropout(self, p)
     }
+}
 
-    pub fn dropout<A, S, D>(&self, array: &ArrayBase<S, D>) -> Array<A, D>
-    where
-        A: Num + ScalarOperand,
-        D: Dimension,
-        S: DataOwned<Elem = A>,
-    {
-        dropout(array, self.p)
+impl DropoutLayer {
+    pub fn new(p: f64) -> Self {
+        Self { p }
     }
 
     pub fn scale(&self) -> f64 {
@@ -72,13 +75,14 @@ impl Dropout {
     }
 }
 
-impl Default for Dropout {
+impl Default for DropoutLayer {
     fn default() -> Self {
         Self::new(0.5)
     }
 }
 
-impl<A, S, D> Forward<ArrayBase<S, D>> for Dropout
+#[cfg(feature = "rand")]
+impl<A, S, D> Forward<ArrayBase<S, D>> for DropoutLayer
 where
     A: Num + ScalarOperand,
     D: Dimension,
@@ -87,6 +91,6 @@ where
     type Output = Array<A, D>;
 
     fn forward(&self, input: &ArrayBase<S, D>) -> Self::Output {
-        dropout(input, self.p)
+        input.dropout(self.p)
     }
 }
diff --git a/core/src/nn/mod.rs b/core/src/nn/mod.rs
index c0eb1f81..d4c7fa48 100644
--- a/core/src/nn/mod.rs
+++ b/core/src/nn/mod.rs
@@ -2,13 +2,15 @@
    Appellation: nn <mod>
    Contrib: FL03 <jo3mccain@icloud.com>
 */
-pub use self::{error::ModelError, model::prelude::*};
+pub use self::{dropout::*, error::ModelError, model::prelude::*};
 
+pub mod dropout;
 pub mod error;
 pub mod model;
 
 pub(crate) mod prelude {
-    pub use super::error::ModelError;
+    pub use super::dropout::*;
+    pub use super::error::*;
     pub use super::model::prelude::*;
 }
 
diff --git a/core/src/types/mask.rs b/core/src/types/mask.rs
new file mode 100644
index 00000000..87494557
--- /dev/null
+++ b/core/src/types/mask.rs
@@ -0,0 +1,10 @@
+/*
+    Appellation: mask <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use nd::*;
+
+pub struct Mask<S, D>(ArrayBase<S, D>)
+where
+    D: Dimension,
+    S: RawData<Elem = bool>;
diff --git a/core/src/types/mod.rs b/core/src/types/mod.rs
index a639d5a4..e015605a 100644
--- a/core/src/types/mod.rs
+++ b/core/src/types/mod.rs
@@ -6,6 +6,7 @@ pub use self::prelude::*;
 #[cfg(feature = "std")]
 pub use self::std_types::*;
 
+pub mod mask;
 pub mod propagate;
 
 pub type NdResult<T> = core::result::Result<T, nd::ShapeError>;
@@ -22,6 +23,7 @@ mod std_types {
 }
 
 pub(crate) mod prelude {
+    pub use super::mask::*;
     pub use super::propagate::Propagate;
     #[cfg(feature = "std")]
     pub use super::std_types::*;
diff --git a/core/tests/func.rs b/core/tests/nn.rs
similarity index 83%
rename from core/tests/func.rs
rename to core/tests/nn.rs
index e1a5ccef..55b51198 100644
--- a/core/tests/func.rs
+++ b/core/tests/nn.rs
@@ -1,7 +1,7 @@
 #![allow(unused_imports)]
 extern crate concision_core as concision;
 
-use concision::func::Dropout;
+use concision::nn::DropoutLayer;
 use concision::Forward;
 use ndarray::prelude::*;
 
@@ -10,7 +10,7 @@ use ndarray::prelude::*;
 fn test_dropout() {
     let shape = (512, 2048);
     let arr = Array2::<f64>::ones(shape);
-    let dropout = Dropout::new(0.5);
+    let dropout = DropoutLayer::new(0.5);
     let out = dropout.forward(&arr);
 
     assert!(arr.iter().all(|&x| x == 1.0));
diff --git a/models/transformers/src/attention/head.rs b/models/transformers/src/attention/head.rs
index 8bd27400..0fb67382 100644
--- a/models/transformers/src/attention/head.rs
+++ b/models/transformers/src/attention/head.rs
@@ -4,6 +4,8 @@
 */
 use crate::params::QkvBase;
 use concision::getters;
+use concision::nn::DropoutLayer;
+
 use core::borrow::{Borrow, BorrowMut};
 use nd::linalg::Dot;
 use nd::*;
@@ -15,7 +17,8 @@ where
     D: Dimension,
     S: RawData<Elem = A>,
 {
-    pub(crate) mask: Option<ArrayBase<S, D>>,
+    pub(crate) dropout: Option<DropoutLayer>,
+    pub(crate) mask: Option<Array<bool, D>>,
     pub(crate) params: QkvBase<S, D>,
 }
 
@@ -25,7 +28,11 @@ where
     S: RawData<Elem = A>,
 {
     pub fn from_params(params: QkvBase<S, D>) -> Self {
-        Self { mask: None, params }
+        Self {
+            dropout: None,
+            mask: None,
+            params,
+        }
     }
 
     pub fn builder<Sh, F>(shape: Sh, builder: F) -> Self
@@ -44,7 +51,18 @@ where
     {
         Self::from_params(QkvBase::from_elem(shape, value))
     }
-
+    #[cfg(not(feature = "rand"))]
+    pub fn attention(&self) -> Array<A, D>
+    where
+        A: ComplexFloat + ScalarOperand,
+        S: Data,
+        ArrayBase<S, D>: for<'a> Dot<ArrayView<'a, A, D>, Output = Array<A, D>>,
+        Array<A, D>: Dot<ArrayBase<S, D>, Output = Array<A, D>>,
+    {
+        let (q, k, v) = self.qkv();
+        super::_attention_no_dropout(q, k, v, self.mask())
+    }
+    #[cfg(feature = "rand")]
     pub fn attention(&self) -> Array<A, D>
     where
         A: ComplexFloat + ScalarOperand,
@@ -53,7 +71,15 @@ where
         Array<A, D>: Dot<ArrayBase<S, D>, Output = Array<A, D>>,
     {
         let (q, k, v) = self.qkv();
-        crate::attention::scaled_dot_product_attention(q, k, v)
+        super::_attention(q, k, v, self.mask(), self.dropout())
+    }
+
+    pub fn dropout(&self) -> Option<&DropoutLayer> {
+        self.dropout.as_ref()
+    }
+    /// Returns an immutable reference to the, optional, [Dropout] layer
+    pub fn mask(&self) -> Option<&Array<bool, D>> {
+        self.mask.as_ref()
     }
     /// Returns an immuable reference to the underlying parameters.
     pub const fn params(&self) -> &QkvBase<S, D> {
diff --git a/models/transformers/src/attention/mod.rs b/models/transformers/src/attention/mod.rs
index 443f8204..33dea032 100644
--- a/models/transformers/src/attention/mod.rs
+++ b/models/transformers/src/attention/mod.rs
@@ -28,7 +28,9 @@ pub trait Attention {
 }
 
 pub(crate) mod utils {
-    use concision::func::activate::Softmax;
+    use concision::func::Softmax;
+    use concision::nn::DropoutLayer;
+    use concision::MaskFill;
     use nd::linalg::Dot;
     use nd::prelude::{Array, ArrayBase, ArrayView, Axis, Dimension};
     use nd::{Data, ScalarOperand};
@@ -46,6 +48,7 @@ pub(crate) mod utils {
         q: &ArrayBase<S, D>,
         k: &ArrayBase<S, D>,
         v: &ArrayBase<S, D>,
+        mask: Option<&Array<bool, D>>,
     ) -> Array<A, D>
     where
         A: ComplexFloat + ScalarOperand,
@@ -54,7 +57,54 @@ pub(crate) mod utils {
         ArrayBase<S, D>: for<'a> Dot<ArrayView<'a, A, D>, Output = Array<A, D>>,
         Array<A, D>: Dot<ArrayBase<S, D>, Output = Array<A, D>>,
     {
+        _attention_no_dropout(q, k, v, mask)
+    }
+
+    pub(crate) fn _attention_no_dropout<A, S, D>(
+        q: &ArrayBase<S, D>,
+        k: &ArrayBase<S, D>,
+        v: &ArrayBase<S, D>,
+        mask: Option<&Array<bool, D>>,
+    ) -> Array<A, D>
+    where
+        A: ComplexFloat + ScalarOperand,
+        S: Data<Elem = A>,
+        D: Dimension,
+        ArrayBase<S, D>: for<'a> Dot<ArrayView<'a, A, D>, Output = Array<A, D>>,
+        Array<A, D>: Dot<ArrayBase<S, D>, Output = Array<A, D>>,
+    {
+        let dk = scale::<A>(k.len_of(Axis(1)));
+        let mut z = q.dot(&k.t()) * dk;
+        if let Some(mask) = mask {
+            z = z.masked_fill(mask, A::zero());
+        }
+        z.softmax().dot(&v)
+    }
+    #[cfg(feature = "rand")]
+    pub(crate) fn _attention<A, S, D>(
+        q: &ArrayBase<S, D>,
+        k: &ArrayBase<S, D>,
+        v: &ArrayBase<S, D>,
+        mask: Option<&Array<bool, D>>,
+        dropout: Option<&DropoutLayer>,
+    ) -> Array<A, D>
+    where
+        A: ComplexFloat + ScalarOperand,
+        S: Data<Elem = A>,
+        D: Dimension,
+        ArrayBase<S, D>: for<'a> Dot<ArrayView<'a, A, D>, Output = Array<A, D>>,
+        Array<A, D>: Dot<ArrayBase<S, D>, Output = Array<A, D>>,
+    {
+        use concision::Forward;
         let dk = scale::<A>(k.len_of(Axis(1)));
-        (q.dot(&k.t()) * dk).softmax().dot(&v)
+        let mut z = q.dot(&k.t()) * dk;
+        if let Some(mask) = mask {
+            z = z.masked_fill(mask, A::zero());
+        }
+        z = z.softmax();
+        if let Some(dropout) = dropout {
+            z = dropout.forward(&z);
+        }
+        z.dot(&v)
     }
 }
diff --git a/models/transformers/src/impls/impl_head.rs b/models/transformers/src/impls/impl_head.rs
index fa22f80a..c8fc92da 100644
--- a/models/transformers/src/impls/impl_head.rs
+++ b/models/transformers/src/impls/impl_head.rs
@@ -15,6 +15,7 @@ where
 {
     fn clone(&self) -> Self {
         Self {
+            dropout: self.dropout.clone(),
             mask: self.mask.clone(),
             params: self.params.clone(),
         }
@@ -26,6 +27,7 @@ where
     A: Copy,
     D: Copy + Dimension,
     S: Copy + RawDataClone<Elem = A>,
+    Array<bool, D>: Copy,
 {
 }
 
diff --git a/models/transformers/src/params/store.rs b/models/transformers/src/params/store.rs
index 90c13693..db99143a 100644
--- a/models/transformers/src/params/store.rs
+++ b/models/transformers/src/params/store.rs
@@ -6,6 +6,8 @@ use concision::{dimensional, getters};
 use nd::*;
 use num::traits::{One, Zero};
 
+/// [QkvBase] is a container for the query, key, and value arrays used in the
+/// attention mechanism of the transformer model.
 pub struct QkvBase<S = OwnedRepr<f64>, D = Ix2>
 where
     D: Dimension,

From a7fac83aa066aec2fcb9599d0ea11dd64d3778c2 Mon Sep 17 00:00:00 2001
From: Joe McCain III <jo3mccain@icloud.com>
Date: Mon, 20 May 2024 12:58:22 -0500
Subject: [PATCH 10/19] update

Signed-off-by: Joe McCain III <jo3mccain@icloud.com>
---
 .github/ISSUE_TEMPLATE/issue.md            |  17 +++
 .github/ISSUE_TEMPLATE/proposal.md         |  11 +-
 .github/ISSUE_TEMPLATE/tracking.md         |  17 +++
 core/src/types/mask.rs                     | 116 +++++++++++++-
 core/src/types/mod.rs                      |   4 +-
 core/src/types/shape.rs                    | 166 +++++++++++++++++++++
 models/transformers/src/attention/head.rs  |  55 ++-----
 models/transformers/src/attention/mod.rs   |  62 +++-----
 models/transformers/src/impls/impl_head.rs |  43 +++++-
 models/transformers/src/lib.rs             |   1 -
 models/transformers/src/primitives.rs      |  11 ++
 11 files changed, 415 insertions(+), 88 deletions(-)
 create mode 100644 .github/ISSUE_TEMPLATE/issue.md
 create mode 100644 .github/ISSUE_TEMPLATE/tracking.md
 create mode 100644 core/src/types/shape.rs

diff --git a/.github/ISSUE_TEMPLATE/issue.md b/.github/ISSUE_TEMPLATE/issue.md
new file mode 100644
index 00000000..925cfede
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/issue.md
@@ -0,0 +1,17 @@
+---
+about: A generic issue template
+assignees:
+    - FL03
+labels: []
+projects: ['@FL03/concision:features']
+name: Generic Issue
+title: ''
+---
+
+**Describe the proposal or feature that this issue is tracking.**
+
+## Issues
+
+- []
+
+## Pull Requests
diff --git a/.github/ISSUE_TEMPLATE/proposal.md b/.github/ISSUE_TEMPLATE/proposal.md
index d7bacdf8..d8370f22 100644
--- a/.github/ISSUE_TEMPLATE/proposal.md
+++ b/.github/ISSUE_TEMPLATE/proposal.md
@@ -1,15 +1,14 @@
 ---
-name: Improvement Proposal
 about: A formal proposal discussing any new features, changes, or improvements to the project.
-title: 'CNC-0000:'
-labels: ['proposal']
-projects: ['@FL03/concision:features', '@FL03/concision:roadmap']
 assignees:
     - FL03
-
+labels: ['proposal']
+name: Improvement Proposal
+projects: ['@FL03/concision:features', '@FL03/concision:roadmap']
+title: 'CNC-0000:'
 ---
 
 
 ### Resources
 
-- [Google](https://google.com)
\ No newline at end of file
+- [company](https://github.com/scattered-systems)
diff --git a/.github/ISSUE_TEMPLATE/tracking.md b/.github/ISSUE_TEMPLATE/tracking.md
new file mode 100644
index 00000000..0139c486
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/tracking.md
@@ -0,0 +1,17 @@
+---
+about: Create a new tracking issue to track the progress of a proposal or feature.
+assignees:
+    - FL03
+labels: ['tracking']
+projects: ['@FL03/concision:features']
+name: Tracking Issue
+title: 'Tracking Issue:'
+---
+
+**Describe the proposal or feature that this issue is tracking.**
+
+## Issues
+
+- []
+
+## Pull Requests
diff --git a/core/src/types/mask.rs b/core/src/types/mask.rs
index 87494557..9be85784 100644
--- a/core/src/types/mask.rs
+++ b/core/src/types/mask.rs
@@ -2,9 +2,123 @@
     Appellation: mask <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
-use nd::*;
+
+use nd::prelude::*;
+use nd::RawData;
+
+pub trait NdMask<D = Ix2>
+where
+    D: Dimension,
+{
+    type Data: RawData<Elem = bool>;
+}
+
+
 
 pub struct Mask<S, D>(ArrayBase<S, D>)
 where
     D: Dimension,
     S: RawData<Elem = bool>;
+
+impl<S, D> Mask<S, D>
+where
+    D: Dimension,
+    S: RawData<Elem = bool>,
+{
+    pub fn new(data: ArrayBase<S, D>) -> Self {
+        Self(data)
+    }
+}
+
+
+/*
+    ************* Implementations *************
+*/
+mod impls {
+    use super::*;
+    use core::borrow::{Borrow, BorrowMut};
+    use core::ops::{Deref, DerefMut};
+
+    impl<S, D> AsRef<ArrayBase<S, D>> for Mask<S, D>
+    where
+        D: Dimension,
+        S: RawData<Elem = bool>,
+    {
+        fn as_ref(&self) -> &ArrayBase<S, D> {
+            &self.0
+        }
+    }
+
+    impl<S, D> AsMut<ArrayBase<S, D>> for Mask<S, D>
+    where
+        D: Dimension,
+        S: RawData<Elem = bool>,
+    {
+        fn as_mut(&mut self) -> &mut ArrayBase<S, D> {
+            &mut self.0
+        }
+    }
+
+    impl<S, D> Borrow<ArrayBase<S, D>> for Mask<S, D>
+    where
+        D: Dimension,
+        S: RawData<Elem = bool>,
+    {
+        fn borrow(&self) -> &ArrayBase<S, D> {
+            &self.0
+        }
+    }
+
+    impl<S, D> BorrowMut<ArrayBase<S, D>> for Mask<S, D>
+    where
+        D: Dimension,
+        S: RawData<Elem = bool>,
+    {
+        fn borrow_mut(&mut self) -> &mut ArrayBase<S, D> {
+            &mut self.0
+        }
+    }
+
+    impl<S, D> Deref for Mask<S, D>
+    where
+        D: Dimension,
+        S: RawData<Elem = bool>,
+    {
+        type Target = ArrayBase<S, D>;
+
+        fn deref(&self) -> &Self::Target {
+            &self.0
+        }
+    }
+
+    impl<S, D> DerefMut for Mask<S, D>
+    where
+        D: Dimension,
+        S: RawData<Elem = bool>,
+    {
+        fn deref_mut(&mut self) -> &mut Self::Target {
+            &mut self.0
+        }
+    }
+
+    impl<S, D> From<ArrayBase<S, D>> for Mask<S, D>
+    where
+        D: Dimension,
+        S: RawData<Elem = bool>,
+    {
+        fn from(mask: ArrayBase<S, D>) -> Self {
+            Mask(mask)
+        }
+    }
+
+    impl<S, D> From<Mask<S, D>> for ArrayBase<S, D>
+    where
+        D: Dimension,
+        S: RawData<Elem = bool>,
+    {
+        fn from(mask: Mask<S, D>) -> Self {
+            mask.0
+        }
+    }
+
+}
\ No newline at end of file
diff --git a/core/src/types/mod.rs b/core/src/types/mod.rs
index e015605a..75cddd52 100644
--- a/core/src/types/mod.rs
+++ b/core/src/types/mod.rs
@@ -8,6 +8,7 @@ pub use self::std_types::*;
 
 pub mod mask;
 pub mod propagate;
+pub mod shape;
 
 pub type NdResult<T> = core::result::Result<T, nd::ShapeError>;
 /// A type alias for a [Result](core::result::Result) with the crate's [Error](crate::error::Error) type.
@@ -23,8 +24,9 @@ mod std_types {
 }
 
 pub(crate) mod prelude {
-    pub use super::mask::*;
+    pub use super::mask::Mask;
     pub use super::propagate::Propagate;
+    pub use super::shape::ModelShape;
     #[cfg(feature = "std")]
     pub use super::std_types::*;
     pub use super::{NdResult, Result};
diff --git a/core/src/types/shape.rs b/core/src/types/shape.rs
new file mode 100644
index 00000000..37e75047
--- /dev/null
+++ b/core/src/types/shape.rs
@@ -0,0 +1,166 @@
+/*
+    Appellation: shape <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use nd::prelude::{Ix1, Ix2};
+use nd::{Dimension, ErrorKind, IntoDimension, RemoveAxis, ShapeBuilder, ShapeError};
+
+pub(crate) fn _from_dim<D>(dim: D) -> Result<Features, ShapeError>
+where
+    D: Dimension,
+{
+    if dim.ndim() == 1 {
+        Ok(Features::new(dim[0], 1))
+    } else if dim.ndim() >= 2 {
+        Ok(Features::new(dim[1], dim[0]))
+    } else {
+        Err(ShapeError::from_kind(ErrorKind::IncompatibleShape))
+    }
+}
+
+#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+pub struct ModelShape {
+    pub(crate) features: Features,
+    pub(crate) network: usize,
+}
+
+impl ModelShape {
+    pub fn new(model: usize, network: usize) -> Self {
+        let features = Features::from_network(model, network);
+        Self { features, network }
+    }
+
+    pub fn from_features(features: Features) -> Self {
+        Self {
+            features,
+            network: features.size(),
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+pub struct Features {
+    pub(crate) inputs: usize,
+    pub(crate) outputs: usize,
+}
+
+impl Features {
+    /// Create a new, unchecked [Features] instance.
+    ///
+    pub fn new(inputs: usize, outputs: usize) -> Self {
+        debug_assert_ne!(inputs, 0);
+        debug_assert_ne!(outputs, 0);
+
+        Self { inputs, outputs }
+    }
+    /// Attempts to build a new [Features] instance from the given dimension ([`D`](Dimension))
+    pub fn from_dimension<D>(dim: D) -> Result<Self, ShapeError>
+    where
+        D: Dimension,
+    {
+        _from_dim(dim)
+    }
+    /// Builds a new instance from the given shape ([`Sh`](ShapeBuilder));
+    /// Unlike [Features::from_dimension], this method requires the dimension (`D`) to
+    /// additionally implement the [RemoveAxis] trait
+    pub fn from_shape<D, Sh>(shape: Sh) -> Self
+    where
+        D: RemoveAxis,
+        Sh: ShapeBuilder<Dim = D>,
+    {
+        let dim = shape.into_shape().raw_dim().clone();
+        _from_dim(dim).unwrap()
+    }
+    /// Creates a new instance given the model size (`inputs`, `d_model`) and total number of nodes within the network (`size`, `network`, `d_network`)
+    pub fn from_network(model: usize, network: usize) -> Self {
+        let outputs = network / model;
+        Self::new(model, outputs)
+    }
+
+    pub const fn as_array(&self) -> [usize; 2] {
+        [self.outputs(), self.inputs()]
+    }
+    /// Creates a new two-tuple instance from the given dimensions;
+    pub const fn as_tuple(&self) -> (usize, usize) {
+        (self.outputs(), self.inputs())
+    }
+    pub fn check_dim<D>(&self, dim: D) -> bool
+    where
+        D: Dimension,
+    {
+        if dim.ndim() == 1 {
+            self.inputs() == dim[0]
+        } else if dim.ndim() >= 2 {
+            self.outputs() == dim[0] && self.inputs() == dim[1]
+        } else {
+            false
+        }
+    }
+    /// Forwards the [into_pattern](ndarray::Dimension::into_pattern) method from the [Dimension] trait
+    #[inline]
+    pub fn into_pattern(self) -> (usize, usize) {
+        self.into_dimension().into_pattern()
+    }
+    /// An aliased function that returns the number of input features
+    pub const fn d_model(&self) -> usize {
+        self.inputs()
+    }
+    /// Returns the number of input features
+    pub const fn inputs(&self) -> usize {
+        self.inputs
+    }
+    /// Checks to see if the features speak to a so-called `unit`;
+    /// i.e. see if the number of output features is equal to 1.
+    pub fn is_unit(&self) -> bool {
+        self.outputs() == 1
+    }
+    /// Returns the number of output features
+    pub const fn outputs(&self) -> usize {
+        self.outputs
+    }
+    /// Computes the total number of nodes in the network
+    pub fn size(&self) -> usize {
+        self.inputs() * self.outputs()
+    }
+    #[doc(hidden)]
+    pub fn uniform_scale(&self) -> f64 {
+        (self.inputs as f64).recip().sqrt()
+    }
+}
+
+impl IntoDimension for Features {
+    type Dim = Ix2;
+
+    fn into_dimension(self) -> Self::Dim {
+        (self.outputs, self.inputs).into_dimension()
+    }
+}
+
+impl From<Ix1> for Features {
+    fn from(dim: Ix1) -> Self {
+        Self::new(1, dim[0])
+    }
+}
+
+impl From<Ix2> for Features {
+    fn from(dim: Ix2) -> Self {
+        Self::new(dim[1], dim[0])
+    }
+}
+
+impl From<Features> for Ix2 {
+    fn from(features: Features) -> Self {
+        features.into_dimension()
+    }
+}
+
+impl<U> PartialEq<U> for Features
+where
+    [usize; 2]: PartialEq<U>,
+{
+    fn eq(&self, other: &U) -> bool {
+        self.as_array() == *other
+    }
+}
diff --git a/models/transformers/src/attention/head.rs b/models/transformers/src/attention/head.rs
index 0fb67382..7e24804c 100644
--- a/models/transformers/src/attention/head.rs
+++ b/models/transformers/src/attention/head.rs
@@ -5,18 +5,21 @@
 use crate::params::QkvBase;
 use concision::getters;
 use concision::nn::DropoutLayer;
-
-use core::borrow::{Borrow, BorrowMut};
 use nd::linalg::Dot;
 use nd::*;
 use num::complex::ComplexFloat;
 
 // #68
+/// [AttentionHead] implements the scaled dot-product attention mechanism formally defined in
+/// [Attention is all you need](https://arxiv.org/abs/1706.03762).
+///
+/// [DropoutLayer]: requires the `rand` feature
 pub struct AttentionHead<A = f64, D = Ix2, S = OwnedRepr<A>>
 where
     D: Dimension,
     S: RawData<Elem = A>,
 {
+    #[cfg(feature = "rand")]
     pub(crate) dropout: Option<DropoutLayer>,
     pub(crate) mask: Option<Array<bool, D>>,
     pub(crate) params: QkvBase<S, D>,
@@ -29,6 +32,7 @@ where
 {
     pub fn from_params(params: QkvBase<S, D>) -> Self {
         Self {
+            #[cfg(feature = "rand")]
             dropout: None,
             mask: None,
             params,
@@ -51,18 +55,7 @@ where
     {
         Self::from_params(QkvBase::from_elem(shape, value))
     }
-    #[cfg(not(feature = "rand"))]
-    pub fn attention(&self) -> Array<A, D>
-    where
-        A: ComplexFloat + ScalarOperand,
-        S: Data,
-        ArrayBase<S, D>: for<'a> Dot<ArrayView<'a, A, D>, Output = Array<A, D>>,
-        Array<A, D>: Dot<ArrayBase<S, D>, Output = Array<A, D>>,
-    {
-        let (q, k, v) = self.qkv();
-        super::_attention_no_dropout(q, k, v, self.mask())
-    }
-    #[cfg(feature = "rand")]
+    /// Computes the score using scaled dot-product attention.
     pub fn attention(&self) -> Array<A, D>
     where
         A: ComplexFloat + ScalarOperand,
@@ -73,10 +66,6 @@ where
         let (q, k, v) = self.qkv();
         super::_attention(q, k, v, self.mask(), self.dropout())
     }
-
-    pub fn dropout(&self) -> Option<&DropoutLayer> {
-        self.dropout.as_ref()
-    }
     /// Returns an immutable reference to the, optional, [Dropout] layer
     pub fn mask(&self) -> Option<&Array<bool, D>> {
         self.mask.as_ref()
@@ -104,37 +93,25 @@ where
     ndbuilder!(zeros() where A: Clone + num::Zero, S: DataOwned);
 }
 
-impl<A, S, D> super::Attention for AttentionHead<A, D, S>
-where
-    A: ComplexFloat + ScalarOperand,
-    D: Dimension,
-    S: Data<Elem = A>,
-    ArrayBase<S, D>: for<'a> Dot<ArrayView<'a, A, D>, Output = Array<A, D>>,
-    Array<A, D>: Dot<ArrayBase<S, D>, Output = Array<A, D>>,
-{
-    type Output = Array<A, D>;
-
-    fn attention(&self) -> Self::Output {
-        self.attention()
-    }
-}
-
-impl<A, S, D> Borrow<QkvBase<S, D>> for AttentionHead<A, D, S>
+#[cfg(feature = "rand")]
+impl<A, S, D> AttentionHead<A, D, S>
 where
     D: Dimension,
     S: RawData<Elem = A>,
 {
-    fn borrow(&self) -> &QkvBase<S, D> {
-        self.params()
+    pub fn dropout(&self) -> Option<&DropoutLayer> {
+        self.dropout.as_ref()
     }
 }
 
-impl<A, S, D> BorrowMut<QkvBase<S, D>> for AttentionHead<A, D, S>
+#[cfg(not(feature = "rand"))]
+impl<A, S, D> AttentionHead<A, D, S>
 where
     D: Dimension,
     S: RawData<Elem = A>,
 {
-    fn borrow_mut(&mut self) -> &mut QkvBase<S, D> {
-        self.params_mut()
+    #[cfg(not(feature = "rand"))]
+    pub fn dropout(&self) -> Option<&DropoutLayer> {
+        None
     }
 }
diff --git a/models/transformers/src/attention/mod.rs b/models/transformers/src/attention/mod.rs
index 33dea032..279793fe 100644
--- a/models/transformers/src/attention/mod.rs
+++ b/models/transformers/src/attention/mod.rs
@@ -8,6 +8,7 @@
 //! Today, these mechanisms are found in several state-of-the-art models, such as
 //! the Transformer model, primarily due to its capabilities in natural language
 //! processing (NLP) domains
+pub(crate) use self::_impl_methods::*;
 pub use self::head::AttentionHead;
 pub use self::utils::*;
 
@@ -28,59 +29,36 @@ pub trait Attention {
 }
 
 pub(crate) mod utils {
-    use concision::func::Softmax;
     use concision::nn::DropoutLayer;
-    use concision::MaskFill;
     use nd::linalg::Dot;
-    use nd::prelude::{Array, ArrayBase, ArrayView, Axis, Dimension};
-    use nd::{Data, ScalarOperand};
+    use nd::prelude::*;
     use num::complex::ComplexFloat;
 
-    pub(crate) fn scale<A>(dk: usize) -> A
-    where
-        A: ComplexFloat,
-    {
-        A::from(dk).unwrap().sqrt().recip()
-    }
-
     /// A functional implementation of the scaled dot-product attention mechanism;
     pub fn scaled_dot_product_attention<A, S, D>(
         q: &ArrayBase<S, D>,
         k: &ArrayBase<S, D>,
         v: &ArrayBase<S, D>,
         mask: Option<&Array<bool, D>>,
+        dropout: Option<&DropoutLayer>,
     ) -> Array<A, D>
     where
-        A: ComplexFloat + ScalarOperand,
-        S: Data<Elem = A>,
+        A: ComplexFloat + nd::ScalarOperand,
+        S: nd::Data<Elem = A>,
         D: Dimension,
         ArrayBase<S, D>: for<'a> Dot<ArrayView<'a, A, D>, Output = Array<A, D>>,
         Array<A, D>: Dot<ArrayBase<S, D>, Output = Array<A, D>>,
     {
-        _attention_no_dropout(q, k, v, mask)
+        super::_attention(q, k, v, mask, dropout)
     }
+}
+
+mod _impl_methods {
+    use concision::prelude::{DropoutLayer, MaskFill, Softmax};
+    use nd::linalg::Dot;
+    use nd::prelude::*;
+    use num::complex::ComplexFloat;
 
-    pub(crate) fn _attention_no_dropout<A, S, D>(
-        q: &ArrayBase<S, D>,
-        k: &ArrayBase<S, D>,
-        v: &ArrayBase<S, D>,
-        mask: Option<&Array<bool, D>>,
-    ) -> Array<A, D>
-    where
-        A: ComplexFloat + ScalarOperand,
-        S: Data<Elem = A>,
-        D: Dimension,
-        ArrayBase<S, D>: for<'a> Dot<ArrayView<'a, A, D>, Output = Array<A, D>>,
-        Array<A, D>: Dot<ArrayBase<S, D>, Output = Array<A, D>>,
-    {
-        let dk = scale::<A>(k.len_of(Axis(1)));
-        let mut z = q.dot(&k.t()) * dk;
-        if let Some(mask) = mask {
-            z = z.masked_fill(mask, A::zero());
-        }
-        z.softmax().dot(&v)
-    }
-    #[cfg(feature = "rand")]
     pub(crate) fn _attention<A, S, D>(
         q: &ArrayBase<S, D>,
         k: &ArrayBase<S, D>,
@@ -89,22 +67,30 @@ pub(crate) mod utils {
         dropout: Option<&DropoutLayer>,
     ) -> Array<A, D>
     where
-        A: ComplexFloat + ScalarOperand,
-        S: Data<Elem = A>,
+        A: ComplexFloat + nd::ScalarOperand,
+        S: nd::Data<Elem = A>,
         D: Dimension,
         ArrayBase<S, D>: for<'a> Dot<ArrayView<'a, A, D>, Output = Array<A, D>>,
         Array<A, D>: Dot<ArrayBase<S, D>, Output = Array<A, D>>,
     {
         use concision::Forward;
-        let dk = scale::<A>(k.len_of(Axis(1)));
+        let dk = scale::<A>(k.len_of(nd::Axis(1)));
         let mut z = q.dot(&k.t()) * dk;
         if let Some(mask) = mask {
             z = z.masked_fill(mask, A::zero());
         }
         z = z.softmax();
+        #[cfg(feature = "rand")]
         if let Some(dropout) = dropout {
             z = dropout.forward(&z);
         }
         z.dot(&v)
     }
+
+    pub(crate) fn scale<A>(dk: usize) -> A
+    where
+        A: ComplexFloat,
+    {
+        A::from(dk).unwrap().sqrt().recip()
+    }
 }
diff --git a/models/transformers/src/impls/impl_head.rs b/models/transformers/src/impls/impl_head.rs
index c8fc92da..9b8e61a7 100644
--- a/models/transformers/src/impls/impl_head.rs
+++ b/models/transformers/src/impls/impl_head.rs
@@ -2,10 +2,48 @@
     Appellation: impl_head <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
-use crate::attention::AttentionHead;
+use crate::attention::{Attention, AttentionHead};
 use crate::params::QkvBase;
+use core::borrow::{Borrow, BorrowMut};
+use nd::linalg::Dot;
 use nd::prelude::*;
-use nd::{DataOwned, RawDataClone};
+use nd::{Data, DataOwned, RawData, RawDataClone, ScalarOperand};
+use num::complex::ComplexFloat;
+
+impl<A, S, D> Attention for AttentionHead<A, D, S>
+where
+    A: ComplexFloat + ScalarOperand,
+    D: Dimension,
+    S: Data<Elem = A>,
+    ArrayBase<S, D>: for<'a> Dot<ArrayView<'a, A, D>, Output = Array<A, D>>,
+    Array<A, D>: Dot<ArrayBase<S, D>, Output = Array<A, D>>,
+{
+    type Output = Array<A, D>;
+
+    fn attention(&self) -> Self::Output {
+        self.attention()
+    }
+}
+
+impl<A, S, D> Borrow<QkvBase<S, D>> for AttentionHead<A, D, S>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+{
+    fn borrow(&self) -> &QkvBase<S, D> {
+        self.params()
+    }
+}
+
+impl<A, S, D> BorrowMut<QkvBase<S, D>> for AttentionHead<A, D, S>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+{
+    fn borrow_mut(&mut self) -> &mut QkvBase<S, D> {
+        self.params_mut()
+    }
+}
 
 impl<A, S, D> Clone for AttentionHead<A, D, S>
 where
@@ -15,6 +53,7 @@ where
 {
     fn clone(&self) -> Self {
         Self {
+            #[cfg(feature = "rand")]
             dropout: self.dropout.clone(),
             mask: self.mask.clone(),
             params: self.params.clone(),
diff --git a/models/transformers/src/lib.rs b/models/transformers/src/lib.rs
index ed9cf63e..37c5c1ce 100644
--- a/models/transformers/src/lib.rs
+++ b/models/transformers/src/lib.rs
@@ -40,6 +40,5 @@ pub(crate) mod impls {
 
 pub mod prelude {
     pub use super::attention::prelude::*;
-    pub use super::primitives::*;
     pub use super::Transformer;
 }
diff --git a/models/transformers/src/primitives.rs b/models/transformers/src/primitives.rs
index 96db829b..5f5c1bcf 100644
--- a/models/transformers/src/primitives.rs
+++ b/models/transformers/src/primitives.rs
@@ -5,6 +5,17 @@
 pub use self::consts::*;
 
 pub mod consts {
+    /// The default dimension of the model; i.e. the number of inputs
+    pub const D_MODEL: usize = 512;
+    /// The default size of the network; i.e. the number of neurons in the network
+    pub const D_NETWORK: usize = 2048;
+
+    /// The default number of attention heads
+    pub const H: usize = 8;
     /// The default number of layers used for the encoder / decoder.
     pub const N: usize = 6;
 }
+
+pub fn outputs_from_ratio(model: usize, network: usize) -> usize {
+    network / model
+}

From e6bb98019ac878408097c880adb9488572b6fdc0 Mon Sep 17 00:00:00 2001
From: Joe McCain III <jo3mccain@icloud.com>
Date: Mon, 20 May 2024 14:55:27 -0500
Subject: [PATCH 11/19] update

Signed-off-by: Joe McCain III <jo3mccain@icloud.com>
---
 concision/Cargo.toml                          |  9 +-
 concision/examples/transformer.rs             | 39 ++++++++
 concision/src/lib.rs                          |  6 +-
 core/src/init/initializer.rs                  | 25 ++++++
 core/src/init/mod.rs                          |  2 +
 core/src/math/traits.rs                       | 39 +++++++-
 data/src/tensor/mod.rs                        |  2 +-
 data/src/tensor/ndtensor/traits.rs            |  4 +-
 models/transformers/Cargo.toml                |  9 +-
 models/transformers/src/attention/head.rs     | 14 ++-
 models/transformers/src/attention/mod.rs      | 12 ++-
 .../transformers/src/attention/multi/mod.rs   |  4 +
 .../src/attention/multi/multi_head.rs         | 14 ++-
 models/transformers/src/attention/score.rs    | 89 +++++++++++++++++++
 models/transformers/src/impls/impl_head.rs    | 14 ++-
 models/transformers/src/impls/impl_init.rs    | 61 +++++++++++++
 models/transformers/src/macros.rs             | 82 +++--------------
 models/transformers/src/macros/params.rs      | 51 +++++++++++
 models/transformers/src/params/store.rs       | 12 +--
 models/transformers/tests/attention.rs        | 34 ++-----
 models/transformers/tests/params.rs           | 35 ++++++++
 21 files changed, 433 insertions(+), 124 deletions(-)
 create mode 100644 concision/examples/transformer.rs
 create mode 100644 core/src/init/initializer.rs
 create mode 100644 models/transformers/src/attention/score.rs
 create mode 100644 models/transformers/src/impls/impl_init.rs
 create mode 100644 models/transformers/src/macros/params.rs
 create mode 100644 models/transformers/tests/params.rs

diff --git a/concision/Cargo.toml b/concision/Cargo.toml
index e8ddbdfb..1cd286ac 100644
--- a/concision/Cargo.toml
+++ b/concision/Cargo.toml
@@ -45,7 +45,7 @@ models = [
     "gnn",
     "kan",
     "linear",
-    "transformers",
+    "transformer",
 ]
 
 gnn = [
@@ -60,7 +60,7 @@ linear = [
     "dep:concision-linear",
 ]
 
-transformers = [
+transformer = [
     "dep:concision-transformers",
 ]
 
@@ -184,6 +184,10 @@ test = true
 name = "linear"
 required-features = ["linear", "rand", "serde", "tracing"]
 
+[[example]]
+name = "transformer"
+required-features = ["transformer", "rand", "serde", "tracing"]
+
 [build-dependencies]
 
 [dependencies.concision-core]
@@ -229,6 +233,7 @@ version = "0.1.14"
 
 [dev-dependencies]
 anyhow = "1"
+approx.workspace = true
 lazy_static.workspace = true
 ndarray.workspace = true
 num = { features = ["rand", "serde"], version = "0.4" }
diff --git a/concision/examples/transformer.rs b/concision/examples/transformer.rs
new file mode 100644
index 00000000..c1a36fc7
--- /dev/null
+++ b/concision/examples/transformer.rs
@@ -0,0 +1,39 @@
+/*
+    Appellation: transformer <example>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+extern crate concision as cnc;
+
+use approx::AbsDiffEq;
+use cnc::transformer::AttentionHead;
+use cnc::prelude::Result;
+use ndarray::Array2;
+
+fn tracing() {
+    use tracing::Level;
+    use tracing_subscriber::fmt::time;
+
+    tracing_subscriber::fmt()
+        .compact()
+        .with_ansi(true)
+        .with_max_level(Level::DEBUG)
+        .with_target(false)
+        .with_timer(time::uptime())
+        .init();
+}
+
+fn main() -> Result<()> {
+    tracing();
+    tracing::info!("Starting up the transformer model example...");
+
+    let shape = (3, 3);
+    let head = AttentionHead::<f64>::ones(shape);
+    let score = head.attention();
+    assert!(score.attention().abs_diff_eq(&Array2::from_elem(shape, 1f64/3f64), 1e-6));
+    println!("{:?}", score);
+
+
+    
+
+    Ok(())
+}
diff --git a/concision/src/lib.rs b/concision/src/lib.rs
index ced1df1d..ae868246 100644
--- a/concision/src/lib.rs
+++ b/concision/src/lib.rs
@@ -27,9 +27,9 @@ pub use concision_kan as kan;
 pub use concision_linear as linear;
 #[cfg(feature = "macros")]
 pub use concision_macros::*;
-#[cfg(feature = "transformers")]
+#[cfg(feature = "transformer")]
 #[doc(inline)]
-pub use concision_transformers as transformers;
+pub use concision_transformers as transformer;
 
 pub mod prelude {
     pub use concision_core::prelude::*;
@@ -45,4 +45,6 @@ pub mod prelude {
     pub use concision_linear::prelude::*;
     #[cfg(feature = "macros")]
     pub use concision_macros::*;
+    #[cfg(feature = "transformer")]
+    pub use concision_transformers::prelude::*;
 }
diff --git a/core/src/init/initializer.rs b/core/src/init/initializer.rs
new file mode 100644
index 00000000..75c20579
--- /dev/null
+++ b/core/src/init/initializer.rs
@@ -0,0 +1,25 @@
+/*
+    Appellation: initializer <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use super::Initialize;
+use core::marker::PhantomData;
+use nd::prelude::*;
+use nd::DataOwned;
+use rand_distr::{Distribution, StandardNormal};
+
+pub struct InitializerBase<A = f64, D = Ix2, Dst = StandardNormal> where D: Dimension, Dst: Clone + Distribution<A> {
+    pub(crate) dim: D,
+    pub(crate) distr: Dst,
+    pub(crate) _dtype: PhantomData<A>,
+}
+
+impl<A, D, Dst> InitializerBase<A, D, Dst> where D: Dimension, Dst: Clone + Distribution<A> {
+    pub fn new(dim: D, distr: Dst) -> Self {
+        Self { dim, distr, _dtype: PhantomData::<A> }
+    }
+
+    pub fn init<S>(self) -> ArrayBase<S, D> where S: DataOwned<Elem = A> {
+        ArrayBase::rand(self.dim, self.distr)
+    }
+}
diff --git a/core/src/init/mod.rs b/core/src/init/mod.rs
index 2ebcd583..f2bb4731 100644
--- a/core/src/init/mod.rs
+++ b/core/src/init/mod.rs
@@ -16,6 +16,8 @@ pub use self::prelude::*;
 pub(crate) mod initialize;
 pub(crate) mod utils;
 
+pub mod initializer;
+
 pub mod distr {
     pub use self::prelude::*;
 
diff --git a/core/src/math/traits.rs b/core/src/math/traits.rs
index d71d433d..78e67337 100644
--- a/core/src/math/traits.rs
+++ b/core/src/math/traits.rs
@@ -2,9 +2,46 @@
     Appellation: traits <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
+use core::iter::Sum;
 use nd::{Array, ArrayBase, Data, Dimension};
 use num::complex::{Complex, ComplexFloat};
-use num::traits::Signed;
+use num::traits::{FromPrimitive, Num, Signed};
+
+pub trait IterStats<T> where T: FromPrimitive {
+    type Output;
+
+    fn elems(&self) -> T;
+
+    fn mean(&self) -> Self::Output;
+
+    fn std(&self) -> Self::Output where T: ComplexFloat;
+
+    fn var(&self) -> Self::Output where T: ComplexFloat;
+}
+
+impl<T, I> IterStats<T> for I where I: Clone + ExactSizeIterator<Item = T>, T: Clone + FromPrimitive + Num + Sum {
+    type Output = T;
+
+    fn elems(&self) -> T {
+        T::from_usize(self.len()).unwrap()
+    }
+
+    fn mean(&self) -> Self::Output {
+        self.clone().sum::<T>() / self.elems()
+    }
+
+    fn std(&self) -> Self::Output where T: ComplexFloat {
+        let mean = self.mean();
+        let sum = self.clone().map(|x| (x - mean).powi(2)).sum::<T>();
+        (sum / self.elems()).sqrt()
+    }
+
+    fn var(&self) -> Self::Output where T: ComplexFloat {
+        let mean = self.mean();
+        let sum = self.clone().map(|x| (x - mean).powi(2)).sum::<T>();
+        sum / self.elems()
+    }
+}
 
 unary!(
     Abs::abs(self),
diff --git a/data/src/tensor/mod.rs b/data/src/tensor/mod.rs
index 17945f2f..7d31345f 100644
--- a/data/src/tensor/mod.rs
+++ b/data/src/tensor/mod.rs
@@ -2,6 +2,6 @@
     Appellation: tensor <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
-pub use self::ndtensor::NdTensor;
+pub use self::ndtensor::NdContainer;
 
 pub mod ndtensor;
diff --git a/data/src/tensor/ndtensor/traits.rs b/data/src/tensor/ndtensor/traits.rs
index c55c3afe..67af23b1 100644
--- a/data/src/tensor/ndtensor/traits.rs
+++ b/data/src/tensor/ndtensor/traits.rs
@@ -11,7 +11,7 @@ pub trait TensorData {
     fn as_mut_slice(&mut self) -> &mut [Self::Elem];
 }
 
-pub trait NdTensor<T> {
+pub trait NdContainer<T> {
     const RANK: Option<usize> = None;
 
     type Data: TensorData<Elem = T>;
@@ -28,6 +28,8 @@ pub trait NdTensor<T> {
     fn shape(&self) -> &[usize];
 }
 
+
+
 /*
  ******** implementations ********
 */
diff --git a/models/transformers/Cargo.toml b/models/transformers/Cargo.toml
index 00bf9fb7..d0558100 100644
--- a/models/transformers/Cargo.toml
+++ b/models/transformers/Cargo.toml
@@ -95,6 +95,10 @@ crate-type = ["lib"]
 doctest = true
 test = true
 
+[[test]]
+name = "attention"
+required-features = ["approx", "rand"]
+
 [build-dependencies]
 
 [dependencies]
@@ -128,8 +132,9 @@ version = "1"
 optional = true
 version = "0.1"
 
-[dev-dependencies.lazy_static]
-workspace = true
+[dev-dependencies]
+approx.workspace = true
+lazy_static.workspace = true
 
 [package.metadata.docs.rs]
 all-features = true
diff --git a/models/transformers/src/attention/head.rs b/models/transformers/src/attention/head.rs
index 7e24804c..cc42c531 100644
--- a/models/transformers/src/attention/head.rs
+++ b/models/transformers/src/attention/head.rs
@@ -2,6 +2,7 @@
     Appellation: head <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
+use super::{_attention, Score};
 use crate::params::QkvBase;
 use concision::getters;
 use concision::nn::DropoutLayer;
@@ -9,11 +10,16 @@ use nd::linalg::Dot;
 use nd::*;
 use num::complex::ComplexFloat;
 
+
+
 // #68
 /// [AttentionHead] implements the scaled dot-product attention mechanism formally defined in
 /// [Attention is all you need](https://arxiv.org/abs/1706.03762).
 ///
-/// [DropoutLayer]: requires the `rand` feature
+/// ### Fields
+/// 
+/// [dropout](DropoutLayer): requires the `rand` feature
+/// 
 pub struct AttentionHead<A = f64, D = Ix2, S = OwnedRepr<A>>
 where
     D: Dimension,
@@ -55,8 +61,8 @@ where
     {
         Self::from_params(QkvBase::from_elem(shape, value))
     }
-    /// Computes the score using scaled dot-product attention.
-    pub fn attention(&self) -> Array<A, D>
+    /// Computes the [Score] using scaled dot-product attention.
+    pub fn attention(&self) -> Score<A, D>
     where
         A: ComplexFloat + ScalarOperand,
         S: Data,
@@ -64,7 +70,7 @@ where
         Array<A, D>: Dot<ArrayBase<S, D>, Output = Array<A, D>>,
     {
         let (q, k, v) = self.qkv();
-        super::_attention(q, k, v, self.mask(), self.dropout())
+        _attention(q, k, v, self.mask(), self.dropout())
     }
     /// Returns an immutable reference to the, optional, [Dropout] layer
     pub fn mask(&self) -> Option<&Array<bool, D>> {
diff --git a/models/transformers/src/attention/mod.rs b/models/transformers/src/attention/mod.rs
index 279793fe..a500b5f5 100644
--- a/models/transformers/src/attention/mod.rs
+++ b/models/transformers/src/attention/mod.rs
@@ -10,15 +10,19 @@
 //! processing (NLP) domains
 pub(crate) use self::_impl_methods::*;
 pub use self::head::AttentionHead;
+pub use self::score::Score;
 pub use self::utils::*;
 
 pub(crate) mod head;
+pub(crate) mod score;
 
 // #69: Multi-Head Attention implementation
 pub mod multi;
 
 pub(crate) mod prelude {
     pub use super::head::AttentionHead;
+    pub use super::multi::prelude::*;
+    pub use super::score::Score;
     pub use super::utils::*;
 }
 
@@ -29,6 +33,7 @@ pub trait Attention {
 }
 
 pub(crate) mod utils {
+    use super::Score;
     use concision::nn::DropoutLayer;
     use nd::linalg::Dot;
     use nd::prelude::*;
@@ -41,7 +46,7 @@ pub(crate) mod utils {
         v: &ArrayBase<S, D>,
         mask: Option<&Array<bool, D>>,
         dropout: Option<&DropoutLayer>,
-    ) -> Array<A, D>
+    ) -> Score<A, D>
     where
         A: ComplexFloat + nd::ScalarOperand,
         S: nd::Data<Elem = A>,
@@ -54,6 +59,7 @@ pub(crate) mod utils {
 }
 
 mod _impl_methods {
+    use super::Score;
     use concision::prelude::{DropoutLayer, MaskFill, Softmax};
     use nd::linalg::Dot;
     use nd::prelude::*;
@@ -65,7 +71,7 @@ mod _impl_methods {
         v: &ArrayBase<S, D>,
         mask: Option<&Array<bool, D>>,
         dropout: Option<&DropoutLayer>,
-    ) -> Array<A, D>
+    ) -> Score<A, D>
     where
         A: ComplexFloat + nd::ScalarOperand,
         S: nd::Data<Elem = A>,
@@ -84,7 +90,7 @@ mod _impl_methods {
         if let Some(dropout) = dropout {
             z = dropout.forward(&z);
         }
-        z.dot(&v)
+        (z.dot(&v), z).into()
     }
 
     pub(crate) fn scale<A>(dk: usize) -> A
diff --git a/models/transformers/src/attention/multi/mod.rs b/models/transformers/src/attention/multi/mod.rs
index 014e29b9..67592c2d 100644
--- a/models/transformers/src/attention/multi/mod.rs
+++ b/models/transformers/src/attention/multi/mod.rs
@@ -8,3 +8,7 @@
 pub use self::multi_head::*;
 
 pub(crate) mod multi_head;
+
+pub(crate) mod prelude {
+    pub use super::multi_head::MultiHeadAttention;
+}
\ No newline at end of file
diff --git a/models/transformers/src/attention/multi/multi_head.rs b/models/transformers/src/attention/multi/multi_head.rs
index ad36fe45..e59061ae 100644
--- a/models/transformers/src/attention/multi/multi_head.rs
+++ b/models/transformers/src/attention/multi/multi_head.rs
@@ -3,4 +3,16 @@
     Contrib: FL03 <jo3mccain@icloud.com>
 */
 
-pub struct MultiHeadAttention;
+pub struct Config {
+    pub heads: usize,
+}
+
+pub struct MultiHeadAttention {
+    pub(crate) config: Config,
+}
+
+impl MultiHeadAttention {
+    pub const fn config(&self) -> &Config {
+        &self.config
+    }
+}
diff --git a/models/transformers/src/attention/score.rs b/models/transformers/src/attention/score.rs
new file mode 100644
index 00000000..c2548040
--- /dev/null
+++ b/models/transformers/src/attention/score.rs
@@ -0,0 +1,89 @@
+/*
+    Appellation: score <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use core::fmt;
+use nd::{Array, Dimension};
+
+/// [Score] is a created as a result of invoking an attention mechanism;
+/// 
+/// - attention: the actual result; returns the dot product of the score with the value tensor
+/// - score: the attention score tensor
+#[derive(Clone, Eq, Hash, PartialEq)]
+pub struct Score<A, D> where D: Dimension {
+    pub(crate) attention: Array<A, D>,
+    pub(crate) score: Array<A, D>,
+}
+
+impl<A, D> Score<A, D>
+where
+    D: Dimension,
+{
+    pub(crate) fn new(attention: Array<A, D>, score: Array<A, D>) -> Self {
+        Self { attention, score }
+    }
+    /// Consumes the instance and returns the attention tensor.
+    pub fn into_attention(self) -> Array<A, D> {
+        self.attention
+    }
+    /// Consumes the container and returns the score tensor.
+    pub fn into_score(self) -> Array<A, D> {
+        self.score
+    }
+
+    /// Retrieve the attention tensor.
+    pub fn attention(&self) -> &Array<A, D> {
+        &self.attention
+    }
+    /// Retrieve the score tensor
+    pub fn score(&self) -> &Array<A, D> {
+        &self.score
+    }
+}
+
+impl<A, D> Copy for Score<A, D> where A: Copy, D: Copy + Dimension, Array<A, D>: Copy {
+
+}
+
+impl<A, D> fmt::Debug for Score<A, D>
+where
+    A: fmt::Debug,
+    D: Dimension,
+{
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Score")
+            .field("attention", &self.attention)
+            .field("score", &self.score)
+            .finish()
+    }
+}
+
+impl<A, D> fmt::Display for Score<A, D>
+where
+    A: fmt::Display,
+    D: Dimension,
+{
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "({}, {})", self.attention, self.score)
+    }
+}
+
+
+
+impl<A, D> From<(Array<A, D>, Array<A, D>)> for Score<A, D>
+where
+    D: Dimension,
+{
+    fn from((attention, score): (Array<A, D>, Array<A, D>)) -> Self {
+        Self::new(attention, score)
+    }
+}
+
+impl<A, D> From<Score<A, D>> for (Array<A, D>, Array<A, D>)
+where
+    D: Dimension,
+{
+    fn from(score: Score<A, D>) -> Self {
+        (score.attention, score.score)
+    }
+}
\ No newline at end of file
diff --git a/models/transformers/src/impls/impl_head.rs b/models/transformers/src/impls/impl_head.rs
index 9b8e61a7..cbb381af 100644
--- a/models/transformers/src/impls/impl_head.rs
+++ b/models/transformers/src/impls/impl_head.rs
@@ -2,7 +2,7 @@
     Appellation: impl_head <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
-use crate::attention::{Attention, AttentionHead};
+use crate::attention::{Attention, AttentionHead, Score};
 use crate::params::QkvBase;
 use core::borrow::{Borrow, BorrowMut};
 use nd::linalg::Dot;
@@ -18,7 +18,7 @@ where
     ArrayBase<S, D>: for<'a> Dot<ArrayView<'a, A, D>, Output = Array<A, D>>,
     Array<A, D>: Dot<ArrayBase<S, D>, Output = Array<A, D>>,
 {
-    type Output = Array<A, D>;
+    type Output = Score<A, D>;
 
     fn attention(&self) -> Self::Output {
         self.attention()
@@ -80,3 +80,13 @@ where
         Self::from_params(QkvBase::default())
     }
 }
+
+impl<A, S, D> From<QkvBase<S, D>> for AttentionHead<A, D, S>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+{
+    fn from(params: QkvBase<S, D>) -> Self {
+        Self::from_params(params)
+    }
+}
\ No newline at end of file
diff --git a/models/transformers/src/impls/impl_init.rs b/models/transformers/src/impls/impl_init.rs
new file mode 100644
index 00000000..cfde2c15
--- /dev/null
+++ b/models/transformers/src/impls/impl_init.rs
@@ -0,0 +1,61 @@
+#![cfg(feature = "rand")]
+
+use crate::QkvBase;
+use concision::Initialize;
+use concision::init::rand::Rng;
+use concision::init::rand_distr::{Distribution, StandardNormal};
+use concision::init::rand_distr::uniform::SampleUniform;
+use nd::{ArrayBase, DataOwned, Dimension, ShapeBuilder};
+
+impl<A, S, D> Initialize for QkvBase<S, D> where
+    D: RemoveAxis,
+    S: DataOwned<Elem = A>,
+    StandardNormal: Distribution<A>,
+{
+    type Data = S;
+
+    fn rand<Sh, Dstr>(shape: Sh, distr: Dstr) -> Self
+    where
+        Sh: ShapeBuilder<Dim = D>,
+        Dstr: Clone + Distribution<A>,
+    {
+        let dim = shape.into_shape().raw_dim().clone();
+        Self {
+            q: ArrayBase::rand(dim.clone(), distr.clone()),
+            k: ArrayBase::rand(dim.clone(), distr.clone()),
+            v: ArrayBase::rand(dim, distr)
+        }
+    }
+
+    fn rand_with<Sh, Ds, R>(shape: Sh, distr: Ds, rng: &mut R) -> Self
+    where
+        R: Rng + ?Sized,
+        Ds: Clone + Distribution<A>,
+        Sh: ShapeBuilder<Dim = D>,
+    {
+        let dim = shape.into_shape().raw_dim().clone();
+        Self {
+            q: ArrayBase::rand_with(dim.clone(), distr.clone(), &mut rng),
+            k: ArrayBase::rand_with(dim.clone(), distr.clone(), &mut rng),
+            v: ArrayBase::rand_with(dim, distr, &mut rng)
+        }
+    }
+
+    fn init_rand<Ds>(self, distr: Ds) -> Self
+    where
+        Ds: Clone + Distribution<A>,
+        Self: Sized,
+    {
+        Self::rand(self.dim(), distr)
+    }
+
+    fn init_rand_with<Ds, R>(self, distr: Ds, rng: &mut R) -> Self
+    where
+        R: Rng + ?Sized,
+        Ds: Clone + Distribution<A>,
+    {
+        Self::rand_with(self.dim(), distr, rng)
+    }
+}
+
+
diff --git a/models/transformers/src/macros.rs b/models/transformers/src/macros.rs
index fd05142e..ff070c13 100644
--- a/models/transformers/src/macros.rs
+++ b/models/transformers/src/macros.rs
@@ -3,81 +3,19 @@
     Contrib: FL03 <jo3mccain@icloud.com>
 */
 
+#[macro_use]
+mod params;
+
 macro_rules! ndbuilder {
-    ($method:ident$(::$call:ident)?() where $($rest:tt)*) => {
-        ndbuilder!(@impl $method$(::$call)?() where $($rest)*);
+    ($method:ident$(::$call:ident)?() $($where:tt)*) => {
+        ndbuilder!(@impl $method$(::$call)?() $($where)*);
     };
-    (@impl $method:ident() where $($rest:tt)*) => {
-        ndbuilder!(@impl $method::$method() where $($rest)*);
+    (@impl $method:ident() $($where:tt)*) => {
+        ndbuilder!(@impl $method::$method() $($where)*);
     };
-    (@impl $method:ident::$call:ident() where $($rest:tt)*) => {
-        pub fn $method<Sh: ndarray::ShapeBuilder<Dim = D>>(shape: Sh) -> Self where $($rest)* {
+    (@impl $method:ident::$call:ident() $($where:tt)*) => {
+        pub fn $method<Sh: ndarray::ShapeBuilder<Dim = D>>(shape: Sh) -> Self $($where)* {
             Self::builder(shape, ndarray::ArrayBase::$call)
         }
     };
-}
-
-// # TODO:
-macro_rules! ndview {
-    ($method:ident::$($rest:tt)*) => {
-        ndview!(@impl $method.$method::$($rest)*);
-    };
-    ($method:ident.$call:ident::$($rest:tt)*) => {
-        ndview!(@impl $method.$call::$($rest)*);
-    };
-    (@impl $method:ident.$call:ident::<$view:ident>(self) where $($rest:tt)*) => {
-        pub fn $method(self) -> $crate::params::QkvBase<$view<A>, D>
-        where
-            $($rest)*
-        {
-            ndview!(@apply $call(self))
-        }
-    };
-    (@impl $method:ident.$call:ident::<$view:ident>(mut self) where $($rest:tt)*) => {
-        pub fn $method(mut self) -> $crate::params::QkvBase<$view<A>, D>
-        where
-            $($rest)*
-        {
-            ndview!(@apply $call(self))
-        }
-    };
-    (@impl $method:ident.$call:ident::<$view:ident>(&self) where $($rest:tt)*) => {
-        pub fn $method(&self) -> $crate::params::QkvBase<$view<A>, D>
-        where
-            $($rest)*
-        {
-            ndview!(@apply $call(self))
-        }
-    };
-    (@impl $method:ident.$call:ident::<$view:ident>(&mut self) where $($rest:tt)*) => {
-        pub fn $method(&mut self) -> $crate::params::QkvBase<$view<A>, D>
-        where
-            $($rest)*
-        {
-            ndview!(@apply $call(self))
-        }
-    };
-    (@impl $method:ident.$call:ident::<'a, $view:ident>(&self) where $($rest:tt)*) => {
-        pub fn $method(&self) -> $crate::params::QkvBase<$view<&'_ A>, D>
-        where
-            $($rest)*
-        {
-            ndview!(@apply $call(self))
-        }
-    };
-    (@impl $method:ident.$call:ident::<'a, $view:ident>(&mut self) where $($rest:tt)*) => {
-        pub fn $method(&mut self) -> $crate::params::QkvBase<$view<&'_ mut A>, D>
-        where
-            $($rest)*
-        {
-            ndview!(@apply $call(self))
-        }
-    };
-    (@apply $call:ident($self:expr)) => {
-        $crate::params::QkvBase {
-            q: $self.q.$call(),
-            k: $self.k.$call(),
-            v: $self.v.$call(),
-        }
-    };
-}
+}
\ No newline at end of file
diff --git a/models/transformers/src/macros/params.rs b/models/transformers/src/macros/params.rs
new file mode 100644
index 00000000..c8bdcd52
--- /dev/null
+++ b/models/transformers/src/macros/params.rs
@@ -0,0 +1,51 @@
+/*
+    Appellation: params <macros>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+
+
+macro_rules! qkv_view {
+    ($method:ident$(.$call:ident)?::$($rest:tt)*) => {
+        qkv_view!(@impl $method$(.$call)?::$($rest)*);
+    };
+    (@impl $method:ident::$($rest:tt)*) => {
+        qkv_view!(@impl $method.$method::$($rest)*);
+    };
+    (@impl $method:ident.$call:ident::<$view:ident>(self) $($rest:tt)*) => {
+        pub fn $method(self) -> $crate::params::QkvBase<$view<A>, D> $($rest)* {
+            qkv_view!(@apply $call(self))
+        }
+    };
+    (@impl $method:ident.$call:ident::<$view:ident>(mut self) $($rest:tt)*) => {
+        pub fn $method(mut self) -> $crate::params::QkvBase<$view<A>, D> $($rest)* {
+            qkv_view!(@apply $call(self))
+        }
+    };
+    (@impl $method:ident.$call:ident::<$view:ident>(&self) $($rest:tt)*) => {
+        pub fn $method(&self) -> $crate::params::QkvBase<$view<A>, D> $($rest)* {
+            qkv_view!(@apply $call(self))
+        }
+    };
+    (@impl $method:ident.$call:ident::<$view:ident>(&mut self) $($rest:tt)*) => {
+        pub fn $method(&mut self) -> $crate::params::QkvBase<$view<A>, D> $($rest)* {
+            qkv_view!(@apply $call(self))
+        }
+    };
+    (@impl $method:ident.$call:ident::<'a, $view:ident>(&self) $($rest:tt)*) => {
+        pub fn $method(&self) -> $crate::params::QkvBase<$view<&'_ A>, D> $($rest)* {
+            qkv_view!(@apply $call(self))
+        }
+    };
+    (@impl $method:ident.$call:ident::<'a, $view:ident>(&mut self) $($rest:tt)*) => {
+        pub fn $method(&mut self) -> $crate::params::QkvBase<$view<&'_ mut A>, D> $($rest)* {
+            qkv_view!(@apply $call(self))
+        }
+    };
+    (@apply $call:ident($self:expr)) => {
+        $crate::params::QkvBase {
+            q: $self.q.$call(),
+            k: $self.k.$call(),
+            v: $self.v.$call(),
+        }
+    };
+}
diff --git a/models/transformers/src/params/store.rs b/models/transformers/src/params/store.rs
index db99143a..d62fb7cb 100644
--- a/models/transformers/src/params/store.rs
+++ b/models/transformers/src/params/store.rs
@@ -74,12 +74,12 @@ where
 
     dimensional!(q());
 
-    ndview!(into_owned::<OwnedRepr>(self) where A: Clone, S: Data);
-    ndview!(to_owned::<OwnedRepr>(&self) where A: Clone, S: Data);
+    qkv_view!(into_owned::<OwnedRepr>(self) where A: Clone, S: Data);
+    qkv_view!(to_owned::<OwnedRepr>(&self) where A: Clone, S: Data);
 
-    ndview!(into_shared::<OwnedArcRepr>(self) where A: Clone, S: DataOwned);
-    ndview!(to_shared::<OwnedArcRepr>(&self) where A: Clone, S: DataShared);
+    qkv_view!(into_shared::<OwnedArcRepr>(self) where A: Clone, S: DataOwned);
+    qkv_view!(to_shared::<OwnedArcRepr>(&self) where A: Clone, S: DataShared);
 
-    ndview!(view::<'a, ViewRepr>(&self) where S: Data);
-    ndview!(view_mut::<'a, ViewRepr>(&mut self) where S: DataMut);
+    qkv_view!(view::<'a, ViewRepr>(&self) where S: Data);
+    qkv_view!(view_mut::<'a, ViewRepr>(&mut self) where S: DataMut);
 }
diff --git a/models/transformers/tests/attention.rs b/models/transformers/tests/attention.rs
index bd7f16a6..eb527659 100644
--- a/models/transformers/tests/attention.rs
+++ b/models/transformers/tests/attention.rs
@@ -5,39 +5,19 @@
 extern crate concision_core as concision;
 extern crate concision_transformers as transformers;
 
-use concision::{linarr, Matmul};
-use transformers::{AttentionHead, Qkv};
+use approx::AbsDiffEq;
+use transformers::AttentionHead;
 
 use ndarray::prelude::*;
 
 #[test]
-fn test_qkv() {
-    let shape = (2048, 10);
-    let params = Qkv::<f64>::new(shape);
-    assert_eq!(params.q(), &Array::default(shape));
-}
 
-#[test]
-fn test_qkv_matmul() {
-    let shape = (2048, 10);
-    // generate some sample data
-    let data = linarr(shape).unwrap();
-    // initialize the parameters
-    let params = Qkv::<f64>::ones(shape);
-    // calculate the expected result
-    let exp = Array2::<f64>::ones(shape).dot(&data.t());
-    // calculate the result
-    let res = params.matmul(&data.t());
-    // compare the results
-    assert_eq!(res.q(), &exp);
-    assert_eq!(res.k(), &exp);
-    assert_eq!(res.v(), &exp);
-}
-
-#[test]
-fn test_attention_head() {
-    let shape = (30, 3);
+fn attention_head() {
+    let shape = (3, 3);
 
     let head = AttentionHead::<f64>::ones(shape);
     assert_eq!(head.q(), &Array::ones(shape));
+    let exp = Array2::from_elem(shape, 1f64/3f64);
+    let score = head.attention();
+    assert!(score.attention().abs_diff_eq(&exp, 1e-6));
 }
diff --git a/models/transformers/tests/params.rs b/models/transformers/tests/params.rs
new file mode 100644
index 00000000..18656be8
--- /dev/null
+++ b/models/transformers/tests/params.rs
@@ -0,0 +1,35 @@
+/*
+    Appellation: params <test>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+extern crate concision_core as concision;
+extern crate concision_transformers as transformers;
+
+use concision::{linarr, Matmul};
+use transformers::Qkv;
+
+use ndarray::prelude::*;
+
+#[test]
+fn test_qkv() {
+    let shape = (2048, 10);
+    let params = Qkv::<f64>::new(shape);
+    assert_eq!(params.q(), &Array::default(shape));
+}
+
+#[test]
+fn test_qkv_matmul() {
+    let shape = (2048, 10);
+    // generate some sample data
+    let data = linarr(shape).unwrap();
+    // initialize the parameters
+    let params = Qkv::<f64>::ones(shape);
+    // calculate the expected result
+    let exp = Array2::<f64>::ones(shape).dot(&data.t());
+    // calculate the result
+    let res = params.matmul(&data.t());
+    // compare the results
+    assert_eq!(res.q(), &exp);
+    assert_eq!(res.k(), &exp);
+    assert_eq!(res.v(), &exp);
+}

From e0d07eab75ab47d662fb31ea7cf39ba46154997b Mon Sep 17 00:00:00 2001
From: Joe McCain III <jo3mccain@icloud.com>
Date: Mon, 20 May 2024 15:18:03 -0500
Subject: [PATCH 12/19] update

Signed-off-by: Joe McCain III <jo3mccain@icloud.com>
---
 concision/examples/transformer.rs             |  9 ++--
 core/src/func/activate/nl.rs                  | 45 ++++++++++++-------
 core/src/init/initializer.rs                  | 23 ++++++++--
 core/src/math/traits.rs                       | 29 +++++++++---
 core/src/types/mask.rs                        |  8 +---
 data/src/tensor/ndtensor/traits.rs            |  2 -
 models/transformers/src/attention/head.rs     |  8 ++--
 .../transformers/src/attention/multi/mod.rs   |  2 +-
 models/transformers/src/attention/score.rs    | 19 +++++---
 models/transformers/src/impls/impl_head.rs    |  2 +-
 models/transformers/src/macros.rs             |  2 +-
 models/transformers/src/macros/params.rs      |  1 -
 models/transformers/tests/attention.rs        |  3 +-
 13 files changed, 96 insertions(+), 57 deletions(-)

diff --git a/concision/examples/transformer.rs b/concision/examples/transformer.rs
index c1a36fc7..cc24879b 100644
--- a/concision/examples/transformer.rs
+++ b/concision/examples/transformer.rs
@@ -5,8 +5,8 @@
 extern crate concision as cnc;
 
 use approx::AbsDiffEq;
-use cnc::transformer::AttentionHead;
 use cnc::prelude::Result;
+use cnc::transformer::AttentionHead;
 use ndarray::Array2;
 
 fn tracing() {
@@ -29,11 +29,10 @@ fn main() -> Result<()> {
     let shape = (3, 3);
     let head = AttentionHead::<f64>::ones(shape);
     let score = head.attention();
-    assert!(score.attention().abs_diff_eq(&Array2::from_elem(shape, 1f64/3f64), 1e-6));
+    assert!(score
+        .attention()
+        .abs_diff_eq(&Array2::from_elem(shape, 1f64 / 3f64), 1e-6));
     println!("{:?}", score);
 
-
-    
-
     Ok(())
 }
diff --git a/core/src/func/activate/nl.rs b/core/src/func/activate/nl.rs
index 694145c7..1e861170 100644
--- a/core/src/func/activate/nl.rs
+++ b/core/src/func/activate/nl.rs
@@ -7,7 +7,7 @@ use ndarray::*;
 use num::complex::{Complex, ComplexFloat};
 use num::traits::Zero;
 
-pub fn relu<T>(args: T) -> T
+fn _relu<T>(args: T) -> T
 where
     T: PartialOrd + Zero,
 {
@@ -17,23 +17,33 @@ where
     T::zero()
 }
 
-pub fn sigmoid<T>(args: T) -> T
+fn _sigmoid<T>(args: T) -> T
 where
     T: ComplexFloat,
 {
     (T::one() + args.neg().exp()).recip()
 }
 
-pub fn softmax<A, S, D>(args: &ArrayBase<S, D>) -> Array<A, D>
+fn _softmax<A, S, D>(args: &ArrayBase<S, D>) -> Array<A, D>
 where
     A: ComplexFloat + ScalarOperand,
     D: Dimension,
     S: Data<Elem = A>,
 {
-    args.exp() / args.exp().sum()
+    let e = args.exp();
+    &e / e.sum()
 }
 
-pub fn tanh<T>(args: T) -> T
+// fn __softmax<T, I>(args: &I) -> I 
+// where 
+//     I: Clone + core::ops::Div<T, Output = I> + Exp<Output = I>, T: Exp<Output = T> + core::iter::Sum ,
+//     for<'a> I: IntoIterator<Item = &'a T>,
+// {
+//     let e = args.exp();
+//     e.clone() / e.into_iter().sum::<T>()
+// }
+
+fn _tanh<T>(args: T) -> T
 where
     T: ComplexFloat,
 {
@@ -64,22 +74,25 @@ macro_rules! nonlinear {
         nonlinear!(@arr $rho::$call);
     };
     (@impl $rho:ident::$call:ident<$T:ty>) => {
-        impl $rho for $T {
-            type Output = $T;
+        paste::paste! {
+            impl $rho for $T {
+                type Output = $T;
 
-            fn $call(self) -> Self::Output {
-                $call(self)
+                fn $call(self) -> Self::Output {
+                    [<_ $call>](self)
+                }
             }
-        }
 
-        impl<'a> $rho for &'a $T {
-            type Output = $T;
+            impl<'a> $rho for &'a $T {
+                type Output = $T;
 
-            fn $call(self) -> Self::Output {
-                $call(*self)
+                fn $call(self) -> Self::Output {
+                    [<_ $call>](*self)
+                }
             }
         }
 
+
     };
     (@arr $name:ident::$call:ident) => {
         impl<A, S, D> $name for ArrayBase<S, D>
@@ -150,7 +163,7 @@ where
     type Output = Array<A, D>;
 
     fn softmax(self) -> Self::Output {
-        softmax(&self)
+        _softmax(&self)
     }
 }
 
@@ -163,6 +176,6 @@ where
     type Output = Array<A, D>;
 
     fn softmax(self) -> Self::Output {
-        softmax(self)
+        _softmax(self)
     }
 }
diff --git a/core/src/init/initializer.rs b/core/src/init/initializer.rs
index 75c20579..2de38df9 100644
--- a/core/src/init/initializer.rs
+++ b/core/src/init/initializer.rs
@@ -8,18 +8,33 @@ use nd::prelude::*;
 use nd::DataOwned;
 use rand_distr::{Distribution, StandardNormal};
 
-pub struct InitializerBase<A = f64, D = Ix2, Dst = StandardNormal> where D: Dimension, Dst: Clone + Distribution<A> {
+pub struct InitializerBase<A = f64, D = Ix2, Dst = StandardNormal>
+where
+    D: Dimension,
+    Dst: Clone + Distribution<A>,
+{
     pub(crate) dim: D,
     pub(crate) distr: Dst,
     pub(crate) _dtype: PhantomData<A>,
 }
 
-impl<A, D, Dst> InitializerBase<A, D, Dst> where D: Dimension, Dst: Clone + Distribution<A> {
+impl<A, D, Dst> InitializerBase<A, D, Dst>
+where
+    D: Dimension,
+    Dst: Clone + Distribution<A>,
+{
     pub fn new(dim: D, distr: Dst) -> Self {
-        Self { dim, distr, _dtype: PhantomData::<A> }
+        Self {
+            dim,
+            distr,
+            _dtype: PhantomData::<A>,
+        }
     }
 
-    pub fn init<S>(self) -> ArrayBase<S, D> where S: DataOwned<Elem = A> {
+    pub fn init<S>(self) -> ArrayBase<S, D>
+    where
+        S: DataOwned<Elem = A>,
+    {
         ArrayBase::rand(self.dim, self.distr)
     }
 }
diff --git a/core/src/math/traits.rs b/core/src/math/traits.rs
index 78e67337..51e3c1d6 100644
--- a/core/src/math/traits.rs
+++ b/core/src/math/traits.rs
@@ -7,19 +7,30 @@ use nd::{Array, ArrayBase, Data, Dimension};
 use num::complex::{Complex, ComplexFloat};
 use num::traits::{FromPrimitive, Num, Signed};
 
-pub trait IterStats<T> where T: FromPrimitive {
+pub trait IterStats<T>
+where
+    T: FromPrimitive,
+{
     type Output;
 
     fn elems(&self) -> T;
 
     fn mean(&self) -> Self::Output;
 
-    fn std(&self) -> Self::Output where T: ComplexFloat;
+    fn std(&self) -> Self::Output
+    where
+        T: ComplexFloat;
 
-    fn var(&self) -> Self::Output where T: ComplexFloat;
+    fn var(&self) -> Self::Output
+    where
+        T: ComplexFloat;
 }
 
-impl<T, I> IterStats<T> for I where I: Clone + ExactSizeIterator<Item = T>, T: Clone + FromPrimitive + Num + Sum {
+impl<T, I> IterStats<T> for I
+where
+    I: Clone + ExactSizeIterator<Item = T>,
+    T: Clone + FromPrimitive + Num + Sum,
+{
     type Output = T;
 
     fn elems(&self) -> T {
@@ -30,13 +41,19 @@ impl<T, I> IterStats<T> for I where I: Clone + ExactSizeIterator<Item = T>, T: C
         self.clone().sum::<T>() / self.elems()
     }
 
-    fn std(&self) -> Self::Output where T: ComplexFloat {
+    fn std(&self) -> Self::Output
+    where
+        T: ComplexFloat,
+    {
         let mean = self.mean();
         let sum = self.clone().map(|x| (x - mean).powi(2)).sum::<T>();
         (sum / self.elems()).sqrt()
     }
 
-    fn var(&self) -> Self::Output where T: ComplexFloat {
+    fn var(&self) -> Self::Output
+    where
+        T: ComplexFloat,
+    {
         let mean = self.mean();
         let sum = self.clone().map(|x| (x - mean).powi(2)).sum::<T>();
         sum / self.elems()
diff --git a/core/src/types/mask.rs b/core/src/types/mask.rs
index 9be85784..ab587861 100644
--- a/core/src/types/mask.rs
+++ b/core/src/types/mask.rs
@@ -13,8 +13,6 @@ where
     type Data: RawData<Elem = bool>;
 }
 
-
-
 pub struct Mask<S, D>(ArrayBase<S, D>)
 where
     D: Dimension,
@@ -30,9 +28,8 @@ where
     }
 }
 
-
 /*
-    ************* Implementations *************
+ ************* Implementations *************
 */
 mod impls {
     use super::*;
@@ -120,5 +117,4 @@ mod impls {
             mask.0
         }
     }
-
-}
\ No newline at end of file
+}
diff --git a/data/src/tensor/ndtensor/traits.rs b/data/src/tensor/ndtensor/traits.rs
index 67af23b1..b125c25b 100644
--- a/data/src/tensor/ndtensor/traits.rs
+++ b/data/src/tensor/ndtensor/traits.rs
@@ -28,8 +28,6 @@ pub trait NdContainer<T> {
     fn shape(&self) -> &[usize];
 }
 
-
-
 /*
  ******** implementations ********
 */
diff --git a/models/transformers/src/attention/head.rs b/models/transformers/src/attention/head.rs
index cc42c531..14887a77 100644
--- a/models/transformers/src/attention/head.rs
+++ b/models/transformers/src/attention/head.rs
@@ -2,7 +2,7 @@
     Appellation: head <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
-use super::{_attention, Score};
+use super::{Score, _attention};
 use crate::params::QkvBase;
 use concision::getters;
 use concision::nn::DropoutLayer;
@@ -10,16 +10,14 @@ use nd::linalg::Dot;
 use nd::*;
 use num::complex::ComplexFloat;
 
-
-
 // #68
 /// [AttentionHead] implements the scaled dot-product attention mechanism formally defined in
 /// [Attention is all you need](https://arxiv.org/abs/1706.03762).
 ///
 /// ### Fields
-/// 
+///
 /// [dropout](DropoutLayer): requires the `rand` feature
-/// 
+///
 pub struct AttentionHead<A = f64, D = Ix2, S = OwnedRepr<A>>
 where
     D: Dimension,
diff --git a/models/transformers/src/attention/multi/mod.rs b/models/transformers/src/attention/multi/mod.rs
index 67592c2d..0b33a303 100644
--- a/models/transformers/src/attention/multi/mod.rs
+++ b/models/transformers/src/attention/multi/mod.rs
@@ -11,4 +11,4 @@ pub(crate) mod multi_head;
 
 pub(crate) mod prelude {
     pub use super::multi_head::MultiHeadAttention;
-}
\ No newline at end of file
+}
diff --git a/models/transformers/src/attention/score.rs b/models/transformers/src/attention/score.rs
index c2548040..3e1df96e 100644
--- a/models/transformers/src/attention/score.rs
+++ b/models/transformers/src/attention/score.rs
@@ -6,11 +6,14 @@ use core::fmt;
 use nd::{Array, Dimension};
 
 /// [Score] is a created as a result of invoking an attention mechanism;
-/// 
+///
 /// - attention: the actual result; returns the dot product of the score with the value tensor
 /// - score: the attention score tensor
 #[derive(Clone, Eq, Hash, PartialEq)]
-pub struct Score<A, D> where D: Dimension {
+pub struct Score<A, D>
+where
+    D: Dimension,
+{
     pub(crate) attention: Array<A, D>,
     pub(crate) score: Array<A, D>,
 }
@@ -41,8 +44,12 @@ where
     }
 }
 
-impl<A, D> Copy for Score<A, D> where A: Copy, D: Copy + Dimension, Array<A, D>: Copy {
-
+impl<A, D> Copy for Score<A, D>
+where
+    A: Copy,
+    D: Copy + Dimension,
+    Array<A, D>: Copy,
+{
 }
 
 impl<A, D> fmt::Debug for Score<A, D>
@@ -68,8 +75,6 @@ where
     }
 }
 
-
-
 impl<A, D> From<(Array<A, D>, Array<A, D>)> for Score<A, D>
 where
     D: Dimension,
@@ -86,4 +91,4 @@ where
     fn from(score: Score<A, D>) -> Self {
         (score.attention, score.score)
     }
-}
\ No newline at end of file
+}
diff --git a/models/transformers/src/impls/impl_head.rs b/models/transformers/src/impls/impl_head.rs
index cbb381af..4160975d 100644
--- a/models/transformers/src/impls/impl_head.rs
+++ b/models/transformers/src/impls/impl_head.rs
@@ -89,4 +89,4 @@ where
     fn from(params: QkvBase<S, D>) -> Self {
         Self::from_params(params)
     }
-}
\ No newline at end of file
+}
diff --git a/models/transformers/src/macros.rs b/models/transformers/src/macros.rs
index ff070c13..7763caa9 100644
--- a/models/transformers/src/macros.rs
+++ b/models/transformers/src/macros.rs
@@ -18,4 +18,4 @@ macro_rules! ndbuilder {
             Self::builder(shape, ndarray::ArrayBase::$call)
         }
     };
-}
\ No newline at end of file
+}
diff --git a/models/transformers/src/macros/params.rs b/models/transformers/src/macros/params.rs
index c8bdcd52..f7e12e32 100644
--- a/models/transformers/src/macros/params.rs
+++ b/models/transformers/src/macros/params.rs
@@ -3,7 +3,6 @@
     Contrib: FL03 <jo3mccain@icloud.com>
 */
 
-
 macro_rules! qkv_view {
     ($method:ident$(.$call:ident)?::$($rest:tt)*) => {
         qkv_view!(@impl $method$(.$call)?::$($rest)*);
diff --git a/models/transformers/tests/attention.rs b/models/transformers/tests/attention.rs
index eb527659..6bc023af 100644
--- a/models/transformers/tests/attention.rs
+++ b/models/transformers/tests/attention.rs
@@ -11,13 +11,12 @@ use transformers::AttentionHead;
 use ndarray::prelude::*;
 
 #[test]
-
 fn attention_head() {
     let shape = (3, 3);
 
     let head = AttentionHead::<f64>::ones(shape);
     assert_eq!(head.q(), &Array::ones(shape));
-    let exp = Array2::from_elem(shape, 1f64/3f64);
+    let exp = Array2::from_elem(shape, 1f64 / 3f64);
     let score = head.attention();
     assert!(score.attention().abs_diff_eq(&exp, 1e-6));
 }

From 207072a4794e64eab2ecb51e5627be7d57443e96 Mon Sep 17 00:00:00 2001
From: Joe McCain III <jo3mccain@icloud.com>
Date: Fri, 24 May 2024 03:17:55 -0500
Subject: [PATCH 13/19] update

Signed-off-by: Joe McCain III <jo3mccain@icloud.com>
---
 core/src/func/activate/nl.rs                  |  4 +-
 models/linear/src/norm/layer/mod.rs           |  9 ++-
 models/linear/src/norm/layer/model.rs         | 12 +--
 models/transformers/Cargo.toml                |  1 -
 models/transformers/src/attention/head.rs     | 15 ++++
 .../src/attention/multi/config.rs             | 49 ++++++++++++
 .../transformers/src/attention/multi/mod.rs   |  4 +-
 .../src/attention/multi/multi_head.rs         |  6 +-
 .../transformers/src/codec/encoder/layer.rs   | 17 ++++-
 models/transformers/src/codec/mod.rs          |  7 ++
 models/transformers/src/lib.rs                |  1 +
 models/transformers/src/macros.rs             | 64 ++++++++++++++++
 models/transformers/src/model/mod.rs          |  6 ++
 models/transformers/src/model/sublayer.rs     | 74 +++++++++++++++++++
 models/transformers/src/primitives.rs         |  5 +-
 15 files changed, 254 insertions(+), 20 deletions(-)
 create mode 100644 models/transformers/src/attention/multi/config.rs
 create mode 100644 models/transformers/src/model/mod.rs
 create mode 100644 models/transformers/src/model/sublayer.rs

diff --git a/core/src/func/activate/nl.rs b/core/src/func/activate/nl.rs
index 1e861170..d9e70fed 100644
--- a/core/src/func/activate/nl.rs
+++ b/core/src/func/activate/nl.rs
@@ -34,8 +34,8 @@ where
     &e / e.sum()
 }
 
-// fn __softmax<T, I>(args: &I) -> I 
-// where 
+// fn __softmax<T, I>(args: &I) -> I
+// where
 //     I: Clone + core::ops::Div<T, Output = I> + Exp<Output = I>, T: Exp<Output = T> + core::iter::Sum ,
 //     for<'a> I: IntoIterator<Item = &'a T>,
 // {
diff --git a/models/linear/src/norm/layer/mod.rs b/models/linear/src/norm/layer/mod.rs
index 6b54d6e8..28254dc1 100644
--- a/models/linear/src/norm/layer/mod.rs
+++ b/models/linear/src/norm/layer/mod.rs
@@ -19,13 +19,15 @@ pub(crate) mod prelude {
 }
 
 pub(crate) mod utils {
-    use nd::{Array, Axis, Dimension, RemoveAxis};
+    use nd::prelude::*;
+    use nd::{Data, RemoveAxis};
     use num::traits::{Float, FromPrimitive};
 
-    pub(crate) fn layer_norm<A, D>(x: &Array<A, D>, eps: f64) -> Array<A, D>
+    pub(crate) fn layer_norm<A, S, D>(x: &ArrayBase<S, D>, eps: f64) -> Array<A, D>
     where
         A: Float + FromPrimitive,
         D: Dimension,
+        S: Data<Elem = A>,
     {
         let mean = x.mean().unwrap();
         let denom = {
@@ -36,10 +38,11 @@ pub(crate) mod utils {
         x.mapv(|xi| (xi - mean) / denom)
     }
 
-    pub(crate) fn layer_norm_axis<A, D>(x: &Array<A, D>, axis: Axis, eps: f64) -> Array<A, D>
+    pub(crate) fn layer_norm_axis<A, S, D>(x: &ArrayBase<S, D>, axis: Axis, eps: f64) -> Array<A, D>
     where
         A: Float + FromPrimitive,
         D: RemoveAxis,
+        S: Data<Elem = A>,
     {
         let eps = A::from(eps).unwrap();
         let mean = x.mean_axis(axis).unwrap();
diff --git a/models/linear/src/norm/layer/model.rs b/models/linear/src/norm/layer/model.rs
index e5dc6b67..1cca2419 100644
--- a/models/linear/src/norm/layer/model.rs
+++ b/models/linear/src/norm/layer/model.rs
@@ -6,7 +6,7 @@ use super::Config;
 use crate::{Biased, LinearParams, ParamMode, Unbiased};
 use concision::Forward;
 use nd::prelude::*;
-use nd::RemoveAxis;
+use nd::{Data, RemoveAxis};
 use num::traits::{Float, FromPrimitive, One, Zero};
 
 // #62
@@ -139,14 +139,15 @@ where
     }
 }
 
-impl<A, D> Forward<Array<A, D>> for LayerNorm<A, Biased, D>
+impl<A, S, D> Forward<ArrayBase<S, D>> for LayerNorm<A, Biased, D>
 where
     A: Float + FromPrimitive,
     D: RemoveAxis,
+    S: Data<Elem = A>,
 {
     type Output = Array<A, D>;
 
-    fn forward(&self, x: &Array<A, D>) -> Self::Output {
+    fn forward(&self, x: &ArrayBase<S, D>) -> Self::Output {
         let norm = if let Some(axis) = self.config().axis() {
             super::layer_norm_axis(x, *axis, self.eps())
         } else {
@@ -156,14 +157,15 @@ where
     }
 }
 
-impl<A, D> Forward<Array<A, D>> for LayerNorm<A, Unbiased, D>
+impl<A, S, D> Forward<ArrayBase<S, D>> for LayerNorm<A, Unbiased, D>
 where
     A: Float + FromPrimitive,
     D: RemoveAxis,
+    S: Data<Elem = A>,
 {
     type Output = Array<A, D>;
 
-    fn forward(&self, x: &Array<A, D>) -> Self::Output {
+    fn forward(&self, x: &ArrayBase<S, D>) -> Self::Output {
         let norm = if let Some(axis) = self.config().axis() {
             super::layer_norm_axis(x, *axis, self.eps())
         } else {
diff --git a/models/transformers/Cargo.toml b/models/transformers/Cargo.toml
index d0558100..7dc36953 100644
--- a/models/transformers/Cargo.toml
+++ b/models/transformers/Cargo.toml
@@ -133,7 +133,6 @@ optional = true
 version = "0.1"
 
 [dev-dependencies]
-approx.workspace = true
 lazy_static.workspace = true
 
 [package.metadata.docs.rs]
diff --git a/models/transformers/src/attention/head.rs b/models/transformers/src/attention/head.rs
index 14887a77..80da457b 100644
--- a/models/transformers/src/attention/head.rs
+++ b/models/transformers/src/attention/head.rs
@@ -59,6 +59,21 @@ where
     {
         Self::from_params(QkvBase::from_elem(shape, value))
     }
+    /// Setup the [AttentionHead] with a [DropoutLayer]
+    #[cfg(feature = "rand")]
+    pub fn with_dropout(self, dropout: DropoutLayer) -> Self {
+        Self {
+            dropout: Some(dropout),
+            ..self
+        }
+    }
+    /// Setup the [AttentionHead] with a mask
+    pub fn with_mask(self, mask: Array<bool, D>) -> Self {
+        Self {
+            mask: Some(mask),
+            ..self
+        }
+    }
     /// Computes the [Score] using scaled dot-product attention.
     pub fn attention(&self) -> Score<A, D>
     where
diff --git a/models/transformers/src/attention/multi/config.rs b/models/transformers/src/attention/multi/config.rs
new file mode 100644
index 00000000..4fa5302c
--- /dev/null
+++ b/models/transformers/src/attention/multi/config.rs
@@ -0,0 +1,49 @@
+/*
+    Appellation: config <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+
+#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+pub struct Config {
+    pub heads: usize,
+}
+
+impl Config {
+    pub fn new() -> ConfigBuilder {
+        ConfigBuilder::new()
+    }
+    pub fn heads(&self) -> usize {
+        self.heads
+    }
+}
+
+impl Default for Config {
+    fn default() -> Self {
+        Self {
+            heads: crate::HEADS,
+        }
+    }
+}
+
+#[derive(Default)]
+pub struct ConfigBuilder {
+    heads: Option<usize>,
+}
+
+impl ConfigBuilder {
+    pub fn new() -> Self {
+        Self { heads: None }
+    }
+
+    pub fn heads(mut self, heads: usize) -> Self {
+        self.heads = Some(heads);
+        self
+    }
+
+    pub fn build(&self) -> Config {
+        Config {
+            heads: self.heads.unwrap_or(crate::HEADS),
+        }
+    }
+}
diff --git a/models/transformers/src/attention/multi/mod.rs b/models/transformers/src/attention/multi/mod.rs
index 0b33a303..e101f032 100644
--- a/models/transformers/src/attention/multi/mod.rs
+++ b/models/transformers/src/attention/multi/mod.rs
@@ -5,10 +5,12 @@
 //! # Multi-Head Attention
 //!
 //!
-pub use self::multi_head::*;
+pub use self::{config::Config, multi_head::*};
 
+pub(crate) mod config;
 pub(crate) mod multi_head;
 
 pub(crate) mod prelude {
+    pub use super::config::Config as MultiHeadAttentionConfig;
     pub use super::multi_head::MultiHeadAttention;
 }
diff --git a/models/transformers/src/attention/multi/multi_head.rs b/models/transformers/src/attention/multi/multi_head.rs
index e59061ae..47b2a8e2 100644
--- a/models/transformers/src/attention/multi/multi_head.rs
+++ b/models/transformers/src/attention/multi/multi_head.rs
@@ -2,11 +2,9 @@
     Appellation: multi_head <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
+use super::Config;
 
-pub struct Config {
-    pub heads: usize,
-}
-
+#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct MultiHeadAttention {
     pub(crate) config: Config,
 }
diff --git a/models/transformers/src/codec/encoder/layer.rs b/models/transformers/src/codec/encoder/layer.rs
index 10821bd3..5c00ebcf 100644
--- a/models/transformers/src/codec/encoder/layer.rs
+++ b/models/transformers/src/codec/encoder/layer.rs
@@ -2,12 +2,25 @@
     Appellation: layer <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
+use crate::attention::multi::MultiHeadAttention;
 
 #[derive(Default)]
-pub struct EncoderLayer {}
+pub struct EncoderLayer {
+    pub(crate) attention: MultiHeadAttention,
+}
 
 impl EncoderLayer {
     pub fn new() -> Self {
-        Self {}
+        let attention = MultiHeadAttention::default();
+
+        Self { attention }
+    }
+    /// Returns an immutable reference to the multi-head, self-attention layer.
+    pub fn attention(&self) -> &MultiHeadAttention {
+        &self.attention
+    }
+    /// Returns a mutable reference to the multi-head, self-attention layer.
+    pub fn attention_mut(&mut self) -> &mut MultiHeadAttention {
+        &mut self.attention
     }
 }
diff --git a/models/transformers/src/codec/mod.rs b/models/transformers/src/codec/mod.rs
index 3a7e3f77..52e34740 100644
--- a/models/transformers/src/codec/mod.rs
+++ b/models/transformers/src/codec/mod.rs
@@ -2,6 +2,13 @@
     Appellation: codec <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
+//! # Codec
+//!
+//! The `codec` module implements the [Decoder] and [Encoder] layers of the [Transformer](crate::Transformer) model.
+//! Each layer has two sublayers, namely:
+//! - multi-head, self-attention layer
+//! - fully-connected, piecewise feed-forward network.
+//!
 pub use self::{decoder::Decoder, encoder::Encoder, model::*};
 
 pub(crate) mod model;
diff --git a/models/transformers/src/lib.rs b/models/transformers/src/lib.rs
index 37c5c1ce..9e512f24 100644
--- a/models/transformers/src/lib.rs
+++ b/models/transformers/src/lib.rs
@@ -29,6 +29,7 @@ pub(crate) mod transformer;
 
 pub mod attention;
 pub mod codec;
+pub mod model;
 pub mod ops;
 pub mod params;
 
diff --git a/models/transformers/src/macros.rs b/models/transformers/src/macros.rs
index 7763caa9..e25dafae 100644
--- a/models/transformers/src/macros.rs
+++ b/models/transformers/src/macros.rs
@@ -19,3 +19,67 @@ macro_rules! ndbuilder {
         }
     };
 }
+
+#[allow(unused_macros)]
+macro_rules! cbuilder {
+    (@impl derive: [$($D:ident),* $(,)?], $name:ident {$($vis:vis $field:ident: $type:ty),*}) => {
+        #[derive(Clone, Debug, PartialEq, $($D),*)]
+        #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+        pub struct $name {
+            $($vis $field: $type),*
+        }
+        impl $name {
+            paste::paste! {
+                pub fn new() -> [<$name Builder>] {
+                    [<$name Builder>]::new()
+                }
+            }
+
+            $(
+                pub fn $field(mut self, $field: $type) -> Self {
+                    self.$field = $field;
+                    self
+                }
+            )*
+        }
+    };
+    (@builder derive: [$($D:ident),* $(,)?], $name:ident {$($field:ident: $type:ty),*}) => {
+        pub struct $name {
+            $(pub(crate) $field: $type),*
+        }
+
+        impl $name {
+            pub fn new() -> Self {
+                Self {
+                    $($field: None),*
+                }
+            }
+
+            $(
+                pub fn $field(mut self, $field: $type) -> Self {
+                    self.$field = Some($field);
+                    self
+                }
+            )*
+
+            pub fn build(&self) -> Config {
+                Config {
+                    $($field: self.$field.unwrap_or_else(|| crate::$field),)*
+                }
+            }
+        }
+
+        impl Default for $name {
+            fn default() -> Self {
+                Self::new()
+            }
+        }
+    };
+}
+
+/// This macro helps create a stack of identical sublayers.
+///
+#[allow(unused_macros)]
+macro_rules! sublayer {
+    (@impl heads: $heads:expr) => {};
+}
diff --git a/models/transformers/src/model/mod.rs b/models/transformers/src/model/mod.rs
new file mode 100644
index 00000000..ac227da3
--- /dev/null
+++ b/models/transformers/src/model/mod.rs
@@ -0,0 +1,6 @@
+/*
+    Appellation: model <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+
+pub mod sublayer;
diff --git a/models/transformers/src/model/sublayer.rs b/models/transformers/src/model/sublayer.rs
new file mode 100644
index 00000000..a1a5fbe7
--- /dev/null
+++ b/models/transformers/src/model/sublayer.rs
@@ -0,0 +1,74 @@
+/*
+    Appellation: sublayer <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+#![cfg(feature = "rand")]
+use concision::nn::DropoutLayer;
+use concision::Forward;
+use linear::{Biased, LayerNorm, ParamMode, Unbiased};
+use nd::prelude::*;
+use nd::{DataOwned, RemoveAxis, ScalarOperand};
+use num::traits::{Float, FromPrimitive};
+
+/// A residual connection followed by a [layer norm](LayerNorm)
+/// [Transformer](crate::Transformer)
+pub struct Sublayer<A = f64, K = Biased, D = Ix2>
+where
+    D: Dimension,
+{
+    pub(crate) dropout: DropoutLayer,
+    pub(crate) norm: LayerNorm<A, K, D>,
+}
+
+impl<A, K, D> Sublayer<A, K, D>
+where
+    D: RemoveAxis,
+{
+    pub fn new<Sh>(shape: Sh, dropout: f64) -> Self
+    where
+        A: Default,
+        K: ParamMode,
+        Sh: ShapeBuilder<Dim = D>,
+    {
+        Self {
+            dropout: DropoutLayer::new(dropout),
+            norm: LayerNorm::new(shape),
+        }
+    }
+
+    pub fn dropout(&self) -> &DropoutLayer {
+        &self.dropout
+    }
+
+    pub fn norm(&self) -> &LayerNorm<A, K, D> {
+        &self.norm
+    }
+}
+
+impl<A, S, D> Forward<ArrayBase<S, D>> for Sublayer<A, Biased, D>
+where
+    A: Float + FromPrimitive + ScalarOperand,
+    D: RemoveAxis,
+    S: DataOwned<Elem = A>,
+{
+    type Output = Array<A, D>;
+
+    fn forward(&self, input: &ArrayBase<S, D>) -> Self::Output {
+        let normal = self.norm().forward(input);
+        input + self.dropout().forward(&normal)
+    }
+}
+
+impl<A, S, D> Forward<ArrayBase<S, D>> for Sublayer<A, Unbiased, D>
+where
+    A: Float + FromPrimitive + ScalarOperand,
+    D: RemoveAxis,
+    S: DataOwned<Elem = A>,
+{
+    type Output = Array<A, D>;
+
+    fn forward(&self, input: &ArrayBase<S, D>) -> Self::Output {
+        let normal = self.norm().forward(input);
+        input + self.dropout().forward(&normal)
+    }
+}
diff --git a/models/transformers/src/primitives.rs b/models/transformers/src/primitives.rs
index 5f5c1bcf..3b30e7aa 100644
--- a/models/transformers/src/primitives.rs
+++ b/models/transformers/src/primitives.rs
@@ -9,9 +9,10 @@ pub mod consts {
     pub const D_MODEL: usize = 512;
     /// The default size of the network; i.e. the number of neurons in the network
     pub const D_NETWORK: usize = 2048;
-
+    /// The default dimension of the key and query vectors
+    pub const DK: usize = D_MODEL / HEADS;
     /// The default number of attention heads
-    pub const H: usize = 8;
+    pub const HEADS: usize = 8;
     /// The default number of layers used for the encoder / decoder.
     pub const N: usize = 6;
 }

From 4dbc5e439b5e24a6f8af6ce2edfa14d6334aee17 Mon Sep 17 00:00:00 2001
From: Joe McCain III <jo3mccain@icloud.com>
Date: Fri, 24 May 2024 06:22:17 -0500
Subject: [PATCH 14/19] update

Signed-off-by: Joe McCain III <jo3mccain@icloud.com>
---
 core/src/macros/builder.rs                    |  6 ++--
 core/src/nn/mod.rs                            |  1 +
 core/src/nn/model.rs                          |  2 ++
 core/src/nn/model/repo.rs                     | 10 ++++++
 core/src/traits/mod.rs                        |  4 +--
 core/src/traits/{misc => }/setup.rs           |  0
 models/transformers/src/attention/head.rs     |  2 +-
 .../src/attention/multi/config.rs             | 36 +++++++++----------
 .../src/attention/multi/multi_head.rs         | 34 ++++++++++++++++--
 models/transformers/src/params/store.rs       | 10 ++++++
 10 files changed, 76 insertions(+), 29 deletions(-)
 create mode 100644 core/src/nn/model/repo.rs
 rename core/src/traits/{misc => }/setup.rs (100%)

diff --git a/core/src/macros/builder.rs b/core/src/macros/builder.rs
index 8fba06d2..b840fea6 100644
--- a/core/src/macros/builder.rs
+++ b/core/src/macros/builder.rs
@@ -6,10 +6,10 @@
 #[macro_export]
 macro_rules! builder {
     ($(#[derive($($d:ident),+)])?$name:ident::<$inner:ty> {$($k:ident: $v:ty),* $(,)?}) => {
-        builder!(@loop builder: $name, derive: [$($($d),+)?], inner: $inner {$($k: $v),*});
+        $crate::builder!(@loop builder: $name, derive: [$($($d),+)?], inner: $inner {$($k: $v),*});
     };
     ($(#[derive($($d:ident),+)])? $name:ident($inner:ty) {$($k:ident: $v:ty),* $(,)?}) => {
-        builder!(@loop builder: $name, derive: [$($($d),+)?], inner: $inner {$($k: $v),*});
+        $crate::builder!(@loop builder: $name, derive: [$($($d),+)?], inner: $inner {$($k: $v),*});
     };
     (@loop builder: $name:ident, derive: [$($d:ident),* $(,)?], inner: $inner:ty {$($k:ident: $v:ty),* $(,)?}) => {
 
@@ -18,7 +18,7 @@ macro_rules! builder {
             inner: $inner,
         }
 
-        builder!(@impl builder: $name, inner: $inner {$($k: $v),*});
+        $crate::builder!(@impl builder: $name, inner: $inner {$($k: $v),*});
     };
     (@impl builder: $name:ident, inner: $inner:ty {$($k:ident: $v:ty),* $(,)?}) => {
         impl $name {
diff --git a/core/src/nn/mod.rs b/core/src/nn/mod.rs
index d4c7fa48..0b8f8707 100644
--- a/core/src/nn/mod.rs
+++ b/core/src/nn/mod.rs
@@ -8,6 +8,7 @@ pub mod dropout;
 pub mod error;
 pub mod model;
 
+
 pub(crate) mod prelude {
     pub use super::dropout::*;
     pub use super::error::*;
diff --git a/core/src/nn/model.rs b/core/src/nn/model.rs
index 8991d08e..d989cb9b 100644
--- a/core/src/nn/model.rs
+++ b/core/src/nn/model.rs
@@ -6,6 +6,8 @@ pub use self::module::*;
 
 pub mod config;
 pub mod module;
+#[doc(hidden)]
+pub mod repo;
 
 pub(crate) mod prelude {
     pub use super::config::*;
diff --git a/core/src/nn/model/repo.rs b/core/src/nn/model/repo.rs
new file mode 100644
index 00000000..bb0c8b6c
--- /dev/null
+++ b/core/src/nn/model/repo.rs
@@ -0,0 +1,10 @@
+/*
+    Appellation: repo <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+#![allow(unused)]
+
+pub struct ModelRepo {
+    pub name: String,
+    pub(crate) store: String,
+}
\ No newline at end of file
diff --git a/core/src/traits/mod.rs b/core/src/traits/mod.rs
index b6aa6b21..f7a13549 100644
--- a/core/src/traits/mod.rs
+++ b/core/src/traits/mod.rs
@@ -7,6 +7,7 @@ pub use self::prelude::*;
 pub mod num;
 pub mod ops;
 pub mod predict;
+pub mod setup;
 pub mod train;
 
 pub mod arr {
@@ -29,14 +30,12 @@ pub mod misc {
     pub mod adjust;
     #[doc(hidden)]
     pub mod container;
-    pub mod setup;
     pub mod store;
     pub mod toggle;
 
     pub(crate) mod prelude {
         pub use super::adjust::*;
         pub use super::container::*;
-        pub use super::setup::*;
         pub use super::store::*;
         pub use super::toggle::*;
     }
@@ -48,5 +47,6 @@ pub(crate) mod prelude {
     pub use super::num::*;
     pub use super::ops::*;
     pub use super::predict::*;
+    pub use super::setup::*;
     pub use super::train::*;
 }
diff --git a/core/src/traits/misc/setup.rs b/core/src/traits/setup.rs
similarity index 100%
rename from core/src/traits/misc/setup.rs
rename to core/src/traits/setup.rs
diff --git a/models/transformers/src/attention/head.rs b/models/transformers/src/attention/head.rs
index 80da457b..fe9a34d8 100644
--- a/models/transformers/src/attention/head.rs
+++ b/models/transformers/src/attention/head.rs
@@ -85,7 +85,7 @@ where
         let (q, k, v) = self.qkv();
         _attention(q, k, v, self.mask(), self.dropout())
     }
-    /// Returns an immutable reference to the, optional, [Dropout] layer
+    /// Returns an immutable reference to the, optional, mask.
     pub fn mask(&self) -> Option<&Array<bool, D>> {
         self.mask.as_ref()
     }
diff --git a/models/transformers/src/attention/multi/config.rs b/models/transformers/src/attention/multi/config.rs
index 4fa5302c..6b5c70c9 100644
--- a/models/transformers/src/attention/multi/config.rs
+++ b/models/transformers/src/attention/multi/config.rs
@@ -6,6 +6,7 @@
 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
 pub struct Config {
+    pub d_model: usize,
     pub heads: usize,
 }
 
@@ -13,6 +14,15 @@ impl Config {
     pub fn new() -> ConfigBuilder {
         ConfigBuilder::new()
     }
+
+    pub fn d_model(&self) -> usize {
+        self.d_model
+    }
+
+    pub fn dk(&self) -> usize {
+        self.d_model() / self.heads()
+    }
+
     pub fn heads(&self) -> usize {
         self.heads
     }
@@ -21,29 +31,15 @@ impl Config {
 impl Default for Config {
     fn default() -> Self {
         Self {
+            d_model: crate::D_MODEL,
             heads: crate::HEADS,
         }
     }
 }
 
-#[derive(Default)]
-pub struct ConfigBuilder {
-    heads: Option<usize>,
-}
-
-impl ConfigBuilder {
-    pub fn new() -> Self {
-        Self { heads: None }
+concision::builder! {
+    ConfigBuilder(Config) {
+        d_model: usize,
+        heads: usize,
     }
-
-    pub fn heads(mut self, heads: usize) -> Self {
-        self.heads = Some(heads);
-        self
-    }
-
-    pub fn build(&self) -> Config {
-        Config {
-            heads: self.heads.unwrap_or(crate::HEADS),
-        }
-    }
-}
+}
\ No newline at end of file
diff --git a/models/transformers/src/attention/multi/multi_head.rs b/models/transformers/src/attention/multi/multi_head.rs
index 47b2a8e2..ea37ffee 100644
--- a/models/transformers/src/attention/multi/multi_head.rs
+++ b/models/transformers/src/attention/multi/multi_head.rs
@@ -3,14 +3,42 @@
     Contrib: FL03 <jo3mccain@icloud.com>
 */
 use super::Config;
+use crate::AttentionHead;
+use linear::{Biased, Linear};
+use nd::prelude::*;
 
-#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
-pub struct MultiHeadAttention {
+
+#[derive(Default)]
+pub struct MultiHeadAttention<A = f64, D = Ix2> where D: Dimension, {
+    pub(crate) attention: Option<AttentionHead<A, D>>,
     pub(crate) config: Config,
+    pub(crate) linears: Vec<Linear<A, Biased, D>>,
 }
 
-impl MultiHeadAttention {
+impl<A, D> MultiHeadAttention<A, D> where D: Dimension, {
+
+    pub fn head(&self) -> Option<&AttentionHead<A, D>> {
+        self.attention.as_ref()
+    }
+    
     pub const fn config(&self) -> &Config {
         &self.config
     }
+
+    pub fn linears(&self) -> &[Linear<A, Biased, D>] {
+        &self.linears
+    }
 }
+
+impl<A> MultiHeadAttention<A, Ix2> {
+    pub fn std(config: Config) -> Self where A: Clone + Default {
+        let linears = (0..4)
+            .map(|_| Linear::from_features(config.d_model(), config.d_model()))
+            .collect();
+        Self {
+            attention: None,
+            config,
+            linears
+        }
+    }
+}
\ No newline at end of file
diff --git a/models/transformers/src/params/store.rs b/models/transformers/src/params/store.rs
index d62fb7cb..87208081 100644
--- a/models/transformers/src/params/store.rs
+++ b/models/transformers/src/params/store.rs
@@ -57,6 +57,16 @@ where
         (self.q.view(), self.k.view(), self.v.view())
     }
 
+    // pub fn attention(&self, mask: Option<Array<bool, D>>) -> Array<A, D>
+    // where
+    //     A: ComplexFloat,
+    //     S: Data,
+    // {
+    //     let (q, k, v) = self.as_qkv();
+    //     let (q, k, v) = _attention(q, k, v, mask);
+    //     q.dot(&v)
+    // }
+
     /// Consumes the store and returns a three-tuple consisting of the query, key, and value arrays respectively.
     pub fn into_qkv(self) -> (ArrayBase<S, D>, ArrayBase<S, D>, ArrayBase<S, D>) {
         (self.q, self.k, self.v)

From ba80a2dc1650bf9cc488038f3bdfa9d8c4e2a46a Mon Sep 17 00:00:00 2001
From: Joe McCain III <jo3mccain@icloud.com>
Date: Fri, 24 May 2024 08:44:06 -0500
Subject: [PATCH 15/19] update

Signed-off-by: Joe McCain III <jo3mccain@icloud.com>
---
 core/src/init/mod.rs                          |  8 +-
 core/src/init/{initialize.rs => traits.rs}    | 14 ++++
 core/src/init/utils.rs                        | 35 +++++----
 core/src/macros/builder.rs                    |  3 -
 core/src/{types => nn/mask}/mask.rs           | 20 +++--
 core/src/nn/mask/mod.rs                       | 29 +++++++
 core/src/nn/mod.rs                            | 13 +++-
 core/src/nn/model/repo.rs                     |  2 +-
 core/src/traits/arr/reshape.rs                | 40 ++++++++++
 core/src/traits/arr/tensor.rs                 | 77 +++++++++++++++----
 core/src/traits/misc/container.rs             | 14 +++-
 core/src/traits/mod.rs                        | 10 ++-
 core/src/types/mod.rs                         |  2 -
 core/tests/traits.rs                          | 15 +++-
 models/linear/src/mlp/model.rs                | 10 +++
 models/transformers/src/attention/head.rs     | 61 ++++++++++-----
 .../src/attention/multi/config.rs             |  2 +-
 .../src/attention/multi/multi_head.rs         | 35 ++++++---
 models/transformers/src/codec/model.rs        | 12 +--
 models/transformers/src/impls/impl_init.rs    |  5 +-
 models/transformers/src/impls/impl_params.rs  | 35 ++++++---
 models/transformers/src/lib.rs                |  2 +-
 models/transformers/src/params/store.rs       | 55 ++++++++++---
 23 files changed, 380 insertions(+), 119 deletions(-)
 rename core/src/init/{initialize.rs => traits.rs} (95%)
 rename core/src/{types => nn/mask}/mask.rs (89%)
 create mode 100644 core/src/nn/mask/mod.rs
 create mode 100644 core/src/traits/arr/reshape.rs
 create mode 100644 models/linear/src/mlp/model.rs

diff --git a/core/src/init/mod.rs b/core/src/init/mod.rs
index f2bb4731..7f5bc4b4 100644
--- a/core/src/init/mod.rs
+++ b/core/src/init/mod.rs
@@ -11,9 +11,11 @@
 //! better suited for machine-learning workloads.
 #![cfg(feature = "rand")]
 
-pub use self::prelude::*;
+pub use self::distr::prelude::*;
+pub use self::traits::*;
+pub use self::utils::*;
 
-pub(crate) mod initialize;
+pub(crate) mod traits;
 pub(crate) mod utils;
 
 pub mod initializer;
@@ -41,6 +43,6 @@ pub use rand_distr;
 
 pub(crate) mod prelude {
     pub use super::distr::prelude::*;
-    pub use super::initialize::{Initialize, InitializeExt};
+    pub use super::traits::{Initialize, InitializeExt};
     pub use super::utils::*;
 }
diff --git a/core/src/init/initialize.rs b/core/src/init/traits.rs
similarity index 95%
rename from core/src/init/initialize.rs
rename to core/src/init/traits.rs
index 4d2eb51d..a01ca7d9 100644
--- a/core/src/init/initialize.rs
+++ b/core/src/init/traits.rs
@@ -139,6 +139,20 @@ where
     {
         Self::rand(shape, Uniform::new(dk.clone().neg(), dk))
     }
+
+    fn uniform_from_seed<Sh>(shape: Sh, start: A, stop: A, key: u64) -> Self
+    where
+        A: SampleUniform,
+        S: DataOwned,
+        Sh: ShapeBuilder<Dim = D>,
+        <A as SampleUniform>::Sampler: Clone,
+    {
+        Self::rand_with(
+            shape,
+            Uniform::new(start, stop),
+            &mut StdRng::seed_from_u64(key),
+        )
+    }
     /// Generate a random array with values between u(-a, a) where a is the reciprocal of the value at the given axis
     fn uniform_along<Sh>(shape: Sh, axis: usize) -> Self
     where
diff --git a/core/src/init/utils.rs b/core/src/init/utils.rs
index 3994589c..dacb3df2 100644
--- a/core/src/init/utils.rs
+++ b/core/src/init/utils.rs
@@ -22,23 +22,7 @@ where
     let distr = ComplexDistribution::<A, A>::new(A::one(), A::one());
     ArrayBase::random(shape, distr)
 }
-/// Creates a random array from a uniform distribution using a given key
-pub fn seeded_uniform<T, D>(
-    key: u64,
-    start: T,
-    stop: T,
-    shape: impl IntoDimension<Dim = D>,
-) -> Array<T, D>
-where
-    D: Dimension,
-    T: SampleUniform,
-{
-    Array::random_using(
-        shape,
-        Uniform::new(start, stop),
-        &mut rngs::StdRng::seed_from_u64(key),
-    )
-}
+
 /// Given a shape, generate a random array using the StandardNormal distribution
 pub fn stdnorm<S, D, Sh>(shape: Sh) -> ArrayBase<S, D>
 where
@@ -59,3 +43,20 @@ where
 {
     ArrayBase::random_using(shape, StandardNormal, &mut StdRng::seed_from_u64(seed))
 }
+/// Creates a random array from a uniform distribution using a given key
+pub fn uniform_from_seed<T, D>(
+    key: u64,
+    start: T,
+    stop: T,
+    shape: impl IntoDimension<Dim = D>,
+) -> Array<T, D>
+where
+    D: Dimension,
+    T: SampleUniform,
+{
+    Array::random_using(
+        shape,
+        Uniform::new(start, stop),
+        &mut rngs::StdRng::seed_from_u64(key),
+    )
+}
diff --git a/core/src/macros/builder.rs b/core/src/macros/builder.rs
index b840fea6..6e545c9f 100644
--- a/core/src/macros/builder.rs
+++ b/core/src/macros/builder.rs
@@ -5,9 +5,6 @@
 
 #[macro_export]
 macro_rules! builder {
-    ($(#[derive($($d:ident),+)])?$name:ident::<$inner:ty> {$($k:ident: $v:ty),* $(,)?}) => {
-        $crate::builder!(@loop builder: $name, derive: [$($($d),+)?], inner: $inner {$($k: $v),*});
-    };
     ($(#[derive($($d:ident),+)])? $name:ident($inner:ty) {$($k:ident: $v:ty),* $(,)?}) => {
         $crate::builder!(@loop builder: $name, derive: [$($($d),+)?], inner: $inner {$($k: $v),*});
     };
diff --git a/core/src/types/mask.rs b/core/src/nn/mask/mask.rs
similarity index 89%
rename from core/src/types/mask.rs
rename to core/src/nn/mask/mask.rs
index ab587861..263cf016 100644
--- a/core/src/types/mask.rs
+++ b/core/src/nn/mask/mask.rs
@@ -2,18 +2,10 @@
     Appellation: mask <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
-
 use nd::prelude::*;
-use nd::RawData;
-
-pub trait NdMask<D = Ix2>
-where
-    D: Dimension,
-{
-    type Data: RawData<Elem = bool>;
-}
+use nd::{OwnedRepr, RawData};
 
-pub struct Mask<S, D>(ArrayBase<S, D>)
+pub struct Mask<S = OwnedRepr<bool>, D = Ix2>(ArrayBase<S, D>)
 where
     D: Dimension,
     S: RawData<Elem = bool>;
@@ -32,9 +24,10 @@ where
  ************* Implementations *************
 */
 mod impls {
-    use super::*;
+    use super::Mask;
     use core::borrow::{Borrow, BorrowMut};
     use core::ops::{Deref, DerefMut};
+    use nd::{ArrayBase, Dimension, RawData};
 
     impl<S, D> AsRef<ArrayBase<S, D>> for Mask<S, D>
     where
@@ -97,6 +90,11 @@ mod impls {
             &mut self.0
         }
     }
+}
+
+mod impl_from {
+    use super::Mask;
+    use nd::{ArrayBase, Dimension, RawData};
 
     impl<S, D> From<ArrayBase<S, D>> for Mask<S, D>
     where
diff --git a/core/src/nn/mask/mod.rs b/core/src/nn/mask/mod.rs
new file mode 100644
index 00000000..5a3aaa2b
--- /dev/null
+++ b/core/src/nn/mask/mod.rs
@@ -0,0 +1,29 @@
+/*
+    Appellation: mask <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+pub use self::mask::*;
+
+pub(crate) mod mask;
+
+pub(crate) mod prelude {
+    pub use super::mask::Mask;
+    pub use super::NdMask;
+}
+
+use nd::{ArrayBase, Dimension, Ix2, RawData};
+
+pub trait NdMask<D = Ix2>
+where
+    D: Dimension,
+{
+    type Data: RawData<Elem = bool>;
+}
+
+impl<S, D> NdMask<D> for ArrayBase<S, D>
+where
+    D: Dimension,
+    S: RawData<Elem = bool>,
+{
+    type Data = S;
+}
diff --git a/core/src/nn/mod.rs b/core/src/nn/mod.rs
index 0b8f8707..172b7e7d 100644
--- a/core/src/nn/mod.rs
+++ b/core/src/nn/mod.rs
@@ -2,22 +2,29 @@
    Appellation: nn <mod>
    Contrib: FL03 <jo3mccain@icloud.com>
 */
+#[cfg(any(feature = "alloc", feature = "std"))]
+pub use self::types::*;
 pub use self::{dropout::*, error::ModelError, model::prelude::*};
 
 pub mod dropout;
 pub mod error;
+pub mod mask;
 pub mod model;
 
-
 pub(crate) mod prelude {
     pub use super::dropout::*;
     pub use super::error::*;
+    pub use super::mask::prelude::*;
     pub use super::model::prelude::*;
 }
 
 #[cfg(any(feature = "alloc", feature = "std"))]
-pub type ForwardDyn<T = nd::Array2<f64>, O = T> =
-    crate::rust::Box<dyn crate::Forward<T, Output = O>>;
+mod types {
+    use crate::rust::Box;
+    use nd::prelude::Array2;
+
+    pub type ForwardDyn<T = Array2<f64>, O = T> = Box<dyn crate::Forward<T, Output = O>>;
+}
 
 #[cfg(test)]
 mod tests {}
diff --git a/core/src/nn/model/repo.rs b/core/src/nn/model/repo.rs
index bb0c8b6c..affd401a 100644
--- a/core/src/nn/model/repo.rs
+++ b/core/src/nn/model/repo.rs
@@ -7,4 +7,4 @@
 pub struct ModelRepo {
     pub name: String,
     pub(crate) store: String,
-}
\ No newline at end of file
+}
diff --git a/core/src/traits/arr/reshape.rs b/core/src/traits/arr/reshape.rs
new file mode 100644
index 00000000..7079f130
--- /dev/null
+++ b/core/src/traits/arr/reshape.rs
@@ -0,0 +1,40 @@
+/*
+    Appellation: reshape <module> [traits::arr]
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use nd::prelude::*;
+use nd::{RawData, RawDataClone};
+
+pub trait Unsqueeze {
+    type Output;
+
+    fn unsqueeze(self, axis: usize) -> Self::Output;
+}
+
+/*
+ ************* Implementations *************
+*/
+
+impl<A, S, D> Unsqueeze for ArrayBase<S, D>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+{
+    type Output = ArrayBase<S, D::Larger>;
+
+    fn unsqueeze(self, axis: usize) -> Self::Output {
+        self.insert_axis(Axis(axis))
+    }
+}
+
+impl<'a, A, S, D> Unsqueeze for &'a ArrayBase<S, D>
+where
+    D: Dimension,
+    S: RawDataClone<Elem = A>,
+{
+    type Output = ArrayBase<S, D::Larger>;
+
+    fn unsqueeze(self, axis: usize) -> Self::Output {
+        self.clone().insert_axis(Axis(axis))
+    }
+}
diff --git a/core/src/traits/arr/tensor.rs b/core/src/traits/arr/tensor.rs
index 9bd5d863..12571e0f 100644
--- a/core/src/traits/arr/tensor.rs
+++ b/core/src/traits/arr/tensor.rs
@@ -3,17 +3,44 @@
     Contrib: FL03 <jo3mccain@icloud.com>
 */
 use super::Dimensional;
+use nd::iter::{Iter, IterMut};
 use nd::prelude::*;
 use nd::{Data, DataMut, DataOwned, OwnedRepr, RawData};
 use num::{One, Zero};
 
-/// This trait describes the basic operations for any n-dimensional container.
-pub trait NdContainer<A = f64, D = Ix2>: Dimensional<D> {
-    type Data;
+pub trait NdArray<A, D>
+where
+    D: Dimension,
+{
+    type Data: RawData<Elem = A>;
 
     fn as_slice(&self) -> &[A];
 
     fn as_mut_slice(&mut self) -> &mut [A];
+
+    fn iter(&self) -> Iter<'_, A, D>;
+
+    fn iter_mut(&mut self) -> IterMut<'_, A, D>;
+
+    fn map<F>(&self, f: F) -> Self
+    where
+        F: FnMut(&A) -> A;
+
+    fn mapv<F>(&mut self, f: F)
+    where
+        A: Clone,
+        F: FnMut(A) -> A;
+}
+
+pub trait NdIter<A, D>
+where
+    D: Dimension,
+{
+    type Data: RawData<Elem = A>;
+
+    fn iter(&self) -> Iter<'_, A, D>;
+
+    fn iter_mut(&mut self) -> IterMut<'_, A, D>;
 }
 
 /// [NdBuilder] describes common creation routines for [ArrayBase](ndarray::ArrayBase)
@@ -22,40 +49,39 @@ where
     D: Dimension,
 {
     type Data: RawData<Elem = A>;
-    type Store;
 
     /// Create a new array with the given shape whose elements are set to the default value of the element type.
-    fn default<Sh>(shape: Sh) -> Self::Store
+    fn default<Sh>(shape: Sh) -> Self
     where
         A: Default,
         Sh: ShapeBuilder<Dim = D>,
         Self::Data: DataOwned;
 
-    fn fill<Sh>(shape: Sh, elem: A) -> Self::Store
+    fn fill<Sh>(shape: Sh, elem: A) -> Self
     where
         A: Clone,
         Sh: ShapeBuilder<Dim = D>,
         Self::Data: DataOwned;
 
-    fn ones<Sh>(shape: Sh) -> Self::Store
+    fn ones<Sh>(shape: Sh) -> Self
     where
         A: Clone + One,
         Sh: ShapeBuilder<Dim = D>,
         Self::Data: DataOwned;
 
-    fn zeros<Sh>(shape: Sh) -> Self::Store
+    fn zeros<Sh>(shape: Sh) -> Self
     where
         A: Clone + Zero,
         Sh: ShapeBuilder<Dim = D>,
         Self::Data: DataOwned;
 }
 
-pub trait NdBuilderExt<A = f64, D = Ix2>:
-    Dimensional<D, Pattern = D::Pattern> + NdBuilder<A, D>
+pub trait NdBuilderExt<A = f64, D = Ix2>: NdBuilder<A, D>
 where
     D: Dimension,
+    Self: Dimensional<D, Pattern = D::Pattern> + Sized,
 {
-    fn default_like<Sh>(&self) -> Self::Store
+    fn default_like<Sh>(&self) -> Self
     where
         A: Default,
         Sh: ShapeBuilder<Dim = D>,
@@ -64,7 +90,7 @@ where
         Self::default(self.dim())
     }
 
-    fn fill_like<Sh>(&self, elem: A) -> Self::Store
+    fn fill_like<Sh>(&self, elem: A) -> Self
     where
         A: Clone,
         Sh: ShapeBuilder<Dim = D>,
@@ -73,7 +99,7 @@ where
         Self::fill(self.dim(), elem)
     }
 
-    fn ones_like<Sh>(&self) -> Self::Store
+    fn ones_like<Sh>(&self) -> Self
     where
         A: Clone + One,
         Sh: ShapeBuilder<Dim = D>,
@@ -82,7 +108,7 @@ where
         Self::ones(self.dim())
     }
 
-    fn zeros_like<Sh>(&self) -> Self::Store
+    fn zeros_like<Sh>(&self) -> Self
     where
         A: Clone + Zero,
         Sh: ShapeBuilder<Dim = D>,
@@ -144,6 +170,28 @@ where
         S: DataMut;
 }
 
+pub trait View<A = f64, D = Ix2>
+where
+    D: Dimension,
+{
+    type Data: RawData<Elem = A>;
+    type Output;
+
+    fn view(&self) -> Self::Output
+    where
+        A: Clone,
+        Self::Data: Data;
+}
+pub trait ViewMut<A = f64, D = Ix2>: View<A, D>
+where
+    D: Dimension,
+{
+    fn view_mut(&mut self) -> ArrayViewMut<'_, A, D>
+    where
+        A: Clone,
+        Self::Data: DataMut;
+}
+
 /*
  ************* Implementations *************
 */
@@ -153,7 +201,6 @@ where
     S: RawData<Elem = A>,
 {
     type Data = S;
-    type Store = ArrayBase<S, D>;
 
     fn default<Sh>(shape: Sh) -> Self
     where
diff --git a/core/src/traits/misc/container.rs b/core/src/traits/misc/container.rs
index 842d96db..a295d90f 100644
--- a/core/src/traits/misc/container.rs
+++ b/core/src/traits/misc/container.rs
@@ -2,11 +2,21 @@
     Appellation: container <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
+use crate::traits::Dimensional;
 
 pub trait Container<T> {
-    type Data: Data<Item = T>;
+    type Data: Data<Elem = T>;
 }
 
 pub trait Data {
-    type Item;
+    type Elem;
+}
+
+/// This trait describes the basic operations for any n-dimensional container.
+pub trait NdContainer<A = f64, D = nd::Ix2>: Dimensional<D> {
+    type Data: Data<Elem = A>;
+
+    fn as_slice(&self) -> &[A];
+
+    fn as_mut_slice(&mut self) -> &mut [A];
 }
diff --git a/core/src/traits/mod.rs b/core/src/traits/mod.rs
index f7a13549..2ae4d47a 100644
--- a/core/src/traits/mod.rs
+++ b/core/src/traits/mod.rs
@@ -13,15 +13,17 @@ pub mod train;
 pub mod arr {
     pub use self::prelude::*;
 
-    pub(crate) mod create;
-    pub(crate) mod misc;
-    pub(crate) mod ops;
-    pub(crate) mod tensor;
+    mod create;
+    mod misc;
+    mod ops;
+    mod reshape;
+    mod tensor;
 
     pub(crate) mod prelude {
         pub use super::create::*;
         pub use super::misc::*;
         pub use super::ops::*;
+        pub use super::reshape::*;
         pub use super::tensor::*;
     }
 }
diff --git a/core/src/types/mod.rs b/core/src/types/mod.rs
index 75cddd52..d6347e49 100644
--- a/core/src/types/mod.rs
+++ b/core/src/types/mod.rs
@@ -6,7 +6,6 @@ pub use self::prelude::*;
 #[cfg(feature = "std")]
 pub use self::std_types::*;
 
-pub mod mask;
 pub mod propagate;
 pub mod shape;
 
@@ -24,7 +23,6 @@ mod std_types {
 }
 
 pub(crate) mod prelude {
-    pub use super::mask::Mask;
     pub use super::propagate::Propagate;
     pub use super::shape::ModelShape;
     #[cfg(feature = "std")]
diff --git a/core/tests/traits.rs b/core/tests/traits.rs
index ab7bd44d..b1038f94 100644
--- a/core/tests/traits.rs
+++ b/core/tests/traits.rs
@@ -5,12 +5,12 @@
 extern crate concision_core as cnc;
 
 use cnc::linarr;
-use cnc::traits::{Affine, AsComplex, MaskFill, Matpow};
 use ndarray::prelude::*;
 use num::Complex;
 
 #[test]
 fn test_affine() {
+    use cnc::traits::Affine;
     let x = array![[0.0, 1.0], [2.0, 3.0]];
 
     let y = x.affine(4.0, -2.0);
@@ -19,6 +19,7 @@ fn test_affine() {
 
 #[test]
 fn test_masked_fill() {
+    use cnc::traits::MaskFill;
     let shape = (2, 2);
     let mask = array![[true, false], [false, true]];
     let arr = linarr::<f64, Ix2>(shape).unwrap();
@@ -28,6 +29,7 @@ fn test_masked_fill() {
 
 #[test]
 fn test_as_complex() {
+    use cnc::traits::AsComplex;
     let x = 1.0;
     let y = x.as_re();
     assert_eq!(y, Complex::new(1.0, 0.0));
@@ -35,8 +37,19 @@ fn test_as_complex() {
 
 #[test]
 fn test_matrix_power() {
+    use cnc::traits::Matpow;
     let x = array![[1.0, 2.0], [3.0, 4.0]];
     assert_eq!(x.pow(0), Array2::<f64>::eye(2));
     assert_eq!(x.pow(1), x);
     assert_eq!(x.pow(2), x.dot(&x));
 }
+
+#[test]
+fn test_unsqueeze() {
+    use cnc::traits::Unsqueeze;
+    let arr = array![1, 2, 3, 4];
+    let a = arr.clone().unsqueeze(0);
+    assert_eq!(a.dim(), (1, 4));
+    let b = arr.unsqueeze(1);
+    assert_eq!(b.dim(), (4, 1));
+}
diff --git a/models/linear/src/mlp/model.rs b/models/linear/src/mlp/model.rs
new file mode 100644
index 00000000..4128cb88
--- /dev/null
+++ b/models/linear/src/mlp/model.rs
@@ -0,0 +1,10 @@
+/*
+    Appellation: model <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+
+pub struct Mlp<I, H, O> {
+    input: I,
+    hidden: H,
+    output: O,
+}
\ No newline at end of file
diff --git a/models/transformers/src/attention/head.rs b/models/transformers/src/attention/head.rs
index fe9a34d8..1deebb6f 100644
--- a/models/transformers/src/attention/head.rs
+++ b/models/transformers/src/attention/head.rs
@@ -12,12 +12,22 @@ use num::complex::ComplexFloat;
 
 // #68
 /// [AttentionHead] implements the scaled dot-product attention mechanism formally defined in
-/// [Attention is all you need](https://arxiv.org/abs/1706.03762).
+/// [Attention is all you need](https://arxiv.org/abs/1706.03762). The structure is designed to
+/// be flexible, relying upon the n-dimensional [QkvBase] to store the query, key, and value tensors.
+/// More so, the head may be configured with an optional dropout and/or masking layers.
 ///
-/// ### Fields
+/// ### Dropout
 ///
-/// [dropout](DropoutLayer): requires the `rand` feature
+/// The [DropoutLayer] is an optional layer applied after the softmax function is applied to the
+/// score. The layer is used to prevent overfitting by randomly setting a fraction of the input
+/// units to zero at each update during training time.
 ///
+/// ### Masking
+///
+/// After computing the dot-product of the query and key tensors, an optional mask may be applied to
+/// the attention score. The mask is used to prevent the model from attending to certain parts of the
+/// input sequence. For example, in the case of a language model, the mask may be used to prevent the
+/// model from attending to the padding tokens.
 pub struct AttentionHead<A = f64, D = Ix2, S = OwnedRepr<A>>
 where
     D: Dimension,
@@ -59,21 +69,6 @@ where
     {
         Self::from_params(QkvBase::from_elem(shape, value))
     }
-    /// Setup the [AttentionHead] with a [DropoutLayer]
-    #[cfg(feature = "rand")]
-    pub fn with_dropout(self, dropout: DropoutLayer) -> Self {
-        Self {
-            dropout: Some(dropout),
-            ..self
-        }
-    }
-    /// Setup the [AttentionHead] with a mask
-    pub fn with_mask(self, mask: Array<bool, D>) -> Self {
-        Self {
-            mask: Some(mask),
-            ..self
-        }
-    }
     /// Computes the [Score] using scaled dot-product attention.
     pub fn attention(&self) -> Score<A, D>
     where
@@ -105,6 +100,30 @@ where
     pub fn into_qkv(self) -> (ArrayBase<S, D>, ArrayBase<S, D>, ArrayBase<S, D>) {
         self.params.into_qkv()
     }
+    /// Sets the dropout layer for the [AttentionHead]
+    #[cfg(feature = "rand")]
+    pub fn set_dropout(&mut self, dropout: Option<DropoutLayer>) {
+        self.dropout = dropout;
+    }
+    /// Sets the mask for the [AttentionHead]
+    pub fn set_mask(&mut self, mask: Option<Array<bool, D>>) {
+        self.mask = mask;
+    }
+    /// Configure the [AttentionHead] with a [DropoutLayer]
+    #[cfg(feature = "rand")]
+    pub fn with_dropout(self, dropout: DropoutLayer) -> Self {
+        Self {
+            dropout: Some(dropout),
+            ..self
+        }
+    }
+    /// Consume and store a mask for the [AttentionHead]
+    pub fn with_mask(self, mask: Array<bool, D>) -> Self {
+        Self {
+            mask: Some(mask),
+            ..self
+        }
+    }
 
     getters!(params::<[q, k, v]> => ArrayBase<S, D>);
     ndbuilder!(new::default() where A: Default, S: DataOwned);
@@ -118,6 +137,9 @@ where
     D: Dimension,
     S: RawData<Elem = A>,
 {
+    /// Returns an immutable reference to the, optional, [dropout](DropoutLayer) layer.
+    /// With the `rand` feature flag disabled, the dropout layer is
+    /// unavailable and returns `None`.
     pub fn dropout(&self) -> Option<&DropoutLayer> {
         self.dropout.as_ref()
     }
@@ -129,6 +151,9 @@ where
     D: Dimension,
     S: RawData<Elem = A>,
 {
+    /// Returns an immutable reference to the, optional, [dropout](DropoutLayer) layer.
+    /// With the `rand` feature flag disabled, the dropout layer is
+    /// unavailable and returns `None`.
     #[cfg(not(feature = "rand"))]
     pub fn dropout(&self) -> Option<&DropoutLayer> {
         None
diff --git a/models/transformers/src/attention/multi/config.rs b/models/transformers/src/attention/multi/config.rs
index 6b5c70c9..2fa53c3f 100644
--- a/models/transformers/src/attention/multi/config.rs
+++ b/models/transformers/src/attention/multi/config.rs
@@ -42,4 +42,4 @@ concision::builder! {
         d_model: usize,
         heads: usize,
     }
-}
\ No newline at end of file
+}
diff --git a/models/transformers/src/attention/multi/multi_head.rs b/models/transformers/src/attention/multi/multi_head.rs
index ea37ffee..be427e4d 100644
--- a/models/transformers/src/attention/multi/multi_head.rs
+++ b/models/transformers/src/attention/multi/multi_head.rs
@@ -7,22 +7,30 @@ use crate::AttentionHead;
 use linear::{Biased, Linear};
 use nd::prelude::*;
 
-
 #[derive(Default)]
-pub struct MultiHeadAttention<A = f64, D = Ix2> where D: Dimension, {
-    pub(crate) attention: Option<AttentionHead<A, D>>,
+pub struct MultiHeadAttention<A = f64, D = Ix2>
+where
+    D: Dimension,
+{
     pub(crate) config: Config,
+    pub(crate) head: Option<AttentionHead<A, D>>,
     pub(crate) linears: Vec<Linear<A, Biased, D>>,
 }
 
-impl<A, D> MultiHeadAttention<A, D> where D: Dimension, {
+impl<A, D> MultiHeadAttention<A, D>
+where
+    D: Dimension,
+{
+    pub const fn config(&self) -> &Config {
+        &self.config
+    }
 
     pub fn head(&self) -> Option<&AttentionHead<A, D>> {
-        self.attention.as_ref()
+        self.head.as_ref()
     }
-    
-    pub const fn config(&self) -> &Config {
-        &self.config
+
+    pub fn head_mut(&mut self) -> Option<&mut AttentionHead<A, D>> {
+        self.head.as_mut()
     }
 
     pub fn linears(&self) -> &[Linear<A, Biased, D>] {
@@ -31,14 +39,17 @@ impl<A, D> MultiHeadAttention<A, D> where D: Dimension, {
 }
 
 impl<A> MultiHeadAttention<A, Ix2> {
-    pub fn std(config: Config) -> Self where A: Clone + Default {
+    pub fn std(config: Config) -> Self
+    where
+        A: Clone + Default,
+    {
         let linears = (0..4)
             .map(|_| Linear::from_features(config.d_model(), config.d_model()))
             .collect();
         Self {
-            attention: None,
+            head: None,
             config,
-            linears
+            linears,
         }
     }
-}
\ No newline at end of file
+}
diff --git a/models/transformers/src/codec/model.rs b/models/transformers/src/codec/model.rs
index 494c0a0e..470938a5 100644
--- a/models/transformers/src/codec/model.rs
+++ b/models/transformers/src/codec/model.rs
@@ -24,11 +24,13 @@ impl Codec {
     );
 }
 
-builder!(CodecBuilder::<Codec> {
-    ctx: Context,
-    decoder: Decoder,
-    encoder: Encoder,
-});
+builder! {
+    CodecBuilder(Codec) {
+        ctx: Context,
+        decoder: Decoder,
+        encoder: Encoder,
+    }
+}
 
 #[derive(Default)]
 pub struct Generator {
diff --git a/models/transformers/src/impls/impl_init.rs b/models/transformers/src/impls/impl_init.rs
index cfde2c15..1ed7effd 100644
--- a/models/transformers/src/impls/impl_init.rs
+++ b/models/transformers/src/impls/impl_init.rs
@@ -1,5 +1,8 @@
+/*
+    Appellation: init <impls>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
 #![cfg(feature = "rand")]
-
 use crate::QkvBase;
 use concision::Initialize;
 use concision::init::rand::Rng;
diff --git a/models/transformers/src/impls/impl_params.rs b/models/transformers/src/impls/impl_params.rs
index 9736c1b0..2ea7dec4 100644
--- a/models/transformers/src/impls/impl_params.rs
+++ b/models/transformers/src/impls/impl_params.rs
@@ -6,10 +6,12 @@ use crate::params::QkvBase;
 use nd::prelude::*;
 use nd::{Data, DataOwned, RawDataClone};
 
-impl<S, D> Clone for QkvBase<S, D>
+pub(crate) type ThreeTuple<A, B = A, C = B> = (A, B, C);
+
+impl<A, S, D> Clone for QkvBase<S, D>
 where
     D: Dimension,
-    S: RawDataClone,
+    S: RawDataClone<Elem = A>,
 {
     fn clone(&self) -> Self {
         Self {
@@ -20,18 +22,18 @@ where
     }
 }
 
-impl<S, D> Copy for QkvBase<S, D>
+impl<A, S, D> Copy for QkvBase<S, D>
 where
     D: Copy + Dimension,
-    S: Copy + RawDataClone,
+    S: Copy + RawDataClone<Elem = A>,
 {
 }
 
-impl<S, D> Default for QkvBase<S, D>
+impl<A, S, D> Default for QkvBase<S, D>
 where
+    A: Default,
     D: Dimension,
-    S: DataOwned,
-    S::Elem: Default,
+    S: DataOwned<Elem = A>,
 {
     fn default() -> Self {
         Self {
@@ -49,7 +51,7 @@ where
     S: Data<Elem = A>,
 {
     fn eq(&self, other: &Self) -> bool {
-        self.q == *other.q() && self.k == *other.k() && self.v == *other.v()
+        self.q() == other.q() && self.k() == other.k() && self.v() == other.v()
     }
 }
 
@@ -64,6 +66,21 @@ where
     ArrayBase<S, D>: PartialEq<ArrayBase<S2, D2>>,
 {
     fn eq(&self, other: &ArrayBase<S2, D2>) -> bool {
-        self.q == *other && self.k == *other && self.v == *other
+        self.q() == other && self.k() == other && self.v() == other
+    }
+}
+
+impl<A, B, S, D, S2, D2> PartialEq<ThreeTuple<ArrayBase<S2, D2>>> for QkvBase<S, D>
+where
+    A: PartialEq,
+    B: PartialEq,
+    D: Dimension,
+    S: Data<Elem = A>,
+    S2: Data<Elem = B>,
+    D2: Dimension,
+    ArrayBase<S, D>: PartialEq<ArrayBase<S2, D2>>,
+{
+    fn eq(&self, (q, k, v): &ThreeTuple<ArrayBase<S2, D2>>) -> bool {
+        self.q() == q && self.k() == k && self.v() == v
     }
 }
diff --git a/models/transformers/src/lib.rs b/models/transformers/src/lib.rs
index 9e512f24..89cc41f1 100644
--- a/models/transformers/src/lib.rs
+++ b/models/transformers/src/lib.rs
@@ -17,7 +17,7 @@ extern crate concision_core as concision;
 extern crate concision_linear as linear;
 extern crate ndarray as nd;
 
-pub use self::attention::AttentionHead;
+pub use self::attention::{scaled_dot_product_attention, AttentionHead};
 pub use self::params::*;
 pub use self::primitives::*;
 pub use self::transformer::Transformer;
diff --git a/models/transformers/src/params/store.rs b/models/transformers/src/params/store.rs
index 87208081..f59ee6eb 100644
--- a/models/transformers/src/params/store.rs
+++ b/models/transformers/src/params/store.rs
@@ -2,8 +2,12 @@
     Appellation: params <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
+use crate::attention::{Score, _attention};
+use concision::nn::DropoutLayer;
 use concision::{dimensional, getters};
+use nd::linalg::Dot;
 use nd::*;
+use num::complex::ComplexFloat;
 use num::traits::{One, Zero};
 
 /// [QkvBase] is a container for the query, key, and value arrays used in the
@@ -57,16 +61,6 @@ where
         (self.q.view(), self.k.view(), self.v.view())
     }
 
-    // pub fn attention(&self, mask: Option<Array<bool, D>>) -> Array<A, D>
-    // where
-    //     A: ComplexFloat,
-    //     S: Data,
-    // {
-    //     let (q, k, v) = self.as_qkv();
-    //     let (q, k, v) = _attention(q, k, v, mask);
-    //     q.dot(&v)
-    // }
-
     /// Consumes the store and returns a three-tuple consisting of the query, key, and value arrays respectively.
     pub fn into_qkv(self) -> (ArrayBase<S, D>, ArrayBase<S, D>, ArrayBase<S, D>) {
         (self.q, self.k, self.v)
@@ -93,3 +87,44 @@ where
     qkv_view!(view::<'a, ViewRepr>(&self) where S: Data);
     qkv_view!(view_mut::<'a, ViewRepr>(&mut self) where S: DataMut);
 }
+
+#[cfg(not(feature = "rand"))]
+impl<A, S, D> QkvBase<S, D>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+    A: Clone,
+{
+    /// Computes the [Score] using scaled dot-product attention.
+    pub fn attention(&self, dropout: Option<f64>, mask: Option<&Array<bool, D>>) -> Score<A, D>
+    where
+        A: ComplexFloat + ScalarOperand,
+        S: Data,
+        ArrayBase<S, D>: for<'a> Dot<ArrayView<'a, A, D>, Output = Array<A, D>>,
+        Array<A, D>: Dot<ArrayBase<S, D>, Output = Array<A, D>>,
+    {
+        let (q, k, v) = self.qkv();
+        _attention(q, k, v, mask, None)
+    }
+}
+
+#[cfg(feature = "rand")]
+impl<A, S, D> QkvBase<S, D>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+    A: Clone,
+{
+    /// Computes the [Score] using scaled dot-product attention.
+    pub fn attention(&self, dropout: Option<f64>, mask: Option<&Array<bool, D>>) -> Score<A, D>
+    where
+        A: ComplexFloat + ScalarOperand,
+        S: Data,
+        ArrayBase<S, D>: for<'a> Dot<ArrayView<'a, A, D>, Output = Array<A, D>>,
+        Array<A, D>: Dot<ArrayBase<S, D>, Output = Array<A, D>>,
+    {
+        let dropout = dropout.map(DropoutLayer::new);
+        let (q, k, v) = self.qkv();
+        _attention(q, k, v, mask, dropout.as_ref())
+    }
+}

From ead13404d3cc23d6c4f08c338529a5c03ed6ac10 Mon Sep 17 00:00:00 2001
From: Joe McCain III <jo3mccain@icloud.com>
Date: Fri, 24 May 2024 09:23:37 -0500
Subject: [PATCH 16/19] update

Signed-off-by: Joe McCain III <jo3mccain@icloud.com>
---
 core/Cargo.toml                   |  13 ++--
 core/src/nn/mask/mask.rs          | 122 +++++++++++++++++++++++++++++-
 core/src/nn/model.rs              |  29 +++++++
 core/tests/{random.rs => init.rs} |   0
 4 files changed, 153 insertions(+), 11 deletions(-)
 rename core/tests/{random.rs => init.rs} (100%)

diff --git a/core/Cargo.toml b/core/Cargo.toml
index a4d06c91..c8f85779 100644
--- a/core/Cargo.toml
+++ b/core/Cargo.toml
@@ -110,15 +110,17 @@ name = "fft"
 required-features = ["approx"]
 
 [[test]]
-name = "nn"
-
-[[test]]
-name = "random"
+name = "init"
 required-features = ["rand", "std"]
 
+[[test]]
+name = "nn"
 
 [build-dependencies]
 
+[dev-dependencies]
+lazy_static.workspace = true
+
 [dependencies]
 ndarray.workspace = true
 num.workspace = true
@@ -160,9 +162,6 @@ default-features = false
 features = ["v5", "v8"]
 version = "1"
 
-[dev-dependencies]
-lazy_static = "1"
-
 [package.metadata.docs.rs]
 all-features = true
 rustc-args = ["--cfg", "docsrs"]
diff --git a/core/src/nn/mask/mask.rs b/core/src/nn/mask/mask.rs
index 263cf016..083fc0b9 100644
--- a/core/src/nn/mask/mask.rs
+++ b/core/src/nn/mask/mask.rs
@@ -2,8 +2,9 @@
     Appellation: mask <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
+use nd::iter::{Iter, IterMut};
 use nd::prelude::*;
-use nd::{OwnedRepr, RawData};
+use nd::{Data, DataMut, OwnedRepr, RawData, RawDataClone};
 
 pub struct Mask<S = OwnedRepr<bool>, D = Ix2>(ArrayBase<S, D>)
 where
@@ -15,9 +16,98 @@ where
     D: Dimension,
     S: RawData<Elem = bool>,
 {
-    pub fn new(data: ArrayBase<S, D>) -> Self {
+    pub fn from_arr(data: ArrayBase<S, D>) -> Self {
         Self(data)
     }
+
+    pub fn apply<A, T, F>(&mut self, data: &ArrayBase<T, D>, fill: A) -> ArrayBase<T, D>
+    where
+        A: Clone,
+        S: Data,
+        T: DataMut<Elem = A> + RawDataClone,
+    {
+        let mut res = data.clone();
+        res.zip_mut_with(self.as_mut(), |x, &m| {
+            if m {
+                *x = fill.clone();
+            }
+        });
+        res
+    }
+
+    pub fn mask_inplace<'a, A, T, F>(&mut self, data: &'a mut ArrayBase<T, D>, fill: A) -> &'a mut ArrayBase<T, D>
+    where
+        A: Clone,
+        S: Data,
+        T: DataMut<Elem = A>,
+    {
+        data.zip_mut_with(&mut self.0, |x, &m| {
+            if m {
+                *x = fill.clone();
+            }
+        });
+        data
+    }
+
+    pub fn as_slice(&self) -> &[bool]
+    where
+        S: Data,
+    {
+        self.get().as_slice().unwrap()
+    }
+
+    pub fn as_mut_slice(&mut self) -> &mut [bool]
+    where
+        S: DataMut,
+    {
+        self.get_mut().as_slice_mut().unwrap()
+    }
+
+    pub fn dim(&self) -> D::Pattern {
+        self.get().dim()
+    }
+
+    pub fn iter(&self) -> Iter<'_, bool, D>
+    where
+        S: Data,
+    {
+        self.get().iter()
+    }
+
+    pub fn iter_mut(&mut self) -> IterMut<'_, bool, D>
+    where
+        S: DataMut,
+    {
+        self.get_mut().iter_mut()
+    }
+
+    pub fn get(&self) -> &ArrayBase<S, D> {
+        &self.0
+    }
+
+    pub fn get_mut(&mut self) -> &mut ArrayBase<S, D> {
+        &mut self.0
+    }
+
+    pub fn into_inner(self) -> ArrayBase<S, D> {
+        self.0
+    }
+
+    pub fn ndim(&self) -> usize {
+        self.get().ndim()
+    }
+
+    pub fn raw_dim(&self) -> D {
+        self.get().raw_dim()
+    }
+
+    pub fn set(&mut self, data: ArrayBase<S, D>) {
+        self.0 = data;
+    }
+
+    pub fn shape(&self) -> D {
+        self.get().raw_dim()
+    }
 }
 
 /*
@@ -26,8 +116,8 @@ where
 mod impls {
     use super::Mask;
     use core::borrow::{Borrow, BorrowMut};
-    use core::ops::{Deref, DerefMut};
-    use nd::{ArrayBase, Dimension, RawData};
+    use core::ops::{Deref, DerefMut, Index, IndexMut};
+    use nd::{ArrayBase, Data, DataMut, Dimension, NdIndex, RawData};
 
     impl<S, D> AsRef<ArrayBase<S, D>> for Mask<S, D>
     where
@@ -90,6 +180,30 @@ mod impls {
             &mut self.0
         }
     }
+
+    impl<S, D, I> Index<I> for Mask<S, D>
+    where
+        D: Dimension,
+        I: NdIndex<D>,
+        S: Data<Elem = bool>,
+    {
+        type Output = <ArrayBase<S, D> as Index<I>>::Output;
+
+        fn index(&self, index: I) -> &Self::Output {
+            &self.0[index]
+        }
+    }
+
+    impl<S, D, I> IndexMut<I> for Mask<S, D>
+    where
+        D: Dimension,
+        I: NdIndex<D>,
+        S: DataMut<Elem = bool>,
+    {
+        fn index_mut(&mut self, index: I) -> &mut Self::Output {
+            &mut self.0[index]
+        }
+    }
 }
 
 mod impl_from {
diff --git a/core/src/nn/model.rs b/core/src/nn/model.rs
index d989cb9b..47d7f485 100644
--- a/core/src/nn/model.rs
+++ b/core/src/nn/model.rs
@@ -28,3 +28,32 @@ where
 
     fn context(&self) -> Self::Ctx;
 }
+
+/// This trait describes any neural networks or models that 
+/// adhears to the deep netural network architecture.
+/// This design considers a single input and output layer, while
+/// allowing for any number of hidden layers to be persisted.
+/// 
+/// The `HIDDEN` constant is used to specify the number of hidden layers
+/// and is used to compute the total number of layers (HIDDEN + 2)
+pub trait DeepNeuralNetwork<S, T>: Forward<S, Output = T> {
+    const HIDDEN: Option<usize> = None;
+
+    type Input: Forward<S, Output = T>;
+    type Hidden: Forward<T, Output = T>; // The type of `hidden` layers; all hidden layers implement the same activation function
+    type Out: Forward<T, Output = T>;
+
+    fn input(&self) -> &Self::Input;
+
+    fn hidden(&self) -> &[Self::Hidden];
+
+    fn output(&self) -> &Self::Out;
+
+    fn nlayers(&self) -> usize  {
+        self.nhidden() + 2
+    }
+
+    fn nhidden(&self) -> usize {
+        Self::HIDDEN.unwrap_or_else(|| self.hidden().len())
+    }
+}
\ No newline at end of file
diff --git a/core/tests/random.rs b/core/tests/init.rs
similarity index 100%
rename from core/tests/random.rs
rename to core/tests/init.rs

From 417beb793f0e096212fd3823c43766597808bced Mon Sep 17 00:00:00 2001
From: Joe McCain III <jo3mccain@icloud.com>
Date: Fri, 24 May 2024 10:51:47 -0500
Subject: [PATCH 17/19] update

Signed-off-by: Joe McCain III <jo3mccain@icloud.com>
---
 core/src/math/arith.rs                        |  68 ++++++++
 core/src/math/mod.rs                          |   3 +
 core/src/math/stats/mod.rs                    |  13 ++
 core/src/math/stats/summary.rs                | 151 ++++++++++++++++++
 core/src/math/traits.rs                       |  56 +------
 core/src/nn/mask/mask.rs                      |   6 +-
 core/src/nn/model.rs                          |   8 +-
 core/src/traits/misc/container.rs             |  17 +-
 core/src/traits/misc/sequential.rs            |  63 ++++++++
 core/src/traits/mod.rs                        |   4 +
 models/linear/src/impls/impl_rand.rs          |  58 +++----
 models/linear/src/impls/model/impl_linear.rs  |  23 +--
 models/linear/src/model/layer.rs              |  70 +++++---
 models/transformers/src/attention/head.rs     |  13 ++
 .../src/attention/multi/config.rs             |   6 +-
 .../src/attention/multi/multi_head.rs         |  50 ++++--
 16 files changed, 471 insertions(+), 138 deletions(-)
 create mode 100644 core/src/math/arith.rs
 create mode 100644 core/src/math/stats/mod.rs
 create mode 100644 core/src/math/stats/summary.rs
 create mode 100644 core/src/traits/misc/sequential.rs

diff --git a/core/src/math/arith.rs b/core/src/math/arith.rs
new file mode 100644
index 00000000..04c88c1a
--- /dev/null
+++ b/core/src/math/arith.rs
@@ -0,0 +1,68 @@
+/*
+    Appellation: arith <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use num::integer::Roots;
+use num::traits::FromPrimitive;
+
+pub trait Root {
+    type Output;
+
+    fn nth_root(&self, n: u32) -> Self::Output;
+
+    fn sqrt(&self) -> Self::Output {
+        self.nth_root(2)
+    }
+
+    fn cbrt(&self) -> Self::Output {
+        self.nth_root(3)
+    }
+}
+
+macro_rules! impl_root {
+    (float $($T:ty),* $(,)?) => {
+        $(
+            impl_root!(@float $T);
+        )*
+    };
+    ($($T:ty),* $(,)?) => {
+        $(
+            impl_root!(@impl $T);
+        )*
+    };
+
+    (@impl $T:ty) => {
+        impl Root for $T {
+            type Output = $T;
+
+            fn nth_root(&self, n: u32) -> Self::Output {
+                Roots::nth_root(self, n)
+            }
+        }
+    };
+    (@float $T:ty) => {
+        impl Root for $T {
+            type Output = $T;
+
+            fn nth_root(&self, n: u32) -> Self::Output {
+                self.powf(<$T>::from_u32(n).unwrap().recip())
+            }
+        }
+    };
+}
+
+impl_root!(float f32, f64);
+impl_root! {
+    i8,
+    i16,
+    i32,
+    i64,
+    i128,
+    isize,
+    u8,
+    u16,
+    u32,
+    u64,
+    u128,
+    usize,
+}
diff --git a/core/src/math/mod.rs b/core/src/math/mod.rs
index d8084442..da193f09 100644
--- a/core/src/math/mod.rs
+++ b/core/src/math/mod.rs
@@ -9,8 +9,11 @@
 //! as well as the `ndarray` crate.
 pub use self::traits::*;
 
+pub mod arith;
+pub mod stats;
 pub mod traits;
 
 pub(crate) mod prelude {
+    pub use super::stats::prelude::*;
     pub use super::traits::*;
 }
diff --git a/core/src/math/stats/mod.rs b/core/src/math/stats/mod.rs
new file mode 100644
index 00000000..7a0a3892
--- /dev/null
+++ b/core/src/math/stats/mod.rs
@@ -0,0 +1,13 @@
+/*
+    Appellation: stats <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+//! # Statistics
+//!
+pub use self::summary::*;
+
+mod summary;
+
+pub(crate) mod prelude {
+    pub use super::summary::*;
+}
diff --git a/core/src/math/stats/summary.rs b/core/src/math/stats/summary.rs
new file mode 100644
index 00000000..35b5821d
--- /dev/null
+++ b/core/src/math/stats/summary.rs
@@ -0,0 +1,151 @@
+/*
+    Appellation: summary <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use crate::math::arith::Root;
+use core::iter::{Product, Sum};
+use nd::{ArrayBase, Data, Dimension};
+use num::traits::{FromPrimitive, Num, NumOps, Pow};
+
+/// This trait describes the fundamental methods of summary statistics.
+/// These include the mean, standard deviation, variance, and more.
+pub trait SummaryStatistics
+where
+    Self::Item: FromPrimitive,
+    Self::Output: NumOps<Self::Item, Self::Output>,
+{
+    type Item;
+    type Output;
+
+    fn elems(&self) -> Self::Item {
+        Self::Item::from_usize(self.len()).unwrap()
+    }
+
+    fn len(&self) -> usize;
+
+    fn mean(&self) -> Self::Output {
+        self.sum() / self.elems()
+    }
+
+    fn product(&self) -> Self::Output;
+
+    fn sum(&self) -> Self::Output;
+
+    fn std(&self) -> Self::Output;
+
+    fn var(&self) -> Self::Output;
+}
+
+/*
+ ************* Implementations *************
+*/
+impl<'a, T, I> SummaryStatistics for &'a I
+where
+    I: Clone + ExactSizeIterator<Item = T>,
+    T: Copy + FromPrimitive + Num + Pow<i32, Output = T> + Product + Root<Output = T> + Sum,
+{
+    type Item = T;
+    type Output = T;
+
+    fn len(&self) -> usize {
+        ExactSizeIterator::len(*self)
+    }
+
+    fn product(&self) -> Self::Output {
+        (*self).clone().product()
+    }
+
+    fn sum(&self) -> Self::Output {
+        (*self).clone().sum()
+    }
+
+    fn std(&self) -> Self::Output {
+        let mean = self.mean();
+        let sum = (*self).clone().map(|x| (x - mean).pow(2)).sum::<T>();
+        (sum / self.elems()).sqrt()
+    }
+
+    fn var(&self) -> Self::Output {
+        let mean = self.mean();
+        let sum = (*self).clone().map(|x| (x - mean).pow(2)).sum::<T>();
+        sum / self.elems()
+    }
+}
+
+macro_rules! impl_summary {
+    ($($T:ty),* $(,)?) => {
+        $(
+            impl_summary!(@impl $T);
+        )*
+    };
+    (@impl $T:ty) => {
+
+        impl<T> SummaryStatistics for $T
+        where
+            T: Copy + FromPrimitive + Num + Pow<i32, Output = T> + Product + Root<Output = T> + Sum,
+        {
+            type Item = T;
+            type Output = T;
+
+            fn len(&self) -> usize {
+                self.len()
+            }
+
+            fn product(&self) -> Self::Output {
+                self.iter().copied().product::<T>()
+            }
+
+            fn sum(&self) -> Self::Output {
+                self.iter().copied().sum::<T>()
+            }
+
+            fn std(&self) -> Self::Output {
+                let mean = self.mean();
+                let sum = self.iter().copied().map(|x| (x - mean).pow(2)).sum::<T>();
+                (sum / self.elems()).sqrt()
+            }
+
+            fn var(&self) -> Self::Output {
+                let mean = self.mean();
+                let sum = self.iter().copied().map(|x| (x - mean).pow(2)).sum::<T>();
+                sum / self.elems()
+            }
+        }
+    };
+}
+
+impl_summary!(Vec<T>, [T]);
+
+impl<A, S, D> SummaryStatistics for ArrayBase<S, D>
+where
+    A: Copy + FromPrimitive + Num + Pow<i32, Output = A> + Product + Root<Output = A> + Sum,
+    D: Dimension,
+    S: Data<Elem = A>,
+{
+    type Item = A;
+    type Output = A;
+
+    fn len(&self) -> usize {
+        self.len()
+    }
+
+    fn product(&self) -> Self::Output {
+        self.iter().copied().product::<A>()
+    }
+
+    fn sum(&self) -> Self::Output {
+        self.iter().copied().sum::<A>()
+    }
+
+    fn std(&self) -> Self::Output {
+        let mean = self.mean().unwrap_or_else(A::zero);
+        let sum = self.iter().copied().map(|x| (x - mean).pow(2)).sum::<A>();
+        (sum / self.elems()).sqrt()
+    }
+
+    fn var(&self) -> Self::Output {
+        let mean = self.mean().unwrap_or_else(A::zero);
+        let sum = self.iter().copied().map(|x| (x - mean).pow(2)).sum::<A>();
+        sum / self.elems()
+    }
+}
diff --git a/core/src/math/traits.rs b/core/src/math/traits.rs
index 51e3c1d6..d71d433d 100644
--- a/core/src/math/traits.rs
+++ b/core/src/math/traits.rs
@@ -2,63 +2,9 @@
     Appellation: traits <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
-use core::iter::Sum;
 use nd::{Array, ArrayBase, Data, Dimension};
 use num::complex::{Complex, ComplexFloat};
-use num::traits::{FromPrimitive, Num, Signed};
-
-pub trait IterStats<T>
-where
-    T: FromPrimitive,
-{
-    type Output;
-
-    fn elems(&self) -> T;
-
-    fn mean(&self) -> Self::Output;
-
-    fn std(&self) -> Self::Output
-    where
-        T: ComplexFloat;
-
-    fn var(&self) -> Self::Output
-    where
-        T: ComplexFloat;
-}
-
-impl<T, I> IterStats<T> for I
-where
-    I: Clone + ExactSizeIterator<Item = T>,
-    T: Clone + FromPrimitive + Num + Sum,
-{
-    type Output = T;
-
-    fn elems(&self) -> T {
-        T::from_usize(self.len()).unwrap()
-    }
-
-    fn mean(&self) -> Self::Output {
-        self.clone().sum::<T>() / self.elems()
-    }
-
-    fn std(&self) -> Self::Output
-    where
-        T: ComplexFloat,
-    {
-        let mean = self.mean();
-        let sum = self.clone().map(|x| (x - mean).powi(2)).sum::<T>();
-        (sum / self.elems()).sqrt()
-    }
-
-    fn var(&self) -> Self::Output
-    where
-        T: ComplexFloat,
-    {
-        let mean = self.mean();
-        let sum = self.clone().map(|x| (x - mean).powi(2)).sum::<T>();
-        sum / self.elems()
-    }
-}
+use num::traits::Signed;
 
 unary!(
     Abs::abs(self),
diff --git a/core/src/nn/mask/mask.rs b/core/src/nn/mask/mask.rs
index 083fc0b9..94da711a 100644
--- a/core/src/nn/mask/mask.rs
+++ b/core/src/nn/mask/mask.rs
@@ -35,7 +35,11 @@ where
         res
     }
 
-    pub fn mask_inplace<'a, A, T, F>(&mut self, data: &'a mut ArrayBase<T, D>, fill: A) -> &'a mut ArrayBase<T, D>
+    pub fn mask_inplace<'a, A, T, F>(
+        &mut self,
+        data: &'a mut ArrayBase<T, D>,
+        fill: A,
+    ) -> &'a mut ArrayBase<T, D>
     where
         A: Clone,
         S: Data,
diff --git a/core/src/nn/model.rs b/core/src/nn/model.rs
index 47d7f485..316d03e7 100644
--- a/core/src/nn/model.rs
+++ b/core/src/nn/model.rs
@@ -29,11 +29,11 @@ where
     fn context(&self) -> Self::Ctx;
 }
 
-/// This trait describes any neural networks or models that 
+/// This trait describes any neural networks or models that
 /// adhears to the deep netural network architecture.
 /// This design considers a single input and output layer, while
 /// allowing for any number of hidden layers to be persisted.
-/// 
+///
 /// The `HIDDEN` constant is used to specify the number of hidden layers
 /// and is used to compute the total number of layers (HIDDEN + 2)
 pub trait DeepNeuralNetwork<S, T>: Forward<S, Output = T> {
@@ -49,11 +49,11 @@ pub trait DeepNeuralNetwork<S, T>: Forward<S, Output = T> {
 
     fn output(&self) -> &Self::Out;
 
-    fn nlayers(&self) -> usize  {
+    fn nlayers(&self) -> usize {
         self.nhidden() + 2
     }
 
     fn nhidden(&self) -> usize {
         Self::HIDDEN.unwrap_or_else(|| self.hidden().len())
     }
-}
\ No newline at end of file
+}
diff --git a/core/src/traits/misc/container.rs b/core/src/traits/misc/container.rs
index a295d90f..5243fc79 100644
--- a/core/src/traits/misc/container.rs
+++ b/core/src/traits/misc/container.rs
@@ -5,18 +5,29 @@
 use crate::traits::Dimensional;
 
 pub trait Container<T> {
-    type Data: Data<Elem = T>;
+    type Data: ContainerData<Elem = T>;
 }
 
-pub trait Data {
+pub trait ContainerData {
     type Elem;
 }
 
 /// This trait describes the basic operations for any n-dimensional container.
 pub trait NdContainer<A = f64, D = nd::Ix2>: Dimensional<D> {
-    type Data: Data<Elem = A>;
+    type Data: ContainerData<Elem = A>;
 
     fn as_slice(&self) -> &[A];
 
     fn as_mut_slice(&mut self) -> &mut [A];
 }
+
+/*
+ ************* Implementations *************
+*/
+impl<T> ContainerData for Vec<T> {
+    type Elem = T;
+}
+
+impl<S, T> Container<T> for Vec<S> {
+    type Data = Vec<T>;
+}
diff --git a/core/src/traits/misc/sequential.rs b/core/src/traits/misc/sequential.rs
new file mode 100644
index 00000000..8e92192e
--- /dev/null
+++ b/core/src/traits/misc/sequential.rs
@@ -0,0 +1,63 @@
+/*
+    Appellation: sequential <module> [traits::misc]
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use num::traits::FromPrimitive;
+
+/// A trait for sequential data structures;
+/// This trait is implemented for iterators that have a known length.
+pub trait Sequence<T> {
+    const LENGTH: Option<usize> = None;
+
+    fn len(&self) -> usize;
+
+    fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    fn elems(&self) -> T
+    where
+        T: FromPrimitive,
+    {
+        T::from_usize(self.len()).unwrap()
+    }
+}
+
+pub trait SequenceIter {
+    type Item;
+
+    fn len(&self) -> usize;
+}
+/*
+ ************* Implementations *************
+*/
+impl<T, I> SequenceIter for I
+where
+    I: ExactSizeIterator<Item = T>,
+{
+    type Item = T;
+
+    fn len(&self) -> usize {
+        self.len()
+    }
+}
+
+impl<T> Sequence<T> for Vec<T> {
+    fn len(&self) -> usize {
+        self.len()
+    }
+}
+
+impl<T> Sequence<T> for [T] {
+    fn len(&self) -> usize {
+        self.len()
+    }
+}
+
+impl<T, const N: usize> Sequence<T> for [T; N] {
+    const LENGTH: Option<usize> = Some(N);
+
+    fn len(&self) -> usize {
+        N
+    }
+}
diff --git a/core/src/traits/mod.rs b/core/src/traits/mod.rs
index 2ae4d47a..5c48f8af 100644
--- a/core/src/traits/mod.rs
+++ b/core/src/traits/mod.rs
@@ -32,12 +32,16 @@ pub mod misc {
     pub mod adjust;
     #[doc(hidden)]
     pub mod container;
+    #[doc(hidden)]
+    pub mod sequential;
+    #[doc(hidden)]
     pub mod store;
     pub mod toggle;
 
     pub(crate) mod prelude {
         pub use super::adjust::*;
         pub use super::container::*;
+        pub use super::sequential::*;
         pub use super::store::*;
         pub use super::toggle::*;
     }
diff --git a/models/linear/src/impls/impl_rand.rs b/models/linear/src/impls/impl_rand.rs
index f2e602e1..28bb0126 100644
--- a/models/linear/src/impls/impl_rand.rs
+++ b/models/linear/src/impls/impl_rand.rs
@@ -4,7 +4,7 @@
 */
 #![cfg(feature = "rand")]
 
-use crate::params::{ParamMode, ParamsBase};
+use crate::params::{LinearParams, ParamMode, ParamsBase};
 use crate::{bias_dim, Linear};
 use concision::init::rand::Rng;
 use concision::init::rand_distr::{uniform::SampleUniform, Distribution, StandardNormal};
@@ -12,30 +12,32 @@ use concision::{Initialize, InitializeExt};
 use nd::*;
 use num::Float;
 
-impl<A, D, K> Linear<A, K, D>
+impl<A, S, D, K> Linear<A, K, D, S>
 where
     A: Clone + Float,
     D: RemoveAxis,
     K: ParamMode,
+    S: DataOwned<Elem = A>,
     StandardNormal: Distribution<A>,
 {
-    pub fn uniform(self) -> Self
+    pub fn uniform(self) -> Linear<A, K, D, OwnedRepr<A>>
     where
         A: SampleUniform,
         <A as SampleUniform>::Sampler: Clone,
     {
-        Self {
+        Linear {
+            config: self.config,
             params: self.params.uniform(),
-            ..self
         }
     }
 }
 
-impl<A, K, D> crate::LinearParams<A, K, D>
+impl<A, S, D, K> ParamsBase<S, D, K>
 where
     A: Clone + Float + SampleUniform,
     D: RemoveAxis,
     K: ParamMode,
+    S: RawData<Elem = A>,
     StandardNormal: Distribution<A>,
     <A as SampleUniform>::Sampler: Clone,
 {
@@ -48,42 +50,42 @@ where
         self.dk().sqrt()
     }
 
-    pub fn uniform(self) -> Self {
+    pub fn uniform(self) -> LinearParams<A, K, D>
+    where
+        S: DataOwned,
+    {
         let dk = self.dk_sqrt();
         self.uniform_between(-dk, dk)
     }
 
-    pub fn uniform_between(self, low: A, high: A) -> Self {
-        if self.is_biased() && !self.bias.is_some() {
+    pub fn uniform_between(self, low: A, high: A) -> LinearParams<A, K, D>
+    where
+        S: DataOwned,
+    {
+        let weight = Array::uniform_between(self.raw_dim(), low, high);
+        let bias = if self.is_biased() && !self.bias.is_some() {
             let b_dim = bias_dim(self.raw_dim());
-            Self {
-                bias: Some(Array::uniform_between(b_dim, low, high)),
-                weight: Array::uniform_between(self.raw_dim(), low, high),
-                _mode: self._mode,
-            }
+            Some(Array::uniform_between(b_dim, low, high))
         } else if !self.is_biased() && self.bias.is_some() {
-            Self {
-                bias: None,
-                weight: Array::uniform_between(self.raw_dim(), low, high),
-                _mode: self._mode,
-            }
+            None
         } else {
-            Self {
-                bias: self
-                    .bias
-                    .as_ref()
-                    .map(|b| Array::uniform_between(b.raw_dim(), low, high)),
-                weight: Array::uniform_between(self.raw_dim(), low, high),
-                _mode: self._mode,
-            }
+            self.bias
+                .as_ref()
+                .map(|b| Array::uniform_between(b.raw_dim(), low, high))
+        };
+        LinearParams {
+            weight,
+            bias,
+            _mode: core::marker::PhantomData::<K>,
         }
     }
 }
 
-impl<A, K, D> Initialize<A, D> for Linear<A, K, D>
+impl<A, S, D, K> Initialize<A, D> for Linear<A, K, D, S>
 where
     D: RemoveAxis,
     K: ParamMode,
+    S: DataOwned<Elem = A>,
     StandardNormal: Distribution<A>,
 {
     type Data = OwnedRepr<A>;
diff --git a/models/linear/src/impls/model/impl_linear.rs b/models/linear/src/impls/model/impl_linear.rs
index 03c97f82..49ee85ba 100644
--- a/models/linear/src/impls/model/impl_linear.rs
+++ b/models/linear/src/impls/model/impl_linear.rs
@@ -2,47 +2,52 @@
     Appellation: impl_linear <impls>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
-use crate::{Config, Linear, LinearParams, ParamMode};
+use crate::{Config, Linear, ParamMode, ParamsBase};
 use core::borrow::{Borrow, BorrowMut};
-use nd::RemoveAxis;
+use nd::{DataOwned, Ix2, RawData, RemoveAxis};
 
-impl<A, K> Linear<A, K>
+impl<A, K, S> Linear<A, K, Ix2, S>
 where
     K: ParamMode,
+    S: RawData<Elem = A>,
 {
     pub fn from_features(inputs: usize, outputs: usize) -> Self
     where
         A: Clone + Default,
+        S: DataOwned,
     {
         let config = Config::std(inputs, outputs);
-        let params = LinearParams::new(config.dim());
+        let params = ParamsBase::new(config.dim());
         Self { config, params }
     }
 }
 
-impl<A, K, D> Borrow<Config<K, D>> for Linear<A, K, D>
+impl<A, S, D, K> Borrow<Config<K, D>> for Linear<A, K, D, S>
 where
     D: RemoveAxis,
+    S: RawData<Elem = A>,
 {
     fn borrow(&self) -> &Config<K, D> {
         &self.config
     }
 }
 
-impl<A, K, D> Borrow<LinearParams<A, K, D>> for Linear<A, K, D>
+impl<A, S, D, K> Borrow<ParamsBase<S, D, K>> for Linear<A, K, D, S>
 where
     D: RemoveAxis,
+    S: RawData<Elem = A>,
 {
-    fn borrow(&self) -> &LinearParams<A, K, D> {
+    fn borrow(&self) -> &ParamsBase<S, D, K> {
         &self.params
     }
 }
 
-impl<A, K, D> BorrowMut<LinearParams<A, K, D>> for Linear<A, K, D>
+impl<A, S, D, K> BorrowMut<ParamsBase<S, D, K>> for Linear<A, K, D, S>
 where
     D: RemoveAxis,
+    S: RawData<Elem = A>,
 {
-    fn borrow_mut(&mut self) -> &mut LinearParams<A, K, D> {
+    fn borrow_mut(&mut self) -> &mut ParamsBase<S, D, K> {
         &mut self.params
     }
 }
diff --git a/models/linear/src/model/layer.rs b/models/linear/src/model/layer.rs
index 486bb43d..9bb00f52 100644
--- a/models/linear/src/model/layer.rs
+++ b/models/linear/src/model/layer.rs
@@ -3,39 +3,56 @@
     Contrib: FL03 <jo3mccain@icloud.com>
 */
 use super::{Config, Layout};
-use crate::{Biased, LinearParams, ParamMode, Unbiased};
+use crate::{Biased, LinearParams, ParamMode, ParamsBase, Unbiased};
 use concision::prelude::{Predict, Result};
 use nd::prelude::*;
-use nd::RemoveAxis;
+use nd::{DataOwned, OwnedRepr, RawData, RemoveAxis};
 
 /// An implementation of a linear model.
 ///
 /// In an effort to streamline the api, the [Linear] model relies upon a [ParamMode] type ([Biased] or [Unbiased](crate::params::mode::Unbiased))
 /// which enables the model to automatically determine whether or not to include a bias term. Doing so allows the model to inherit several methods
 /// familar to the underlying [ndarray](https://docs.rs/ndarray) crate.
-pub struct Linear<A = f64, K = Biased, D = Ix2>
+pub struct Linear<A = f64, K = Biased, D = Ix2, S = OwnedRepr<A>>
 where
     D: Dimension,
+    S: RawData<Elem = A>,
 {
     pub(crate) config: Config<K, D>,
-    pub(crate) params: LinearParams<A, K, D>,
+    pub(crate) params: ParamsBase<S, D, K>,
 }
 
-impl<A, K, D> Linear<A, K, D>
+impl<A, K> Linear<A, K, Ix2, OwnedRepr<A>>
+where
+    K: ParamMode,
+{
+    pub fn std(inputs: usize, outputs: usize) -> Self
+    where
+        A: Default,
+    {
+        let config = Config::<K, Ix2>::new().with_shape((inputs, outputs));
+        let params = ParamsBase::new(config.features());
+        Linear { config, params }
+    }
+}
+
+impl<A, S, D, K> Linear<A, K, D, S>
 where
     D: RemoveAxis,
     K: ParamMode,
+    S: RawData<Elem = A>,
 {
-    mbuilder!(new where A: Default);
-    mbuilder!(ones where A: Clone + num::One);
-    mbuilder!(zeros where A: Clone + num::Zero);
+    mbuilder!(new where A: Default, S: DataOwned);
+    mbuilder!(ones where A: Clone + num::One, S: DataOwned);
+    mbuilder!(zeros where A: Clone + num::Zero, S: DataOwned);
 
     pub fn from_config(config: Config<K, D>) -> Self
     where
         A: Clone + Default,
         K: ParamMode,
+        S: DataOwned,
     {
-        let params = LinearParams::new(config.dim());
+        let params = ParamsBase::new(config.dim());
         Self { config, params }
     }
 
@@ -43,13 +60,14 @@ where
     where
         A: Clone + Default,
         K: ParamMode,
+        S: DataOwned,
     {
         let config = Config::<K, D>::new().with_layout(layout);
-        let params = LinearParams::new(config.dim());
+        let params = ParamsBase::new(config.dim());
         Self { config, params }
     }
 
-    pub fn from_params(params: LinearParams<A, K, D>) -> Self {
+    pub fn from_params(params: ParamsBase<S, D, K>) -> Self {
         let config = Config::<K, D>::new().with_shape(params.raw_dim());
         Self { config, params }
     }
@@ -67,26 +85,27 @@ where
         &self.config
     }
 
-    pub fn weights(&self) -> &Array<A, D> {
+    pub fn weights(&self) -> &ArrayBase<S, D> {
         self.params.weights()
     }
 
-    pub fn weights_mut(&mut self) -> &mut Array<A, D> {
+    pub fn weights_mut(&mut self) -> &mut ArrayBase<S, D> {
         self.params.weights_mut()
     }
 
-    pub const fn params(&self) -> &LinearParams<A, K, D> {
+    pub const fn params(&self) -> &ParamsBase<S, D, K> {
         &self.params
     }
 
-    pub fn params_mut(&mut self) -> &mut LinearParams<A, K, D> {
+    pub fn params_mut(&mut self) -> &mut ParamsBase<S, D, K> {
         &mut self.params
     }
 
-    pub fn into_biased(self) -> Linear<A, Biased, D>
+    pub fn into_biased(self) -> Linear<A, Biased, D, S>
     where
         A: Default,
         K: 'static,
+        S: DataOwned,
     {
         Linear {
             config: self.config.into_biased(),
@@ -94,10 +113,11 @@ where
         }
     }
 
-    pub fn into_unbiased(self) -> Linear<A, Unbiased, D>
+    pub fn into_unbiased(self) -> Linear<A, Unbiased, D, S>
     where
         A: Default,
         K: 'static,
+        S: DataOwned,
     {
         Linear {
             config: self.config.into_unbiased(),
@@ -130,40 +150,44 @@ where
     concision::dimensional!(params());
 }
 
-impl<A, D> Linear<A, Biased, D>
+impl<A, S, D> Linear<A, Biased, D, S>
 where
     D: RemoveAxis,
+    S: RawData<Elem = A>,
 {
     pub fn biased<Sh>(shape: Sh) -> Self
     where
         A: Default,
+        S: DataOwned,
         Sh: ShapeBuilder<Dim = D>,
     {
         let config = Config::<Biased, D>::new().with_shape(shape);
-        let params = LinearParams::biased(config.dim());
+        let params = ParamsBase::biased(config.dim());
         Linear { config, params }
     }
 
-    pub fn bias(&self) -> &Array<A, D::Smaller> {
+    pub fn bias(&self) -> &ArrayBase<S, D::Smaller> {
         self.params().bias()
     }
 
-    pub fn bias_mut(&mut self) -> &mut Array<A, D::Smaller> {
+    pub fn bias_mut(&mut self) -> &mut ArrayBase<S, D::Smaller> {
         self.params_mut().bias_mut()
     }
 }
 
-impl<A, D> Linear<A, Unbiased, D>
+impl<A, S, D> Linear<A, Unbiased, D, S>
 where
     D: RemoveAxis,
+    S: RawData<Elem = A>,
 {
     pub fn unbiased<Sh>(shape: Sh) -> Self
     where
         A: Default,
+        S: DataOwned,
         Sh: ShapeBuilder<Dim = D>,
     {
         let config = Config::<Unbiased, D>::new().with_shape(shape);
-        let params = LinearParams::unbiased(config.dim());
+        let params = ParamsBase::unbiased(config.dim());
         Linear { config, params }
     }
 }
diff --git a/models/transformers/src/attention/head.rs b/models/transformers/src/attention/head.rs
index 1deebb6f..e80fdda9 100644
--- a/models/transformers/src/attention/head.rs
+++ b/models/transformers/src/attention/head.rs
@@ -39,6 +39,19 @@ where
     pub(crate) params: QkvBase<S, D>,
 }
 
+impl<A, S> AttentionHead<A, Ix2, S>
+where
+    S: RawData<Elem = A>,
+{
+    pub fn std(dm: usize, dk: usize) -> Self
+    where
+        A: Default,
+        S: DataOwned,
+    {
+        Self::from_params(QkvBase::new((dk, dm)))
+    }
+}
+
 impl<A, S, D> AttentionHead<A, D, S>
 where
     D: Dimension,
diff --git a/models/transformers/src/attention/multi/config.rs b/models/transformers/src/attention/multi/config.rs
index 2fa53c3f..58c510c6 100644
--- a/models/transformers/src/attention/multi/config.rs
+++ b/models/transformers/src/attention/multi/config.rs
@@ -3,6 +3,10 @@
     Contrib: FL03 <jo3mccain@icloud.com>
 */
 
+pub(crate) fn dk(d_model: usize, heads: usize) -> usize {
+    d_model / heads
+}
+
 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
 pub struct Config {
@@ -20,7 +24,7 @@ impl Config {
     }
 
     pub fn dk(&self) -> usize {
-        self.d_model() / self.heads()
+        dk(self.d_model(), self.heads())
     }
 
     pub fn heads(&self) -> usize {
diff --git a/models/transformers/src/attention/multi/multi_head.rs b/models/transformers/src/attention/multi/multi_head.rs
index be427e4d..36a4051d 100644
--- a/models/transformers/src/attention/multi/multi_head.rs
+++ b/models/transformers/src/attention/multi/multi_head.rs
@@ -6,50 +6,72 @@ use super::Config;
 use crate::AttentionHead;
 use linear::{Biased, Linear};
 use nd::prelude::*;
+use nd::{DataOwned, OwnedRepr, RawData};
 
-#[derive(Default)]
-pub struct MultiHeadAttention<A = f64, D = Ix2>
+pub struct MultiHeadAttention<A = f64, D = Ix2, S = OwnedRepr<A>>
 where
     D: Dimension,
+    S: RawData<Elem = A>,
 {
     pub(crate) config: Config,
-    pub(crate) head: Option<AttentionHead<A, D>>,
-    pub(crate) linears: Vec<Linear<A, Biased, D>>,
+    pub(crate) head: AttentionHead<A, D, S>,
+    pub(crate) linears: Vec<Linear<A, Biased, D, S>>,
 }
 
-impl<A, D> MultiHeadAttention<A, D>
+impl<A, S, D> MultiHeadAttention<A, D, S>
 where
     D: Dimension,
+    S: RawData<Elem = A>,
 {
     pub const fn config(&self) -> &Config {
         &self.config
     }
 
-    pub fn head(&self) -> Option<&AttentionHead<A, D>> {
-        self.head.as_ref()
+    pub const fn head(&self) -> &AttentionHead<A, D, S> {
+        &self.head
     }
 
-    pub fn head_mut(&mut self) -> Option<&mut AttentionHead<A, D>> {
-        self.head.as_mut()
+    pub fn head_mut(&mut self) -> &mut AttentionHead<A, D, S> {
+        &mut self.head
     }
 
-    pub fn linears(&self) -> &[Linear<A, Biased, D>] {
+    pub fn linears(&self) -> &[Linear<A, Biased, D, S>] {
         &self.linears
     }
 }
 
-impl<A> MultiHeadAttention<A, Ix2> {
-    pub fn std(config: Config) -> Self
+impl<A, S> MultiHeadAttention<A, Ix2, S>
+where
+    S: RawData<Elem = A>,
+{
+    pub fn std(d_model: usize, heads: usize) -> Self
     where
         A: Clone + Default,
+        S: DataOwned,
     {
+        let config = Config::new().d_model(d_model).heads(heads).build();
         let linears = (0..4)
-            .map(|_| Linear::from_features(config.d_model(), config.d_model()))
+            .map(|_| Linear::from_features(d_model, d_model))
             .collect();
         Self {
-            head: None,
             config,
+            head: AttentionHead::std(d_model, config.dk()),
             linears,
         }
     }
 }
+
+impl<A, S, D> Default for MultiHeadAttention<A, D, S>
+where
+    A: Default,
+    D: Dimension,
+    S: DataOwned<Elem = A>,
+{
+    fn default() -> Self {
+        Self {
+            config: Config::default(),
+            head: AttentionHead::default(),
+            linears: Vec::new(),
+        }
+    }
+}

From b2a430fee49a944778548260a44d9826adb98e13 Mon Sep 17 00:00:00 2001
From: Joe McCain III <jo3mccain@icloud.com>
Date: Fri, 24 May 2024 11:20:54 -0500
Subject: [PATCH 18/19] update

Signed-off-by: Joe McCain III <jo3mccain@icloud.com>
---
 data/src/kernel/mod.rs            |  0
 data/src/lib.rs                   |  2 +
 data/src/traits/data/container.rs | 25 ++++++++
 data/src/traits/data/repr.rs      | 15 +++++
 data/src/traits/mod.rs            | 18 +++++-
 data/src/traits/shape.rs          | 96 +++++++++++++++++++++++++++++++
 data/src/types/kernel.rs          |  6 ++
 data/src/types/mod.rs             | 11 ++++
 8 files changed, 172 insertions(+), 1 deletion(-)
 create mode 100644 data/src/kernel/mod.rs
 create mode 100644 data/src/traits/data/container.rs
 create mode 100644 data/src/traits/data/repr.rs
 create mode 100644 data/src/traits/shape.rs
 create mode 100644 data/src/types/kernel.rs
 create mode 100644 data/src/types/mod.rs

diff --git a/data/src/kernel/mod.rs b/data/src/kernel/mod.rs
new file mode 100644
index 00000000..e69de29b
diff --git a/data/src/lib.rs b/data/src/lib.rs
index 0186ca18..5f1d6ead 100644
--- a/data/src/lib.rs
+++ b/data/src/lib.rs
@@ -22,9 +22,11 @@ pub mod params;
 pub mod preproc;
 pub mod tensor;
 pub mod traits;
+pub mod types;
 
 pub mod prelude {
     pub use super::dataset::*;
     pub use super::params::prelude::*;
     pub use super::traits::prelude::*;
+    pub use super::types::prelude::*;
 }
diff --git a/data/src/traits/data/container.rs b/data/src/traits/data/container.rs
new file mode 100644
index 00000000..0d9e0044
--- /dev/null
+++ b/data/src/traits/data/container.rs
@@ -0,0 +1,25 @@
+/*
+    Appellation: container <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use crate::traits::{ContainerRepr, Dimensional};
+
+pub trait Container<T> {
+    type Data: ContainerRepr<Elem = T>;
+}
+
+/// This trait describes the basic operations for any n-dimensional container.
+pub trait NdContainer<A, D>: Dimensional<Dim = D> {
+    type Data: ContainerRepr<Elem = A>;
+
+    fn as_slice(&self) -> &[A];
+
+    fn as_mut_slice(&mut self) -> &mut [A];
+}
+
+/*
+ ************* Implementations *************
+*/
+impl<S, T> Container<T> for Vec<S> {
+    type Data = Vec<T>;
+}
diff --git a/data/src/traits/data/repr.rs b/data/src/traits/data/repr.rs
new file mode 100644
index 00000000..3c583d95
--- /dev/null
+++ b/data/src/traits/data/repr.rs
@@ -0,0 +1,15 @@
+/*
+    Appellation: data <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+
+pub trait ContainerRepr {
+    type Elem;
+}
+
+/*
+ ************* Implementations *************
+*/
+impl<T> ContainerRepr for Vec<T> {
+    type Elem = T;
+}
diff --git a/data/src/traits/mod.rs b/data/src/traits/mod.rs
index 83d994c3..cf2a290a 100644
--- a/data/src/traits/mod.rs
+++ b/data/src/traits/mod.rs
@@ -2,10 +2,26 @@
     Appellation: traits <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
-pub use self::prelude::*;
+pub use self::{data::*, records::*, shape::*};
 
 pub mod records;
+pub mod shape;
+
+#[doc(hidden)]
+pub mod data {
+    pub use self::{container::*, repr::*};
+
+    pub(crate) mod container;
+    pub(crate) mod repr;
+
+    pub(crate) mod prelude {
+        pub use super::container::*;
+        pub use super::repr::*;
+    }
+}
 
 pub(crate) mod prelude {
+    pub use super::data::prelude::*;
     pub use super::records::*;
+    pub use super::shape::*;
 }
diff --git a/data/src/traits/shape.rs b/data/src/traits/shape.rs
new file mode 100644
index 00000000..a8127e46
--- /dev/null
+++ b/data/src/traits/shape.rs
@@ -0,0 +1,96 @@
+/*
+    Appellation: shape <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use nd::{ArrayBase, Dimension, RawData};
+
+pub trait IntoPattern {
+    type Pattern;
+
+    fn into_pattern(self) -> Self::Pattern;
+}
+
+/// [Dimensional] provides a common interface for containers to access their shape and dimension.
+pub trait Dimensional {
+    const RANK: Option<usize> = None;
+
+    type Dim: IntoPattern;
+
+    fn dim(&self) -> <Self::Dim as IntoPattern>::Pattern {
+        self.raw_dim().into_pattern()
+    }
+
+    fn is_scalar(&self) -> bool {
+        self.rank() == 0 || self.shape().iter().all(|x| *x == 1)
+    }
+
+    fn rank(&self) -> usize {
+        Self::RANK.unwrap_or(self.shape().len())
+    }
+
+    fn raw_dim(&self) -> Self::Dim;
+
+    fn size(&self) -> usize {
+        self.shape().iter().product()
+    }
+
+    fn shape(&self) -> &[usize];
+}
+
+/*
+ ******** implementations ********
+*/
+impl<D> IntoPattern for D
+where
+    D: Dimension,
+{
+    type Pattern = D::Pattern;
+
+    fn into_pattern(self) -> Self::Pattern {
+        Dimension::into_pattern(self)
+    }
+}
+
+// impl<D> Dimensional for D
+// where
+//     D: Dimension + IntoPattern,
+// {
+//     type Dim = D;
+
+//     fn dim(&self) -> D::Pattern {
+//         self.clone().into_pattern()
+//     }
+
+//     fn raw_dim(&self) -> D {
+//         self.clone()
+//     }
+
+//     fn shape(&self) -> &[usize] {
+//         D::slice(self)
+//     }
+// }
+
+impl<S, D> Dimensional for ArrayBase<S, D>
+where
+    D: Dimension,
+    S: RawData,
+{
+    const RANK: Option<usize> = D::NDIM;
+    type Dim = D;
+
+    fn dim(&self) -> D::Pattern {
+        ArrayBase::dim(self)
+    }
+
+    fn raw_dim(&self) -> D {
+        ArrayBase::raw_dim(self)
+    }
+
+    fn shape(&self) -> &[usize] {
+        ArrayBase::shape(self)
+    }
+
+    fn size(&self) -> usize {
+        ArrayBase::len(self)
+    }
+}
diff --git a/data/src/types/kernel.rs b/data/src/types/kernel.rs
new file mode 100644
index 00000000..248ad95f
--- /dev/null
+++ b/data/src/types/kernel.rs
@@ -0,0 +1,6 @@
+/*
+    Appellation: kernel <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+
+pub struct Kernel;
diff --git a/data/src/types/mod.rs b/data/src/types/mod.rs
new file mode 100644
index 00000000..b8ca6da5
--- /dev/null
+++ b/data/src/types/mod.rs
@@ -0,0 +1,11 @@
+/*
+    Appellation: types <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+pub use self::kernel::Kernel;
+
+pub mod kernel;
+
+pub(crate) mod prelude {
+    pub use super::kernel::Kernel;
+}

From cb2f72cd83cbf43a2f16c0c3969b72f838ea6508 Mon Sep 17 00:00:00 2001
From: Joe McCain III <jo3mccain@icloud.com>
Date: Fri, 24 May 2024 11:52:31 -0500
Subject: [PATCH 19/19] update

Signed-off-by: Joe McCain III <jo3mccain@icloud.com>
---
 core/src/traits/arr/misc.rs       |  44 -----
 core/src/traits/arr/tensor.rs     | 317 ------------------------------
 core/src/traits/misc/container.rs |  33 ----
 core/src/traits/mod.rs            |  15 +-
 data/src/traits/build.rs          | 140 +++++++++++++
 data/src/traits/ext/ndarray.rs    |  45 +++++
 data/src/traits/ext/ndtensor.rs   |  52 +++++
 data/src/traits/ext/ndview.rs     | 157 +++++++++++++++
 data/src/traits/mod.rs            |  19 +-
 9 files changed, 418 insertions(+), 404 deletions(-)
 delete mode 100644 core/src/traits/arr/tensor.rs
 delete mode 100644 core/src/traits/misc/container.rs
 create mode 100644 data/src/traits/build.rs
 create mode 100644 data/src/traits/ext/ndarray.rs
 create mode 100644 data/src/traits/ext/ndtensor.rs
 create mode 100644 data/src/traits/ext/ndview.rs

diff --git a/core/src/traits/arr/misc.rs b/core/src/traits/arr/misc.rs
index 51a5996d..40857596 100644
--- a/core/src/traits/arr/misc.rs
+++ b/core/src/traits/arr/misc.rs
@@ -5,30 +5,6 @@
 use nd::prelude::*;
 use nd::{DataMut, RawData};
 
-/// [Dimensional] provides a common interface for containers to access their shape and dimension.
-pub trait Dimensional<D> {
-    const RANK: Option<usize> = None;
-
-    type Pattern;
-
-    fn dim(&self) -> Self::Pattern;
-
-    fn is_scalar(&self) -> bool {
-        self.rank() == 0 || self.shape().iter().all(|x| *x == 1)
-    }
-
-    fn rank(&self) -> usize {
-        Self::RANK.unwrap_or(self.shape().len())
-    }
-
-    fn raw_dim(&self) -> D;
-
-    fn size(&self) -> usize {
-        self.shape().iter().product()
-    }
-
-    fn shape(&self) -> &[usize];
-}
 /// This trait is used to fill an array with a value based on a mask.
 /// The mask is a boolean array of the same shape as the array.
 pub trait MaskFill<A, D>
@@ -51,26 +27,6 @@ pub trait IsSquare {
 /*
  ******** implementations ********
 */
-impl<S, D> Dimensional<D> for ArrayBase<S, D>
-where
-    D: Dimension,
-    S: RawData,
-{
-    const RANK: Option<usize> = D::NDIM;
-    type Pattern = D::Pattern;
-
-    fn shape(&self) -> &[usize] {
-        ArrayBase::shape(self)
-    }
-
-    fn dim(&self) -> Self::Pattern {
-        ArrayBase::dim(self)
-    }
-
-    fn raw_dim(&self) -> D {
-        ArrayBase::raw_dim(self)
-    }
-}
 
 impl<A, S, D> MaskFill<A, D> for ArrayBase<S, D>
 where
diff --git a/core/src/traits/arr/tensor.rs b/core/src/traits/arr/tensor.rs
deleted file mode 100644
index 12571e0f..00000000
--- a/core/src/traits/arr/tensor.rs
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
-    Appellation: generator <module>
-    Contrib: FL03 <jo3mccain@icloud.com>
-*/
-use super::Dimensional;
-use nd::iter::{Iter, IterMut};
-use nd::prelude::*;
-use nd::{Data, DataMut, DataOwned, OwnedRepr, RawData};
-use num::{One, Zero};
-
-pub trait NdArray<A, D>
-where
-    D: Dimension,
-{
-    type Data: RawData<Elem = A>;
-
-    fn as_slice(&self) -> &[A];
-
-    fn as_mut_slice(&mut self) -> &mut [A];
-
-    fn iter(&self) -> Iter<'_, A, D>;
-
-    fn iter_mut(&mut self) -> IterMut<'_, A, D>;
-
-    fn map<F>(&self, f: F) -> Self
-    where
-        F: FnMut(&A) -> A;
-
-    fn mapv<F>(&mut self, f: F)
-    where
-        A: Clone,
-        F: FnMut(A) -> A;
-}
-
-pub trait NdIter<A, D>
-where
-    D: Dimension,
-{
-    type Data: RawData<Elem = A>;
-
-    fn iter(&self) -> Iter<'_, A, D>;
-
-    fn iter_mut(&mut self) -> IterMut<'_, A, D>;
-}
-
-/// [NdBuilder] describes common creation routines for [ArrayBase](ndarray::ArrayBase)
-pub trait NdBuilder<A = f64, D = Ix2>
-where
-    D: Dimension,
-{
-    type Data: RawData<Elem = A>;
-
-    /// Create a new array with the given shape whose elements are set to the default value of the element type.
-    fn default<Sh>(shape: Sh) -> Self
-    where
-        A: Default,
-        Sh: ShapeBuilder<Dim = D>,
-        Self::Data: DataOwned;
-
-    fn fill<Sh>(shape: Sh, elem: A) -> Self
-    where
-        A: Clone,
-        Sh: ShapeBuilder<Dim = D>,
-        Self::Data: DataOwned;
-
-    fn ones<Sh>(shape: Sh) -> Self
-    where
-        A: Clone + One,
-        Sh: ShapeBuilder<Dim = D>,
-        Self::Data: DataOwned;
-
-    fn zeros<Sh>(shape: Sh) -> Self
-    where
-        A: Clone + Zero,
-        Sh: ShapeBuilder<Dim = D>,
-        Self::Data: DataOwned;
-}
-
-pub trait NdBuilderExt<A = f64, D = Ix2>: NdBuilder<A, D>
-where
-    D: Dimension,
-    Self: Dimensional<D, Pattern = D::Pattern> + Sized,
-{
-    fn default_like<Sh>(&self) -> Self
-    where
-        A: Default,
-        Sh: ShapeBuilder<Dim = D>,
-        Self::Data: DataOwned,
-    {
-        Self::default(self.dim())
-    }
-
-    fn fill_like<Sh>(&self, elem: A) -> Self
-    where
-        A: Clone,
-        Sh: ShapeBuilder<Dim = D>,
-        Self::Data: DataOwned,
-    {
-        Self::fill(self.dim(), elem)
-    }
-
-    fn ones_like<Sh>(&self) -> Self
-    where
-        A: Clone + One,
-        Sh: ShapeBuilder<Dim = D>,
-        Self::Data: DataOwned,
-    {
-        Self::ones(self.dim())
-    }
-
-    fn zeros_like<Sh>(&self) -> Self
-    where
-        A: Clone + Zero,
-        Sh: ShapeBuilder<Dim = D>,
-        Self::Data: DataOwned,
-    {
-        Self::zeros(self.dim())
-    }
-}
-
-pub trait AsOwned<S, D = Ix2>
-where
-    D: Dimension,
-    S: RawData,
-{
-    type Output;
-
-    fn into_owned(self) -> Self::Output
-    where
-        S: Data,
-        S::Elem: Clone;
-
-    fn to_owned(&self) -> Self::Output
-    where
-        S: Data,
-        S::Elem: Clone;
-}
-
-pub trait AsShared<S, D = Ix2>
-where
-    D: Dimension,
-    S: RawData,
-{
-    type Output;
-
-    fn into_shared(self) -> Self::Output
-    where
-        S: DataOwned,
-        S::Elem: Clone;
-
-    fn to_shared(&self) -> Self::Output
-    where
-        S: DataOwned,
-        S::Elem: Clone;
-}
-
-pub trait NdView<A = f64, S = OwnedRepr<A>, D = Ix2>: AsOwned<S, D> + AsShared<S, D>
-where
-    D: Dimension,
-    S: RawData<Elem = A>,
-{
-    fn view(&self) -> ArrayView<'_, A, D>
-    where
-        A: Clone,
-        S: Data;
-
-    fn view_mut(&mut self) -> ArrayViewMut<'_, A, D>
-    where
-        A: Clone,
-        S: DataMut;
-}
-
-pub trait View<A = f64, D = Ix2>
-where
-    D: Dimension,
-{
-    type Data: RawData<Elem = A>;
-    type Output;
-
-    fn view(&self) -> Self::Output
-    where
-        A: Clone,
-        Self::Data: Data;
-}
-pub trait ViewMut<A = f64, D = Ix2>: View<A, D>
-where
-    D: Dimension,
-{
-    fn view_mut(&mut self) -> ArrayViewMut<'_, A, D>
-    where
-        A: Clone,
-        Self::Data: DataMut;
-}
-
-/*
- ************* Implementations *************
-*/
-impl<A, S, D> NdBuilder<A, D> for ArrayBase<S, D>
-where
-    D: Dimension,
-    S: RawData<Elem = A>,
-{
-    type Data = S;
-
-    fn default<Sh>(shape: Sh) -> Self
-    where
-        A: Default,
-        Sh: ShapeBuilder<Dim = D>,
-        Self::Data: DataOwned,
-    {
-        ArrayBase::default(shape)
-    }
-
-    fn fill<Sh>(shape: Sh, elem: A) -> Self
-    where
-        A: Clone,
-        S: DataOwned,
-        Sh: ShapeBuilder<Dim = D>,
-    {
-        ArrayBase::from_elem(shape, elem)
-    }
-
-    fn ones<Sh>(shape: Sh) -> Self
-    where
-        A: Clone + One,
-        Sh: ShapeBuilder<Dim = D>,
-        Self::Data: DataOwned,
-    {
-        ArrayBase::ones(shape)
-    }
-
-    fn zeros<Sh>(shape: Sh) -> Self
-    where
-        A: Clone + Zero,
-        Sh: ShapeBuilder<Dim = D>,
-        Self::Data: DataOwned,
-    {
-        ArrayBase::zeros(shape)
-    }
-}
-
-impl<U, A, D> NdBuilderExt<A, D> for U
-where
-    U: Dimensional<D, Pattern = D::Pattern> + NdBuilder<A, D>,
-    D: Dimension,
-{
-}
-
-impl<A, S, D> AsOwned<S, D> for ArrayBase<S, D>
-where
-    D: Dimension,
-    S: RawData<Elem = A>,
-{
-    type Output = Array<A, D>;
-
-    fn into_owned(self) -> Self::Output
-    where
-        A: Clone,
-        S: Data,
-    {
-        self.into_owned()
-    }
-
-    fn to_owned(&self) -> Self::Output
-    where
-        A: Clone,
-        S: Data,
-    {
-        self.to_owned()
-    }
-}
-
-impl<A, S, D> AsShared<S, D> for ArrayBase<S, D>
-where
-    D: Dimension,
-    S: RawData<Elem = A>,
-{
-    type Output = ArcArray<A, D>;
-
-    fn into_shared(self) -> Self::Output
-    where
-        A: Clone,
-        S: DataOwned,
-    {
-        self.into_shared()
-    }
-
-    fn to_shared(&self) -> Self::Output
-    where
-        A: Clone,
-        S: DataOwned,
-    {
-        self.to_shared()
-    }
-}
-
-impl<A, S, D> NdView<A, S, D> for ArrayBase<S, D>
-where
-    D: Dimension,
-    S: RawData<Elem = A>,
-{
-    fn view(&self) -> ArrayView<'_, A, D>
-    where
-        A: Clone,
-        S: Data,
-    {
-        self.view()
-    }
-
-    fn view_mut(&mut self) -> ArrayViewMut<'_, A, D>
-    where
-        A: Clone,
-        S: DataMut,
-    {
-        self.view_mut()
-    }
-}
diff --git a/core/src/traits/misc/container.rs b/core/src/traits/misc/container.rs
deleted file mode 100644
index 5243fc79..00000000
--- a/core/src/traits/misc/container.rs
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
-    Appellation: container <module>
-    Contrib: FL03 <jo3mccain@icloud.com>
-*/
-use crate::traits::Dimensional;
-
-pub trait Container<T> {
-    type Data: ContainerData<Elem = T>;
-}
-
-pub trait ContainerData {
-    type Elem;
-}
-
-/// This trait describes the basic operations for any n-dimensional container.
-pub trait NdContainer<A = f64, D = nd::Ix2>: Dimensional<D> {
-    type Data: ContainerData<Elem = A>;
-
-    fn as_slice(&self) -> &[A];
-
-    fn as_mut_slice(&mut self) -> &mut [A];
-}
-
-/*
- ************* Implementations *************
-*/
-impl<T> ContainerData for Vec<T> {
-    type Elem = T;
-}
-
-impl<S, T> Container<T> for Vec<S> {
-    type Data = Vec<T>;
-}
diff --git a/core/src/traits/mod.rs b/core/src/traits/mod.rs
index 5c48f8af..9dc12247 100644
--- a/core/src/traits/mod.rs
+++ b/core/src/traits/mod.rs
@@ -17,30 +17,27 @@ pub mod arr {
     mod misc;
     mod ops;
     mod reshape;
-    mod tensor;
 
     pub(crate) mod prelude {
         pub use super::create::*;
         pub use super::misc::*;
         pub use super::ops::*;
         pub use super::reshape::*;
-        pub use super::tensor::*;
     }
 }
 
 pub mod misc {
-    pub mod adjust;
-    #[doc(hidden)]
-    pub mod container;
+    pub use self::prelude::*;
+
+    pub(crate) mod adjust;
     #[doc(hidden)]
-    pub mod sequential;
+    pub(crate) mod sequential;
     #[doc(hidden)]
-    pub mod store;
-    pub mod toggle;
+    pub(crate) mod store;
+    pub(crate) mod toggle;
 
     pub(crate) mod prelude {
         pub use super::adjust::*;
-        pub use super::container::*;
         pub use super::sequential::*;
         pub use super::store::*;
         pub use super::toggle::*;
diff --git a/data/src/traits/build.rs b/data/src/traits/build.rs
new file mode 100644
index 00000000..7944014b
--- /dev/null
+++ b/data/src/traits/build.rs
@@ -0,0 +1,140 @@
+/*
+    Appellation: ndarray <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use crate::traits::Dimensional;
+use nd::{ArrayBase, DataOwned, Dimension, RawData, ShapeBuilder};
+use num::{One, Zero};
+
+/// [NdBuilder] describes common creation routines for [ArrayBase]
+pub trait NdBuilder<A = f64, D = nd::Ix2>
+where
+    D: Dimension,
+{
+    type Data: RawData<Elem = A>;
+
+    /// Create a new array with the given shape whose elements are set to the default value of the element type.
+    fn default<Sh>(shape: Sh) -> Self
+    where
+        A: Default,
+        Sh: ShapeBuilder<Dim = D>,
+        Self::Data: DataOwned;
+
+    fn fill<Sh>(shape: Sh, elem: A) -> Self
+    where
+        A: Clone,
+        Sh: ShapeBuilder<Dim = D>,
+        Self::Data: DataOwned;
+
+    fn ones<Sh>(shape: Sh) -> Self
+    where
+        A: Clone + One,
+        Sh: ShapeBuilder<Dim = D>,
+        Self::Data: DataOwned;
+
+    fn zeros<Sh>(shape: Sh) -> Self
+    where
+        A: Clone + Zero,
+        Sh: ShapeBuilder<Dim = D>,
+        Self::Data: DataOwned;
+}
+
+pub trait NdBuilderExt<A = f64, D = nd::Ix2>: NdBuilder<A, D> + Sized
+where
+    D: Dimension,
+{
+    fn dim(&self) -> D::Pattern;
+
+    fn default_like<Sh>(&self) -> Self
+    where
+        A: Default,
+        Sh: ShapeBuilder<Dim = D>,
+        Self::Data: DataOwned,
+    {
+        Self::default(self.dim())
+    }
+
+    fn fill_like<Sh>(&self, elem: A) -> Self
+    where
+        A: Clone,
+        Sh: ShapeBuilder<Dim = D>,
+        Self::Data: DataOwned,
+    {
+        Self::fill(self.dim(), elem)
+    }
+
+    fn ones_like<Sh>(&self) -> Self
+    where
+        A: Clone + One,
+        Sh: ShapeBuilder<Dim = D>,
+        Self::Data: DataOwned,
+    {
+        Self::ones(self.dim())
+    }
+
+    fn zeros_like<Sh>(&self) -> Self
+    where
+        A: Clone + Zero,
+        Sh: ShapeBuilder<Dim = D>,
+        Self::Data: DataOwned,
+    {
+        Self::zeros(self.dim())
+    }
+}
+
+/*
+ ************* Implementations *************
+*/
+impl<A, S, D> NdBuilder<A, D> for ArrayBase<S, D>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+{
+    type Data = S;
+
+    fn default<Sh>(shape: Sh) -> Self
+    where
+        A: Default,
+        Sh: ShapeBuilder<Dim = D>,
+        Self::Data: DataOwned,
+    {
+        ArrayBase::default(shape)
+    }
+
+    fn fill<Sh>(shape: Sh, elem: A) -> Self
+    where
+        A: Clone,
+        S: DataOwned,
+        Sh: ShapeBuilder<Dim = D>,
+    {
+        ArrayBase::from_elem(shape, elem)
+    }
+
+    fn ones<Sh>(shape: Sh) -> Self
+    where
+        A: Clone + One,
+        Sh: ShapeBuilder<Dim = D>,
+        Self::Data: DataOwned,
+    {
+        ArrayBase::ones(shape)
+    }
+
+    fn zeros<Sh>(shape: Sh) -> Self
+    where
+        A: Clone + Zero,
+        Sh: ShapeBuilder<Dim = D>,
+        Self::Data: DataOwned,
+    {
+        ArrayBase::zeros(shape)
+    }
+}
+
+impl<U, A, D> NdBuilderExt<A, D> for U
+where
+    U: Dimensional<Dim = D> + NdBuilder<A, D>,
+    D: Dimension,
+{
+    fn dim(&self) -> D::Pattern {
+        self.dim()
+    }
+}
diff --git a/data/src/traits/ext/ndarray.rs b/data/src/traits/ext/ndarray.rs
new file mode 100644
index 00000000..6d3e6ed8
--- /dev/null
+++ b/data/src/traits/ext/ndarray.rs
@@ -0,0 +1,45 @@
+/*
+    Appellation: ndarray <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use nd::iter::{Iter, IterMut};
+use nd::{Dimension, RawData};
+
+pub trait NdArray<A, D>
+where
+    D: Dimension,
+{
+    type Data: RawData<Elem = A>;
+
+    fn as_slice(&self) -> &[A];
+
+    fn as_mut_slice(&mut self) -> &mut [A];
+
+    fn iter(&self) -> Iter<'_, A, D>;
+
+    fn iter_mut(&mut self) -> IterMut<'_, A, D>;
+
+    fn map<F>(&self, f: F) -> Self
+    where
+        F: FnMut(&A) -> A;
+
+    fn mapv<F>(&mut self, f: F)
+    where
+        A: Clone,
+        F: FnMut(A) -> A;
+}
+
+pub trait NdIter<A, D>
+where
+    D: Dimension,
+{
+    type Data: RawData<Elem = A>;
+
+    fn iter(&self) -> Iter<'_, A, D>;
+
+    fn iter_mut(&mut self) -> IterMut<'_, A, D>;
+}
+
+/*
+ ************* Implementations *************
+*/
diff --git a/data/src/traits/ext/ndtensor.rs b/data/src/traits/ext/ndtensor.rs
new file mode 100644
index 00000000..0edbd756
--- /dev/null
+++ b/data/src/traits/ext/ndtensor.rs
@@ -0,0 +1,52 @@
+/*
+    Appellation: ndtensor <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use nd::{ArrayBase, Data, Dimension, RawData};
+use num::complex::ComplexFloat;
+use num::traits::Float;
+
+pub trait Scalar {
+    type R: Float;
+}
+
+pub trait NdTensor<A, D>
+where
+    A: ComplexFloat,
+    D: Dimension,
+{
+    type Data: RawData<Elem = A>;
+    type Output;
+
+    fn conj(&self) -> Self::Output;
+
+    fn cos(&self) -> Self::Output;
+
+    fn cosh(&self) -> Self::Output;
+}
+
+/*
+ ************* Implementations *************
+*/
+impl<A, S, D> NdTensor<A, D> for ArrayBase<S, D>
+where
+    A: ComplexFloat,
+    D: Dimension,
+    S: Data<Elem = A>,
+    Self: Clone,
+{
+    type Data = S;
+    type Output = nd::Array<A, D>;
+
+    fn conj(&self) -> Self::Output {
+        self.mapv(|x| x.conj())
+    }
+
+    fn cos(&self) -> Self::Output {
+        self.mapv(|x| x.cos())
+    }
+
+    fn cosh(&self) -> Self::Output {
+        self.mapv(|x| x.cosh())
+    }
+}
diff --git a/data/src/traits/ext/ndview.rs b/data/src/traits/ext/ndview.rs
new file mode 100644
index 00000000..56b88c3f
--- /dev/null
+++ b/data/src/traits/ext/ndview.rs
@@ -0,0 +1,157 @@
+/*
+    Appellation: ndview <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+/*
+    Appellation: ndarray <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+use nd::prelude::*;
+use nd::{Data, DataMut, DataOwned, OwnedRepr, RawData};
+
+pub trait AsOwned<A, S, D = Ix2>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+{
+    type Output;
+
+    fn into_owned(self) -> Self::Output
+    where
+        A: Clone,
+        S: Data;
+
+    fn to_owned(&self) -> Self::Output
+    where
+        A: Clone,
+        S: Data;
+}
+
+pub trait AsShared<S, D = Ix2>
+where
+    D: Dimension,
+    S: RawData,
+{
+    type Output;
+
+    fn into_shared(self) -> Self::Output
+    where
+        S: DataOwned,
+        S::Elem: Clone;
+
+    fn to_shared(&self) -> Self::Output
+    where
+        S: DataOwned,
+        S::Elem: Clone;
+}
+
+pub trait NdView<A = f64, S = OwnedRepr<A>, D = Ix2>: AsOwned<A, S, D> + AsShared<S, D>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+{
+    fn view(&self) -> ArrayView<'_, A, D>
+    where
+        A: Clone,
+        S: Data;
+
+    fn view_mut(&mut self) -> ArrayViewMut<'_, A, D>
+    where
+        A: Clone,
+        S: DataMut;
+}
+
+pub trait View<A = f64, D = Ix2>
+where
+    D: Dimension,
+{
+    type Data: RawData<Elem = A>;
+    type Output;
+
+    fn view(&self) -> Self::Output
+    where
+        A: Clone,
+        Self::Data: Data;
+}
+pub trait ViewMut<A = f64, D = Ix2>: View<A, D>
+where
+    D: Dimension,
+{
+    fn view_mut(&mut self) -> ArrayViewMut<'_, A, D>
+    where
+        A: Clone,
+        Self::Data: DataMut;
+}
+
+/*
+ ************* Implementations *************
+*/
+impl<A, S, D> AsOwned<A, S, D> for ArrayBase<S, D>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+{
+    type Output = Array<A, D>;
+
+    fn into_owned(self) -> Self::Output
+    where
+        A: Clone,
+        S: Data,
+    {
+        self.into_owned()
+    }
+
+    fn to_owned(&self) -> Self::Output
+    where
+        A: Clone,
+        S: Data,
+    {
+        self.to_owned()
+    }
+}
+
+impl<A, S, D> AsShared<S, D> for ArrayBase<S, D>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+{
+    type Output = ArcArray<A, D>;
+
+    fn into_shared(self) -> Self::Output
+    where
+        A: Clone,
+        S: DataOwned,
+    {
+        self.into_shared()
+    }
+
+    fn to_shared(&self) -> Self::Output
+    where
+        A: Clone,
+        S: DataOwned,
+    {
+        self.to_shared()
+    }
+}
+
+impl<A, S, D> NdView<A, S, D> for ArrayBase<S, D>
+where
+    D: Dimension,
+    S: RawData<Elem = A>,
+{
+    fn view(&self) -> ArrayView<'_, A, D>
+    where
+        A: Clone,
+        S: Data,
+    {
+        self.view()
+    }
+
+    fn view_mut(&mut self) -> ArrayViewMut<'_, A, D>
+    where
+        A: Clone,
+        S: DataMut,
+    {
+        self.view_mut()
+    }
+}
diff --git a/data/src/traits/mod.rs b/data/src/traits/mod.rs
index cf2a290a..14b24d38 100644
--- a/data/src/traits/mod.rs
+++ b/data/src/traits/mod.rs
@@ -2,7 +2,9 @@
     Appellation: traits <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
-pub use self::{data::*, records::*, shape::*};
+pub use self::{data::*, ext::*, records::*, shape::*};
+
+pub mod build;
 
 pub mod records;
 pub mod shape;
@@ -20,8 +22,23 @@ pub mod data {
     }
 }
 
+pub mod ext {
+    pub use self::{ndarray::*, ndtensor::*, ndview::*};
+
+    pub(crate) mod ndarray;
+    pub(crate) mod ndtensor;
+    pub(crate) mod ndview;
+
+    pub(crate) mod prelude {
+        pub use super::ndarray::*;
+        pub use super::ndtensor::*;
+        pub use super::ndview::*;
+    }
+}
+
 pub(crate) mod prelude {
     pub use super::data::prelude::*;
+    pub use super::ext::prelude::*;
     pub use super::records::*;
     pub use super::shape::*;
 }