From 7a8a7871f73711aebffa2aeebcf9c6d36e3f2992 Mon Sep 17 00:00:00 2001 From: Joe McCain III Date: Sun, 19 May 2024 13:44:39 -0500 Subject: [PATCH 01/19] update Signed-off-by: Joe McCain III --- models/transformers/src/ops/merge.rs | 26 ++++++-- models/transformers/src/ops/mod.rs | 89 ++++++++++++++++++++++------ models/transformers/src/ops/split.rs | 84 ++++++++++++++++++-------- models/transformers/tests/ops.rs | 78 +++++++++++++++++++----- 4 files changed, 217 insertions(+), 60 deletions(-) diff --git a/models/transformers/src/ops/merge.rs b/models/transformers/src/ops/merge.rs index 747cae82..2ca6296c 100644 --- a/models/transformers/src/ops/merge.rs +++ b/models/transformers/src/ops/merge.rs @@ -2,9 +2,15 @@ Appellation: merge Contrib: FL03 */ +use super::_merge_dim; use concision::NdResult; -use nd::prelude::*; -use nd::{Data, RemoveAxis}; +use nd::{Array, ArrayBase, Data, Dimension, RemoveAxis}; + +pub trait DimMerge { + type Output; + + fn merge(&self, tgt: usize) -> Self::Output; +} // #67: Optimize the Merge trait pub trait Merge { @@ -20,6 +26,19 @@ pub trait Merge { /* ************* Implementations ************* */ +impl DimMerge for D +where + D: RemoveAxis, + D::Smaller: Dimension, + D::Larger: Dimension, +{ + type Output = D::Smaller; + + fn merge(&self, tgt: usize) -> Self::Output { + _merge_dim(self, tgt) + } +} + impl Merge for ArrayBase where A: Clone, @@ -36,7 +55,6 @@ where } fn merge_along(&self, swap: usize) -> NdResult { - use ndarray::Order; - super::merger(self, swap, swap + 1, Order::RowMajor) + super::_merge(self, swap, swap + 1, false) } } diff --git a/models/transformers/src/ops/mod.rs b/models/transformers/src/ops/mod.rs index 778e4abc..4af970b3 100644 --- a/models/transformers/src/ops/mod.rs +++ b/models/transformers/src/ops/mod.rs @@ -12,7 +12,18 @@ pub(crate) mod utils { use nd::prelude::*; use nd::{Data, Order, RemoveAxis}; - #[doc(hidden)] + pub(crate) fn order(row_major: bool) -> Order { + if row_major { + Order::RowMajor + } else { + Order::ColumnMajor + } + } + + #[deprecated( + since = "0.1.14", + note = "Please use the `Merge::merge` method instead" + )] pub fn merge( arr: &ArrayBase, src: usize, @@ -25,14 +36,28 @@ pub(crate) mod utils { D::Smaller: Dimension, ArrayBase: Clone, { - merger(arr, src, tgt, Order::RowMajor) + _merge(arr, src, tgt, false) + } + #[deprecated( + since = "0.1.14", + note = "Please use the `Split::Split` method instead" + )] + pub fn split(arr: &ArrayBase, h: usize) -> NdResult> + where + A: Clone, + D: Dimension, + E: RemoveAxis, + S: Data, + ArrayBase: Clone, + { + _split(arr, h, true) } - pub(crate) fn merger( + pub(crate) fn _merge( arr: &ArrayBase, src: usize, tgt: usize, - order: Order, + row_major: bool, ) -> NdResult> where A: Clone, @@ -41,14 +66,36 @@ pub(crate) mod utils { D::Smaller: Dimension, ArrayBase: Clone, { - let shape = merge_dims(arr.raw_dim(), src); + let shape = _merge_dim(&arr.raw_dim(), src); let mut head = arr.clone(); head.swap_axes(src, tgt); - head.to_shape((shape, order)).map(|x| x.to_owned()) + head.to_shape((shape, order(row_major))) + .map(|x| x.to_owned()) } - #[doc(hidden)] - pub fn merge_dims(dim: D, src: usize) -> D::Smaller + pub(crate) fn _split( + arr: &ArrayBase, + h: usize, + row_major: bool, + ) -> NdResult> + where + A: Clone, + D: Dimension, + E: RemoveAxis, + S: Data, + ArrayBase: Clone, + { + let src = if arr.ndim() >= 2 { arr.ndim() - 2 } else { 0 }; + let tgt = src + 1; + let shape: E = _split_dim(&arr.raw_dim(), h); + let mut head = arr.clone(); + + head.swap_axes(src, tgt); + head.to_shape((shape, order(row_major))) + .map(|x| x.to_owned()) + } + /// Creates the new dimension after merging two axes. + pub(crate) fn _merge_dim(dim: &D, src: usize) -> D::Smaller where D: RemoveAxis, D::Smaller: Dimension, @@ -66,17 +113,25 @@ pub(crate) mod utils { new_dim } - #[doc(hidden)] - pub fn merge_batch(heads: &Array4) -> NdResult> + pub(crate) fn _split_dim(dim: &D::Smaller, h: usize) -> D where - T: Clone, + D: RemoveAxis, + D::Smaller: Dimension, { - let (batch, n, seq, query) = heads.dim(); - let mut tmp = heads.clone(); - // swap the head and sequence axes - tmp.swap_axes(1, 2); - // reshape the qkv matrix into a 2d array - tmp.into_shape((batch, seq, n * query)) + let rank = dim.ndim() + 1; + // create a new dimension with one less axis; initialized with zeros + let mut new_dim = D::zeros(rank); + // create a mutable vector from the slice + let mut shape = dim.slice().to_vec(); + // get and remove the last axis + let bx = shape.pop().unwrap() / h; + // extend the shape with the new axes + shape.push(h); + shape.push(bx); + shape.swap(rank - 2, rank - 3); + // copy the values into the new dimension + new_dim.slice_mut().copy_from_slice(&shape); + new_dim } pub fn split_heads(param: &Array2, h: usize) -> NdResult> diff --git a/models/transformers/src/ops/split.rs b/models/transformers/src/ops/split.rs index 3a182710..2a1f96c7 100644 --- a/models/transformers/src/ops/split.rs +++ b/models/transformers/src/ops/split.rs @@ -2,8 +2,7 @@ Appellation: split Contrib: FL03 */ -use ndarray::prelude::{Array2, Array3, Array4}; -use ndarray::ShapeError; +use ndarray::{Array, ArrayBase, Data, Dimension, RemoveAxis, ShapeError}; // pub fn split(param: &Array, heads: usize) -> Result, ShapeError> { // let mut dim = param.dim() @@ -15,36 +14,73 @@ use ndarray::ShapeError; // Ok(res) // } +pub trait DimSplit { + type Output; + + fn split(&self, h: usize) -> Self::Output; +} + pub trait Split { type Output; fn split(&self, heads: usize) -> Result; } -impl Split for Array2 { - type Output = Array3; - - fn split(&self, heads: usize) -> Result { - let (seq, model) = self.dim(); - let query = model / heads; - // reshape the qkv matrix into a 3d array - let mut res = self.clone().into_shape((seq, heads, query))?; - // swap the sequence and head axes - res.swap_axes(0, 1); - Ok(res) +/* + ************* Implementations ************* +*/ + +impl DimSplit for D +where + D: Dimension, + E: RemoveAxis, +{ + type Output = E; + + fn split(&self, h: usize) -> Self::Output { + super::utils::_split_dim(self, h) } } -impl Split for Array3 { - type Output = Array4; - - fn split(&self, heads: usize) -> Result { - let (batch, seq, model) = self.dim(); - let query = model / heads; - // reshape the qkv matrix into a 3d array - let mut res = self.clone().into_shape((batch, seq, heads, query))?; - // swap the sequence and head axes - res.swap_axes(1, 2); - Ok(res) +impl Split for ArrayBase +where + A: Clone, + D: Dimension, + E: RemoveAxis, + S: Data, + ArrayBase: Clone, +{ + type Output = Array; + + fn split(&self, h: usize) -> Result { + super::_split(self, h, false) } } + +// impl Split for Array2 { +// type Output = Array3; + +// fn split(&self, heads: usize) -> Result { +// let (seq, model) = self.dim(); +// let query = model / heads; +// // reshape the qkv matrix into a 3d array +// let mut res = self.clone().into_shape((seq, heads, query))?; +// // swap the sequence and head axes +// res.swap_axes(0, 1); +// Ok(res) +// } +// } + +// impl Split for Array3 { +// type Output = Array4; + +// fn split(&self, heads: usize) -> Result { +// let (batch, seq, model) = self.dim(); +// let query = model / heads; +// // reshape the qkv matrix into a 3d array +// let mut res = self.clone().into_shape((batch, seq, heads, query))?; +// // swap the sequence and head axes +// res.swap_axes(1, 2); +// Ok(res) +// } +// } diff --git a/models/transformers/tests/ops.rs b/models/transformers/tests/ops.rs index c39b8efa..8227d8b2 100644 --- a/models/transformers/tests/ops.rs +++ b/models/transformers/tests/ops.rs @@ -5,21 +5,54 @@ extern crate concision_core as concision; extern crate concision_transformers as transformers; -use concision::linarr; +use concision::prelude::{linarr, NdResult}; use ndarray::prelude::*; +use ndarray::Order; use transformers::ops::*; +fn order(row_major: bool) -> Order { + if row_major { + Order::RowMajor + } else { + Order::ColumnMajor + } +} + +fn merge3(heads: &Array3, row_major: bool) -> NdResult> +where + T: Clone, +{ + let (n, seq, query) = heads.dim(); + let mut tmp = heads.clone(); + // swap the head and sequence axes + tmp.swap_axes(0, 1); + // reshape the qkv matrix into a 2d array + tmp.to_shape(((seq, n * query), order(row_major))) + .map(|x| x.to_owned()) +} + +fn merge4(heads: &Array4, row_major: bool) -> NdResult> +where + T: Clone, +{ + let (batch, n, seq, query) = heads.dim(); + let mut tmp = heads.clone(); + // swap the head and sequence axes + tmp.swap_axes(1, 2); + // reshape the qkv matrix into a 2d array + tmp.to_shape(((batch, seq, n * query), order(row_major))) + .map(|x| x.to_owned()) +} + #[test] fn test_merge() { let shape = (3, 4, 5); let dout = (4, 15); let arr = linarr::(shape.clone()).unwrap(); let a = arr.clone().merge().unwrap(); - let b = merge(&arr, 0, 1).unwrap(); assert_eq!(a.dim(), dout); - assert_eq!(a.dim(), b.dim()); - assert_eq!(a, b); + assert_eq!(a, merge3(&arr, false).unwrap()); } #[test] @@ -28,25 +61,40 @@ fn test_merge_batch() { let dout = (2, 4, 15); let arr = linarr::(shape).unwrap(); let a = arr.merge().unwrap(); - let b = merge(&arr, 1, 2).unwrap(); assert_eq!(a.dim(), dout); - assert_eq!(a, b); + assert_eq!(a, merge4(&arr, false).unwrap()); } #[test] +fn test_split() { + let heads = 2; + let shape = (3, 4, 6); + let dout = (3, heads, 4, 3); + let arr = linarr::(shape).unwrap(); + let a = arr.split(heads).unwrap(); + + assert_eq!(a.dim(), dout); +} + +#[test] +#[ignore = "Needs to be fixed; currently fails when trying to recreate the original data."] fn reshape_ops() { - let dim_input: [usize; 3] = [2, 4, 6]; // (batch, seq, model) - let dim_split = [2, 2, 4, 3]; // (batch, heads, seq, model) + let heads = 2; + let dim_input = (2, 4, 6); // (batch, seq, model) + let dim_split = (2, heads, 4, 3); // (batch, heads, seq, model) let data = linarr::(dim_input).unwrap(); - let a = split_batch(&data, 2).unwrap(); + let a = data.split(heads).unwrap(); // split_batch(&data, heads).unwrap(); let b = a.merge().unwrap(); // merge_batch(&a).unwrap(); - assert_eq!(a.shape(), &dim_split); - assert_eq!(b.shape(), &dim_input); - assert_eq!(a, data.split(2).unwrap()); - for (i, &j) in b.indexed_iter() { - assert_eq!(j, data[i]); - } + assert_eq!(a.dim(), dim_split); + assert_eq!(b.dim(), dim_input); + assert_eq!(b, data); + // for (i, &j) in data.split(heads).unwrap().indexed_iter() { + // assert_eq!(j, a[i]); + // } + // for (i, &j) in b.indexed_iter() { + // assert_eq!(j, data[i]); + // } } From 465eafa31506324a5f631cff2c5508bd56f5e5c2 Mon Sep 17 00:00:00 2001 From: Joe McCain III Date: Sun, 19 May 2024 14:17:20 -0500 Subject: [PATCH 02/19] update Signed-off-by: Joe McCain III --- models/transformers/src/ops/merge.rs | 2 +- models/transformers/src/ops/mod.rs | 110 +++++---------------- models/transformers/src/ops/split.rs | 17 +--- models/transformers/tests/ops.rs | 141 ++++++++++++++++----------- 4 files changed, 114 insertions(+), 156 deletions(-) diff --git a/models/transformers/src/ops/merge.rs b/models/transformers/src/ops/merge.rs index 2ca6296c..c7e66e3e 100644 --- a/models/transformers/src/ops/merge.rs +++ b/models/transformers/src/ops/merge.rs @@ -55,6 +55,6 @@ where } fn merge_along(&self, swap: usize) -> NdResult { - super::_merge(self, swap, swap + 1, false) + super::_merge(self, swap, swap + 1, super::ORDER) } } diff --git a/models/transformers/src/ops/mod.rs b/models/transformers/src/ops/mod.rs index 4af970b3..b177d89c 100644 --- a/models/transformers/src/ops/mod.rs +++ b/models/transformers/src/ops/mod.rs @@ -2,62 +2,29 @@ Appellation: ops Contrib: FL03 */ -pub use self::{merge::*, split::*, utils::*}; +pub use self::prelude::*; -pub(crate) mod merge; -pub(crate) mod split; +mod merge; +mod split; + +pub(crate) mod prelude { + pub use super::merge::*; + pub use super::split::*; + pub(crate) use super::utils::*; +} + +pub(crate) const ORDER: nd::Order = nd::Order::RowMajor; pub(crate) mod utils { use concision::NdResult; use nd::prelude::*; use nd::{Data, Order, RemoveAxis}; - - pub(crate) fn order(row_major: bool) -> Order { - if row_major { - Order::RowMajor - } else { - Order::ColumnMajor - } - } - - #[deprecated( - since = "0.1.14", - note = "Please use the `Merge::merge` method instead" - )] - pub fn merge( - arr: &ArrayBase, - src: usize, - tgt: usize, - ) -> NdResult> - where - A: Clone, - D: RemoveAxis, - S: Data, - D::Smaller: Dimension, - ArrayBase: Clone, - { - _merge(arr, src, tgt, false) - } - #[deprecated( - since = "0.1.14", - note = "Please use the `Split::Split` method instead" - )] - pub fn split(arr: &ArrayBase, h: usize) -> NdResult> - where - A: Clone, - D: Dimension, - E: RemoveAxis, - S: Data, - ArrayBase: Clone, - { - _split(arr, h, true) - } - + pub(crate) fn _merge( arr: &ArrayBase, src: usize, tgt: usize, - row_major: bool, + order: Order, ) -> NdResult> where A: Clone, @@ -69,14 +36,13 @@ pub(crate) mod utils { let shape = _merge_dim(&arr.raw_dim(), src); let mut head = arr.clone(); head.swap_axes(src, tgt); - head.to_shape((shape, order(row_major))) - .map(|x| x.to_owned()) + head.to_shape((shape, order)).map(|x| x.to_owned()) } pub(crate) fn _split( arr: &ArrayBase, h: usize, - row_major: bool, + order: Order, ) -> NdResult> where A: Clone, @@ -88,29 +54,27 @@ pub(crate) mod utils { let src = if arr.ndim() >= 2 { arr.ndim() - 2 } else { 0 }; let tgt = src + 1; let shape: E = _split_dim(&arr.raw_dim(), h); - let mut head = arr.clone(); - + let mut head = arr.to_shape((shape, order))?.to_owned(); head.swap_axes(src, tgt); - head.to_shape((shape, order(row_major))) - .map(|x| x.to_owned()) + Ok(head) } /// Creates the new dimension after merging two axes. - pub(crate) fn _merge_dim(dim: &D, src: usize) -> D::Smaller + pub(crate) fn _merge_dim(dim: &D, axis: usize) -> D::Smaller where D: RemoveAxis, D::Smaller: Dimension, { // create a new dimension with one less axis; initialized with zeros - let mut new_dim = ::Smaller::zeros(dim.ndim() - 1); + let mut dn = ::Smaller::zeros(dim.ndim() - 1); // create a mutable vector from the slice let mut shape = dim.slice().to_vec(); // multiply the last axis by the target - shape[new_dim.ndim()] *= shape[src]; + shape[dn.ndim()] *= shape[axis]; // remove the last dimension - shape.remove(src); + shape.remove(axis); - new_dim.slice_mut().copy_from_slice(&shape); - new_dim + dn.slice_mut().copy_from_slice(&shape); + dn } pub(crate) fn _split_dim(dim: &D::Smaller, h: usize) -> D @@ -128,35 +92,9 @@ pub(crate) mod utils { // extend the shape with the new axes shape.push(h); shape.push(bx); - shape.swap(rank - 2, rank - 3); + // shape.swap(rank - 2, rank - 3); // copy the values into the new dimension new_dim.slice_mut().copy_from_slice(&shape); new_dim } - - pub fn split_heads(param: &Array2, h: usize) -> NdResult> - where - T: Clone, - { - let dim = param.shape().last().unwrap() / h; - // reshape the qkv matrix into a 3d array - let mut res = param.clone().into_shape((param.shape()[0], h, dim))?; - // swap the sequence and head axes - res.swap_axes(0, 1); - Ok(res) - } - - pub fn split_batch(param: &Array3, h: usize) -> NdResult> - where - T: Clone, - { - let dim = param.shape().last().unwrap() / h; - // reshape the qkv matrix into a 3d array - let mut res = param - .clone() - .into_shape((param.shape()[0], param.shape()[1], h, dim))?; - // swap the sequence and head axes - res.swap_axes(1, 2); - Ok(res) - } } diff --git a/models/transformers/src/ops/split.rs b/models/transformers/src/ops/split.rs index 2a1f96c7..d98d861d 100644 --- a/models/transformers/src/ops/split.rs +++ b/models/transformers/src/ops/split.rs @@ -4,23 +4,14 @@ */ use ndarray::{Array, ArrayBase, Data, Dimension, RemoveAxis, ShapeError}; -// pub fn split(param: &Array, heads: usize) -> Result, ShapeError> { -// let mut dim = param.dim() -// let query = param.shape().last().unwrap() / heads; -// // reshape the qkv matrix into a 3d array -// let mut res = param.clone().into_shape((param.shape()[0], heads, query))?; -// // swap the sequence and head axes -// res.swap_axes(0, 1); -// Ok(res) -// } - +/// Split a dimension into two parts pub trait DimSplit { type Output; fn split(&self, h: usize) -> Self::Output; } -pub trait Split { +pub trait SplitHead { type Output; fn split(&self, heads: usize) -> Result; @@ -42,7 +33,7 @@ where } } -impl Split for ArrayBase +impl SplitHead for ArrayBase where A: Clone, D: Dimension, @@ -53,7 +44,7 @@ where type Output = Array; fn split(&self, h: usize) -> Result { - super::_split(self, h, false) + super::_split(self, h, super::ORDER) } } diff --git a/models/transformers/tests/ops.rs b/models/transformers/tests/ops.rs index 8227d8b2..687b50db 100644 --- a/models/transformers/tests/ops.rs +++ b/models/transformers/tests/ops.rs @@ -4,45 +4,14 @@ */ extern crate concision_core as concision; extern crate concision_transformers as transformers; +extern crate ndarray as nd; -use concision::prelude::{linarr, NdResult}; -use ndarray::prelude::*; -use ndarray::Order; +use concision::linarr; +use nd::prelude::*; use transformers::ops::*; -fn order(row_major: bool) -> Order { - if row_major { - Order::RowMajor - } else { - Order::ColumnMajor - } -} - -fn merge3(heads: &Array3, row_major: bool) -> NdResult> -where - T: Clone, -{ - let (n, seq, query) = heads.dim(); - let mut tmp = heads.clone(); - // swap the head and sequence axes - tmp.swap_axes(0, 1); - // reshape the qkv matrix into a 2d array - tmp.to_shape(((seq, n * query), order(row_major))) - .map(|x| x.to_owned()) -} - -fn merge4(heads: &Array4, row_major: bool) -> NdResult> -where - T: Clone, -{ - let (batch, n, seq, query) = heads.dim(); - let mut tmp = heads.clone(); - // swap the head and sequence axes - tmp.swap_axes(1, 2); - // reshape the qkv matrix into a 2d array - tmp.to_shape(((batch, seq, n * query), order(row_major))) - .map(|x| x.to_owned()) -} +pub const HEADS: usize = 2; +pub const ORDER: nd::Order = nd::Order::RowMajor; #[test] fn test_merge() { @@ -52,7 +21,7 @@ fn test_merge() { let a = arr.clone().merge().unwrap(); assert_eq!(a.dim(), dout); - assert_eq!(a, merge3(&arr, false).unwrap()); + assert_eq!(a, utils::merge3(&arr).unwrap()); } #[test] @@ -63,38 +32,98 @@ fn test_merge_batch() { let a = arr.merge().unwrap(); assert_eq!(a.dim(), dout); - assert_eq!(a, merge4(&arr, false).unwrap()); + assert_eq!(a, utils::merge4(&arr).unwrap()); } #[test] fn test_split() { + let heads = 2; + let shape = (4, 6); + let arr = linarr::(shape).unwrap(); + let a = arr.split(heads).unwrap(); + + assert_eq!(a.dim(), (heads, 4, 3)); + assert_eq!(a, utils::split_heads(&arr, heads).unwrap()); +} + +#[test] +fn test_split_batch() { let heads = 2; let shape = (3, 4, 6); - let dout = (3, heads, 4, 3); let arr = linarr::(shape).unwrap(); let a = arr.split(heads).unwrap(); - assert_eq!(a.dim(), dout); + assert_eq!(a.dim(), (3, heads, 4, 3)); + assert_eq!(a, utils::split_batch(&arr, heads).unwrap()); } #[test] -#[ignore = "Needs to be fixed; currently fails when trying to recreate the original data."] fn reshape_ops() { - let heads = 2; - let dim_input = (2, 4, 6); // (batch, seq, model) - let dim_split = (2, heads, 4, 3); // (batch, heads, seq, model) - let data = linarr::(dim_input).unwrap(); - - let a = data.split(heads).unwrap(); // split_batch(&data, heads).unwrap(); - let b = a.merge().unwrap(); // merge_batch(&a).unwrap(); + let shape = (2, 4, 6); + let data = linarr::(shape).unwrap(); - assert_eq!(a.dim(), dim_split); - assert_eq!(b.dim(), dim_input); + let a = data.split(HEADS).unwrap(); + assert_eq!(a.dim(), (2, HEADS, 4, 3)); + let b = a.merge().unwrap(); + assert_eq!(b.dim(), shape); + // verify that doing the ops consecutively is the identity assert_eq!(b, data); - // for (i, &j) in data.split(heads).unwrap().indexed_iter() { - // assert_eq!(j, a[i]); - // } - // for (i, &j) in b.indexed_iter() { - // assert_eq!(j, data[i]); - // } +} + +#[allow(dead_code)] +pub(crate) mod utils { + use concision::NdResult; + use ndarray::*; + + pub fn merge3(heads: &Array3) -> NdResult> + where + T: Clone, + { + let (n, seq, query) = heads.dim(); + let shape = (seq, n * query); + let mut tmp = heads.clone(); + // swap the head and sequence axes + tmp.swap_axes(0, 1); + // reshape the qkv matrix into a 2d array + tmp.to_shape((shape, super::ORDER)).map(|x| x.to_owned()) + } + + pub fn merge4(heads: &Array4) -> NdResult> + where + T: Clone, + { + let (batch, n, seq, query) = heads.dim(); + let shape = (batch, seq, n * query); + let mut tmp = heads.clone(); + // swap the head and sequence axes + tmp.swap_axes(1, 2); + // reshape the qkv matrix into a 2d array + tmp.to_shape((shape, super::ORDER)).map(|x| x.to_owned()) + } + + pub fn split_heads(param: &Array2, h: usize) -> NdResult> + where + T: Clone, + { + let dim = param.shape().last().unwrap() / h; + // reshape the qkv matrix into a 3d array + let mut res = param.clone().into_shape((param.shape()[0], h, dim))?; + // swap the sequence and head axes + res.swap_axes(0, 1); + Ok(res) + } + + pub fn split_batch(param: &Array3, h: usize) -> NdResult> + where + T: Clone, + { + let dim = param.shape().last().unwrap() / h; + // reshape the qkv matrix into a 3d array + let mut res = param + .clone() + .into_shape((param.shape()[0], param.shape()[1], h, dim))?; + // swap the sequence and head axes + res.swap_axes(1, 2); + Ok(res) + } } From 7ee384b3c789fb5c030f31118de31d63c0de092a Mon Sep 17 00:00:00 2001 From: Joe McCain III Date: Sun, 19 May 2024 14:43:22 -0500 Subject: [PATCH 03/19] update Signed-off-by: Joe McCain III --- core/src/traits/arr/misc.rs | 34 ++++++++++++++++++-- models/transformers/src/attention/head.rs | 15 +++++++++ models/transformers/src/attention/mod.rs | 6 ++++ models/transformers/src/impls/impl_linalg.rs | 6 ++-- models/transformers/src/ops/mod.rs | 2 +- models/transformers/src/params/mod.rs | 20 ++++++------ models/transformers/tests/attention.rs | 6 ++-- 7 files changed, 71 insertions(+), 18 deletions(-) diff --git a/core/src/traits/arr/misc.rs b/core/src/traits/arr/misc.rs index 4cc76e4c..a38b28e1 100644 --- a/core/src/traits/arr/misc.rs +++ b/core/src/traits/arr/misc.rs @@ -2,8 +2,8 @@ Appellation: convert Contrib: FL03 */ -use nd::Axis; -use nd::{ArrayBase, Dimension, RawData}; +use nd::prelude::*; +use nd::{DataMut, RawData}; pub trait Dimensional { type Pattern; @@ -14,6 +14,16 @@ pub trait Dimensional { fn shape(&self) -> &[usize]; } +/// This trait is used to fill an array with a value based on a mask. +/// The mask is a boolean array of the same shape as the array. +pub trait MaskFill +where + D: Dimension, +{ + type Output; + + fn mask_fill(&self, mask: Array, value: A) -> Self::Output; +} pub trait IntoAxis { fn into_axis(self) -> Axis; @@ -46,6 +56,26 @@ where } } +impl MaskFill for ArrayBase +where + A: Clone, + D: Dimension, + S: DataMut, + Self: Clone, +{ + type Output = ArrayBase; + + fn mask_fill(&self, mask: Array, value: A) -> Self::Output { + let mut arr = self.clone(); + arr.zip_mut_with(&mask, |x, &m| { + if m { + *x = value.clone(); + } + }); + arr + } +} + impl IntoAxis for S where S: AsRef, diff --git a/models/transformers/src/attention/head.rs b/models/transformers/src/attention/head.rs index c5146a34..8bd27400 100644 --- a/models/transformers/src/attention/head.rs +++ b/models/transformers/src/attention/head.rs @@ -78,6 +78,21 @@ where ndbuilder!(zeros() where A: Clone + num::Zero, S: DataOwned); } +impl super::Attention for AttentionHead +where + A: ComplexFloat + ScalarOperand, + D: Dimension, + S: Data, + ArrayBase: for<'a> Dot, Output = Array>, + Array: Dot, Output = Array>, +{ + type Output = Array; + + fn attention(&self) -> Self::Output { + self.attention() + } +} + impl Borrow> for AttentionHead where D: Dimension, diff --git a/models/transformers/src/attention/mod.rs b/models/transformers/src/attention/mod.rs index 80a264c7..443f8204 100644 --- a/models/transformers/src/attention/mod.rs +++ b/models/transformers/src/attention/mod.rs @@ -21,6 +21,12 @@ pub(crate) mod prelude { pub use super::utils::*; } +pub trait Attention { + type Output; + + fn attention(&self) -> Self::Output; +} + pub(crate) mod utils { use concision::func::activate::Softmax; use nd::linalg::Dot; diff --git a/models/transformers/src/impls/impl_linalg.rs b/models/transformers/src/impls/impl_linalg.rs index ce069afe..c2ab8812 100644 --- a/models/transformers/src/impls/impl_linalg.rs +++ b/models/transformers/src/impls/impl_linalg.rs @@ -2,7 +2,7 @@ Appellation: impl_linalg Contrib: FL03 */ -use crate::params::{Params, QkvBase}; +use crate::params::{Qkv, QkvBase}; use concision::Matmul; use nd::linalg::Dot; use nd::*; @@ -17,7 +17,7 @@ where T: Data, ArrayBase: Dot, Output = Array>, { - type Output = Params; + type Output = Qkv; fn matmul(&self, rhs: &QkvBase) -> Self::Output { QkvBase { @@ -38,7 +38,7 @@ where T: Data, ArrayBase: Dot, Output = Array>, { - type Output = Params; + type Output = Qkv; fn matmul(&self, rhs: &ArrayBase) -> Self::Output { QkvBase { diff --git a/models/transformers/src/ops/mod.rs b/models/transformers/src/ops/mod.rs index b177d89c..6612af22 100644 --- a/models/transformers/src/ops/mod.rs +++ b/models/transformers/src/ops/mod.rs @@ -19,7 +19,7 @@ pub(crate) mod utils { use concision::NdResult; use nd::prelude::*; use nd::{Data, Order, RemoveAxis}; - + pub(crate) fn _merge( arr: &ArrayBase, src: usize, diff --git a/models/transformers/src/params/mod.rs b/models/transformers/src/params/mod.rs index 367f8b2a..ba79e10f 100644 --- a/models/transformers/src/params/mod.rs +++ b/models/transformers/src/params/mod.rs @@ -4,11 +4,12 @@ */ pub use self::{item::*, store::QkvBase}; -pub(crate) mod item; -pub(crate) mod store; +mod store; + +pub mod item; macro_rules! params_ty { - ($target:ident: [$($name:ident<$(&$lt:lifetime)?$repr:ident>),* $(,)?]) => { + ($target:ident {$($name:ident: $(&$lt:lifetime)? $repr:ident),* $(,)?}) => { $(params_ty!(@impl $target: $name<$(&$lt)? $repr>);)* }; (@impl $target:ident: $name:ident<$repr:ident>) => { @@ -20,16 +21,17 @@ macro_rules! params_ty { } params_ty!( - QkvBase: [ - Params, - ArcParams, - ParamsView<&'a ViewRepr>, - ] + QkvBase { + Qkv: OwnedRepr, + ArcQkv: OwnedArcRepr, + ViewQkv: &'a ViewRepr, + + } ); #[allow(unused_imports)] pub(crate) mod prelude { pub use super::item::{Entry, QKV}; pub use super::store::QkvBase; - pub use super::{ArcParams, Params}; + pub use super::{ArcQkv, Qkv, ViewQkv}; } diff --git a/models/transformers/tests/attention.rs b/models/transformers/tests/attention.rs index db1efe2a..bd7f16a6 100644 --- a/models/transformers/tests/attention.rs +++ b/models/transformers/tests/attention.rs @@ -6,14 +6,14 @@ extern crate concision_core as concision; extern crate concision_transformers as transformers; use concision::{linarr, Matmul}; -use transformers::{AttentionHead, Params}; +use transformers::{AttentionHead, Qkv}; use ndarray::prelude::*; #[test] fn test_qkv() { let shape = (2048, 10); - let params = Params::::new(shape); + let params = Qkv::::new(shape); assert_eq!(params.q(), &Array::default(shape)); } @@ -23,7 +23,7 @@ fn test_qkv_matmul() { // generate some sample data let data = linarr(shape).unwrap(); // initialize the parameters - let params = Params::::ones(shape); + let params = Qkv::::ones(shape); // calculate the expected result let exp = Array2::::ones(shape).dot(&data.t()); // calculate the result From 465da83804359d0797c13d0c618963cc81ea8942 Mon Sep 17 00:00:00 2001 From: Joe McCain III Date: Sun, 19 May 2024 14:59:55 -0500 Subject: [PATCH 04/19] update Signed-off-by: Joe McCain III --- core/src/math/mod.rs | 5 +++-- core/src/traits/arr/create.rs | 12 +++++------- core/src/traits/arr/misc.rs | 4 ++-- core/tests/traits.rs | 14 ++++++++++++-- 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/core/src/math/mod.rs b/core/src/math/mod.rs index bc9dc16f..16220e0f 100644 --- a/core/src/math/mod.rs +++ b/core/src/math/mod.rs @@ -4,8 +4,9 @@ */ //! # Mathematics //! -//! This module focuses on implementing various mathematical objects and operations that are -//! critical to the development of machine learning algorithms. +//! This module focuses on providing the mathematical foundation for the library. +//! Any defined operation is designed to extend the functionality of the basic primitives +//! as well as the `ndarray` crate. pub use self::traits::*; pub mod traits; diff --git a/core/src/traits/arr/create.rs b/core/src/traits/arr/create.rs index b99a5eaa..8c45d927 100644 --- a/core/src/traits/arr/create.rs +++ b/core/src/traits/arr/create.rs @@ -86,11 +86,9 @@ where } } -macro_rules! impl_like { +macro_rules! impl_ndlike { + ($name:ident::$method:ident.$call:ident: $($p:tt)*) => { - impl_like!(@impl $name::$method.$call: $($p)*); - }; - (@impl $name:ident::$method:ident.$call:ident: $($p:tt)*) => { impl $name for ArrayBase where A: $($p)*, @@ -106,6 +104,6 @@ macro_rules! impl_like { }; } -impl_like!(DefaultLike::default_like.default: Default); -impl_like!(OnesLike::ones_like.ones: Clone + num::One); -impl_like!(ZerosLike::zeros_like.zeros: Clone + num::Zero); +impl_ndlike!(DefaultLike::default_like.default: Default); +impl_ndlike!(OnesLike::ones_like.ones: Clone + num::One); +impl_ndlike!(ZerosLike::zeros_like.zeros: Clone + num::Zero); diff --git a/core/src/traits/arr/misc.rs b/core/src/traits/arr/misc.rs index a38b28e1..0b3b04e2 100644 --- a/core/src/traits/arr/misc.rs +++ b/core/src/traits/arr/misc.rs @@ -22,7 +22,7 @@ where { type Output; - fn mask_fill(&self, mask: Array, value: A) -> Self::Output; + fn masked_fill(&self, mask: &Array, value: A) -> Self::Output; } pub trait IntoAxis { @@ -65,7 +65,7 @@ where { type Output = ArrayBase; - fn mask_fill(&self, mask: Array, value: A) -> Self::Output { + fn masked_fill(&self, mask: &Array, value: A) -> Self::Output { let mut arr = self.clone(); arr.zip_mut_with(&mask, |x, &m| { if m { diff --git a/core/tests/traits.rs b/core/tests/traits.rs index 1778fefd..ab7bd44d 100644 --- a/core/tests/traits.rs +++ b/core/tests/traits.rs @@ -4,8 +4,9 @@ */ extern crate concision_core as cnc; -use cnc::traits::{Affine, AsComplex, Matpow}; -use ndarray::prelude::{array, Array2}; +use cnc::linarr; +use cnc::traits::{Affine, AsComplex, MaskFill, Matpow}; +use ndarray::prelude::*; use num::Complex; #[test] @@ -16,6 +17,15 @@ fn test_affine() { assert_eq!(y, array![[-2.0, 2.0], [6.0, 10.0]]); } +#[test] +fn test_masked_fill() { + let shape = (2, 2); + let mask = array![[true, false], [false, true]]; + let arr = linarr::(shape).unwrap(); + let a = arr.masked_fill(&mask, 0.0); + assert_eq!(a, array![[0.0, 1.0], [2.0, 0.0]]); +} + #[test] fn test_as_complex() { let x = 1.0; From 72f79eb3d10dfcd7edf82ff2b073ecd53d354f9a Mon Sep 17 00:00:00 2001 From: Joe McCain III Date: Sun, 19 May 2024 15:12:07 -0500 Subject: [PATCH 05/19] update Signed-off-by: Joe McCain III --- core/src/traits/arr/tensor.rs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/core/src/traits/arr/tensor.rs b/core/src/traits/arr/tensor.rs index 22a00f99..06a99781 100644 --- a/core/src/traits/arr/tensor.rs +++ b/core/src/traits/arr/tensor.rs @@ -2,6 +2,7 @@ Appellation: generator Contrib: FL03 */ +use super::Dimensional; use nd::prelude::*; use nd::{Data, DataMut, DataOwned, OwnedRepr, RawData}; use num::{One, Zero}; @@ -40,12 +41,10 @@ where Self::Data: DataOwned; } -pub trait NdBuilderExt: NdBuilder +pub trait NdBuilderExt: Dimensional + NdBuilder where D: Dimension, { - fn dim(&self) -> D::Pattern; - fn default_like(&self) -> Self::Store where A: Default, @@ -183,14 +182,11 @@ where } } -impl NdBuilderExt for ArrayBase +impl NdBuilderExt for U where + U: Dimensional + NdBuilder, D: Dimension, - S: RawData, { - fn dim(&self) -> D::Pattern { - ArrayBase::dim(self) - } } impl AsOwned for ArrayBase From 44f3966633909426ad0fcaf37edf5a684fc3a742 Mon Sep 17 00:00:00 2001 From: Joe McCain III Date: Sun, 19 May 2024 15:26:10 -0500 Subject: [PATCH 06/19] update Signed-off-by: Joe McCain III --- core/src/traits/arr/misc.rs | 16 ++++++++++++++++ core/src/traits/arr/tensor.rs | 9 +++++++++ 2 files changed, 25 insertions(+) diff --git a/core/src/traits/arr/misc.rs b/core/src/traits/arr/misc.rs index 0b3b04e2..51a5996d 100644 --- a/core/src/traits/arr/misc.rs +++ b/core/src/traits/arr/misc.rs @@ -5,13 +5,28 @@ use nd::prelude::*; use nd::{DataMut, RawData}; +/// [Dimensional] provides a common interface for containers to access their shape and dimension. pub trait Dimensional { + const RANK: Option = None; + type Pattern; fn dim(&self) -> Self::Pattern; + fn is_scalar(&self) -> bool { + self.rank() == 0 || self.shape().iter().all(|x| *x == 1) + } + + fn rank(&self) -> usize { + Self::RANK.unwrap_or(self.shape().len()) + } + fn raw_dim(&self) -> D; + fn size(&self) -> usize { + self.shape().iter().product() + } + fn shape(&self) -> &[usize]; } /// This trait is used to fill an array with a value based on a mask. @@ -41,6 +56,7 @@ where D: Dimension, S: RawData, { + const RANK: Option = D::NDIM; type Pattern = D::Pattern; fn shape(&self) -> &[usize] { diff --git a/core/src/traits/arr/tensor.rs b/core/src/traits/arr/tensor.rs index 06a99781..41de9471 100644 --- a/core/src/traits/arr/tensor.rs +++ b/core/src/traits/arr/tensor.rs @@ -7,6 +7,15 @@ use nd::prelude::*; use nd::{Data, DataMut, DataOwned, OwnedRepr, RawData}; use num::{One, Zero}; +/// This trait describes the basic operations for any n-dimensional container. +pub trait NdContainer: Dimensional { + type Data; + + fn as_slice(&self) -> &[A]; + + fn as_mut_slice(&mut self) -> &mut [A]; +} + /// [NdBuilder] describes common creation routines for [ArrayBase](ndarray::ArrayBase) pub trait NdBuilder where From 4b67274ef9cb037d1dd3663113ea475d1bead0b0 Mon Sep 17 00:00:00 2001 From: Joe McCain III Date: Mon, 20 May 2024 09:39:09 -0500 Subject: [PATCH 07/19] update Signed-off-by: Joe McCain III --- core/src/init/{gen => distr}/lecun.rs | 13 +-- core/src/init/distr/trunc.rs | 81 ++++++++++++++++++ core/src/init/distr/xavier.rs | 116 ++++++++++++++++++++++++++ core/src/init/initialize.rs | 28 ++++--- core/src/init/mod.rs | 8 +- core/src/math/mod.rs | 2 +- core/src/traits/arr/tensor.rs | 3 +- core/tests/random.rs | 28 ++++++- 8 files changed, 258 insertions(+), 21 deletions(-) rename core/src/init/{gen => distr}/lecun.rs (73%) create mode 100644 core/src/init/distr/trunc.rs create mode 100644 core/src/init/distr/xavier.rs diff --git a/core/src/init/gen/lecun.rs b/core/src/init/distr/lecun.rs similarity index 73% rename from core/src/init/gen/lecun.rs rename to core/src/init/distr/lecun.rs index b8cae16c..0c4763c5 100644 --- a/core/src/init/gen/lecun.rs +++ b/core/src/init/distr/lecun.rs @@ -1,10 +1,11 @@ /* - Appellation: lecun + Appellation: lecun Contrib: FL03 */ +use crate::init::distr::TruncatedNormal; use num::Float; use rand::Rng; -use rand_distr::{Distribution, Normal, NormalError, StandardNormal}; +use rand_distr::{Distribution, NormalError, StandardNormal}; /// [LecunNormal] is a truncated [normal](rand_distr::Normal) distribution centered at 0 /// with a standard deviation that is calculated as `σ = sqrt(1/n_in)` @@ -18,14 +19,14 @@ impl LecunNormal { pub fn new(n: usize) -> Self { Self { n } } - /// Create a [normal](rand_distr::Normal) [distribution](Distribution) centered at 0; + /// Create a [truncated normal](TruncatedNormal) [distribution](Distribution) centered at 0; /// See [Self::std_dev] for the standard deviation calculations. - pub fn distr(&self) -> Result, NormalError> + pub fn distr(&self) -> Result, NormalError> where F: Float, StandardNormal: Distribution, { - Normal::new(F::zero(), self.std_dev()) + TruncatedNormal::new(F::zero(), self.std_dev()) } /// Calculate the standard deviation (`σ`) of the distribution. /// This is done by computing the root of the reciprocal of the number of inputs @@ -48,6 +49,6 @@ where where R: Rng + ?Sized, { - self.distr().unwrap().sample(rng) + self.distr().expect("NormalError").sample(rng) } } diff --git a/core/src/init/distr/trunc.rs b/core/src/init/distr/trunc.rs new file mode 100644 index 00000000..fc94f0b9 --- /dev/null +++ b/core/src/init/distr/trunc.rs @@ -0,0 +1,81 @@ +/* + Appellation: trunc + Contrib: FL03 +*/ +use num::traits::Float; +use rand::Rng; +use rand_distr::{Distribution, Normal, NormalError, StandardNormal}; + +/// A truncated normal distribution is similar to a [normal](rand_distr::Normal) [distribution](rand_distr::Distribution), however, +/// any generated value over two standard deviations from the mean is discarded and re-generated. +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct TruncatedNormal +where + StandardNormal: Distribution, +{ + mean: F, + std: F, +} + +impl TruncatedNormal +where + F: Float, + StandardNormal: Distribution, +{ + /// Create a new truncated normal distribution with a given mean and standard deviation + pub fn new(mean: F, std: F) -> Result { + Ok(Self { mean, std }) + } + + pub(crate) fn boundary(&self) -> F { + self.mean() + self.std_dev() * F::from(2).unwrap() + } + + pub(crate) fn score(&self, x: F) -> F { + self.mean() - self.std_dev() * x + } + + pub fn distr(&self) -> Normal { + Normal::new(self.mean(), self.std_dev()).unwrap() + } + + pub fn mean(&self) -> F { + self.mean + } + + pub fn std_dev(&self) -> F { + self.std + } +} + +impl Distribution for TruncatedNormal +where + F: Float, + StandardNormal: Distribution, +{ + fn sample(&self, rng: &mut R) -> F + where + R: Rng + ?Sized, + { + let bnd = self.boundary(); + let mut x = self.score(rng.sample(StandardNormal)); + // if x is outside of the boundary, re-sample + while x < -bnd || x > bnd { + x = self.score(rng.sample(StandardNormal)); + } + x + } +} + +impl From> for TruncatedNormal +where + F: Float, + StandardNormal: Distribution, +{ + fn from(normal: Normal) -> Self { + Self { + mean: normal.mean(), + std: normal.std_dev(), + } + } +} diff --git a/core/src/init/distr/xavier.rs b/core/src/init/distr/xavier.rs new file mode 100644 index 00000000..37f43251 --- /dev/null +++ b/core/src/init/distr/xavier.rs @@ -0,0 +1,116 @@ +/* + Appellation: xavier + Contrib: FL03 +*/ +//! # Xavier +//! +//! Xavier initialization techniques were developed in 2010 by Xavier Glorot. +//! These methods are designed to initialize the weights of a neural network in a way that +//! prevents the vanishing and exploding gradient problems. The initialization technique +//! manifests into two distributions: [XavierNormal] and [XavierUniform]. +use num::Float; +use rand::Rng; +use rand_distr::uniform::{SampleUniform, Uniform}; +use rand_distr::{Distribution, Normal, NormalError, StandardNormal}; + +pub(crate) fn std_dev(inputs: usize, outputs: usize) -> F +where + F: Float, +{ + (F::from(2).unwrap() / F::from(inputs + outputs).unwrap()).sqrt() +} + +pub(crate) fn boundary(inputs: usize, outputs: usize) -> F +where + F: Float, +{ + (F::from(6).unwrap() / F::from(inputs + outputs).unwrap()).sqrt() +} +/// Normal Xavier initializers leverage a normal distribution with a mean of 0 and a standard deviation (`σ`) +/// computed by the formula: `σ = sqrt(2/(d_in + d_out))` +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct XavierNormal +where + F: Float, + StandardNormal: Distribution, +{ + std: F, +} + +impl XavierNormal +where + F: Float, + StandardNormal: Distribution, +{ + pub fn new(inputs: usize, outputs: usize) -> Self { + Self { + std: std_dev(inputs, outputs), + } + } + + pub fn distr(&self) -> Result, NormalError> { + Normal::new(F::zero(), self.std_dev()) + } + + pub fn std_dev(&self) -> F { + self.std + } +} + +impl Distribution for XavierNormal +where + F: Float, + StandardNormal: Distribution, +{ + fn sample(&self, rng: &mut R) -> F + where + R: Rng + ?Sized, + { + self.distr().unwrap().sample(rng) + } +} + +/// Uniform Xavier initializers use a uniform distribution to initialize the weights of a neural network +/// within a given range. +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct XavierUniform +where + X: SampleUniform, +{ + boundary: X, +} + +impl XavierUniform +where + X: Float + SampleUniform, +{ + pub fn new(inputs: usize, outputs: usize) -> Self { + Self { + boundary: boundary(inputs, outputs), + } + } + + pub fn boundary(&self) -> X { + self.boundary + } + + pub fn distr(&self) -> Uniform + where + X: Float, + { + let bnd = self.boundary(); + Uniform::new(-bnd, bnd) + } +} + +impl Distribution for XavierUniform +where + X: Float + SampleUniform, +{ + fn sample(&self, rng: &mut R) -> X + where + R: Rng + ?Sized, + { + self.distr().sample(rng) + } +} diff --git a/core/src/init/initialize.rs b/core/src/init/initialize.rs index 91b41b13..4d2eb51d 100644 --- a/core/src/init/initialize.rs +++ b/core/src/init/initialize.rs @@ -2,16 +2,17 @@ Appellation: initialize Contrib: FL03 */ +use crate::init::distr::*; + use core::ops::Neg; use nd::{ArrayBase, DataOwned, Dimension, RawData, ShapeBuilder}; use ndrand::RandomExt; use num::complex::ComplexDistribution; use num::traits::Float; -use rand::{rngs, Rng, SeedableRng}; +use rand::rngs::StdRng; +use rand::{Rng, SeedableRng}; use rand_distr::uniform::{SampleUniform, Uniform}; -use rand_distr::{Bernoulli, BernoulliError, Distribution, Normal, StandardNormal}; - -use super::LecunNormal; +use rand_distr::{Bernoulli, BernoulliError, Distribution, Normal, NormalError, StandardNormal}; /// This trait provides the base methods required for initializing an [ndarray](ndarray::ArrayBase) with random values. /// [Initialize] is similar to [RandomExt](ndarray_rand::RandomExt), however, it focuses on flexibility while implementing additional @@ -79,7 +80,7 @@ where Self::rand(shape, distr) } /// Given a shape, mean, and standard deviation generate a new object using the [Normal](rand_distr::Normal) distribution - fn normal(shape: Sh, mean: A, std: A) -> Result + fn normal(shape: Sh, mean: A, std: A) -> Result where A: Float, S: DataOwned, @@ -115,11 +116,18 @@ where Sh: ShapeBuilder, StandardNormal: Distribution, { - Self::rand_with( - shape, - StandardNormal, - &mut rngs::StdRng::seed_from_u64(seed), - ) + Self::rand_with(shape, StandardNormal, &mut StdRng::seed_from_u64(seed)) + } + /// Initialize the object using the [TruncatedNormal](crate::init::distr::TruncatedNormal) distribution + fn truncnorm(shape: Sh, mean: A, std: A) -> Result + where + A: Float, + S: DataOwned, + Sh: ShapeBuilder, + StandardNormal: Distribution, + { + let distr = TruncatedNormal::new(mean, std)?; + Ok(Self::rand(shape, distr)) } /// A [uniform](rand_distr::uniform::Uniform) generator with values between u(-dk, dk) fn uniform(shape: Sh, dk: A) -> Self diff --git a/core/src/init/mod.rs b/core/src/init/mod.rs index 22ee1bac..2ebcd583 100644 --- a/core/src/init/mod.rs +++ b/core/src/init/mod.rs @@ -16,13 +16,17 @@ pub use self::prelude::*; pub(crate) mod initialize; pub(crate) mod utils; -pub mod gen { +pub mod distr { pub use self::prelude::*; pub mod lecun; + pub mod trunc; + pub mod xavier; pub(crate) mod prelude { pub use super::lecun::*; + pub use super::trunc::*; + pub use super::xavier::*; } } @@ -34,7 +38,7 @@ pub use rand; pub use rand_distr; pub(crate) mod prelude { - pub use super::gen::prelude::*; + pub use super::distr::prelude::*; pub use super::initialize::{Initialize, InitializeExt}; pub use super::utils::*; } diff --git a/core/src/math/mod.rs b/core/src/math/mod.rs index 16220e0f..d8084442 100644 --- a/core/src/math/mod.rs +++ b/core/src/math/mod.rs @@ -6,7 +6,7 @@ //! //! This module focuses on providing the mathematical foundation for the library. //! Any defined operation is designed to extend the functionality of the basic primitives -//! as well as the `ndarray` crate. +//! as well as the `ndarray` crate. pub use self::traits::*; pub mod traits; diff --git a/core/src/traits/arr/tensor.rs b/core/src/traits/arr/tensor.rs index 41de9471..9bd5d863 100644 --- a/core/src/traits/arr/tensor.rs +++ b/core/src/traits/arr/tensor.rs @@ -50,7 +50,8 @@ where Self::Data: DataOwned; } -pub trait NdBuilderExt: Dimensional + NdBuilder +pub trait NdBuilderExt: + Dimensional + NdBuilder where D: Dimension, { diff --git a/core/tests/random.rs b/core/tests/random.rs index daa76435..39d29e5d 100644 --- a/core/tests/random.rs +++ b/core/tests/random.rs @@ -4,11 +4,12 @@ */ extern crate concision_core as cnc; +use cnc::init::distr::LecunNormal; use cnc::init::InitializeExt; use ndarray::prelude::*; #[test] -fn test_stdnorm() { +fn test_init_ext() { let shape = [3, 3]; let seed = 0u64; let a = Array2::::stdnorm(shape); @@ -17,3 +18,28 @@ fn test_stdnorm() { assert_eq!(a.shape(), shape); assert_eq!(a.shape(), b.shape()); } + +#[test] +fn test_lecun_normal() { + let n = 3; + let shape = (3, 3); + + let distr = LecunNormal::new(n); + + let bnd = 2f64 * distr.std_dev::(); + + let arr = Array2::::lecun_normal(shape, n); + + assert!(arr.iter().all(|&x| x >= -bnd && x <= bnd)); + + assert_eq!(arr.dim(), shape); +} + +#[test] +fn test_truncnorm() { + let (mean, std) = (0f64, 2f64); + let bnd = 2f64 * std; + let shape = (3, 3); + let arr = Array::truncnorm(shape, mean, std).unwrap(); + assert!(arr.iter().all(|&x| x >= -bnd && x <= bnd)); +} From 6f8d88ee84f6c0cf060c930e5530026adb186786 Mon Sep 17 00:00:00 2001 From: Joe McCain III Date: Mon, 20 May 2024 09:40:04 -0500 Subject: [PATCH 08/19] update Signed-off-by: Joe McCain III --- core/src/init/distr/xavier.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/init/distr/xavier.rs b/core/src/init/distr/xavier.rs index 37f43251..ecc1ee0c 100644 --- a/core/src/init/distr/xavier.rs +++ b/core/src/init/distr/xavier.rs @@ -8,6 +8,7 @@ //! These methods are designed to initialize the weights of a neural network in a way that //! prevents the vanishing and exploding gradient problems. The initialization technique //! manifests into two distributions: [XavierNormal] and [XavierUniform]. +// #76 use num::Float; use rand::Rng; use rand_distr::uniform::{SampleUniform, Uniform}; From 02080dc50921237bcbc03cf3fc035e75eade41a7 Mon Sep 17 00:00:00 2001 From: Joe McCain III Date: Mon, 20 May 2024 11:11:53 -0500 Subject: [PATCH 09/19] update Signed-off-by: Joe McCain III --- core/Cargo.toml | 12 +++- core/src/func/mod.rs | 3 - core/src/lib.rs | 1 + core/src/{func => nn}/dropout.rs | 68 ++++++++++++---------- core/src/nn/mod.rs | 6 +- core/src/types/mask.rs | 10 ++++ core/src/types/mod.rs | 2 + core/tests/{func.rs => nn.rs} | 4 +- models/transformers/src/attention/head.rs | 34 +++++++++-- models/transformers/src/attention/mod.rs | 54 ++++++++++++++++- models/transformers/src/impls/impl_head.rs | 2 + models/transformers/src/params/store.rs | 2 + 12 files changed, 150 insertions(+), 48 deletions(-) rename core/src/{func => nn}/dropout.rs (56%) create mode 100644 core/src/types/mask.rs rename core/tests/{func.rs => nn.rs} (83%) diff --git a/core/Cargo.toml b/core/Cargo.toml index b0891b64..a4d06c91 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -103,14 +103,20 @@ crate-type = ["lib"] doctest = false test = true -[[test]] -name = "random" -required-features = ["rand"] + [[test]] name = "fft" required-features = ["approx"] +[[test]] +name = "nn" + +[[test]] +name = "random" +required-features = ["rand", "std"] + + [build-dependencies] [dependencies] diff --git a/core/src/func/mod.rs b/core/src/func/mod.rs index bb99ccba..96513d96 100644 --- a/core/src/func/mod.rs +++ b/core/src/func/mod.rs @@ -7,12 +7,9 @@ pub use self::prelude::*; #[macro_use] pub mod activate; -pub mod dropout; pub mod loss; pub(crate) mod prelude { pub use super::activate::prelude::*; - #[cfg(feature = "rand")] - pub use super::dropout::*; pub use super::loss::prelude::*; } diff --git a/core/src/lib.rs b/core/src/lib.rs index 5906aa6f..a09d48e1 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -34,6 +34,7 @@ pub mod types; pub mod utils; pub mod prelude { + #[allow(unused_imports)] pub(crate) use super::primitives::rust::*; pub use super::error::prelude::*; diff --git a/core/src/func/dropout.rs b/core/src/nn/dropout.rs similarity index 56% rename from core/src/func/dropout.rs rename to core/src/nn/dropout.rs index 00b24b13..19acdbc0 100644 --- a/core/src/func/dropout.rs +++ b/core/src/nn/dropout.rs @@ -2,14 +2,15 @@ Appellation: dropout Contrib: FL03 */ -#![cfg(feature = "rand")] +#![allow(unused_imports)] use crate::Forward; use nd::prelude::*; -use nd::{DataOwned, RemoveAxis, ScalarOperand}; -use ndrand::rand_distr::Bernoulli; -use ndrand::RandomExt; +use nd::{DataOwned, ScalarOperand}; +#[cfg(feature = "rand")] +use ndrand::{rand_distr::Bernoulli, RandomExt}; use num::traits::Num; +#[cfg(feature = "rand")] pub fn dropout(array: &ArrayBase, p: f64) -> Array where A: Num + ScalarOperand, @@ -27,44 +28,46 @@ where array * mask } -pub fn dropout_axis(array: &ArrayBase, _axis: Axis, p: f64) -> Array -where - A: Num + ScalarOperand, - D: RemoveAxis, - S: DataOwned, -{ - // Create a Bernoulli distribution for dropout - let distribution = Bernoulli::new(p).unwrap(); +/// [Dropout] randomly zeroizes elements with a given probability (`p`). +pub trait Dropout { + type Output; - // Create a mask of the same shape as the input array - let _mask: Array = Array::random(array.dim(), distribution); - - unimplemented!() + fn dropout(&self, p: f64) -> Self::Output; } -/// The [Dropout] layer is randomly zeroizes inputs with a given probability (`p`). +/// The [DropoutLayer] layer is randomly zeroizes inputs with a given probability (`p`). /// This regularization technique is often used to prevent overfitting. /// /// /// ### Config /// /// - (p) Probability of dropping an element -pub struct Dropout { - p: f64, +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +pub struct DropoutLayer { + pub(crate) p: f64, } -impl Dropout { - pub fn new(p: f64) -> Self { - Self { p } +/* + ************* Implementations ************* +*/ +#[cfg(feature = "rand")] +impl Dropout for ArrayBase +where + A: Num + ScalarOperand, + D: Dimension, + S: DataOwned, +{ + type Output = Array; + + fn dropout(&self, p: f64) -> Self::Output { + dropout(self, p) } +} - pub fn dropout(&self, array: &ArrayBase) -> Array - where - A: Num + ScalarOperand, - D: Dimension, - S: DataOwned, - { - dropout(array, self.p) +impl DropoutLayer { + pub fn new(p: f64) -> Self { + Self { p } } pub fn scale(&self) -> f64 { @@ -72,13 +75,14 @@ impl Dropout { } } -impl Default for Dropout { +impl Default for DropoutLayer { fn default() -> Self { Self::new(0.5) } } -impl Forward> for Dropout +#[cfg(feature = "rand")] +impl Forward> for DropoutLayer where A: Num + ScalarOperand, D: Dimension, @@ -87,6 +91,6 @@ where type Output = Array; fn forward(&self, input: &ArrayBase) -> Self::Output { - dropout(input, self.p) + input.dropout(self.p) } } diff --git a/core/src/nn/mod.rs b/core/src/nn/mod.rs index c0eb1f81..d4c7fa48 100644 --- a/core/src/nn/mod.rs +++ b/core/src/nn/mod.rs @@ -2,13 +2,15 @@ Appellation: nn Contrib: FL03 */ -pub use self::{error::ModelError, model::prelude::*}; +pub use self::{dropout::*, error::ModelError, model::prelude::*}; +pub mod dropout; pub mod error; pub mod model; pub(crate) mod prelude { - pub use super::error::ModelError; + pub use super::dropout::*; + pub use super::error::*; pub use super::model::prelude::*; } diff --git a/core/src/types/mask.rs b/core/src/types/mask.rs new file mode 100644 index 00000000..87494557 --- /dev/null +++ b/core/src/types/mask.rs @@ -0,0 +1,10 @@ +/* + Appellation: mask + Contrib: FL03 +*/ +use nd::*; + +pub struct Mask(ArrayBase) +where + D: Dimension, + S: RawData; diff --git a/core/src/types/mod.rs b/core/src/types/mod.rs index a639d5a4..e015605a 100644 --- a/core/src/types/mod.rs +++ b/core/src/types/mod.rs @@ -6,6 +6,7 @@ pub use self::prelude::*; #[cfg(feature = "std")] pub use self::std_types::*; +pub mod mask; pub mod propagate; pub type NdResult = core::result::Result; @@ -22,6 +23,7 @@ mod std_types { } pub(crate) mod prelude { + pub use super::mask::*; pub use super::propagate::Propagate; #[cfg(feature = "std")] pub use super::std_types::*; diff --git a/core/tests/func.rs b/core/tests/nn.rs similarity index 83% rename from core/tests/func.rs rename to core/tests/nn.rs index e1a5ccef..55b51198 100644 --- a/core/tests/func.rs +++ b/core/tests/nn.rs @@ -1,7 +1,7 @@ #![allow(unused_imports)] extern crate concision_core as concision; -use concision::func::Dropout; +use concision::nn::DropoutLayer; use concision::Forward; use ndarray::prelude::*; @@ -10,7 +10,7 @@ use ndarray::prelude::*; fn test_dropout() { let shape = (512, 2048); let arr = Array2::::ones(shape); - let dropout = Dropout::new(0.5); + let dropout = DropoutLayer::new(0.5); let out = dropout.forward(&arr); assert!(arr.iter().all(|&x| x == 1.0)); diff --git a/models/transformers/src/attention/head.rs b/models/transformers/src/attention/head.rs index 8bd27400..0fb67382 100644 --- a/models/transformers/src/attention/head.rs +++ b/models/transformers/src/attention/head.rs @@ -4,6 +4,8 @@ */ use crate::params::QkvBase; use concision::getters; +use concision::nn::DropoutLayer; + use core::borrow::{Borrow, BorrowMut}; use nd::linalg::Dot; use nd::*; @@ -15,7 +17,8 @@ where D: Dimension, S: RawData, { - pub(crate) mask: Option>, + pub(crate) dropout: Option, + pub(crate) mask: Option>, pub(crate) params: QkvBase, } @@ -25,7 +28,11 @@ where S: RawData, { pub fn from_params(params: QkvBase) -> Self { - Self { mask: None, params } + Self { + dropout: None, + mask: None, + params, + } } pub fn builder(shape: Sh, builder: F) -> Self @@ -44,7 +51,18 @@ where { Self::from_params(QkvBase::from_elem(shape, value)) } - + #[cfg(not(feature = "rand"))] + pub fn attention(&self) -> Array + where + A: ComplexFloat + ScalarOperand, + S: Data, + ArrayBase: for<'a> Dot, Output = Array>, + Array: Dot, Output = Array>, + { + let (q, k, v) = self.qkv(); + super::_attention_no_dropout(q, k, v, self.mask()) + } + #[cfg(feature = "rand")] pub fn attention(&self) -> Array where A: ComplexFloat + ScalarOperand, @@ -53,7 +71,15 @@ where Array: Dot, Output = Array>, { let (q, k, v) = self.qkv(); - crate::attention::scaled_dot_product_attention(q, k, v) + super::_attention(q, k, v, self.mask(), self.dropout()) + } + + pub fn dropout(&self) -> Option<&DropoutLayer> { + self.dropout.as_ref() + } + /// Returns an immutable reference to the, optional, [Dropout] layer + pub fn mask(&self) -> Option<&Array> { + self.mask.as_ref() } /// Returns an immuable reference to the underlying parameters. pub const fn params(&self) -> &QkvBase { diff --git a/models/transformers/src/attention/mod.rs b/models/transformers/src/attention/mod.rs index 443f8204..33dea032 100644 --- a/models/transformers/src/attention/mod.rs +++ b/models/transformers/src/attention/mod.rs @@ -28,7 +28,9 @@ pub trait Attention { } pub(crate) mod utils { - use concision::func::activate::Softmax; + use concision::func::Softmax; + use concision::nn::DropoutLayer; + use concision::MaskFill; use nd::linalg::Dot; use nd::prelude::{Array, ArrayBase, ArrayView, Axis, Dimension}; use nd::{Data, ScalarOperand}; @@ -46,6 +48,7 @@ pub(crate) mod utils { q: &ArrayBase, k: &ArrayBase, v: &ArrayBase, + mask: Option<&Array>, ) -> Array where A: ComplexFloat + ScalarOperand, @@ -54,7 +57,54 @@ pub(crate) mod utils { ArrayBase: for<'a> Dot, Output = Array>, Array: Dot, Output = Array>, { + _attention_no_dropout(q, k, v, mask) + } + + pub(crate) fn _attention_no_dropout( + q: &ArrayBase, + k: &ArrayBase, + v: &ArrayBase, + mask: Option<&Array>, + ) -> Array + where + A: ComplexFloat + ScalarOperand, + S: Data, + D: Dimension, + ArrayBase: for<'a> Dot, Output = Array>, + Array: Dot, Output = Array>, + { + let dk = scale::(k.len_of(Axis(1))); + let mut z = q.dot(&k.t()) * dk; + if let Some(mask) = mask { + z = z.masked_fill(mask, A::zero()); + } + z.softmax().dot(&v) + } + #[cfg(feature = "rand")] + pub(crate) fn _attention( + q: &ArrayBase, + k: &ArrayBase, + v: &ArrayBase, + mask: Option<&Array>, + dropout: Option<&DropoutLayer>, + ) -> Array + where + A: ComplexFloat + ScalarOperand, + S: Data, + D: Dimension, + ArrayBase: for<'a> Dot, Output = Array>, + Array: Dot, Output = Array>, + { + use concision::Forward; let dk = scale::(k.len_of(Axis(1))); - (q.dot(&k.t()) * dk).softmax().dot(&v) + let mut z = q.dot(&k.t()) * dk; + if let Some(mask) = mask { + z = z.masked_fill(mask, A::zero()); + } + z = z.softmax(); + if let Some(dropout) = dropout { + z = dropout.forward(&z); + } + z.dot(&v) } } diff --git a/models/transformers/src/impls/impl_head.rs b/models/transformers/src/impls/impl_head.rs index fa22f80a..c8fc92da 100644 --- a/models/transformers/src/impls/impl_head.rs +++ b/models/transformers/src/impls/impl_head.rs @@ -15,6 +15,7 @@ where { fn clone(&self) -> Self { Self { + dropout: self.dropout.clone(), mask: self.mask.clone(), params: self.params.clone(), } @@ -26,6 +27,7 @@ where A: Copy, D: Copy + Dimension, S: Copy + RawDataClone, + Array: Copy, { } diff --git a/models/transformers/src/params/store.rs b/models/transformers/src/params/store.rs index 90c13693..db99143a 100644 --- a/models/transformers/src/params/store.rs +++ b/models/transformers/src/params/store.rs @@ -6,6 +6,8 @@ use concision::{dimensional, getters}; use nd::*; use num::traits::{One, Zero}; +/// [QkvBase] is a container for the query, key, and value arrays used in the +/// attention mechanism of the transformer model. pub struct QkvBase, D = Ix2> where D: Dimension, From a7fac83aa066aec2fcb9599d0ea11dd64d3778c2 Mon Sep 17 00:00:00 2001 From: Joe McCain III Date: Mon, 20 May 2024 12:58:22 -0500 Subject: [PATCH 10/19] update Signed-off-by: Joe McCain III --- .github/ISSUE_TEMPLATE/issue.md | 17 +++ .github/ISSUE_TEMPLATE/proposal.md | 11 +- .github/ISSUE_TEMPLATE/tracking.md | 17 +++ core/src/types/mask.rs | 116 +++++++++++++- core/src/types/mod.rs | 4 +- core/src/types/shape.rs | 166 +++++++++++++++++++++ models/transformers/src/attention/head.rs | 55 ++----- models/transformers/src/attention/mod.rs | 62 +++----- models/transformers/src/impls/impl_head.rs | 43 +++++- models/transformers/src/lib.rs | 1 - models/transformers/src/primitives.rs | 11 ++ 11 files changed, 415 insertions(+), 88 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/issue.md create mode 100644 .github/ISSUE_TEMPLATE/tracking.md create mode 100644 core/src/types/shape.rs diff --git a/.github/ISSUE_TEMPLATE/issue.md b/.github/ISSUE_TEMPLATE/issue.md new file mode 100644 index 00000000..925cfede --- /dev/null +++ b/.github/ISSUE_TEMPLATE/issue.md @@ -0,0 +1,17 @@ +--- +about: A generic issue template +assignees: + - FL03 +labels: [] +projects: ['@FL03/concision:features'] +name: Generic Issue +title: '' +--- + +**Describe the proposal or feature that this issue is tracking.** + +## Issues + +- [] + +## Pull Requests diff --git a/.github/ISSUE_TEMPLATE/proposal.md b/.github/ISSUE_TEMPLATE/proposal.md index d7bacdf8..d8370f22 100644 --- a/.github/ISSUE_TEMPLATE/proposal.md +++ b/.github/ISSUE_TEMPLATE/proposal.md @@ -1,15 +1,14 @@ --- -name: Improvement Proposal about: A formal proposal discussing any new features, changes, or improvements to the project. -title: 'CNC-0000:' -labels: ['proposal'] -projects: ['@FL03/concision:features', '@FL03/concision:roadmap'] assignees: - FL03 - +labels: ['proposal'] +name: Improvement Proposal +projects: ['@FL03/concision:features', '@FL03/concision:roadmap'] +title: 'CNC-0000:' --- ### Resources -- [Google](https://google.com) \ No newline at end of file +- [company](https://github.com/scattered-systems) diff --git a/.github/ISSUE_TEMPLATE/tracking.md b/.github/ISSUE_TEMPLATE/tracking.md new file mode 100644 index 00000000..0139c486 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/tracking.md @@ -0,0 +1,17 @@ +--- +about: Create a new tracking issue to track the progress of a proposal or feature. +assignees: + - FL03 +labels: ['tracking'] +projects: ['@FL03/concision:features'] +name: Tracking Issue +title: 'Tracking Issue:' +--- + +**Describe the proposal or feature that this issue is tracking.** + +## Issues + +- [] + +## Pull Requests diff --git a/core/src/types/mask.rs b/core/src/types/mask.rs index 87494557..9be85784 100644 --- a/core/src/types/mask.rs +++ b/core/src/types/mask.rs @@ -2,9 +2,123 @@ Appellation: mask Contrib: FL03 */ -use nd::*; + +use nd::prelude::*; +use nd::RawData; + +pub trait NdMask +where + D: Dimension, +{ + type Data: RawData; +} + + pub struct Mask(ArrayBase) where D: Dimension, S: RawData; + +impl Mask +where + D: Dimension, + S: RawData, +{ + pub fn new(data: ArrayBase) -> Self { + Self(data) + } +} + + +/* + ************* Implementations ************* +*/ +mod impls { + use super::*; + use core::borrow::{Borrow, BorrowMut}; + use core::ops::{Deref, DerefMut}; + + impl AsRef> for Mask + where + D: Dimension, + S: RawData, + { + fn as_ref(&self) -> &ArrayBase { + &self.0 + } + } + + impl AsMut> for Mask + where + D: Dimension, + S: RawData, + { + fn as_mut(&mut self) -> &mut ArrayBase { + &mut self.0 + } + } + + impl Borrow> for Mask + where + D: Dimension, + S: RawData, + { + fn borrow(&self) -> &ArrayBase { + &self.0 + } + } + + impl BorrowMut> for Mask + where + D: Dimension, + S: RawData, + { + fn borrow_mut(&mut self) -> &mut ArrayBase { + &mut self.0 + } + } + + impl Deref for Mask + where + D: Dimension, + S: RawData, + { + type Target = ArrayBase; + + fn deref(&self) -> &Self::Target { + &self.0 + } + } + + impl DerefMut for Mask + where + D: Dimension, + S: RawData, + { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } + } + + impl From> for Mask + where + D: Dimension, + S: RawData, + { + fn from(mask: ArrayBase) -> Self { + Mask(mask) + } + } + + impl From> for ArrayBase + where + D: Dimension, + S: RawData, + { + fn from(mask: Mask) -> Self { + mask.0 + } + } + +} \ No newline at end of file diff --git a/core/src/types/mod.rs b/core/src/types/mod.rs index e015605a..75cddd52 100644 --- a/core/src/types/mod.rs +++ b/core/src/types/mod.rs @@ -8,6 +8,7 @@ pub use self::std_types::*; pub mod mask; pub mod propagate; +pub mod shape; pub type NdResult = core::result::Result; /// A type alias for a [Result](core::result::Result) with the crate's [Error](crate::error::Error) type. @@ -23,8 +24,9 @@ mod std_types { } pub(crate) mod prelude { - pub use super::mask::*; + pub use super::mask::Mask; pub use super::propagate::Propagate; + pub use super::shape::ModelShape; #[cfg(feature = "std")] pub use super::std_types::*; pub use super::{NdResult, Result}; diff --git a/core/src/types/shape.rs b/core/src/types/shape.rs new file mode 100644 index 00000000..37e75047 --- /dev/null +++ b/core/src/types/shape.rs @@ -0,0 +1,166 @@ +/* + Appellation: shape + Contrib: FL03 +*/ +use nd::prelude::{Ix1, Ix2}; +use nd::{Dimension, ErrorKind, IntoDimension, RemoveAxis, ShapeBuilder, ShapeError}; + +pub(crate) fn _from_dim(dim: D) -> Result +where + D: Dimension, +{ + if dim.ndim() == 1 { + Ok(Features::new(dim[0], 1)) + } else if dim.ndim() >= 2 { + Ok(Features::new(dim[1], dim[0])) + } else { + Err(ShapeError::from_kind(ErrorKind::IncompatibleShape)) + } +} + +#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +pub struct ModelShape { + pub(crate) features: Features, + pub(crate) network: usize, +} + +impl ModelShape { + pub fn new(model: usize, network: usize) -> Self { + let features = Features::from_network(model, network); + Self { features, network } + } + + pub fn from_features(features: Features) -> Self { + Self { + features, + network: features.size(), + } + } +} + +#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +pub struct Features { + pub(crate) inputs: usize, + pub(crate) outputs: usize, +} + +impl Features { + /// Create a new, unchecked [Features] instance. + /// + pub fn new(inputs: usize, outputs: usize) -> Self { + debug_assert_ne!(inputs, 0); + debug_assert_ne!(outputs, 0); + + Self { inputs, outputs } + } + /// Attempts to build a new [Features] instance from the given dimension ([`D`](Dimension)) + pub fn from_dimension(dim: D) -> Result + where + D: Dimension, + { + _from_dim(dim) + } + /// Builds a new instance from the given shape ([`Sh`](ShapeBuilder)); + /// Unlike [Features::from_dimension], this method requires the dimension (`D`) to + /// additionally implement the [RemoveAxis] trait + pub fn from_shape(shape: Sh) -> Self + where + D: RemoveAxis, + Sh: ShapeBuilder, + { + let dim = shape.into_shape().raw_dim().clone(); + _from_dim(dim).unwrap() + } + /// Creates a new instance given the model size (`inputs`, `d_model`) and total number of nodes within the network (`size`, `network`, `d_network`) + pub fn from_network(model: usize, network: usize) -> Self { + let outputs = network / model; + Self::new(model, outputs) + } + + pub const fn as_array(&self) -> [usize; 2] { + [self.outputs(), self.inputs()] + } + /// Creates a new two-tuple instance from the given dimensions; + pub const fn as_tuple(&self) -> (usize, usize) { + (self.outputs(), self.inputs()) + } + pub fn check_dim(&self, dim: D) -> bool + where + D: Dimension, + { + if dim.ndim() == 1 { + self.inputs() == dim[0] + } else if dim.ndim() >= 2 { + self.outputs() == dim[0] && self.inputs() == dim[1] + } else { + false + } + } + /// Forwards the [into_pattern](ndarray::Dimension::into_pattern) method from the [Dimension] trait + #[inline] + pub fn into_pattern(self) -> (usize, usize) { + self.into_dimension().into_pattern() + } + /// An aliased function that returns the number of input features + pub const fn d_model(&self) -> usize { + self.inputs() + } + /// Returns the number of input features + pub const fn inputs(&self) -> usize { + self.inputs + } + /// Checks to see if the features speak to a so-called `unit`; + /// i.e. see if the number of output features is equal to 1. + pub fn is_unit(&self) -> bool { + self.outputs() == 1 + } + /// Returns the number of output features + pub const fn outputs(&self) -> usize { + self.outputs + } + /// Computes the total number of nodes in the network + pub fn size(&self) -> usize { + self.inputs() * self.outputs() + } + #[doc(hidden)] + pub fn uniform_scale(&self) -> f64 { + (self.inputs as f64).recip().sqrt() + } +} + +impl IntoDimension for Features { + type Dim = Ix2; + + fn into_dimension(self) -> Self::Dim { + (self.outputs, self.inputs).into_dimension() + } +} + +impl From for Features { + fn from(dim: Ix1) -> Self { + Self::new(1, dim[0]) + } +} + +impl From for Features { + fn from(dim: Ix2) -> Self { + Self::new(dim[1], dim[0]) + } +} + +impl From for Ix2 { + fn from(features: Features) -> Self { + features.into_dimension() + } +} + +impl PartialEq for Features +where + [usize; 2]: PartialEq, +{ + fn eq(&self, other: &U) -> bool { + self.as_array() == *other + } +} diff --git a/models/transformers/src/attention/head.rs b/models/transformers/src/attention/head.rs index 0fb67382..7e24804c 100644 --- a/models/transformers/src/attention/head.rs +++ b/models/transformers/src/attention/head.rs @@ -5,18 +5,21 @@ use crate::params::QkvBase; use concision::getters; use concision::nn::DropoutLayer; - -use core::borrow::{Borrow, BorrowMut}; use nd::linalg::Dot; use nd::*; use num::complex::ComplexFloat; // #68 +/// [AttentionHead] implements the scaled dot-product attention mechanism formally defined in +/// [Attention is all you need](https://arxiv.org/abs/1706.03762). +/// +/// [DropoutLayer]: requires the `rand` feature pub struct AttentionHead> where D: Dimension, S: RawData, { + #[cfg(feature = "rand")] pub(crate) dropout: Option, pub(crate) mask: Option>, pub(crate) params: QkvBase, @@ -29,6 +32,7 @@ where { pub fn from_params(params: QkvBase) -> Self { Self { + #[cfg(feature = "rand")] dropout: None, mask: None, params, @@ -51,18 +55,7 @@ where { Self::from_params(QkvBase::from_elem(shape, value)) } - #[cfg(not(feature = "rand"))] - pub fn attention(&self) -> Array - where - A: ComplexFloat + ScalarOperand, - S: Data, - ArrayBase: for<'a> Dot, Output = Array>, - Array: Dot, Output = Array>, - { - let (q, k, v) = self.qkv(); - super::_attention_no_dropout(q, k, v, self.mask()) - } - #[cfg(feature = "rand")] + /// Computes the score using scaled dot-product attention. pub fn attention(&self) -> Array where A: ComplexFloat + ScalarOperand, @@ -73,10 +66,6 @@ where let (q, k, v) = self.qkv(); super::_attention(q, k, v, self.mask(), self.dropout()) } - - pub fn dropout(&self) -> Option<&DropoutLayer> { - self.dropout.as_ref() - } /// Returns an immutable reference to the, optional, [Dropout] layer pub fn mask(&self) -> Option<&Array> { self.mask.as_ref() @@ -104,37 +93,25 @@ where ndbuilder!(zeros() where A: Clone + num::Zero, S: DataOwned); } -impl super::Attention for AttentionHead -where - A: ComplexFloat + ScalarOperand, - D: Dimension, - S: Data, - ArrayBase: for<'a> Dot, Output = Array>, - Array: Dot, Output = Array>, -{ - type Output = Array; - - fn attention(&self) -> Self::Output { - self.attention() - } -} - -impl Borrow> for AttentionHead +#[cfg(feature = "rand")] +impl AttentionHead where D: Dimension, S: RawData, { - fn borrow(&self) -> &QkvBase { - self.params() + pub fn dropout(&self) -> Option<&DropoutLayer> { + self.dropout.as_ref() } } -impl BorrowMut> for AttentionHead +#[cfg(not(feature = "rand"))] +impl AttentionHead where D: Dimension, S: RawData, { - fn borrow_mut(&mut self) -> &mut QkvBase { - self.params_mut() + #[cfg(not(feature = "rand"))] + pub fn dropout(&self) -> Option<&DropoutLayer> { + None } } diff --git a/models/transformers/src/attention/mod.rs b/models/transformers/src/attention/mod.rs index 33dea032..279793fe 100644 --- a/models/transformers/src/attention/mod.rs +++ b/models/transformers/src/attention/mod.rs @@ -8,6 +8,7 @@ //! Today, these mechanisms are found in several state-of-the-art models, such as //! the Transformer model, primarily due to its capabilities in natural language //! processing (NLP) domains +pub(crate) use self::_impl_methods::*; pub use self::head::AttentionHead; pub use self::utils::*; @@ -28,59 +29,36 @@ pub trait Attention { } pub(crate) mod utils { - use concision::func::Softmax; use concision::nn::DropoutLayer; - use concision::MaskFill; use nd::linalg::Dot; - use nd::prelude::{Array, ArrayBase, ArrayView, Axis, Dimension}; - use nd::{Data, ScalarOperand}; + use nd::prelude::*; use num::complex::ComplexFloat; - pub(crate) fn scale(dk: usize) -> A - where - A: ComplexFloat, - { - A::from(dk).unwrap().sqrt().recip() - } - /// A functional implementation of the scaled dot-product attention mechanism; pub fn scaled_dot_product_attention( q: &ArrayBase, k: &ArrayBase, v: &ArrayBase, mask: Option<&Array>, + dropout: Option<&DropoutLayer>, ) -> Array where - A: ComplexFloat + ScalarOperand, - S: Data, + A: ComplexFloat + nd::ScalarOperand, + S: nd::Data, D: Dimension, ArrayBase: for<'a> Dot, Output = Array>, Array: Dot, Output = Array>, { - _attention_no_dropout(q, k, v, mask) + super::_attention(q, k, v, mask, dropout) } +} + +mod _impl_methods { + use concision::prelude::{DropoutLayer, MaskFill, Softmax}; + use nd::linalg::Dot; + use nd::prelude::*; + use num::complex::ComplexFloat; - pub(crate) fn _attention_no_dropout( - q: &ArrayBase, - k: &ArrayBase, - v: &ArrayBase, - mask: Option<&Array>, - ) -> Array - where - A: ComplexFloat + ScalarOperand, - S: Data, - D: Dimension, - ArrayBase: for<'a> Dot, Output = Array>, - Array: Dot, Output = Array>, - { - let dk = scale::(k.len_of(Axis(1))); - let mut z = q.dot(&k.t()) * dk; - if let Some(mask) = mask { - z = z.masked_fill(mask, A::zero()); - } - z.softmax().dot(&v) - } - #[cfg(feature = "rand")] pub(crate) fn _attention( q: &ArrayBase, k: &ArrayBase, @@ -89,22 +67,30 @@ pub(crate) mod utils { dropout: Option<&DropoutLayer>, ) -> Array where - A: ComplexFloat + ScalarOperand, - S: Data, + A: ComplexFloat + nd::ScalarOperand, + S: nd::Data, D: Dimension, ArrayBase: for<'a> Dot, Output = Array>, Array: Dot, Output = Array>, { use concision::Forward; - let dk = scale::(k.len_of(Axis(1))); + let dk = scale::(k.len_of(nd::Axis(1))); let mut z = q.dot(&k.t()) * dk; if let Some(mask) = mask { z = z.masked_fill(mask, A::zero()); } z = z.softmax(); + #[cfg(feature = "rand")] if let Some(dropout) = dropout { z = dropout.forward(&z); } z.dot(&v) } + + pub(crate) fn scale(dk: usize) -> A + where + A: ComplexFloat, + { + A::from(dk).unwrap().sqrt().recip() + } } diff --git a/models/transformers/src/impls/impl_head.rs b/models/transformers/src/impls/impl_head.rs index c8fc92da..9b8e61a7 100644 --- a/models/transformers/src/impls/impl_head.rs +++ b/models/transformers/src/impls/impl_head.rs @@ -2,10 +2,48 @@ Appellation: impl_head Contrib: FL03 */ -use crate::attention::AttentionHead; +use crate::attention::{Attention, AttentionHead}; use crate::params::QkvBase; +use core::borrow::{Borrow, BorrowMut}; +use nd::linalg::Dot; use nd::prelude::*; -use nd::{DataOwned, RawDataClone}; +use nd::{Data, DataOwned, RawData, RawDataClone, ScalarOperand}; +use num::complex::ComplexFloat; + +impl Attention for AttentionHead +where + A: ComplexFloat + ScalarOperand, + D: Dimension, + S: Data, + ArrayBase: for<'a> Dot, Output = Array>, + Array: Dot, Output = Array>, +{ + type Output = Array; + + fn attention(&self) -> Self::Output { + self.attention() + } +} + +impl Borrow> for AttentionHead +where + D: Dimension, + S: RawData, +{ + fn borrow(&self) -> &QkvBase { + self.params() + } +} + +impl BorrowMut> for AttentionHead +where + D: Dimension, + S: RawData, +{ + fn borrow_mut(&mut self) -> &mut QkvBase { + self.params_mut() + } +} impl Clone for AttentionHead where @@ -15,6 +53,7 @@ where { fn clone(&self) -> Self { Self { + #[cfg(feature = "rand")] dropout: self.dropout.clone(), mask: self.mask.clone(), params: self.params.clone(), diff --git a/models/transformers/src/lib.rs b/models/transformers/src/lib.rs index ed9cf63e..37c5c1ce 100644 --- a/models/transformers/src/lib.rs +++ b/models/transformers/src/lib.rs @@ -40,6 +40,5 @@ pub(crate) mod impls { pub mod prelude { pub use super::attention::prelude::*; - pub use super::primitives::*; pub use super::Transformer; } diff --git a/models/transformers/src/primitives.rs b/models/transformers/src/primitives.rs index 96db829b..5f5c1bcf 100644 --- a/models/transformers/src/primitives.rs +++ b/models/transformers/src/primitives.rs @@ -5,6 +5,17 @@ pub use self::consts::*; pub mod consts { + /// The default dimension of the model; i.e. the number of inputs + pub const D_MODEL: usize = 512; + /// The default size of the network; i.e. the number of neurons in the network + pub const D_NETWORK: usize = 2048; + + /// The default number of attention heads + pub const H: usize = 8; /// The default number of layers used for the encoder / decoder. pub const N: usize = 6; } + +pub fn outputs_from_ratio(model: usize, network: usize) -> usize { + network / model +} From e6bb98019ac878408097c880adb9488572b6fdc0 Mon Sep 17 00:00:00 2001 From: Joe McCain III Date: Mon, 20 May 2024 14:55:27 -0500 Subject: [PATCH 11/19] update Signed-off-by: Joe McCain III --- concision/Cargo.toml | 9 +- concision/examples/transformer.rs | 39 ++++++++ concision/src/lib.rs | 6 +- core/src/init/initializer.rs | 25 ++++++ core/src/init/mod.rs | 2 + core/src/math/traits.rs | 39 +++++++- data/src/tensor/mod.rs | 2 +- data/src/tensor/ndtensor/traits.rs | 4 +- models/transformers/Cargo.toml | 9 +- models/transformers/src/attention/head.rs | 14 ++- models/transformers/src/attention/mod.rs | 12 ++- .../transformers/src/attention/multi/mod.rs | 4 + .../src/attention/multi/multi_head.rs | 14 ++- models/transformers/src/attention/score.rs | 89 +++++++++++++++++++ models/transformers/src/impls/impl_head.rs | 14 ++- models/transformers/src/impls/impl_init.rs | 61 +++++++++++++ models/transformers/src/macros.rs | 82 +++-------------- models/transformers/src/macros/params.rs | 51 +++++++++++ models/transformers/src/params/store.rs | 12 +-- models/transformers/tests/attention.rs | 34 ++----- models/transformers/tests/params.rs | 35 ++++++++ 21 files changed, 433 insertions(+), 124 deletions(-) create mode 100644 concision/examples/transformer.rs create mode 100644 core/src/init/initializer.rs create mode 100644 models/transformers/src/attention/score.rs create mode 100644 models/transformers/src/impls/impl_init.rs create mode 100644 models/transformers/src/macros/params.rs create mode 100644 models/transformers/tests/params.rs diff --git a/concision/Cargo.toml b/concision/Cargo.toml index e8ddbdfb..1cd286ac 100644 --- a/concision/Cargo.toml +++ b/concision/Cargo.toml @@ -45,7 +45,7 @@ models = [ "gnn", "kan", "linear", - "transformers", + "transformer", ] gnn = [ @@ -60,7 +60,7 @@ linear = [ "dep:concision-linear", ] -transformers = [ +transformer = [ "dep:concision-transformers", ] @@ -184,6 +184,10 @@ test = true name = "linear" required-features = ["linear", "rand", "serde", "tracing"] +[[example]] +name = "transformer" +required-features = ["transformer", "rand", "serde", "tracing"] + [build-dependencies] [dependencies.concision-core] @@ -229,6 +233,7 @@ version = "0.1.14" [dev-dependencies] anyhow = "1" +approx.workspace = true lazy_static.workspace = true ndarray.workspace = true num = { features = ["rand", "serde"], version = "0.4" } diff --git a/concision/examples/transformer.rs b/concision/examples/transformer.rs new file mode 100644 index 00000000..c1a36fc7 --- /dev/null +++ b/concision/examples/transformer.rs @@ -0,0 +1,39 @@ +/* + Appellation: transformer + Contrib: FL03 +*/ +extern crate concision as cnc; + +use approx::AbsDiffEq; +use cnc::transformer::AttentionHead; +use cnc::prelude::Result; +use ndarray::Array2; + +fn tracing() { + use tracing::Level; + use tracing_subscriber::fmt::time; + + tracing_subscriber::fmt() + .compact() + .with_ansi(true) + .with_max_level(Level::DEBUG) + .with_target(false) + .with_timer(time::uptime()) + .init(); +} + +fn main() -> Result<()> { + tracing(); + tracing::info!("Starting up the transformer model example..."); + + let shape = (3, 3); + let head = AttentionHead::::ones(shape); + let score = head.attention(); + assert!(score.attention().abs_diff_eq(&Array2::from_elem(shape, 1f64/3f64), 1e-6)); + println!("{:?}", score); + + + + + Ok(()) +} diff --git a/concision/src/lib.rs b/concision/src/lib.rs index ced1df1d..ae868246 100644 --- a/concision/src/lib.rs +++ b/concision/src/lib.rs @@ -27,9 +27,9 @@ pub use concision_kan as kan; pub use concision_linear as linear; #[cfg(feature = "macros")] pub use concision_macros::*; -#[cfg(feature = "transformers")] +#[cfg(feature = "transformer")] #[doc(inline)] -pub use concision_transformers as transformers; +pub use concision_transformers as transformer; pub mod prelude { pub use concision_core::prelude::*; @@ -45,4 +45,6 @@ pub mod prelude { pub use concision_linear::prelude::*; #[cfg(feature = "macros")] pub use concision_macros::*; + #[cfg(feature = "transformer")] + pub use concision_transformers::prelude::*; } diff --git a/core/src/init/initializer.rs b/core/src/init/initializer.rs new file mode 100644 index 00000000..75c20579 --- /dev/null +++ b/core/src/init/initializer.rs @@ -0,0 +1,25 @@ +/* + Appellation: initializer + Contrib: FL03 +*/ +use super::Initialize; +use core::marker::PhantomData; +use nd::prelude::*; +use nd::DataOwned; +use rand_distr::{Distribution, StandardNormal}; + +pub struct InitializerBase where D: Dimension, Dst: Clone + Distribution { + pub(crate) dim: D, + pub(crate) distr: Dst, + pub(crate) _dtype: PhantomData, +} + +impl InitializerBase where D: Dimension, Dst: Clone + Distribution { + pub fn new(dim: D, distr: Dst) -> Self { + Self { dim, distr, _dtype: PhantomData:: } + } + + pub fn init(self) -> ArrayBase where S: DataOwned { + ArrayBase::rand(self.dim, self.distr) + } +} diff --git a/core/src/init/mod.rs b/core/src/init/mod.rs index 2ebcd583..f2bb4731 100644 --- a/core/src/init/mod.rs +++ b/core/src/init/mod.rs @@ -16,6 +16,8 @@ pub use self::prelude::*; pub(crate) mod initialize; pub(crate) mod utils; +pub mod initializer; + pub mod distr { pub use self::prelude::*; diff --git a/core/src/math/traits.rs b/core/src/math/traits.rs index d71d433d..78e67337 100644 --- a/core/src/math/traits.rs +++ b/core/src/math/traits.rs @@ -2,9 +2,46 @@ Appellation: traits Contrib: FL03 */ +use core::iter::Sum; use nd::{Array, ArrayBase, Data, Dimension}; use num::complex::{Complex, ComplexFloat}; -use num::traits::Signed; +use num::traits::{FromPrimitive, Num, Signed}; + +pub trait IterStats where T: FromPrimitive { + type Output; + + fn elems(&self) -> T; + + fn mean(&self) -> Self::Output; + + fn std(&self) -> Self::Output where T: ComplexFloat; + + fn var(&self) -> Self::Output where T: ComplexFloat; +} + +impl IterStats for I where I: Clone + ExactSizeIterator, T: Clone + FromPrimitive + Num + Sum { + type Output = T; + + fn elems(&self) -> T { + T::from_usize(self.len()).unwrap() + } + + fn mean(&self) -> Self::Output { + self.clone().sum::() / self.elems() + } + + fn std(&self) -> Self::Output where T: ComplexFloat { + let mean = self.mean(); + let sum = self.clone().map(|x| (x - mean).powi(2)).sum::(); + (sum / self.elems()).sqrt() + } + + fn var(&self) -> Self::Output where T: ComplexFloat { + let mean = self.mean(); + let sum = self.clone().map(|x| (x - mean).powi(2)).sum::(); + sum / self.elems() + } +} unary!( Abs::abs(self), diff --git a/data/src/tensor/mod.rs b/data/src/tensor/mod.rs index 17945f2f..7d31345f 100644 --- a/data/src/tensor/mod.rs +++ b/data/src/tensor/mod.rs @@ -2,6 +2,6 @@ Appellation: tensor Contrib: FL03 */ -pub use self::ndtensor::NdTensor; +pub use self::ndtensor::NdContainer; pub mod ndtensor; diff --git a/data/src/tensor/ndtensor/traits.rs b/data/src/tensor/ndtensor/traits.rs index c55c3afe..67af23b1 100644 --- a/data/src/tensor/ndtensor/traits.rs +++ b/data/src/tensor/ndtensor/traits.rs @@ -11,7 +11,7 @@ pub trait TensorData { fn as_mut_slice(&mut self) -> &mut [Self::Elem]; } -pub trait NdTensor { +pub trait NdContainer { const RANK: Option = None; type Data: TensorData; @@ -28,6 +28,8 @@ pub trait NdTensor { fn shape(&self) -> &[usize]; } + + /* ******** implementations ******** */ diff --git a/models/transformers/Cargo.toml b/models/transformers/Cargo.toml index 00bf9fb7..d0558100 100644 --- a/models/transformers/Cargo.toml +++ b/models/transformers/Cargo.toml @@ -95,6 +95,10 @@ crate-type = ["lib"] doctest = true test = true +[[test]] +name = "attention" +required-features = ["approx", "rand"] + [build-dependencies] [dependencies] @@ -128,8 +132,9 @@ version = "1" optional = true version = "0.1" -[dev-dependencies.lazy_static] -workspace = true +[dev-dependencies] +approx.workspace = true +lazy_static.workspace = true [package.metadata.docs.rs] all-features = true diff --git a/models/transformers/src/attention/head.rs b/models/transformers/src/attention/head.rs index 7e24804c..cc42c531 100644 --- a/models/transformers/src/attention/head.rs +++ b/models/transformers/src/attention/head.rs @@ -2,6 +2,7 @@ Appellation: head Contrib: FL03 */ +use super::{_attention, Score}; use crate::params::QkvBase; use concision::getters; use concision::nn::DropoutLayer; @@ -9,11 +10,16 @@ use nd::linalg::Dot; use nd::*; use num::complex::ComplexFloat; + + // #68 /// [AttentionHead] implements the scaled dot-product attention mechanism formally defined in /// [Attention is all you need](https://arxiv.org/abs/1706.03762). /// -/// [DropoutLayer]: requires the `rand` feature +/// ### Fields +/// +/// [dropout](DropoutLayer): requires the `rand` feature +/// pub struct AttentionHead> where D: Dimension, @@ -55,8 +61,8 @@ where { Self::from_params(QkvBase::from_elem(shape, value)) } - /// Computes the score using scaled dot-product attention. - pub fn attention(&self) -> Array + /// Computes the [Score] using scaled dot-product attention. + pub fn attention(&self) -> Score where A: ComplexFloat + ScalarOperand, S: Data, @@ -64,7 +70,7 @@ where Array: Dot, Output = Array>, { let (q, k, v) = self.qkv(); - super::_attention(q, k, v, self.mask(), self.dropout()) + _attention(q, k, v, self.mask(), self.dropout()) } /// Returns an immutable reference to the, optional, [Dropout] layer pub fn mask(&self) -> Option<&Array> { diff --git a/models/transformers/src/attention/mod.rs b/models/transformers/src/attention/mod.rs index 279793fe..a500b5f5 100644 --- a/models/transformers/src/attention/mod.rs +++ b/models/transformers/src/attention/mod.rs @@ -10,15 +10,19 @@ //! processing (NLP) domains pub(crate) use self::_impl_methods::*; pub use self::head::AttentionHead; +pub use self::score::Score; pub use self::utils::*; pub(crate) mod head; +pub(crate) mod score; // #69: Multi-Head Attention implementation pub mod multi; pub(crate) mod prelude { pub use super::head::AttentionHead; + pub use super::multi::prelude::*; + pub use super::score::Score; pub use super::utils::*; } @@ -29,6 +33,7 @@ pub trait Attention { } pub(crate) mod utils { + use super::Score; use concision::nn::DropoutLayer; use nd::linalg::Dot; use nd::prelude::*; @@ -41,7 +46,7 @@ pub(crate) mod utils { v: &ArrayBase, mask: Option<&Array>, dropout: Option<&DropoutLayer>, - ) -> Array + ) -> Score where A: ComplexFloat + nd::ScalarOperand, S: nd::Data, @@ -54,6 +59,7 @@ pub(crate) mod utils { } mod _impl_methods { + use super::Score; use concision::prelude::{DropoutLayer, MaskFill, Softmax}; use nd::linalg::Dot; use nd::prelude::*; @@ -65,7 +71,7 @@ mod _impl_methods { v: &ArrayBase, mask: Option<&Array>, dropout: Option<&DropoutLayer>, - ) -> Array + ) -> Score where A: ComplexFloat + nd::ScalarOperand, S: nd::Data, @@ -84,7 +90,7 @@ mod _impl_methods { if let Some(dropout) = dropout { z = dropout.forward(&z); } - z.dot(&v) + (z.dot(&v), z).into() } pub(crate) fn scale(dk: usize) -> A diff --git a/models/transformers/src/attention/multi/mod.rs b/models/transformers/src/attention/multi/mod.rs index 014e29b9..67592c2d 100644 --- a/models/transformers/src/attention/multi/mod.rs +++ b/models/transformers/src/attention/multi/mod.rs @@ -8,3 +8,7 @@ pub use self::multi_head::*; pub(crate) mod multi_head; + +pub(crate) mod prelude { + pub use super::multi_head::MultiHeadAttention; +} \ No newline at end of file diff --git a/models/transformers/src/attention/multi/multi_head.rs b/models/transformers/src/attention/multi/multi_head.rs index ad36fe45..e59061ae 100644 --- a/models/transformers/src/attention/multi/multi_head.rs +++ b/models/transformers/src/attention/multi/multi_head.rs @@ -3,4 +3,16 @@ Contrib: FL03 */ -pub struct MultiHeadAttention; +pub struct Config { + pub heads: usize, +} + +pub struct MultiHeadAttention { + pub(crate) config: Config, +} + +impl MultiHeadAttention { + pub const fn config(&self) -> &Config { + &self.config + } +} diff --git a/models/transformers/src/attention/score.rs b/models/transformers/src/attention/score.rs new file mode 100644 index 00000000..c2548040 --- /dev/null +++ b/models/transformers/src/attention/score.rs @@ -0,0 +1,89 @@ +/* + Appellation: score + Contrib: FL03 +*/ +use core::fmt; +use nd::{Array, Dimension}; + +/// [Score] is a created as a result of invoking an attention mechanism; +/// +/// - attention: the actual result; returns the dot product of the score with the value tensor +/// - score: the attention score tensor +#[derive(Clone, Eq, Hash, PartialEq)] +pub struct Score where D: Dimension { + pub(crate) attention: Array, + pub(crate) score: Array, +} + +impl Score +where + D: Dimension, +{ + pub(crate) fn new(attention: Array, score: Array) -> Self { + Self { attention, score } + } + /// Consumes the instance and returns the attention tensor. + pub fn into_attention(self) -> Array { + self.attention + } + /// Consumes the container and returns the score tensor. + pub fn into_score(self) -> Array { + self.score + } + + /// Retrieve the attention tensor. + pub fn attention(&self) -> &Array { + &self.attention + } + /// Retrieve the score tensor + pub fn score(&self) -> &Array { + &self.score + } +} + +impl Copy for Score where A: Copy, D: Copy + Dimension, Array: Copy { + +} + +impl fmt::Debug for Score +where + A: fmt::Debug, + D: Dimension, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Score") + .field("attention", &self.attention) + .field("score", &self.score) + .finish() + } +} + +impl fmt::Display for Score +where + A: fmt::Display, + D: Dimension, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "({}, {})", self.attention, self.score) + } +} + + + +impl From<(Array, Array)> for Score +where + D: Dimension, +{ + fn from((attention, score): (Array, Array)) -> Self { + Self::new(attention, score) + } +} + +impl From> for (Array, Array) +where + D: Dimension, +{ + fn from(score: Score) -> Self { + (score.attention, score.score) + } +} \ No newline at end of file diff --git a/models/transformers/src/impls/impl_head.rs b/models/transformers/src/impls/impl_head.rs index 9b8e61a7..cbb381af 100644 --- a/models/transformers/src/impls/impl_head.rs +++ b/models/transformers/src/impls/impl_head.rs @@ -2,7 +2,7 @@ Appellation: impl_head Contrib: FL03 */ -use crate::attention::{Attention, AttentionHead}; +use crate::attention::{Attention, AttentionHead, Score}; use crate::params::QkvBase; use core::borrow::{Borrow, BorrowMut}; use nd::linalg::Dot; @@ -18,7 +18,7 @@ where ArrayBase: for<'a> Dot, Output = Array>, Array: Dot, Output = Array>, { - type Output = Array; + type Output = Score; fn attention(&self) -> Self::Output { self.attention() @@ -80,3 +80,13 @@ where Self::from_params(QkvBase::default()) } } + +impl From> for AttentionHead +where + D: Dimension, + S: RawData, +{ + fn from(params: QkvBase) -> Self { + Self::from_params(params) + } +} \ No newline at end of file diff --git a/models/transformers/src/impls/impl_init.rs b/models/transformers/src/impls/impl_init.rs new file mode 100644 index 00000000..cfde2c15 --- /dev/null +++ b/models/transformers/src/impls/impl_init.rs @@ -0,0 +1,61 @@ +#![cfg(feature = "rand")] + +use crate::QkvBase; +use concision::Initialize; +use concision::init::rand::Rng; +use concision::init::rand_distr::{Distribution, StandardNormal}; +use concision::init::rand_distr::uniform::SampleUniform; +use nd::{ArrayBase, DataOwned, Dimension, ShapeBuilder}; + +impl Initialize for QkvBase where + D: RemoveAxis, + S: DataOwned, + StandardNormal: Distribution, +{ + type Data = S; + + fn rand(shape: Sh, distr: Dstr) -> Self + where + Sh: ShapeBuilder, + Dstr: Clone + Distribution, + { + let dim = shape.into_shape().raw_dim().clone(); + Self { + q: ArrayBase::rand(dim.clone(), distr.clone()), + k: ArrayBase::rand(dim.clone(), distr.clone()), + v: ArrayBase::rand(dim, distr) + } + } + + fn rand_with(shape: Sh, distr: Ds, rng: &mut R) -> Self + where + R: Rng + ?Sized, + Ds: Clone + Distribution, + Sh: ShapeBuilder, + { + let dim = shape.into_shape().raw_dim().clone(); + Self { + q: ArrayBase::rand_with(dim.clone(), distr.clone(), &mut rng), + k: ArrayBase::rand_with(dim.clone(), distr.clone(), &mut rng), + v: ArrayBase::rand_with(dim, distr, &mut rng) + } + } + + fn init_rand(self, distr: Ds) -> Self + where + Ds: Clone + Distribution, + Self: Sized, + { + Self::rand(self.dim(), distr) + } + + fn init_rand_with(self, distr: Ds, rng: &mut R) -> Self + where + R: Rng + ?Sized, + Ds: Clone + Distribution, + { + Self::rand_with(self.dim(), distr, rng) + } +} + + diff --git a/models/transformers/src/macros.rs b/models/transformers/src/macros.rs index fd05142e..ff070c13 100644 --- a/models/transformers/src/macros.rs +++ b/models/transformers/src/macros.rs @@ -3,81 +3,19 @@ Contrib: FL03 */ +#[macro_use] +mod params; + macro_rules! ndbuilder { - ($method:ident$(::$call:ident)?() where $($rest:tt)*) => { - ndbuilder!(@impl $method$(::$call)?() where $($rest)*); + ($method:ident$(::$call:ident)?() $($where:tt)*) => { + ndbuilder!(@impl $method$(::$call)?() $($where)*); }; - (@impl $method:ident() where $($rest:tt)*) => { - ndbuilder!(@impl $method::$method() where $($rest)*); + (@impl $method:ident() $($where:tt)*) => { + ndbuilder!(@impl $method::$method() $($where)*); }; - (@impl $method:ident::$call:ident() where $($rest:tt)*) => { - pub fn $method>(shape: Sh) -> Self where $($rest)* { + (@impl $method:ident::$call:ident() $($where:tt)*) => { + pub fn $method>(shape: Sh) -> Self $($where)* { Self::builder(shape, ndarray::ArrayBase::$call) } }; -} - -// # TODO: -macro_rules! ndview { - ($method:ident::$($rest:tt)*) => { - ndview!(@impl $method.$method::$($rest)*); - }; - ($method:ident.$call:ident::$($rest:tt)*) => { - ndview!(@impl $method.$call::$($rest)*); - }; - (@impl $method:ident.$call:ident::<$view:ident>(self) where $($rest:tt)*) => { - pub fn $method(self) -> $crate::params::QkvBase<$view, D> - where - $($rest)* - { - ndview!(@apply $call(self)) - } - }; - (@impl $method:ident.$call:ident::<$view:ident>(mut self) where $($rest:tt)*) => { - pub fn $method(mut self) -> $crate::params::QkvBase<$view, D> - where - $($rest)* - { - ndview!(@apply $call(self)) - } - }; - (@impl $method:ident.$call:ident::<$view:ident>(&self) where $($rest:tt)*) => { - pub fn $method(&self) -> $crate::params::QkvBase<$view, D> - where - $($rest)* - { - ndview!(@apply $call(self)) - } - }; - (@impl $method:ident.$call:ident::<$view:ident>(&mut self) where $($rest:tt)*) => { - pub fn $method(&mut self) -> $crate::params::QkvBase<$view, D> - where - $($rest)* - { - ndview!(@apply $call(self)) - } - }; - (@impl $method:ident.$call:ident::<'a, $view:ident>(&self) where $($rest:tt)*) => { - pub fn $method(&self) -> $crate::params::QkvBase<$view<&'_ A>, D> - where - $($rest)* - { - ndview!(@apply $call(self)) - } - }; - (@impl $method:ident.$call:ident::<'a, $view:ident>(&mut self) where $($rest:tt)*) => { - pub fn $method(&mut self) -> $crate::params::QkvBase<$view<&'_ mut A>, D> - where - $($rest)* - { - ndview!(@apply $call(self)) - } - }; - (@apply $call:ident($self:expr)) => { - $crate::params::QkvBase { - q: $self.q.$call(), - k: $self.k.$call(), - v: $self.v.$call(), - } - }; -} +} \ No newline at end of file diff --git a/models/transformers/src/macros/params.rs b/models/transformers/src/macros/params.rs new file mode 100644 index 00000000..c8bdcd52 --- /dev/null +++ b/models/transformers/src/macros/params.rs @@ -0,0 +1,51 @@ +/* + Appellation: params + Contrib: FL03 +*/ + + +macro_rules! qkv_view { + ($method:ident$(.$call:ident)?::$($rest:tt)*) => { + qkv_view!(@impl $method$(.$call)?::$($rest)*); + }; + (@impl $method:ident::$($rest:tt)*) => { + qkv_view!(@impl $method.$method::$($rest)*); + }; + (@impl $method:ident.$call:ident::<$view:ident>(self) $($rest:tt)*) => { + pub fn $method(self) -> $crate::params::QkvBase<$view, D> $($rest)* { + qkv_view!(@apply $call(self)) + } + }; + (@impl $method:ident.$call:ident::<$view:ident>(mut self) $($rest:tt)*) => { + pub fn $method(mut self) -> $crate::params::QkvBase<$view, D> $($rest)* { + qkv_view!(@apply $call(self)) + } + }; + (@impl $method:ident.$call:ident::<$view:ident>(&self) $($rest:tt)*) => { + pub fn $method(&self) -> $crate::params::QkvBase<$view, D> $($rest)* { + qkv_view!(@apply $call(self)) + } + }; + (@impl $method:ident.$call:ident::<$view:ident>(&mut self) $($rest:tt)*) => { + pub fn $method(&mut self) -> $crate::params::QkvBase<$view, D> $($rest)* { + qkv_view!(@apply $call(self)) + } + }; + (@impl $method:ident.$call:ident::<'a, $view:ident>(&self) $($rest:tt)*) => { + pub fn $method(&self) -> $crate::params::QkvBase<$view<&'_ A>, D> $($rest)* { + qkv_view!(@apply $call(self)) + } + }; + (@impl $method:ident.$call:ident::<'a, $view:ident>(&mut self) $($rest:tt)*) => { + pub fn $method(&mut self) -> $crate::params::QkvBase<$view<&'_ mut A>, D> $($rest)* { + qkv_view!(@apply $call(self)) + } + }; + (@apply $call:ident($self:expr)) => { + $crate::params::QkvBase { + q: $self.q.$call(), + k: $self.k.$call(), + v: $self.v.$call(), + } + }; +} diff --git a/models/transformers/src/params/store.rs b/models/transformers/src/params/store.rs index db99143a..d62fb7cb 100644 --- a/models/transformers/src/params/store.rs +++ b/models/transformers/src/params/store.rs @@ -74,12 +74,12 @@ where dimensional!(q()); - ndview!(into_owned::(self) where A: Clone, S: Data); - ndview!(to_owned::(&self) where A: Clone, S: Data); + qkv_view!(into_owned::(self) where A: Clone, S: Data); + qkv_view!(to_owned::(&self) where A: Clone, S: Data); - ndview!(into_shared::(self) where A: Clone, S: DataOwned); - ndview!(to_shared::(&self) where A: Clone, S: DataShared); + qkv_view!(into_shared::(self) where A: Clone, S: DataOwned); + qkv_view!(to_shared::(&self) where A: Clone, S: DataShared); - ndview!(view::<'a, ViewRepr>(&self) where S: Data); - ndview!(view_mut::<'a, ViewRepr>(&mut self) where S: DataMut); + qkv_view!(view::<'a, ViewRepr>(&self) where S: Data); + qkv_view!(view_mut::<'a, ViewRepr>(&mut self) where S: DataMut); } diff --git a/models/transformers/tests/attention.rs b/models/transformers/tests/attention.rs index bd7f16a6..eb527659 100644 --- a/models/transformers/tests/attention.rs +++ b/models/transformers/tests/attention.rs @@ -5,39 +5,19 @@ extern crate concision_core as concision; extern crate concision_transformers as transformers; -use concision::{linarr, Matmul}; -use transformers::{AttentionHead, Qkv}; +use approx::AbsDiffEq; +use transformers::AttentionHead; use ndarray::prelude::*; #[test] -fn test_qkv() { - let shape = (2048, 10); - let params = Qkv::::new(shape); - assert_eq!(params.q(), &Array::default(shape)); -} -#[test] -fn test_qkv_matmul() { - let shape = (2048, 10); - // generate some sample data - let data = linarr(shape).unwrap(); - // initialize the parameters - let params = Qkv::::ones(shape); - // calculate the expected result - let exp = Array2::::ones(shape).dot(&data.t()); - // calculate the result - let res = params.matmul(&data.t()); - // compare the results - assert_eq!(res.q(), &exp); - assert_eq!(res.k(), &exp); - assert_eq!(res.v(), &exp); -} - -#[test] -fn test_attention_head() { - let shape = (30, 3); +fn attention_head() { + let shape = (3, 3); let head = AttentionHead::::ones(shape); assert_eq!(head.q(), &Array::ones(shape)); + let exp = Array2::from_elem(shape, 1f64/3f64); + let score = head.attention(); + assert!(score.attention().abs_diff_eq(&exp, 1e-6)); } diff --git a/models/transformers/tests/params.rs b/models/transformers/tests/params.rs new file mode 100644 index 00000000..18656be8 --- /dev/null +++ b/models/transformers/tests/params.rs @@ -0,0 +1,35 @@ +/* + Appellation: params + Contrib: FL03 +*/ +extern crate concision_core as concision; +extern crate concision_transformers as transformers; + +use concision::{linarr, Matmul}; +use transformers::Qkv; + +use ndarray::prelude::*; + +#[test] +fn test_qkv() { + let shape = (2048, 10); + let params = Qkv::::new(shape); + assert_eq!(params.q(), &Array::default(shape)); +} + +#[test] +fn test_qkv_matmul() { + let shape = (2048, 10); + // generate some sample data + let data = linarr(shape).unwrap(); + // initialize the parameters + let params = Qkv::::ones(shape); + // calculate the expected result + let exp = Array2::::ones(shape).dot(&data.t()); + // calculate the result + let res = params.matmul(&data.t()); + // compare the results + assert_eq!(res.q(), &exp); + assert_eq!(res.k(), &exp); + assert_eq!(res.v(), &exp); +} From e0d07eab75ab47d662fb31ea7cf39ba46154997b Mon Sep 17 00:00:00 2001 From: Joe McCain III Date: Mon, 20 May 2024 15:18:03 -0500 Subject: [PATCH 12/19] update Signed-off-by: Joe McCain III --- concision/examples/transformer.rs | 9 ++-- core/src/func/activate/nl.rs | 45 ++++++++++++------- core/src/init/initializer.rs | 23 ++++++++-- core/src/math/traits.rs | 29 +++++++++--- core/src/types/mask.rs | 8 +--- data/src/tensor/ndtensor/traits.rs | 2 - models/transformers/src/attention/head.rs | 8 ++-- .../transformers/src/attention/multi/mod.rs | 2 +- models/transformers/src/attention/score.rs | 19 +++++--- models/transformers/src/impls/impl_head.rs | 2 +- models/transformers/src/macros.rs | 2 +- models/transformers/src/macros/params.rs | 1 - models/transformers/tests/attention.rs | 3 +- 13 files changed, 96 insertions(+), 57 deletions(-) diff --git a/concision/examples/transformer.rs b/concision/examples/transformer.rs index c1a36fc7..cc24879b 100644 --- a/concision/examples/transformer.rs +++ b/concision/examples/transformer.rs @@ -5,8 +5,8 @@ extern crate concision as cnc; use approx::AbsDiffEq; -use cnc::transformer::AttentionHead; use cnc::prelude::Result; +use cnc::transformer::AttentionHead; use ndarray::Array2; fn tracing() { @@ -29,11 +29,10 @@ fn main() -> Result<()> { let shape = (3, 3); let head = AttentionHead::::ones(shape); let score = head.attention(); - assert!(score.attention().abs_diff_eq(&Array2::from_elem(shape, 1f64/3f64), 1e-6)); + assert!(score + .attention() + .abs_diff_eq(&Array2::from_elem(shape, 1f64 / 3f64), 1e-6)); println!("{:?}", score); - - - Ok(()) } diff --git a/core/src/func/activate/nl.rs b/core/src/func/activate/nl.rs index 694145c7..1e861170 100644 --- a/core/src/func/activate/nl.rs +++ b/core/src/func/activate/nl.rs @@ -7,7 +7,7 @@ use ndarray::*; use num::complex::{Complex, ComplexFloat}; use num::traits::Zero; -pub fn relu(args: T) -> T +fn _relu(args: T) -> T where T: PartialOrd + Zero, { @@ -17,23 +17,33 @@ where T::zero() } -pub fn sigmoid(args: T) -> T +fn _sigmoid(args: T) -> T where T: ComplexFloat, { (T::one() + args.neg().exp()).recip() } -pub fn softmax(args: &ArrayBase) -> Array +fn _softmax(args: &ArrayBase) -> Array where A: ComplexFloat + ScalarOperand, D: Dimension, S: Data, { - args.exp() / args.exp().sum() + let e = args.exp(); + &e / e.sum() } -pub fn tanh(args: T) -> T +// fn __softmax(args: &I) -> I +// where +// I: Clone + core::ops::Div + Exp, T: Exp + core::iter::Sum , +// for<'a> I: IntoIterator, +// { +// let e = args.exp(); +// e.clone() / e.into_iter().sum::() +// } + +fn _tanh(args: T) -> T where T: ComplexFloat, { @@ -64,22 +74,25 @@ macro_rules! nonlinear { nonlinear!(@arr $rho::$call); }; (@impl $rho:ident::$call:ident<$T:ty>) => { - impl $rho for $T { - type Output = $T; + paste::paste! { + impl $rho for $T { + type Output = $T; - fn $call(self) -> Self::Output { - $call(self) + fn $call(self) -> Self::Output { + [<_ $call>](self) + } } - } - impl<'a> $rho for &'a $T { - type Output = $T; + impl<'a> $rho for &'a $T { + type Output = $T; - fn $call(self) -> Self::Output { - $call(*self) + fn $call(self) -> Self::Output { + [<_ $call>](*self) + } } } + }; (@arr $name:ident::$call:ident) => { impl $name for ArrayBase @@ -150,7 +163,7 @@ where type Output = Array; fn softmax(self) -> Self::Output { - softmax(&self) + _softmax(&self) } } @@ -163,6 +176,6 @@ where type Output = Array; fn softmax(self) -> Self::Output { - softmax(self) + _softmax(self) } } diff --git a/core/src/init/initializer.rs b/core/src/init/initializer.rs index 75c20579..2de38df9 100644 --- a/core/src/init/initializer.rs +++ b/core/src/init/initializer.rs @@ -8,18 +8,33 @@ use nd::prelude::*; use nd::DataOwned; use rand_distr::{Distribution, StandardNormal}; -pub struct InitializerBase where D: Dimension, Dst: Clone + Distribution { +pub struct InitializerBase +where + D: Dimension, + Dst: Clone + Distribution, +{ pub(crate) dim: D, pub(crate) distr: Dst, pub(crate) _dtype: PhantomData, } -impl InitializerBase where D: Dimension, Dst: Clone + Distribution { +impl InitializerBase +where + D: Dimension, + Dst: Clone + Distribution, +{ pub fn new(dim: D, distr: Dst) -> Self { - Self { dim, distr, _dtype: PhantomData:: } + Self { + dim, + distr, + _dtype: PhantomData::, + } } - pub fn init(self) -> ArrayBase where S: DataOwned { + pub fn init(self) -> ArrayBase + where + S: DataOwned, + { ArrayBase::rand(self.dim, self.distr) } } diff --git a/core/src/math/traits.rs b/core/src/math/traits.rs index 78e67337..51e3c1d6 100644 --- a/core/src/math/traits.rs +++ b/core/src/math/traits.rs @@ -7,19 +7,30 @@ use nd::{Array, ArrayBase, Data, Dimension}; use num::complex::{Complex, ComplexFloat}; use num::traits::{FromPrimitive, Num, Signed}; -pub trait IterStats where T: FromPrimitive { +pub trait IterStats +where + T: FromPrimitive, +{ type Output; fn elems(&self) -> T; fn mean(&self) -> Self::Output; - fn std(&self) -> Self::Output where T: ComplexFloat; + fn std(&self) -> Self::Output + where + T: ComplexFloat; - fn var(&self) -> Self::Output where T: ComplexFloat; + fn var(&self) -> Self::Output + where + T: ComplexFloat; } -impl IterStats for I where I: Clone + ExactSizeIterator, T: Clone + FromPrimitive + Num + Sum { +impl IterStats for I +where + I: Clone + ExactSizeIterator, + T: Clone + FromPrimitive + Num + Sum, +{ type Output = T; fn elems(&self) -> T { @@ -30,13 +41,19 @@ impl IterStats for I where I: Clone + ExactSizeIterator, T: C self.clone().sum::() / self.elems() } - fn std(&self) -> Self::Output where T: ComplexFloat { + fn std(&self) -> Self::Output + where + T: ComplexFloat, + { let mean = self.mean(); let sum = self.clone().map(|x| (x - mean).powi(2)).sum::(); (sum / self.elems()).sqrt() } - fn var(&self) -> Self::Output where T: ComplexFloat { + fn var(&self) -> Self::Output + where + T: ComplexFloat, + { let mean = self.mean(); let sum = self.clone().map(|x| (x - mean).powi(2)).sum::(); sum / self.elems() diff --git a/core/src/types/mask.rs b/core/src/types/mask.rs index 9be85784..ab587861 100644 --- a/core/src/types/mask.rs +++ b/core/src/types/mask.rs @@ -13,8 +13,6 @@ where type Data: RawData; } - - pub struct Mask(ArrayBase) where D: Dimension, @@ -30,9 +28,8 @@ where } } - /* - ************* Implementations ************* + ************* Implementations ************* */ mod impls { use super::*; @@ -120,5 +117,4 @@ mod impls { mask.0 } } - -} \ No newline at end of file +} diff --git a/data/src/tensor/ndtensor/traits.rs b/data/src/tensor/ndtensor/traits.rs index 67af23b1..b125c25b 100644 --- a/data/src/tensor/ndtensor/traits.rs +++ b/data/src/tensor/ndtensor/traits.rs @@ -28,8 +28,6 @@ pub trait NdContainer { fn shape(&self) -> &[usize]; } - - /* ******** implementations ******** */ diff --git a/models/transformers/src/attention/head.rs b/models/transformers/src/attention/head.rs index cc42c531..14887a77 100644 --- a/models/transformers/src/attention/head.rs +++ b/models/transformers/src/attention/head.rs @@ -2,7 +2,7 @@ Appellation: head Contrib: FL03 */ -use super::{_attention, Score}; +use super::{Score, _attention}; use crate::params::QkvBase; use concision::getters; use concision::nn::DropoutLayer; @@ -10,16 +10,14 @@ use nd::linalg::Dot; use nd::*; use num::complex::ComplexFloat; - - // #68 /// [AttentionHead] implements the scaled dot-product attention mechanism formally defined in /// [Attention is all you need](https://arxiv.org/abs/1706.03762). /// /// ### Fields -/// +/// /// [dropout](DropoutLayer): requires the `rand` feature -/// +/// pub struct AttentionHead> where D: Dimension, diff --git a/models/transformers/src/attention/multi/mod.rs b/models/transformers/src/attention/multi/mod.rs index 67592c2d..0b33a303 100644 --- a/models/transformers/src/attention/multi/mod.rs +++ b/models/transformers/src/attention/multi/mod.rs @@ -11,4 +11,4 @@ pub(crate) mod multi_head; pub(crate) mod prelude { pub use super::multi_head::MultiHeadAttention; -} \ No newline at end of file +} diff --git a/models/transformers/src/attention/score.rs b/models/transformers/src/attention/score.rs index c2548040..3e1df96e 100644 --- a/models/transformers/src/attention/score.rs +++ b/models/transformers/src/attention/score.rs @@ -6,11 +6,14 @@ use core::fmt; use nd::{Array, Dimension}; /// [Score] is a created as a result of invoking an attention mechanism; -/// +/// /// - attention: the actual result; returns the dot product of the score with the value tensor /// - score: the attention score tensor #[derive(Clone, Eq, Hash, PartialEq)] -pub struct Score where D: Dimension { +pub struct Score +where + D: Dimension, +{ pub(crate) attention: Array, pub(crate) score: Array, } @@ -41,8 +44,12 @@ where } } -impl Copy for Score where A: Copy, D: Copy + Dimension, Array: Copy { - +impl Copy for Score +where + A: Copy, + D: Copy + Dimension, + Array: Copy, +{ } impl fmt::Debug for Score @@ -68,8 +75,6 @@ where } } - - impl From<(Array, Array)> for Score where D: Dimension, @@ -86,4 +91,4 @@ where fn from(score: Score) -> Self { (score.attention, score.score) } -} \ No newline at end of file +} diff --git a/models/transformers/src/impls/impl_head.rs b/models/transformers/src/impls/impl_head.rs index cbb381af..4160975d 100644 --- a/models/transformers/src/impls/impl_head.rs +++ b/models/transformers/src/impls/impl_head.rs @@ -89,4 +89,4 @@ where fn from(params: QkvBase) -> Self { Self::from_params(params) } -} \ No newline at end of file +} diff --git a/models/transformers/src/macros.rs b/models/transformers/src/macros.rs index ff070c13..7763caa9 100644 --- a/models/transformers/src/macros.rs +++ b/models/transformers/src/macros.rs @@ -18,4 +18,4 @@ macro_rules! ndbuilder { Self::builder(shape, ndarray::ArrayBase::$call) } }; -} \ No newline at end of file +} diff --git a/models/transformers/src/macros/params.rs b/models/transformers/src/macros/params.rs index c8bdcd52..f7e12e32 100644 --- a/models/transformers/src/macros/params.rs +++ b/models/transformers/src/macros/params.rs @@ -3,7 +3,6 @@ Contrib: FL03 */ - macro_rules! qkv_view { ($method:ident$(.$call:ident)?::$($rest:tt)*) => { qkv_view!(@impl $method$(.$call)?::$($rest)*); diff --git a/models/transformers/tests/attention.rs b/models/transformers/tests/attention.rs index eb527659..6bc023af 100644 --- a/models/transformers/tests/attention.rs +++ b/models/transformers/tests/attention.rs @@ -11,13 +11,12 @@ use transformers::AttentionHead; use ndarray::prelude::*; #[test] - fn attention_head() { let shape = (3, 3); let head = AttentionHead::::ones(shape); assert_eq!(head.q(), &Array::ones(shape)); - let exp = Array2::from_elem(shape, 1f64/3f64); + let exp = Array2::from_elem(shape, 1f64 / 3f64); let score = head.attention(); assert!(score.attention().abs_diff_eq(&exp, 1e-6)); } From 207072a4794e64eab2ecb51e5627be7d57443e96 Mon Sep 17 00:00:00 2001 From: Joe McCain III Date: Fri, 24 May 2024 03:17:55 -0500 Subject: [PATCH 13/19] update Signed-off-by: Joe McCain III --- core/src/func/activate/nl.rs | 4 +- models/linear/src/norm/layer/mod.rs | 9 ++- models/linear/src/norm/layer/model.rs | 12 +-- models/transformers/Cargo.toml | 1 - models/transformers/src/attention/head.rs | 15 ++++ .../src/attention/multi/config.rs | 49 ++++++++++++ .../transformers/src/attention/multi/mod.rs | 4 +- .../src/attention/multi/multi_head.rs | 6 +- .../transformers/src/codec/encoder/layer.rs | 17 ++++- models/transformers/src/codec/mod.rs | 7 ++ models/transformers/src/lib.rs | 1 + models/transformers/src/macros.rs | 64 ++++++++++++++++ models/transformers/src/model/mod.rs | 6 ++ models/transformers/src/model/sublayer.rs | 74 +++++++++++++++++++ models/transformers/src/primitives.rs | 5 +- 15 files changed, 254 insertions(+), 20 deletions(-) create mode 100644 models/transformers/src/attention/multi/config.rs create mode 100644 models/transformers/src/model/mod.rs create mode 100644 models/transformers/src/model/sublayer.rs diff --git a/core/src/func/activate/nl.rs b/core/src/func/activate/nl.rs index 1e861170..d9e70fed 100644 --- a/core/src/func/activate/nl.rs +++ b/core/src/func/activate/nl.rs @@ -34,8 +34,8 @@ where &e / e.sum() } -// fn __softmax(args: &I) -> I -// where +// fn __softmax(args: &I) -> I +// where // I: Clone + core::ops::Div + Exp, T: Exp + core::iter::Sum , // for<'a> I: IntoIterator, // { diff --git a/models/linear/src/norm/layer/mod.rs b/models/linear/src/norm/layer/mod.rs index 6b54d6e8..28254dc1 100644 --- a/models/linear/src/norm/layer/mod.rs +++ b/models/linear/src/norm/layer/mod.rs @@ -19,13 +19,15 @@ pub(crate) mod prelude { } pub(crate) mod utils { - use nd::{Array, Axis, Dimension, RemoveAxis}; + use nd::prelude::*; + use nd::{Data, RemoveAxis}; use num::traits::{Float, FromPrimitive}; - pub(crate) fn layer_norm(x: &Array, eps: f64) -> Array + pub(crate) fn layer_norm(x: &ArrayBase, eps: f64) -> Array where A: Float + FromPrimitive, D: Dimension, + S: Data, { let mean = x.mean().unwrap(); let denom = { @@ -36,10 +38,11 @@ pub(crate) mod utils { x.mapv(|xi| (xi - mean) / denom) } - pub(crate) fn layer_norm_axis(x: &Array, axis: Axis, eps: f64) -> Array + pub(crate) fn layer_norm_axis(x: &ArrayBase, axis: Axis, eps: f64) -> Array where A: Float + FromPrimitive, D: RemoveAxis, + S: Data, { let eps = A::from(eps).unwrap(); let mean = x.mean_axis(axis).unwrap(); diff --git a/models/linear/src/norm/layer/model.rs b/models/linear/src/norm/layer/model.rs index e5dc6b67..1cca2419 100644 --- a/models/linear/src/norm/layer/model.rs +++ b/models/linear/src/norm/layer/model.rs @@ -6,7 +6,7 @@ use super::Config; use crate::{Biased, LinearParams, ParamMode, Unbiased}; use concision::Forward; use nd::prelude::*; -use nd::RemoveAxis; +use nd::{Data, RemoveAxis}; use num::traits::{Float, FromPrimitive, One, Zero}; // #62 @@ -139,14 +139,15 @@ where } } -impl Forward> for LayerNorm +impl Forward> for LayerNorm where A: Float + FromPrimitive, D: RemoveAxis, + S: Data, { type Output = Array; - fn forward(&self, x: &Array) -> Self::Output { + fn forward(&self, x: &ArrayBase) -> Self::Output { let norm = if let Some(axis) = self.config().axis() { super::layer_norm_axis(x, *axis, self.eps()) } else { @@ -156,14 +157,15 @@ where } } -impl Forward> for LayerNorm +impl Forward> for LayerNorm where A: Float + FromPrimitive, D: RemoveAxis, + S: Data, { type Output = Array; - fn forward(&self, x: &Array) -> Self::Output { + fn forward(&self, x: &ArrayBase) -> Self::Output { let norm = if let Some(axis) = self.config().axis() { super::layer_norm_axis(x, *axis, self.eps()) } else { diff --git a/models/transformers/Cargo.toml b/models/transformers/Cargo.toml index d0558100..7dc36953 100644 --- a/models/transformers/Cargo.toml +++ b/models/transformers/Cargo.toml @@ -133,7 +133,6 @@ optional = true version = "0.1" [dev-dependencies] -approx.workspace = true lazy_static.workspace = true [package.metadata.docs.rs] diff --git a/models/transformers/src/attention/head.rs b/models/transformers/src/attention/head.rs index 14887a77..80da457b 100644 --- a/models/transformers/src/attention/head.rs +++ b/models/transformers/src/attention/head.rs @@ -59,6 +59,21 @@ where { Self::from_params(QkvBase::from_elem(shape, value)) } + /// Setup the [AttentionHead] with a [DropoutLayer] + #[cfg(feature = "rand")] + pub fn with_dropout(self, dropout: DropoutLayer) -> Self { + Self { + dropout: Some(dropout), + ..self + } + } + /// Setup the [AttentionHead] with a mask + pub fn with_mask(self, mask: Array) -> Self { + Self { + mask: Some(mask), + ..self + } + } /// Computes the [Score] using scaled dot-product attention. pub fn attention(&self) -> Score where diff --git a/models/transformers/src/attention/multi/config.rs b/models/transformers/src/attention/multi/config.rs new file mode 100644 index 00000000..4fa5302c --- /dev/null +++ b/models/transformers/src/attention/multi/config.rs @@ -0,0 +1,49 @@ +/* + Appellation: config + Contrib: FL03 +*/ + +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +pub struct Config { + pub heads: usize, +} + +impl Config { + pub fn new() -> ConfigBuilder { + ConfigBuilder::new() + } + pub fn heads(&self) -> usize { + self.heads + } +} + +impl Default for Config { + fn default() -> Self { + Self { + heads: crate::HEADS, + } + } +} + +#[derive(Default)] +pub struct ConfigBuilder { + heads: Option, +} + +impl ConfigBuilder { + pub fn new() -> Self { + Self { heads: None } + } + + pub fn heads(mut self, heads: usize) -> Self { + self.heads = Some(heads); + self + } + + pub fn build(&self) -> Config { + Config { + heads: self.heads.unwrap_or(crate::HEADS), + } + } +} diff --git a/models/transformers/src/attention/multi/mod.rs b/models/transformers/src/attention/multi/mod.rs index 0b33a303..e101f032 100644 --- a/models/transformers/src/attention/multi/mod.rs +++ b/models/transformers/src/attention/multi/mod.rs @@ -5,10 +5,12 @@ //! # Multi-Head Attention //! //! -pub use self::multi_head::*; +pub use self::{config::Config, multi_head::*}; +pub(crate) mod config; pub(crate) mod multi_head; pub(crate) mod prelude { + pub use super::config::Config as MultiHeadAttentionConfig; pub use super::multi_head::MultiHeadAttention; } diff --git a/models/transformers/src/attention/multi/multi_head.rs b/models/transformers/src/attention/multi/multi_head.rs index e59061ae..47b2a8e2 100644 --- a/models/transformers/src/attention/multi/multi_head.rs +++ b/models/transformers/src/attention/multi/multi_head.rs @@ -2,11 +2,9 @@ Appellation: multi_head Contrib: FL03 */ +use super::Config; -pub struct Config { - pub heads: usize, -} - +#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct MultiHeadAttention { pub(crate) config: Config, } diff --git a/models/transformers/src/codec/encoder/layer.rs b/models/transformers/src/codec/encoder/layer.rs index 10821bd3..5c00ebcf 100644 --- a/models/transformers/src/codec/encoder/layer.rs +++ b/models/transformers/src/codec/encoder/layer.rs @@ -2,12 +2,25 @@ Appellation: layer Contrib: FL03 */ +use crate::attention::multi::MultiHeadAttention; #[derive(Default)] -pub struct EncoderLayer {} +pub struct EncoderLayer { + pub(crate) attention: MultiHeadAttention, +} impl EncoderLayer { pub fn new() -> Self { - Self {} + let attention = MultiHeadAttention::default(); + + Self { attention } + } + /// Returns an immutable reference to the multi-head, self-attention layer. + pub fn attention(&self) -> &MultiHeadAttention { + &self.attention + } + /// Returns a mutable reference to the multi-head, self-attention layer. + pub fn attention_mut(&mut self) -> &mut MultiHeadAttention { + &mut self.attention } } diff --git a/models/transformers/src/codec/mod.rs b/models/transformers/src/codec/mod.rs index 3a7e3f77..52e34740 100644 --- a/models/transformers/src/codec/mod.rs +++ b/models/transformers/src/codec/mod.rs @@ -2,6 +2,13 @@ Appellation: codec Contrib: FL03 */ +//! # Codec +//! +//! The `codec` module implements the [Decoder] and [Encoder] layers of the [Transformer](crate::Transformer) model. +//! Each layer has two sublayers, namely: +//! - multi-head, self-attention layer +//! - fully-connected, piecewise feed-forward network. +//! pub use self::{decoder::Decoder, encoder::Encoder, model::*}; pub(crate) mod model; diff --git a/models/transformers/src/lib.rs b/models/transformers/src/lib.rs index 37c5c1ce..9e512f24 100644 --- a/models/transformers/src/lib.rs +++ b/models/transformers/src/lib.rs @@ -29,6 +29,7 @@ pub(crate) mod transformer; pub mod attention; pub mod codec; +pub mod model; pub mod ops; pub mod params; diff --git a/models/transformers/src/macros.rs b/models/transformers/src/macros.rs index 7763caa9..e25dafae 100644 --- a/models/transformers/src/macros.rs +++ b/models/transformers/src/macros.rs @@ -19,3 +19,67 @@ macro_rules! ndbuilder { } }; } + +#[allow(unused_macros)] +macro_rules! cbuilder { + (@impl derive: [$($D:ident),* $(,)?], $name:ident {$($vis:vis $field:ident: $type:ty),*}) => { + #[derive(Clone, Debug, PartialEq, $($D),*)] + #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] + pub struct $name { + $($vis $field: $type),* + } + impl $name { + paste::paste! { + pub fn new() -> [<$name Builder>] { + [<$name Builder>]::new() + } + } + + $( + pub fn $field(mut self, $field: $type) -> Self { + self.$field = $field; + self + } + )* + } + }; + (@builder derive: [$($D:ident),* $(,)?], $name:ident {$($field:ident: $type:ty),*}) => { + pub struct $name { + $(pub(crate) $field: $type),* + } + + impl $name { + pub fn new() -> Self { + Self { + $($field: None),* + } + } + + $( + pub fn $field(mut self, $field: $type) -> Self { + self.$field = Some($field); + self + } + )* + + pub fn build(&self) -> Config { + Config { + $($field: self.$field.unwrap_or_else(|| crate::$field),)* + } + } + } + + impl Default for $name { + fn default() -> Self { + Self::new() + } + } + }; +} + +/// This macro helps create a stack of identical sublayers. +/// +#[allow(unused_macros)] +macro_rules! sublayer { + (@impl heads: $heads:expr) => {}; +} diff --git a/models/transformers/src/model/mod.rs b/models/transformers/src/model/mod.rs new file mode 100644 index 00000000..ac227da3 --- /dev/null +++ b/models/transformers/src/model/mod.rs @@ -0,0 +1,6 @@ +/* + Appellation: model + Contrib: FL03 +*/ + +pub mod sublayer; diff --git a/models/transformers/src/model/sublayer.rs b/models/transformers/src/model/sublayer.rs new file mode 100644 index 00000000..a1a5fbe7 --- /dev/null +++ b/models/transformers/src/model/sublayer.rs @@ -0,0 +1,74 @@ +/* + Appellation: sublayer + Contrib: FL03 +*/ +#![cfg(feature = "rand")] +use concision::nn::DropoutLayer; +use concision::Forward; +use linear::{Biased, LayerNorm, ParamMode, Unbiased}; +use nd::prelude::*; +use nd::{DataOwned, RemoveAxis, ScalarOperand}; +use num::traits::{Float, FromPrimitive}; + +/// A residual connection followed by a [layer norm](LayerNorm) +/// [Transformer](crate::Transformer) +pub struct Sublayer +where + D: Dimension, +{ + pub(crate) dropout: DropoutLayer, + pub(crate) norm: LayerNorm, +} + +impl Sublayer +where + D: RemoveAxis, +{ + pub fn new(shape: Sh, dropout: f64) -> Self + where + A: Default, + K: ParamMode, + Sh: ShapeBuilder, + { + Self { + dropout: DropoutLayer::new(dropout), + norm: LayerNorm::new(shape), + } + } + + pub fn dropout(&self) -> &DropoutLayer { + &self.dropout + } + + pub fn norm(&self) -> &LayerNorm { + &self.norm + } +} + +impl Forward> for Sublayer +where + A: Float + FromPrimitive + ScalarOperand, + D: RemoveAxis, + S: DataOwned, +{ + type Output = Array; + + fn forward(&self, input: &ArrayBase) -> Self::Output { + let normal = self.norm().forward(input); + input + self.dropout().forward(&normal) + } +} + +impl Forward> for Sublayer +where + A: Float + FromPrimitive + ScalarOperand, + D: RemoveAxis, + S: DataOwned, +{ + type Output = Array; + + fn forward(&self, input: &ArrayBase) -> Self::Output { + let normal = self.norm().forward(input); + input + self.dropout().forward(&normal) + } +} diff --git a/models/transformers/src/primitives.rs b/models/transformers/src/primitives.rs index 5f5c1bcf..3b30e7aa 100644 --- a/models/transformers/src/primitives.rs +++ b/models/transformers/src/primitives.rs @@ -9,9 +9,10 @@ pub mod consts { pub const D_MODEL: usize = 512; /// The default size of the network; i.e. the number of neurons in the network pub const D_NETWORK: usize = 2048; - + /// The default dimension of the key and query vectors + pub const DK: usize = D_MODEL / HEADS; /// The default number of attention heads - pub const H: usize = 8; + pub const HEADS: usize = 8; /// The default number of layers used for the encoder / decoder. pub const N: usize = 6; } From 4dbc5e439b5e24a6f8af6ce2edfa14d6334aee17 Mon Sep 17 00:00:00 2001 From: Joe McCain III Date: Fri, 24 May 2024 06:22:17 -0500 Subject: [PATCH 14/19] update Signed-off-by: Joe McCain III --- core/src/macros/builder.rs | 6 ++-- core/src/nn/mod.rs | 1 + core/src/nn/model.rs | 2 ++ core/src/nn/model/repo.rs | 10 ++++++ core/src/traits/mod.rs | 4 +-- core/src/traits/{misc => }/setup.rs | 0 models/transformers/src/attention/head.rs | 2 +- .../src/attention/multi/config.rs | 36 +++++++++---------- .../src/attention/multi/multi_head.rs | 34 ++++++++++++++++-- models/transformers/src/params/store.rs | 10 ++++++ 10 files changed, 76 insertions(+), 29 deletions(-) create mode 100644 core/src/nn/model/repo.rs rename core/src/traits/{misc => }/setup.rs (100%) diff --git a/core/src/macros/builder.rs b/core/src/macros/builder.rs index 8fba06d2..b840fea6 100644 --- a/core/src/macros/builder.rs +++ b/core/src/macros/builder.rs @@ -6,10 +6,10 @@ #[macro_export] macro_rules! builder { ($(#[derive($($d:ident),+)])?$name:ident::<$inner:ty> {$($k:ident: $v:ty),* $(,)?}) => { - builder!(@loop builder: $name, derive: [$($($d),+)?], inner: $inner {$($k: $v),*}); + $crate::builder!(@loop builder: $name, derive: [$($($d),+)?], inner: $inner {$($k: $v),*}); }; ($(#[derive($($d:ident),+)])? $name:ident($inner:ty) {$($k:ident: $v:ty),* $(,)?}) => { - builder!(@loop builder: $name, derive: [$($($d),+)?], inner: $inner {$($k: $v),*}); + $crate::builder!(@loop builder: $name, derive: [$($($d),+)?], inner: $inner {$($k: $v),*}); }; (@loop builder: $name:ident, derive: [$($d:ident),* $(,)?], inner: $inner:ty {$($k:ident: $v:ty),* $(,)?}) => { @@ -18,7 +18,7 @@ macro_rules! builder { inner: $inner, } - builder!(@impl builder: $name, inner: $inner {$($k: $v),*}); + $crate::builder!(@impl builder: $name, inner: $inner {$($k: $v),*}); }; (@impl builder: $name:ident, inner: $inner:ty {$($k:ident: $v:ty),* $(,)?}) => { impl $name { diff --git a/core/src/nn/mod.rs b/core/src/nn/mod.rs index d4c7fa48..0b8f8707 100644 --- a/core/src/nn/mod.rs +++ b/core/src/nn/mod.rs @@ -8,6 +8,7 @@ pub mod dropout; pub mod error; pub mod model; + pub(crate) mod prelude { pub use super::dropout::*; pub use super::error::*; diff --git a/core/src/nn/model.rs b/core/src/nn/model.rs index 8991d08e..d989cb9b 100644 --- a/core/src/nn/model.rs +++ b/core/src/nn/model.rs @@ -6,6 +6,8 @@ pub use self::module::*; pub mod config; pub mod module; +#[doc(hidden)] +pub mod repo; pub(crate) mod prelude { pub use super::config::*; diff --git a/core/src/nn/model/repo.rs b/core/src/nn/model/repo.rs new file mode 100644 index 00000000..bb0c8b6c --- /dev/null +++ b/core/src/nn/model/repo.rs @@ -0,0 +1,10 @@ +/* + Appellation: repo + Contrib: FL03 +*/ +#![allow(unused)] + +pub struct ModelRepo { + pub name: String, + pub(crate) store: String, +} \ No newline at end of file diff --git a/core/src/traits/mod.rs b/core/src/traits/mod.rs index b6aa6b21..f7a13549 100644 --- a/core/src/traits/mod.rs +++ b/core/src/traits/mod.rs @@ -7,6 +7,7 @@ pub use self::prelude::*; pub mod num; pub mod ops; pub mod predict; +pub mod setup; pub mod train; pub mod arr { @@ -29,14 +30,12 @@ pub mod misc { pub mod adjust; #[doc(hidden)] pub mod container; - pub mod setup; pub mod store; pub mod toggle; pub(crate) mod prelude { pub use super::adjust::*; pub use super::container::*; - pub use super::setup::*; pub use super::store::*; pub use super::toggle::*; } @@ -48,5 +47,6 @@ pub(crate) mod prelude { pub use super::num::*; pub use super::ops::*; pub use super::predict::*; + pub use super::setup::*; pub use super::train::*; } diff --git a/core/src/traits/misc/setup.rs b/core/src/traits/setup.rs similarity index 100% rename from core/src/traits/misc/setup.rs rename to core/src/traits/setup.rs diff --git a/models/transformers/src/attention/head.rs b/models/transformers/src/attention/head.rs index 80da457b..fe9a34d8 100644 --- a/models/transformers/src/attention/head.rs +++ b/models/transformers/src/attention/head.rs @@ -85,7 +85,7 @@ where let (q, k, v) = self.qkv(); _attention(q, k, v, self.mask(), self.dropout()) } - /// Returns an immutable reference to the, optional, [Dropout] layer + /// Returns an immutable reference to the, optional, mask. pub fn mask(&self) -> Option<&Array> { self.mask.as_ref() } diff --git a/models/transformers/src/attention/multi/config.rs b/models/transformers/src/attention/multi/config.rs index 4fa5302c..6b5c70c9 100644 --- a/models/transformers/src/attention/multi/config.rs +++ b/models/transformers/src/attention/multi/config.rs @@ -6,6 +6,7 @@ #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] pub struct Config { + pub d_model: usize, pub heads: usize, } @@ -13,6 +14,15 @@ impl Config { pub fn new() -> ConfigBuilder { ConfigBuilder::new() } + + pub fn d_model(&self) -> usize { + self.d_model + } + + pub fn dk(&self) -> usize { + self.d_model() / self.heads() + } + pub fn heads(&self) -> usize { self.heads } @@ -21,29 +31,15 @@ impl Config { impl Default for Config { fn default() -> Self { Self { + d_model: crate::D_MODEL, heads: crate::HEADS, } } } -#[derive(Default)] -pub struct ConfigBuilder { - heads: Option, -} - -impl ConfigBuilder { - pub fn new() -> Self { - Self { heads: None } +concision::builder! { + ConfigBuilder(Config) { + d_model: usize, + heads: usize, } - - pub fn heads(mut self, heads: usize) -> Self { - self.heads = Some(heads); - self - } - - pub fn build(&self) -> Config { - Config { - heads: self.heads.unwrap_or(crate::HEADS), - } - } -} +} \ No newline at end of file diff --git a/models/transformers/src/attention/multi/multi_head.rs b/models/transformers/src/attention/multi/multi_head.rs index 47b2a8e2..ea37ffee 100644 --- a/models/transformers/src/attention/multi/multi_head.rs +++ b/models/transformers/src/attention/multi/multi_head.rs @@ -3,14 +3,42 @@ Contrib: FL03 */ use super::Config; +use crate::AttentionHead; +use linear::{Biased, Linear}; +use nd::prelude::*; -#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] -pub struct MultiHeadAttention { + +#[derive(Default)] +pub struct MultiHeadAttention where D: Dimension, { + pub(crate) attention: Option>, pub(crate) config: Config, + pub(crate) linears: Vec>, } -impl MultiHeadAttention { +impl MultiHeadAttention where D: Dimension, { + + pub fn head(&self) -> Option<&AttentionHead> { + self.attention.as_ref() + } + pub const fn config(&self) -> &Config { &self.config } + + pub fn linears(&self) -> &[Linear] { + &self.linears + } } + +impl MultiHeadAttention { + pub fn std(config: Config) -> Self where A: Clone + Default { + let linears = (0..4) + .map(|_| Linear::from_features(config.d_model(), config.d_model())) + .collect(); + Self { + attention: None, + config, + linears + } + } +} \ No newline at end of file diff --git a/models/transformers/src/params/store.rs b/models/transformers/src/params/store.rs index d62fb7cb..87208081 100644 --- a/models/transformers/src/params/store.rs +++ b/models/transformers/src/params/store.rs @@ -57,6 +57,16 @@ where (self.q.view(), self.k.view(), self.v.view()) } + // pub fn attention(&self, mask: Option>) -> Array + // where + // A: ComplexFloat, + // S: Data, + // { + // let (q, k, v) = self.as_qkv(); + // let (q, k, v) = _attention(q, k, v, mask); + // q.dot(&v) + // } + /// Consumes the store and returns a three-tuple consisting of the query, key, and value arrays respectively. pub fn into_qkv(self) -> (ArrayBase, ArrayBase, ArrayBase) { (self.q, self.k, self.v) From ba80a2dc1650bf9cc488038f3bdfa9d8c4e2a46a Mon Sep 17 00:00:00 2001 From: Joe McCain III Date: Fri, 24 May 2024 08:44:06 -0500 Subject: [PATCH 15/19] update Signed-off-by: Joe McCain III --- core/src/init/mod.rs | 8 +- core/src/init/{initialize.rs => traits.rs} | 14 ++++ core/src/init/utils.rs | 35 +++++---- core/src/macros/builder.rs | 3 - core/src/{types => nn/mask}/mask.rs | 20 +++-- core/src/nn/mask/mod.rs | 29 +++++++ core/src/nn/mod.rs | 13 +++- core/src/nn/model/repo.rs | 2 +- core/src/traits/arr/reshape.rs | 40 ++++++++++ core/src/traits/arr/tensor.rs | 77 +++++++++++++++---- core/src/traits/misc/container.rs | 14 +++- core/src/traits/mod.rs | 10 ++- core/src/types/mod.rs | 2 - core/tests/traits.rs | 15 +++- models/linear/src/mlp/model.rs | 10 +++ models/transformers/src/attention/head.rs | 61 ++++++++++----- .../src/attention/multi/config.rs | 2 +- .../src/attention/multi/multi_head.rs | 35 ++++++--- models/transformers/src/codec/model.rs | 12 +-- models/transformers/src/impls/impl_init.rs | 5 +- models/transformers/src/impls/impl_params.rs | 35 ++++++--- models/transformers/src/lib.rs | 2 +- models/transformers/src/params/store.rs | 55 ++++++++++--- 23 files changed, 380 insertions(+), 119 deletions(-) rename core/src/init/{initialize.rs => traits.rs} (95%) rename core/src/{types => nn/mask}/mask.rs (89%) create mode 100644 core/src/nn/mask/mod.rs create mode 100644 core/src/traits/arr/reshape.rs create mode 100644 models/linear/src/mlp/model.rs diff --git a/core/src/init/mod.rs b/core/src/init/mod.rs index f2bb4731..7f5bc4b4 100644 --- a/core/src/init/mod.rs +++ b/core/src/init/mod.rs @@ -11,9 +11,11 @@ //! better suited for machine-learning workloads. #![cfg(feature = "rand")] -pub use self::prelude::*; +pub use self::distr::prelude::*; +pub use self::traits::*; +pub use self::utils::*; -pub(crate) mod initialize; +pub(crate) mod traits; pub(crate) mod utils; pub mod initializer; @@ -41,6 +43,6 @@ pub use rand_distr; pub(crate) mod prelude { pub use super::distr::prelude::*; - pub use super::initialize::{Initialize, InitializeExt}; + pub use super::traits::{Initialize, InitializeExt}; pub use super::utils::*; } diff --git a/core/src/init/initialize.rs b/core/src/init/traits.rs similarity index 95% rename from core/src/init/initialize.rs rename to core/src/init/traits.rs index 4d2eb51d..a01ca7d9 100644 --- a/core/src/init/initialize.rs +++ b/core/src/init/traits.rs @@ -139,6 +139,20 @@ where { Self::rand(shape, Uniform::new(dk.clone().neg(), dk)) } + + fn uniform_from_seed(shape: Sh, start: A, stop: A, key: u64) -> Self + where + A: SampleUniform, + S: DataOwned, + Sh: ShapeBuilder, + ::Sampler: Clone, + { + Self::rand_with( + shape, + Uniform::new(start, stop), + &mut StdRng::seed_from_u64(key), + ) + } /// Generate a random array with values between u(-a, a) where a is the reciprocal of the value at the given axis fn uniform_along(shape: Sh, axis: usize) -> Self where diff --git a/core/src/init/utils.rs b/core/src/init/utils.rs index 3994589c..dacb3df2 100644 --- a/core/src/init/utils.rs +++ b/core/src/init/utils.rs @@ -22,23 +22,7 @@ where let distr = ComplexDistribution::::new(A::one(), A::one()); ArrayBase::random(shape, distr) } -/// Creates a random array from a uniform distribution using a given key -pub fn seeded_uniform( - key: u64, - start: T, - stop: T, - shape: impl IntoDimension, -) -> Array -where - D: Dimension, - T: SampleUniform, -{ - Array::random_using( - shape, - Uniform::new(start, stop), - &mut rngs::StdRng::seed_from_u64(key), - ) -} + /// Given a shape, generate a random array using the StandardNormal distribution pub fn stdnorm(shape: Sh) -> ArrayBase where @@ -59,3 +43,20 @@ where { ArrayBase::random_using(shape, StandardNormal, &mut StdRng::seed_from_u64(seed)) } +/// Creates a random array from a uniform distribution using a given key +pub fn uniform_from_seed( + key: u64, + start: T, + stop: T, + shape: impl IntoDimension, +) -> Array +where + D: Dimension, + T: SampleUniform, +{ + Array::random_using( + shape, + Uniform::new(start, stop), + &mut rngs::StdRng::seed_from_u64(key), + ) +} diff --git a/core/src/macros/builder.rs b/core/src/macros/builder.rs index b840fea6..6e545c9f 100644 --- a/core/src/macros/builder.rs +++ b/core/src/macros/builder.rs @@ -5,9 +5,6 @@ #[macro_export] macro_rules! builder { - ($(#[derive($($d:ident),+)])?$name:ident::<$inner:ty> {$($k:ident: $v:ty),* $(,)?}) => { - $crate::builder!(@loop builder: $name, derive: [$($($d),+)?], inner: $inner {$($k: $v),*}); - }; ($(#[derive($($d:ident),+)])? $name:ident($inner:ty) {$($k:ident: $v:ty),* $(,)?}) => { $crate::builder!(@loop builder: $name, derive: [$($($d),+)?], inner: $inner {$($k: $v),*}); }; diff --git a/core/src/types/mask.rs b/core/src/nn/mask/mask.rs similarity index 89% rename from core/src/types/mask.rs rename to core/src/nn/mask/mask.rs index ab587861..263cf016 100644 --- a/core/src/types/mask.rs +++ b/core/src/nn/mask/mask.rs @@ -2,18 +2,10 @@ Appellation: mask Contrib: FL03 */ - use nd::prelude::*; -use nd::RawData; - -pub trait NdMask -where - D: Dimension, -{ - type Data: RawData; -} +use nd::{OwnedRepr, RawData}; -pub struct Mask(ArrayBase) +pub struct Mask, D = Ix2>(ArrayBase) where D: Dimension, S: RawData; @@ -32,9 +24,10 @@ where ************* Implementations ************* */ mod impls { - use super::*; + use super::Mask; use core::borrow::{Borrow, BorrowMut}; use core::ops::{Deref, DerefMut}; + use nd::{ArrayBase, Dimension, RawData}; impl AsRef> for Mask where @@ -97,6 +90,11 @@ mod impls { &mut self.0 } } +} + +mod impl_from { + use super::Mask; + use nd::{ArrayBase, Dimension, RawData}; impl From> for Mask where diff --git a/core/src/nn/mask/mod.rs b/core/src/nn/mask/mod.rs new file mode 100644 index 00000000..5a3aaa2b --- /dev/null +++ b/core/src/nn/mask/mod.rs @@ -0,0 +1,29 @@ +/* + Appellation: mask + Contrib: FL03 +*/ +pub use self::mask::*; + +pub(crate) mod mask; + +pub(crate) mod prelude { + pub use super::mask::Mask; + pub use super::NdMask; +} + +use nd::{ArrayBase, Dimension, Ix2, RawData}; + +pub trait NdMask +where + D: Dimension, +{ + type Data: RawData; +} + +impl NdMask for ArrayBase +where + D: Dimension, + S: RawData, +{ + type Data = S; +} diff --git a/core/src/nn/mod.rs b/core/src/nn/mod.rs index 0b8f8707..172b7e7d 100644 --- a/core/src/nn/mod.rs +++ b/core/src/nn/mod.rs @@ -2,22 +2,29 @@ Appellation: nn Contrib: FL03 */ +#[cfg(any(feature = "alloc", feature = "std"))] +pub use self::types::*; pub use self::{dropout::*, error::ModelError, model::prelude::*}; pub mod dropout; pub mod error; +pub mod mask; pub mod model; - pub(crate) mod prelude { pub use super::dropout::*; pub use super::error::*; + pub use super::mask::prelude::*; pub use super::model::prelude::*; } #[cfg(any(feature = "alloc", feature = "std"))] -pub type ForwardDyn, O = T> = - crate::rust::Box>; +mod types { + use crate::rust::Box; + use nd::prelude::Array2; + + pub type ForwardDyn, O = T> = Box>; +} #[cfg(test)] mod tests {} diff --git a/core/src/nn/model/repo.rs b/core/src/nn/model/repo.rs index bb0c8b6c..affd401a 100644 --- a/core/src/nn/model/repo.rs +++ b/core/src/nn/model/repo.rs @@ -7,4 +7,4 @@ pub struct ModelRepo { pub name: String, pub(crate) store: String, -} \ No newline at end of file +} diff --git a/core/src/traits/arr/reshape.rs b/core/src/traits/arr/reshape.rs new file mode 100644 index 00000000..7079f130 --- /dev/null +++ b/core/src/traits/arr/reshape.rs @@ -0,0 +1,40 @@ +/* + Appellation: reshape [traits::arr] + Contrib: FL03 +*/ +use nd::prelude::*; +use nd::{RawData, RawDataClone}; + +pub trait Unsqueeze { + type Output; + + fn unsqueeze(self, axis: usize) -> Self::Output; +} + +/* + ************* Implementations ************* +*/ + +impl Unsqueeze for ArrayBase +where + D: Dimension, + S: RawData, +{ + type Output = ArrayBase; + + fn unsqueeze(self, axis: usize) -> Self::Output { + self.insert_axis(Axis(axis)) + } +} + +impl<'a, A, S, D> Unsqueeze for &'a ArrayBase +where + D: Dimension, + S: RawDataClone, +{ + type Output = ArrayBase; + + fn unsqueeze(self, axis: usize) -> Self::Output { + self.clone().insert_axis(Axis(axis)) + } +} diff --git a/core/src/traits/arr/tensor.rs b/core/src/traits/arr/tensor.rs index 9bd5d863..12571e0f 100644 --- a/core/src/traits/arr/tensor.rs +++ b/core/src/traits/arr/tensor.rs @@ -3,17 +3,44 @@ Contrib: FL03 */ use super::Dimensional; +use nd::iter::{Iter, IterMut}; use nd::prelude::*; use nd::{Data, DataMut, DataOwned, OwnedRepr, RawData}; use num::{One, Zero}; -/// This trait describes the basic operations for any n-dimensional container. -pub trait NdContainer: Dimensional { - type Data; +pub trait NdArray +where + D: Dimension, +{ + type Data: RawData; fn as_slice(&self) -> &[A]; fn as_mut_slice(&mut self) -> &mut [A]; + + fn iter(&self) -> Iter<'_, A, D>; + + fn iter_mut(&mut self) -> IterMut<'_, A, D>; + + fn map(&self, f: F) -> Self + where + F: FnMut(&A) -> A; + + fn mapv(&mut self, f: F) + where + A: Clone, + F: FnMut(A) -> A; +} + +pub trait NdIter +where + D: Dimension, +{ + type Data: RawData; + + fn iter(&self) -> Iter<'_, A, D>; + + fn iter_mut(&mut self) -> IterMut<'_, A, D>; } /// [NdBuilder] describes common creation routines for [ArrayBase](ndarray::ArrayBase) @@ -22,40 +49,39 @@ where D: Dimension, { type Data: RawData; - type Store; /// Create a new array with the given shape whose elements are set to the default value of the element type. - fn default(shape: Sh) -> Self::Store + fn default(shape: Sh) -> Self where A: Default, Sh: ShapeBuilder, Self::Data: DataOwned; - fn fill(shape: Sh, elem: A) -> Self::Store + fn fill(shape: Sh, elem: A) -> Self where A: Clone, Sh: ShapeBuilder, Self::Data: DataOwned; - fn ones(shape: Sh) -> Self::Store + fn ones(shape: Sh) -> Self where A: Clone + One, Sh: ShapeBuilder, Self::Data: DataOwned; - fn zeros(shape: Sh) -> Self::Store + fn zeros(shape: Sh) -> Self where A: Clone + Zero, Sh: ShapeBuilder, Self::Data: DataOwned; } -pub trait NdBuilderExt: - Dimensional + NdBuilder +pub trait NdBuilderExt: NdBuilder where D: Dimension, + Self: Dimensional + Sized, { - fn default_like(&self) -> Self::Store + fn default_like(&self) -> Self where A: Default, Sh: ShapeBuilder, @@ -64,7 +90,7 @@ where Self::default(self.dim()) } - fn fill_like(&self, elem: A) -> Self::Store + fn fill_like(&self, elem: A) -> Self where A: Clone, Sh: ShapeBuilder, @@ -73,7 +99,7 @@ where Self::fill(self.dim(), elem) } - fn ones_like(&self) -> Self::Store + fn ones_like(&self) -> Self where A: Clone + One, Sh: ShapeBuilder, @@ -82,7 +108,7 @@ where Self::ones(self.dim()) } - fn zeros_like(&self) -> Self::Store + fn zeros_like(&self) -> Self where A: Clone + Zero, Sh: ShapeBuilder, @@ -144,6 +170,28 @@ where S: DataMut; } +pub trait View +where + D: Dimension, +{ + type Data: RawData; + type Output; + + fn view(&self) -> Self::Output + where + A: Clone, + Self::Data: Data; +} +pub trait ViewMut: View +where + D: Dimension, +{ + fn view_mut(&mut self) -> ArrayViewMut<'_, A, D> + where + A: Clone, + Self::Data: DataMut; +} + /* ************* Implementations ************* */ @@ -153,7 +201,6 @@ where S: RawData, { type Data = S; - type Store = ArrayBase; fn default(shape: Sh) -> Self where diff --git a/core/src/traits/misc/container.rs b/core/src/traits/misc/container.rs index 842d96db..a295d90f 100644 --- a/core/src/traits/misc/container.rs +++ b/core/src/traits/misc/container.rs @@ -2,11 +2,21 @@ Appellation: container Contrib: FL03 */ +use crate::traits::Dimensional; pub trait Container { - type Data: Data; + type Data: Data; } pub trait Data { - type Item; + type Elem; +} + +/// This trait describes the basic operations for any n-dimensional container. +pub trait NdContainer: Dimensional { + type Data: Data; + + fn as_slice(&self) -> &[A]; + + fn as_mut_slice(&mut self) -> &mut [A]; } diff --git a/core/src/traits/mod.rs b/core/src/traits/mod.rs index f7a13549..2ae4d47a 100644 --- a/core/src/traits/mod.rs +++ b/core/src/traits/mod.rs @@ -13,15 +13,17 @@ pub mod train; pub mod arr { pub use self::prelude::*; - pub(crate) mod create; - pub(crate) mod misc; - pub(crate) mod ops; - pub(crate) mod tensor; + mod create; + mod misc; + mod ops; + mod reshape; + mod tensor; pub(crate) mod prelude { pub use super::create::*; pub use super::misc::*; pub use super::ops::*; + pub use super::reshape::*; pub use super::tensor::*; } } diff --git a/core/src/types/mod.rs b/core/src/types/mod.rs index 75cddd52..d6347e49 100644 --- a/core/src/types/mod.rs +++ b/core/src/types/mod.rs @@ -6,7 +6,6 @@ pub use self::prelude::*; #[cfg(feature = "std")] pub use self::std_types::*; -pub mod mask; pub mod propagate; pub mod shape; @@ -24,7 +23,6 @@ mod std_types { } pub(crate) mod prelude { - pub use super::mask::Mask; pub use super::propagate::Propagate; pub use super::shape::ModelShape; #[cfg(feature = "std")] diff --git a/core/tests/traits.rs b/core/tests/traits.rs index ab7bd44d..b1038f94 100644 --- a/core/tests/traits.rs +++ b/core/tests/traits.rs @@ -5,12 +5,12 @@ extern crate concision_core as cnc; use cnc::linarr; -use cnc::traits::{Affine, AsComplex, MaskFill, Matpow}; use ndarray::prelude::*; use num::Complex; #[test] fn test_affine() { + use cnc::traits::Affine; let x = array![[0.0, 1.0], [2.0, 3.0]]; let y = x.affine(4.0, -2.0); @@ -19,6 +19,7 @@ fn test_affine() { #[test] fn test_masked_fill() { + use cnc::traits::MaskFill; let shape = (2, 2); let mask = array![[true, false], [false, true]]; let arr = linarr::(shape).unwrap(); @@ -28,6 +29,7 @@ fn test_masked_fill() { #[test] fn test_as_complex() { + use cnc::traits::AsComplex; let x = 1.0; let y = x.as_re(); assert_eq!(y, Complex::new(1.0, 0.0)); @@ -35,8 +37,19 @@ fn test_as_complex() { #[test] fn test_matrix_power() { + use cnc::traits::Matpow; let x = array![[1.0, 2.0], [3.0, 4.0]]; assert_eq!(x.pow(0), Array2::::eye(2)); assert_eq!(x.pow(1), x); assert_eq!(x.pow(2), x.dot(&x)); } + +#[test] +fn test_unsqueeze() { + use cnc::traits::Unsqueeze; + let arr = array![1, 2, 3, 4]; + let a = arr.clone().unsqueeze(0); + assert_eq!(a.dim(), (1, 4)); + let b = arr.unsqueeze(1); + assert_eq!(b.dim(), (4, 1)); +} diff --git a/models/linear/src/mlp/model.rs b/models/linear/src/mlp/model.rs new file mode 100644 index 00000000..4128cb88 --- /dev/null +++ b/models/linear/src/mlp/model.rs @@ -0,0 +1,10 @@ +/* + Appellation: model + Contrib: FL03 +*/ + +pub struct Mlp { + input: I, + hidden: H, + output: O, +} \ No newline at end of file diff --git a/models/transformers/src/attention/head.rs b/models/transformers/src/attention/head.rs index fe9a34d8..1deebb6f 100644 --- a/models/transformers/src/attention/head.rs +++ b/models/transformers/src/attention/head.rs @@ -12,12 +12,22 @@ use num::complex::ComplexFloat; // #68 /// [AttentionHead] implements the scaled dot-product attention mechanism formally defined in -/// [Attention is all you need](https://arxiv.org/abs/1706.03762). +/// [Attention is all you need](https://arxiv.org/abs/1706.03762). The structure is designed to +/// be flexible, relying upon the n-dimensional [QkvBase] to store the query, key, and value tensors. +/// More so, the head may be configured with an optional dropout and/or masking layers. /// -/// ### Fields +/// ### Dropout /// -/// [dropout](DropoutLayer): requires the `rand` feature +/// The [DropoutLayer] is an optional layer applied after the softmax function is applied to the +/// score. The layer is used to prevent overfitting by randomly setting a fraction of the input +/// units to zero at each update during training time. /// +/// ### Masking +/// +/// After computing the dot-product of the query and key tensors, an optional mask may be applied to +/// the attention score. The mask is used to prevent the model from attending to certain parts of the +/// input sequence. For example, in the case of a language model, the mask may be used to prevent the +/// model from attending to the padding tokens. pub struct AttentionHead> where D: Dimension, @@ -59,21 +69,6 @@ where { Self::from_params(QkvBase::from_elem(shape, value)) } - /// Setup the [AttentionHead] with a [DropoutLayer] - #[cfg(feature = "rand")] - pub fn with_dropout(self, dropout: DropoutLayer) -> Self { - Self { - dropout: Some(dropout), - ..self - } - } - /// Setup the [AttentionHead] with a mask - pub fn with_mask(self, mask: Array) -> Self { - Self { - mask: Some(mask), - ..self - } - } /// Computes the [Score] using scaled dot-product attention. pub fn attention(&self) -> Score where @@ -105,6 +100,30 @@ where pub fn into_qkv(self) -> (ArrayBase, ArrayBase, ArrayBase) { self.params.into_qkv() } + /// Sets the dropout layer for the [AttentionHead] + #[cfg(feature = "rand")] + pub fn set_dropout(&mut self, dropout: Option) { + self.dropout = dropout; + } + /// Sets the mask for the [AttentionHead] + pub fn set_mask(&mut self, mask: Option>) { + self.mask = mask; + } + /// Configure the [AttentionHead] with a [DropoutLayer] + #[cfg(feature = "rand")] + pub fn with_dropout(self, dropout: DropoutLayer) -> Self { + Self { + dropout: Some(dropout), + ..self + } + } + /// Consume and store a mask for the [AttentionHead] + pub fn with_mask(self, mask: Array) -> Self { + Self { + mask: Some(mask), + ..self + } + } getters!(params::<[q, k, v]> => ArrayBase); ndbuilder!(new::default() where A: Default, S: DataOwned); @@ -118,6 +137,9 @@ where D: Dimension, S: RawData, { + /// Returns an immutable reference to the, optional, [dropout](DropoutLayer) layer. + /// With the `rand` feature flag disabled, the dropout layer is + /// unavailable and returns `None`. pub fn dropout(&self) -> Option<&DropoutLayer> { self.dropout.as_ref() } @@ -129,6 +151,9 @@ where D: Dimension, S: RawData, { + /// Returns an immutable reference to the, optional, [dropout](DropoutLayer) layer. + /// With the `rand` feature flag disabled, the dropout layer is + /// unavailable and returns `None`. #[cfg(not(feature = "rand"))] pub fn dropout(&self) -> Option<&DropoutLayer> { None diff --git a/models/transformers/src/attention/multi/config.rs b/models/transformers/src/attention/multi/config.rs index 6b5c70c9..2fa53c3f 100644 --- a/models/transformers/src/attention/multi/config.rs +++ b/models/transformers/src/attention/multi/config.rs @@ -42,4 +42,4 @@ concision::builder! { d_model: usize, heads: usize, } -} \ No newline at end of file +} diff --git a/models/transformers/src/attention/multi/multi_head.rs b/models/transformers/src/attention/multi/multi_head.rs index ea37ffee..be427e4d 100644 --- a/models/transformers/src/attention/multi/multi_head.rs +++ b/models/transformers/src/attention/multi/multi_head.rs @@ -7,22 +7,30 @@ use crate::AttentionHead; use linear::{Biased, Linear}; use nd::prelude::*; - #[derive(Default)] -pub struct MultiHeadAttention where D: Dimension, { - pub(crate) attention: Option>, +pub struct MultiHeadAttention +where + D: Dimension, +{ pub(crate) config: Config, + pub(crate) head: Option>, pub(crate) linears: Vec>, } -impl MultiHeadAttention where D: Dimension, { +impl MultiHeadAttention +where + D: Dimension, +{ + pub const fn config(&self) -> &Config { + &self.config + } pub fn head(&self) -> Option<&AttentionHead> { - self.attention.as_ref() + self.head.as_ref() } - - pub const fn config(&self) -> &Config { - &self.config + + pub fn head_mut(&mut self) -> Option<&mut AttentionHead> { + self.head.as_mut() } pub fn linears(&self) -> &[Linear] { @@ -31,14 +39,17 @@ impl MultiHeadAttention where D: Dimension, { } impl MultiHeadAttention { - pub fn std(config: Config) -> Self where A: Clone + Default { + pub fn std(config: Config) -> Self + where + A: Clone + Default, + { let linears = (0..4) .map(|_| Linear::from_features(config.d_model(), config.d_model())) .collect(); Self { - attention: None, + head: None, config, - linears + linears, } } -} \ No newline at end of file +} diff --git a/models/transformers/src/codec/model.rs b/models/transformers/src/codec/model.rs index 494c0a0e..470938a5 100644 --- a/models/transformers/src/codec/model.rs +++ b/models/transformers/src/codec/model.rs @@ -24,11 +24,13 @@ impl Codec { ); } -builder!(CodecBuilder:: { - ctx: Context, - decoder: Decoder, - encoder: Encoder, -}); +builder! { + CodecBuilder(Codec) { + ctx: Context, + decoder: Decoder, + encoder: Encoder, + } +} #[derive(Default)] pub struct Generator { diff --git a/models/transformers/src/impls/impl_init.rs b/models/transformers/src/impls/impl_init.rs index cfde2c15..1ed7effd 100644 --- a/models/transformers/src/impls/impl_init.rs +++ b/models/transformers/src/impls/impl_init.rs @@ -1,5 +1,8 @@ +/* + Appellation: init + Contrib: FL03 +*/ #![cfg(feature = "rand")] - use crate::QkvBase; use concision::Initialize; use concision::init::rand::Rng; diff --git a/models/transformers/src/impls/impl_params.rs b/models/transformers/src/impls/impl_params.rs index 9736c1b0..2ea7dec4 100644 --- a/models/transformers/src/impls/impl_params.rs +++ b/models/transformers/src/impls/impl_params.rs @@ -6,10 +6,12 @@ use crate::params::QkvBase; use nd::prelude::*; use nd::{Data, DataOwned, RawDataClone}; -impl Clone for QkvBase +pub(crate) type ThreeTuple = (A, B, C); + +impl Clone for QkvBase where D: Dimension, - S: RawDataClone, + S: RawDataClone, { fn clone(&self) -> Self { Self { @@ -20,18 +22,18 @@ where } } -impl Copy for QkvBase +impl Copy for QkvBase where D: Copy + Dimension, - S: Copy + RawDataClone, + S: Copy + RawDataClone, { } -impl Default for QkvBase +impl Default for QkvBase where + A: Default, D: Dimension, - S: DataOwned, - S::Elem: Default, + S: DataOwned, { fn default() -> Self { Self { @@ -49,7 +51,7 @@ where S: Data, { fn eq(&self, other: &Self) -> bool { - self.q == *other.q() && self.k == *other.k() && self.v == *other.v() + self.q() == other.q() && self.k() == other.k() && self.v() == other.v() } } @@ -64,6 +66,21 @@ where ArrayBase: PartialEq>, { fn eq(&self, other: &ArrayBase) -> bool { - self.q == *other && self.k == *other && self.v == *other + self.q() == other && self.k() == other && self.v() == other + } +} + +impl PartialEq>> for QkvBase +where + A: PartialEq, + B: PartialEq, + D: Dimension, + S: Data, + S2: Data, + D2: Dimension, + ArrayBase: PartialEq>, +{ + fn eq(&self, (q, k, v): &ThreeTuple>) -> bool { + self.q() == q && self.k() == k && self.v() == v } } diff --git a/models/transformers/src/lib.rs b/models/transformers/src/lib.rs index 9e512f24..89cc41f1 100644 --- a/models/transformers/src/lib.rs +++ b/models/transformers/src/lib.rs @@ -17,7 +17,7 @@ extern crate concision_core as concision; extern crate concision_linear as linear; extern crate ndarray as nd; -pub use self::attention::AttentionHead; +pub use self::attention::{scaled_dot_product_attention, AttentionHead}; pub use self::params::*; pub use self::primitives::*; pub use self::transformer::Transformer; diff --git a/models/transformers/src/params/store.rs b/models/transformers/src/params/store.rs index 87208081..f59ee6eb 100644 --- a/models/transformers/src/params/store.rs +++ b/models/transformers/src/params/store.rs @@ -2,8 +2,12 @@ Appellation: params Contrib: FL03 */ +use crate::attention::{Score, _attention}; +use concision::nn::DropoutLayer; use concision::{dimensional, getters}; +use nd::linalg::Dot; use nd::*; +use num::complex::ComplexFloat; use num::traits::{One, Zero}; /// [QkvBase] is a container for the query, key, and value arrays used in the @@ -57,16 +61,6 @@ where (self.q.view(), self.k.view(), self.v.view()) } - // pub fn attention(&self, mask: Option>) -> Array - // where - // A: ComplexFloat, - // S: Data, - // { - // let (q, k, v) = self.as_qkv(); - // let (q, k, v) = _attention(q, k, v, mask); - // q.dot(&v) - // } - /// Consumes the store and returns a three-tuple consisting of the query, key, and value arrays respectively. pub fn into_qkv(self) -> (ArrayBase, ArrayBase, ArrayBase) { (self.q, self.k, self.v) @@ -93,3 +87,44 @@ where qkv_view!(view::<'a, ViewRepr>(&self) where S: Data); qkv_view!(view_mut::<'a, ViewRepr>(&mut self) where S: DataMut); } + +#[cfg(not(feature = "rand"))] +impl QkvBase +where + D: Dimension, + S: RawData, + A: Clone, +{ + /// Computes the [Score] using scaled dot-product attention. + pub fn attention(&self, dropout: Option, mask: Option<&Array>) -> Score + where + A: ComplexFloat + ScalarOperand, + S: Data, + ArrayBase: for<'a> Dot, Output = Array>, + Array: Dot, Output = Array>, + { + let (q, k, v) = self.qkv(); + _attention(q, k, v, mask, None) + } +} + +#[cfg(feature = "rand")] +impl QkvBase +where + D: Dimension, + S: RawData, + A: Clone, +{ + /// Computes the [Score] using scaled dot-product attention. + pub fn attention(&self, dropout: Option, mask: Option<&Array>) -> Score + where + A: ComplexFloat + ScalarOperand, + S: Data, + ArrayBase: for<'a> Dot, Output = Array>, + Array: Dot, Output = Array>, + { + let dropout = dropout.map(DropoutLayer::new); + let (q, k, v) = self.qkv(); + _attention(q, k, v, mask, dropout.as_ref()) + } +} From ead13404d3cc23d6c4f08c338529a5c03ed6ac10 Mon Sep 17 00:00:00 2001 From: Joe McCain III Date: Fri, 24 May 2024 09:23:37 -0500 Subject: [PATCH 16/19] update Signed-off-by: Joe McCain III --- core/Cargo.toml | 13 ++-- core/src/nn/mask/mask.rs | 122 +++++++++++++++++++++++++++++- core/src/nn/model.rs | 29 +++++++ core/tests/{random.rs => init.rs} | 0 4 files changed, 153 insertions(+), 11 deletions(-) rename core/tests/{random.rs => init.rs} (100%) diff --git a/core/Cargo.toml b/core/Cargo.toml index a4d06c91..c8f85779 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -110,15 +110,17 @@ name = "fft" required-features = ["approx"] [[test]] -name = "nn" - -[[test]] -name = "random" +name = "init" required-features = ["rand", "std"] +[[test]] +name = "nn" [build-dependencies] +[dev-dependencies] +lazy_static.workspace = true + [dependencies] ndarray.workspace = true num.workspace = true @@ -160,9 +162,6 @@ default-features = false features = ["v5", "v8"] version = "1" -[dev-dependencies] -lazy_static = "1" - [package.metadata.docs.rs] all-features = true rustc-args = ["--cfg", "docsrs"] diff --git a/core/src/nn/mask/mask.rs b/core/src/nn/mask/mask.rs index 263cf016..083fc0b9 100644 --- a/core/src/nn/mask/mask.rs +++ b/core/src/nn/mask/mask.rs @@ -2,8 +2,9 @@ Appellation: mask Contrib: FL03 */ +use nd::iter::{Iter, IterMut}; use nd::prelude::*; -use nd::{OwnedRepr, RawData}; +use nd::{Data, DataMut, OwnedRepr, RawData, RawDataClone}; pub struct Mask, D = Ix2>(ArrayBase) where @@ -15,9 +16,98 @@ where D: Dimension, S: RawData, { - pub fn new(data: ArrayBase) -> Self { + pub fn from_arr(data: ArrayBase) -> Self { Self(data) } + + pub fn apply(&mut self, data: &ArrayBase, fill: A) -> ArrayBase + where + A: Clone, + S: Data, + T: DataMut + RawDataClone, + { + let mut res = data.clone(); + res.zip_mut_with(self.as_mut(), |x, &m| { + if m { + *x = fill.clone(); + } + }); + res + } + + pub fn mask_inplace<'a, A, T, F>(&mut self, data: &'a mut ArrayBase, fill: A) -> &'a mut ArrayBase + where + A: Clone, + S: Data, + T: DataMut, + { + data.zip_mut_with(&mut self.0, |x, &m| { + if m { + *x = fill.clone(); + } + }); + data + } + + pub fn as_slice(&self) -> &[bool] + where + S: Data, + { + self.get().as_slice().unwrap() + } + + pub fn as_mut_slice(&mut self) -> &mut [bool] + where + S: DataMut, + { + self.get_mut().as_slice_mut().unwrap() + } + + pub fn dim(&self) -> D::Pattern { + self.get().dim() + } + + pub fn iter(&self) -> Iter<'_, bool, D> + where + S: Data, + { + self.get().iter() + } + + pub fn iter_mut(&mut self) -> IterMut<'_, bool, D> + where + S: DataMut, + { + self.get_mut().iter_mut() + } + + pub fn get(&self) -> &ArrayBase { + &self.0 + } + + pub fn get_mut(&mut self) -> &mut ArrayBase { + &mut self.0 + } + + pub fn into_inner(self) -> ArrayBase { + self.0 + } + + pub fn ndim(&self) -> usize { + self.get().ndim() + } + + pub fn raw_dim(&self) -> D { + self.get().raw_dim() + } + + pub fn set(&mut self, data: ArrayBase) { + self.0 = data; + } + + pub fn shape(&self) -> D { + self.get().raw_dim() + } } /* @@ -26,8 +116,8 @@ where mod impls { use super::Mask; use core::borrow::{Borrow, BorrowMut}; - use core::ops::{Deref, DerefMut}; - use nd::{ArrayBase, Dimension, RawData}; + use core::ops::{Deref, DerefMut, Index, IndexMut}; + use nd::{ArrayBase, Data, DataMut, Dimension, NdIndex, RawData}; impl AsRef> for Mask where @@ -90,6 +180,30 @@ mod impls { &mut self.0 } } + + impl Index for Mask + where + D: Dimension, + I: NdIndex, + S: Data, + { + type Output = as Index>::Output; + + fn index(&self, index: I) -> &Self::Output { + &self.0[index] + } + } + + impl IndexMut for Mask + where + D: Dimension, + I: NdIndex, + S: DataMut, + { + fn index_mut(&mut self, index: I) -> &mut Self::Output { + &mut self.0[index] + } + } } mod impl_from { diff --git a/core/src/nn/model.rs b/core/src/nn/model.rs index d989cb9b..47d7f485 100644 --- a/core/src/nn/model.rs +++ b/core/src/nn/model.rs @@ -28,3 +28,32 @@ where fn context(&self) -> Self::Ctx; } + +/// This trait describes any neural networks or models that +/// adhears to the deep netural network architecture. +/// This design considers a single input and output layer, while +/// allowing for any number of hidden layers to be persisted. +/// +/// The `HIDDEN` constant is used to specify the number of hidden layers +/// and is used to compute the total number of layers (HIDDEN + 2) +pub trait DeepNeuralNetwork: Forward { + const HIDDEN: Option = None; + + type Input: Forward; + type Hidden: Forward; // The type of `hidden` layers; all hidden layers implement the same activation function + type Out: Forward; + + fn input(&self) -> &Self::Input; + + fn hidden(&self) -> &[Self::Hidden]; + + fn output(&self) -> &Self::Out; + + fn nlayers(&self) -> usize { + self.nhidden() + 2 + } + + fn nhidden(&self) -> usize { + Self::HIDDEN.unwrap_or_else(|| self.hidden().len()) + } +} \ No newline at end of file diff --git a/core/tests/random.rs b/core/tests/init.rs similarity index 100% rename from core/tests/random.rs rename to core/tests/init.rs From 417beb793f0e096212fd3823c43766597808bced Mon Sep 17 00:00:00 2001 From: Joe McCain III Date: Fri, 24 May 2024 10:51:47 -0500 Subject: [PATCH 17/19] update Signed-off-by: Joe McCain III --- core/src/math/arith.rs | 68 ++++++++ core/src/math/mod.rs | 3 + core/src/math/stats/mod.rs | 13 ++ core/src/math/stats/summary.rs | 151 ++++++++++++++++++ core/src/math/traits.rs | 56 +------ core/src/nn/mask/mask.rs | 6 +- core/src/nn/model.rs | 8 +- core/src/traits/misc/container.rs | 17 +- core/src/traits/misc/sequential.rs | 63 ++++++++ core/src/traits/mod.rs | 4 + models/linear/src/impls/impl_rand.rs | 58 +++---- models/linear/src/impls/model/impl_linear.rs | 23 +-- models/linear/src/model/layer.rs | 70 +++++--- models/transformers/src/attention/head.rs | 13 ++ .../src/attention/multi/config.rs | 6 +- .../src/attention/multi/multi_head.rs | 50 ++++-- 16 files changed, 471 insertions(+), 138 deletions(-) create mode 100644 core/src/math/arith.rs create mode 100644 core/src/math/stats/mod.rs create mode 100644 core/src/math/stats/summary.rs create mode 100644 core/src/traits/misc/sequential.rs diff --git a/core/src/math/arith.rs b/core/src/math/arith.rs new file mode 100644 index 00000000..04c88c1a --- /dev/null +++ b/core/src/math/arith.rs @@ -0,0 +1,68 @@ +/* + Appellation: arith + Contrib: FL03 +*/ +use num::integer::Roots; +use num::traits::FromPrimitive; + +pub trait Root { + type Output; + + fn nth_root(&self, n: u32) -> Self::Output; + + fn sqrt(&self) -> Self::Output { + self.nth_root(2) + } + + fn cbrt(&self) -> Self::Output { + self.nth_root(3) + } +} + +macro_rules! impl_root { + (float $($T:ty),* $(,)?) => { + $( + impl_root!(@float $T); + )* + }; + ($($T:ty),* $(,)?) => { + $( + impl_root!(@impl $T); + )* + }; + + (@impl $T:ty) => { + impl Root for $T { + type Output = $T; + + fn nth_root(&self, n: u32) -> Self::Output { + Roots::nth_root(self, n) + } + } + }; + (@float $T:ty) => { + impl Root for $T { + type Output = $T; + + fn nth_root(&self, n: u32) -> Self::Output { + self.powf(<$T>::from_u32(n).unwrap().recip()) + } + } + }; +} + +impl_root!(float f32, f64); +impl_root! { + i8, + i16, + i32, + i64, + i128, + isize, + u8, + u16, + u32, + u64, + u128, + usize, +} diff --git a/core/src/math/mod.rs b/core/src/math/mod.rs index d8084442..da193f09 100644 --- a/core/src/math/mod.rs +++ b/core/src/math/mod.rs @@ -9,8 +9,11 @@ //! as well as the `ndarray` crate. pub use self::traits::*; +pub mod arith; +pub mod stats; pub mod traits; pub(crate) mod prelude { + pub use super::stats::prelude::*; pub use super::traits::*; } diff --git a/core/src/math/stats/mod.rs b/core/src/math/stats/mod.rs new file mode 100644 index 00000000..7a0a3892 --- /dev/null +++ b/core/src/math/stats/mod.rs @@ -0,0 +1,13 @@ +/* + Appellation: stats + Contrib: FL03 +*/ +//! # Statistics +//! +pub use self::summary::*; + +mod summary; + +pub(crate) mod prelude { + pub use super::summary::*; +} diff --git a/core/src/math/stats/summary.rs b/core/src/math/stats/summary.rs new file mode 100644 index 00000000..35b5821d --- /dev/null +++ b/core/src/math/stats/summary.rs @@ -0,0 +1,151 @@ +/* + Appellation: summary + Contrib: FL03 +*/ +use crate::math::arith::Root; +use core::iter::{Product, Sum}; +use nd::{ArrayBase, Data, Dimension}; +use num::traits::{FromPrimitive, Num, NumOps, Pow}; + +/// This trait describes the fundamental methods of summary statistics. +/// These include the mean, standard deviation, variance, and more. +pub trait SummaryStatistics +where + Self::Item: FromPrimitive, + Self::Output: NumOps, +{ + type Item; + type Output; + + fn elems(&self) -> Self::Item { + Self::Item::from_usize(self.len()).unwrap() + } + + fn len(&self) -> usize; + + fn mean(&self) -> Self::Output { + self.sum() / self.elems() + } + + fn product(&self) -> Self::Output; + + fn sum(&self) -> Self::Output; + + fn std(&self) -> Self::Output; + + fn var(&self) -> Self::Output; +} + +/* + ************* Implementations ************* +*/ +impl<'a, T, I> SummaryStatistics for &'a I +where + I: Clone + ExactSizeIterator, + T: Copy + FromPrimitive + Num + Pow + Product + Root + Sum, +{ + type Item = T; + type Output = T; + + fn len(&self) -> usize { + ExactSizeIterator::len(*self) + } + + fn product(&self) -> Self::Output { + (*self).clone().product() + } + + fn sum(&self) -> Self::Output { + (*self).clone().sum() + } + + fn std(&self) -> Self::Output { + let mean = self.mean(); + let sum = (*self).clone().map(|x| (x - mean).pow(2)).sum::(); + (sum / self.elems()).sqrt() + } + + fn var(&self) -> Self::Output { + let mean = self.mean(); + let sum = (*self).clone().map(|x| (x - mean).pow(2)).sum::(); + sum / self.elems() + } +} + +macro_rules! impl_summary { + ($($T:ty),* $(,)?) => { + $( + impl_summary!(@impl $T); + )* + }; + (@impl $T:ty) => { + + impl SummaryStatistics for $T + where + T: Copy + FromPrimitive + Num + Pow + Product + Root + Sum, + { + type Item = T; + type Output = T; + + fn len(&self) -> usize { + self.len() + } + + fn product(&self) -> Self::Output { + self.iter().copied().product::() + } + + fn sum(&self) -> Self::Output { + self.iter().copied().sum::() + } + + fn std(&self) -> Self::Output { + let mean = self.mean(); + let sum = self.iter().copied().map(|x| (x - mean).pow(2)).sum::(); + (sum / self.elems()).sqrt() + } + + fn var(&self) -> Self::Output { + let mean = self.mean(); + let sum = self.iter().copied().map(|x| (x - mean).pow(2)).sum::(); + sum / self.elems() + } + } + }; +} + +impl_summary!(Vec, [T]); + +impl SummaryStatistics for ArrayBase +where + A: Copy + FromPrimitive + Num + Pow + Product + Root + Sum, + D: Dimension, + S: Data, +{ + type Item = A; + type Output = A; + + fn len(&self) -> usize { + self.len() + } + + fn product(&self) -> Self::Output { + self.iter().copied().product::() + } + + fn sum(&self) -> Self::Output { + self.iter().copied().sum::() + } + + fn std(&self) -> Self::Output { + let mean = self.mean().unwrap_or_else(A::zero); + let sum = self.iter().copied().map(|x| (x - mean).pow(2)).sum::(); + (sum / self.elems()).sqrt() + } + + fn var(&self) -> Self::Output { + let mean = self.mean().unwrap_or_else(A::zero); + let sum = self.iter().copied().map(|x| (x - mean).pow(2)).sum::(); + sum / self.elems() + } +} diff --git a/core/src/math/traits.rs b/core/src/math/traits.rs index 51e3c1d6..d71d433d 100644 --- a/core/src/math/traits.rs +++ b/core/src/math/traits.rs @@ -2,63 +2,9 @@ Appellation: traits Contrib: FL03 */ -use core::iter::Sum; use nd::{Array, ArrayBase, Data, Dimension}; use num::complex::{Complex, ComplexFloat}; -use num::traits::{FromPrimitive, Num, Signed}; - -pub trait IterStats -where - T: FromPrimitive, -{ - type Output; - - fn elems(&self) -> T; - - fn mean(&self) -> Self::Output; - - fn std(&self) -> Self::Output - where - T: ComplexFloat; - - fn var(&self) -> Self::Output - where - T: ComplexFloat; -} - -impl IterStats for I -where - I: Clone + ExactSizeIterator, - T: Clone + FromPrimitive + Num + Sum, -{ - type Output = T; - - fn elems(&self) -> T { - T::from_usize(self.len()).unwrap() - } - - fn mean(&self) -> Self::Output { - self.clone().sum::() / self.elems() - } - - fn std(&self) -> Self::Output - where - T: ComplexFloat, - { - let mean = self.mean(); - let sum = self.clone().map(|x| (x - mean).powi(2)).sum::(); - (sum / self.elems()).sqrt() - } - - fn var(&self) -> Self::Output - where - T: ComplexFloat, - { - let mean = self.mean(); - let sum = self.clone().map(|x| (x - mean).powi(2)).sum::(); - sum / self.elems() - } -} +use num::traits::Signed; unary!( Abs::abs(self), diff --git a/core/src/nn/mask/mask.rs b/core/src/nn/mask/mask.rs index 083fc0b9..94da711a 100644 --- a/core/src/nn/mask/mask.rs +++ b/core/src/nn/mask/mask.rs @@ -35,7 +35,11 @@ where res } - pub fn mask_inplace<'a, A, T, F>(&mut self, data: &'a mut ArrayBase, fill: A) -> &'a mut ArrayBase + pub fn mask_inplace<'a, A, T, F>( + &mut self, + data: &'a mut ArrayBase, + fill: A, + ) -> &'a mut ArrayBase where A: Clone, S: Data, diff --git a/core/src/nn/model.rs b/core/src/nn/model.rs index 47d7f485..316d03e7 100644 --- a/core/src/nn/model.rs +++ b/core/src/nn/model.rs @@ -29,11 +29,11 @@ where fn context(&self) -> Self::Ctx; } -/// This trait describes any neural networks or models that +/// This trait describes any neural networks or models that /// adhears to the deep netural network architecture. /// This design considers a single input and output layer, while /// allowing for any number of hidden layers to be persisted. -/// +/// /// The `HIDDEN` constant is used to specify the number of hidden layers /// and is used to compute the total number of layers (HIDDEN + 2) pub trait DeepNeuralNetwork: Forward { @@ -49,11 +49,11 @@ pub trait DeepNeuralNetwork: Forward { fn output(&self) -> &Self::Out; - fn nlayers(&self) -> usize { + fn nlayers(&self) -> usize { self.nhidden() + 2 } fn nhidden(&self) -> usize { Self::HIDDEN.unwrap_or_else(|| self.hidden().len()) } -} \ No newline at end of file +} diff --git a/core/src/traits/misc/container.rs b/core/src/traits/misc/container.rs index a295d90f..5243fc79 100644 --- a/core/src/traits/misc/container.rs +++ b/core/src/traits/misc/container.rs @@ -5,18 +5,29 @@ use crate::traits::Dimensional; pub trait Container { - type Data: Data; + type Data: ContainerData; } -pub trait Data { +pub trait ContainerData { type Elem; } /// This trait describes the basic operations for any n-dimensional container. pub trait NdContainer: Dimensional { - type Data: Data; + type Data: ContainerData; fn as_slice(&self) -> &[A]; fn as_mut_slice(&mut self) -> &mut [A]; } + +/* + ************* Implementations ************* +*/ +impl ContainerData for Vec { + type Elem = T; +} + +impl Container for Vec { + type Data = Vec; +} diff --git a/core/src/traits/misc/sequential.rs b/core/src/traits/misc/sequential.rs new file mode 100644 index 00000000..8e92192e --- /dev/null +++ b/core/src/traits/misc/sequential.rs @@ -0,0 +1,63 @@ +/* + Appellation: sequential [traits::misc] + Contrib: FL03 +*/ +use num::traits::FromPrimitive; + +/// A trait for sequential data structures; +/// This trait is implemented for iterators that have a known length. +pub trait Sequence { + const LENGTH: Option = None; + + fn len(&self) -> usize; + + fn is_empty(&self) -> bool { + self.len() == 0 + } + + fn elems(&self) -> T + where + T: FromPrimitive, + { + T::from_usize(self.len()).unwrap() + } +} + +pub trait SequenceIter { + type Item; + + fn len(&self) -> usize; +} +/* + ************* Implementations ************* +*/ +impl SequenceIter for I +where + I: ExactSizeIterator, +{ + type Item = T; + + fn len(&self) -> usize { + self.len() + } +} + +impl Sequence for Vec { + fn len(&self) -> usize { + self.len() + } +} + +impl Sequence for [T] { + fn len(&self) -> usize { + self.len() + } +} + +impl Sequence for [T; N] { + const LENGTH: Option = Some(N); + + fn len(&self) -> usize { + N + } +} diff --git a/core/src/traits/mod.rs b/core/src/traits/mod.rs index 2ae4d47a..5c48f8af 100644 --- a/core/src/traits/mod.rs +++ b/core/src/traits/mod.rs @@ -32,12 +32,16 @@ pub mod misc { pub mod adjust; #[doc(hidden)] pub mod container; + #[doc(hidden)] + pub mod sequential; + #[doc(hidden)] pub mod store; pub mod toggle; pub(crate) mod prelude { pub use super::adjust::*; pub use super::container::*; + pub use super::sequential::*; pub use super::store::*; pub use super::toggle::*; } diff --git a/models/linear/src/impls/impl_rand.rs b/models/linear/src/impls/impl_rand.rs index f2e602e1..28bb0126 100644 --- a/models/linear/src/impls/impl_rand.rs +++ b/models/linear/src/impls/impl_rand.rs @@ -4,7 +4,7 @@ */ #![cfg(feature = "rand")] -use crate::params::{ParamMode, ParamsBase}; +use crate::params::{LinearParams, ParamMode, ParamsBase}; use crate::{bias_dim, Linear}; use concision::init::rand::Rng; use concision::init::rand_distr::{uniform::SampleUniform, Distribution, StandardNormal}; @@ -12,30 +12,32 @@ use concision::{Initialize, InitializeExt}; use nd::*; use num::Float; -impl Linear +impl Linear where A: Clone + Float, D: RemoveAxis, K: ParamMode, + S: DataOwned, StandardNormal: Distribution, { - pub fn uniform(self) -> Self + pub fn uniform(self) -> Linear> where A: SampleUniform, ::Sampler: Clone, { - Self { + Linear { + config: self.config, params: self.params.uniform(), - ..self } } } -impl crate::LinearParams +impl ParamsBase where A: Clone + Float + SampleUniform, D: RemoveAxis, K: ParamMode, + S: RawData, StandardNormal: Distribution, ::Sampler: Clone, { @@ -48,42 +50,42 @@ where self.dk().sqrt() } - pub fn uniform(self) -> Self { + pub fn uniform(self) -> LinearParams + where + S: DataOwned, + { let dk = self.dk_sqrt(); self.uniform_between(-dk, dk) } - pub fn uniform_between(self, low: A, high: A) -> Self { - if self.is_biased() && !self.bias.is_some() { + pub fn uniform_between(self, low: A, high: A) -> LinearParams + where + S: DataOwned, + { + let weight = Array::uniform_between(self.raw_dim(), low, high); + let bias = if self.is_biased() && !self.bias.is_some() { let b_dim = bias_dim(self.raw_dim()); - Self { - bias: Some(Array::uniform_between(b_dim, low, high)), - weight: Array::uniform_between(self.raw_dim(), low, high), - _mode: self._mode, - } + Some(Array::uniform_between(b_dim, low, high)) } else if !self.is_biased() && self.bias.is_some() { - Self { - bias: None, - weight: Array::uniform_between(self.raw_dim(), low, high), - _mode: self._mode, - } + None } else { - Self { - bias: self - .bias - .as_ref() - .map(|b| Array::uniform_between(b.raw_dim(), low, high)), - weight: Array::uniform_between(self.raw_dim(), low, high), - _mode: self._mode, - } + self.bias + .as_ref() + .map(|b| Array::uniform_between(b.raw_dim(), low, high)) + }; + LinearParams { + weight, + bias, + _mode: core::marker::PhantomData::, } } } -impl Initialize for Linear +impl Initialize for Linear where D: RemoveAxis, K: ParamMode, + S: DataOwned, StandardNormal: Distribution, { type Data = OwnedRepr; diff --git a/models/linear/src/impls/model/impl_linear.rs b/models/linear/src/impls/model/impl_linear.rs index 03c97f82..49ee85ba 100644 --- a/models/linear/src/impls/model/impl_linear.rs +++ b/models/linear/src/impls/model/impl_linear.rs @@ -2,47 +2,52 @@ Appellation: impl_linear Contrib: FL03 */ -use crate::{Config, Linear, LinearParams, ParamMode}; +use crate::{Config, Linear, ParamMode, ParamsBase}; use core::borrow::{Borrow, BorrowMut}; -use nd::RemoveAxis; +use nd::{DataOwned, Ix2, RawData, RemoveAxis}; -impl Linear +impl Linear where K: ParamMode, + S: RawData, { pub fn from_features(inputs: usize, outputs: usize) -> Self where A: Clone + Default, + S: DataOwned, { let config = Config::std(inputs, outputs); - let params = LinearParams::new(config.dim()); + let params = ParamsBase::new(config.dim()); Self { config, params } } } -impl Borrow> for Linear +impl Borrow> for Linear where D: RemoveAxis, + S: RawData, { fn borrow(&self) -> &Config { &self.config } } -impl Borrow> for Linear +impl Borrow> for Linear where D: RemoveAxis, + S: RawData, { - fn borrow(&self) -> &LinearParams { + fn borrow(&self) -> &ParamsBase { &self.params } } -impl BorrowMut> for Linear +impl BorrowMut> for Linear where D: RemoveAxis, + S: RawData, { - fn borrow_mut(&mut self) -> &mut LinearParams { + fn borrow_mut(&mut self) -> &mut ParamsBase { &mut self.params } } diff --git a/models/linear/src/model/layer.rs b/models/linear/src/model/layer.rs index 486bb43d..9bb00f52 100644 --- a/models/linear/src/model/layer.rs +++ b/models/linear/src/model/layer.rs @@ -3,39 +3,56 @@ Contrib: FL03 */ use super::{Config, Layout}; -use crate::{Biased, LinearParams, ParamMode, Unbiased}; +use crate::{Biased, LinearParams, ParamMode, ParamsBase, Unbiased}; use concision::prelude::{Predict, Result}; use nd::prelude::*; -use nd::RemoveAxis; +use nd::{DataOwned, OwnedRepr, RawData, RemoveAxis}; /// An implementation of a linear model. /// /// In an effort to streamline the api, the [Linear] model relies upon a [ParamMode] type ([Biased] or [Unbiased](crate::params::mode::Unbiased)) /// which enables the model to automatically determine whether or not to include a bias term. Doing so allows the model to inherit several methods /// familar to the underlying [ndarray](https://docs.rs/ndarray) crate. -pub struct Linear +pub struct Linear> where D: Dimension, + S: RawData, { pub(crate) config: Config, - pub(crate) params: LinearParams, + pub(crate) params: ParamsBase, } -impl Linear +impl Linear> +where + K: ParamMode, +{ + pub fn std(inputs: usize, outputs: usize) -> Self + where + A: Default, + { + let config = Config::::new().with_shape((inputs, outputs)); + let params = ParamsBase::new(config.features()); + Linear { config, params } + } +} + +impl Linear where D: RemoveAxis, K: ParamMode, + S: RawData, { - mbuilder!(new where A: Default); - mbuilder!(ones where A: Clone + num::One); - mbuilder!(zeros where A: Clone + num::Zero); + mbuilder!(new where A: Default, S: DataOwned); + mbuilder!(ones where A: Clone + num::One, S: DataOwned); + mbuilder!(zeros where A: Clone + num::Zero, S: DataOwned); pub fn from_config(config: Config) -> Self where A: Clone + Default, K: ParamMode, + S: DataOwned, { - let params = LinearParams::new(config.dim()); + let params = ParamsBase::new(config.dim()); Self { config, params } } @@ -43,13 +60,14 @@ where where A: Clone + Default, K: ParamMode, + S: DataOwned, { let config = Config::::new().with_layout(layout); - let params = LinearParams::new(config.dim()); + let params = ParamsBase::new(config.dim()); Self { config, params } } - pub fn from_params(params: LinearParams) -> Self { + pub fn from_params(params: ParamsBase) -> Self { let config = Config::::new().with_shape(params.raw_dim()); Self { config, params } } @@ -67,26 +85,27 @@ where &self.config } - pub fn weights(&self) -> &Array { + pub fn weights(&self) -> &ArrayBase { self.params.weights() } - pub fn weights_mut(&mut self) -> &mut Array { + pub fn weights_mut(&mut self) -> &mut ArrayBase { self.params.weights_mut() } - pub const fn params(&self) -> &LinearParams { + pub const fn params(&self) -> &ParamsBase { &self.params } - pub fn params_mut(&mut self) -> &mut LinearParams { + pub fn params_mut(&mut self) -> &mut ParamsBase { &mut self.params } - pub fn into_biased(self) -> Linear + pub fn into_biased(self) -> Linear where A: Default, K: 'static, + S: DataOwned, { Linear { config: self.config.into_biased(), @@ -94,10 +113,11 @@ where } } - pub fn into_unbiased(self) -> Linear + pub fn into_unbiased(self) -> Linear where A: Default, K: 'static, + S: DataOwned, { Linear { config: self.config.into_unbiased(), @@ -130,40 +150,44 @@ where concision::dimensional!(params()); } -impl Linear +impl Linear where D: RemoveAxis, + S: RawData, { pub fn biased(shape: Sh) -> Self where A: Default, + S: DataOwned, Sh: ShapeBuilder, { let config = Config::::new().with_shape(shape); - let params = LinearParams::biased(config.dim()); + let params = ParamsBase::biased(config.dim()); Linear { config, params } } - pub fn bias(&self) -> &Array { + pub fn bias(&self) -> &ArrayBase { self.params().bias() } - pub fn bias_mut(&mut self) -> &mut Array { + pub fn bias_mut(&mut self) -> &mut ArrayBase { self.params_mut().bias_mut() } } -impl Linear +impl Linear where D: RemoveAxis, + S: RawData, { pub fn unbiased(shape: Sh) -> Self where A: Default, + S: DataOwned, Sh: ShapeBuilder, { let config = Config::::new().with_shape(shape); - let params = LinearParams::unbiased(config.dim()); + let params = ParamsBase::unbiased(config.dim()); Linear { config, params } } } diff --git a/models/transformers/src/attention/head.rs b/models/transformers/src/attention/head.rs index 1deebb6f..e80fdda9 100644 --- a/models/transformers/src/attention/head.rs +++ b/models/transformers/src/attention/head.rs @@ -39,6 +39,19 @@ where pub(crate) params: QkvBase, } +impl AttentionHead +where + S: RawData, +{ + pub fn std(dm: usize, dk: usize) -> Self + where + A: Default, + S: DataOwned, + { + Self::from_params(QkvBase::new((dk, dm))) + } +} + impl AttentionHead where D: Dimension, diff --git a/models/transformers/src/attention/multi/config.rs b/models/transformers/src/attention/multi/config.rs index 2fa53c3f..58c510c6 100644 --- a/models/transformers/src/attention/multi/config.rs +++ b/models/transformers/src/attention/multi/config.rs @@ -3,6 +3,10 @@ Contrib: FL03 */ +pub(crate) fn dk(d_model: usize, heads: usize) -> usize { + d_model / heads +} + #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] pub struct Config { @@ -20,7 +24,7 @@ impl Config { } pub fn dk(&self) -> usize { - self.d_model() / self.heads() + dk(self.d_model(), self.heads()) } pub fn heads(&self) -> usize { diff --git a/models/transformers/src/attention/multi/multi_head.rs b/models/transformers/src/attention/multi/multi_head.rs index be427e4d..36a4051d 100644 --- a/models/transformers/src/attention/multi/multi_head.rs +++ b/models/transformers/src/attention/multi/multi_head.rs @@ -6,50 +6,72 @@ use super::Config; use crate::AttentionHead; use linear::{Biased, Linear}; use nd::prelude::*; +use nd::{DataOwned, OwnedRepr, RawData}; -#[derive(Default)] -pub struct MultiHeadAttention +pub struct MultiHeadAttention> where D: Dimension, + S: RawData, { pub(crate) config: Config, - pub(crate) head: Option>, - pub(crate) linears: Vec>, + pub(crate) head: AttentionHead, + pub(crate) linears: Vec>, } -impl MultiHeadAttention +impl MultiHeadAttention where D: Dimension, + S: RawData, { pub const fn config(&self) -> &Config { &self.config } - pub fn head(&self) -> Option<&AttentionHead> { - self.head.as_ref() + pub const fn head(&self) -> &AttentionHead { + &self.head } - pub fn head_mut(&mut self) -> Option<&mut AttentionHead> { - self.head.as_mut() + pub fn head_mut(&mut self) -> &mut AttentionHead { + &mut self.head } - pub fn linears(&self) -> &[Linear] { + pub fn linears(&self) -> &[Linear] { &self.linears } } -impl MultiHeadAttention { - pub fn std(config: Config) -> Self +impl MultiHeadAttention +where + S: RawData, +{ + pub fn std(d_model: usize, heads: usize) -> Self where A: Clone + Default, + S: DataOwned, { + let config = Config::new().d_model(d_model).heads(heads).build(); let linears = (0..4) - .map(|_| Linear::from_features(config.d_model(), config.d_model())) + .map(|_| Linear::from_features(d_model, d_model)) .collect(); Self { - head: None, config, + head: AttentionHead::std(d_model, config.dk()), linears, } } } + +impl Default for MultiHeadAttention +where + A: Default, + D: Dimension, + S: DataOwned, +{ + fn default() -> Self { + Self { + config: Config::default(), + head: AttentionHead::default(), + linears: Vec::new(), + } + } +} From b2a430fee49a944778548260a44d9826adb98e13 Mon Sep 17 00:00:00 2001 From: Joe McCain III Date: Fri, 24 May 2024 11:20:54 -0500 Subject: [PATCH 18/19] update Signed-off-by: Joe McCain III --- data/src/kernel/mod.rs | 0 data/src/lib.rs | 2 + data/src/traits/data/container.rs | 25 ++++++++ data/src/traits/data/repr.rs | 15 +++++ data/src/traits/mod.rs | 18 +++++- data/src/traits/shape.rs | 96 +++++++++++++++++++++++++++++++ data/src/types/kernel.rs | 6 ++ data/src/types/mod.rs | 11 ++++ 8 files changed, 172 insertions(+), 1 deletion(-) create mode 100644 data/src/kernel/mod.rs create mode 100644 data/src/traits/data/container.rs create mode 100644 data/src/traits/data/repr.rs create mode 100644 data/src/traits/shape.rs create mode 100644 data/src/types/kernel.rs create mode 100644 data/src/types/mod.rs diff --git a/data/src/kernel/mod.rs b/data/src/kernel/mod.rs new file mode 100644 index 00000000..e69de29b diff --git a/data/src/lib.rs b/data/src/lib.rs index 0186ca18..5f1d6ead 100644 --- a/data/src/lib.rs +++ b/data/src/lib.rs @@ -22,9 +22,11 @@ pub mod params; pub mod preproc; pub mod tensor; pub mod traits; +pub mod types; pub mod prelude { pub use super::dataset::*; pub use super::params::prelude::*; pub use super::traits::prelude::*; + pub use super::types::prelude::*; } diff --git a/data/src/traits/data/container.rs b/data/src/traits/data/container.rs new file mode 100644 index 00000000..0d9e0044 --- /dev/null +++ b/data/src/traits/data/container.rs @@ -0,0 +1,25 @@ +/* + Appellation: container + Contrib: FL03 +*/ +use crate::traits::{ContainerRepr, Dimensional}; + +pub trait Container { + type Data: ContainerRepr; +} + +/// This trait describes the basic operations for any n-dimensional container. +pub trait NdContainer: Dimensional { + type Data: ContainerRepr; + + fn as_slice(&self) -> &[A]; + + fn as_mut_slice(&mut self) -> &mut [A]; +} + +/* + ************* Implementations ************* +*/ +impl Container for Vec { + type Data = Vec; +} diff --git a/data/src/traits/data/repr.rs b/data/src/traits/data/repr.rs new file mode 100644 index 00000000..3c583d95 --- /dev/null +++ b/data/src/traits/data/repr.rs @@ -0,0 +1,15 @@ +/* + Appellation: data + Contrib: FL03 +*/ + +pub trait ContainerRepr { + type Elem; +} + +/* + ************* Implementations ************* +*/ +impl ContainerRepr for Vec { + type Elem = T; +} diff --git a/data/src/traits/mod.rs b/data/src/traits/mod.rs index 83d994c3..cf2a290a 100644 --- a/data/src/traits/mod.rs +++ b/data/src/traits/mod.rs @@ -2,10 +2,26 @@ Appellation: traits Contrib: FL03 */ -pub use self::prelude::*; +pub use self::{data::*, records::*, shape::*}; pub mod records; +pub mod shape; + +#[doc(hidden)] +pub mod data { + pub use self::{container::*, repr::*}; + + pub(crate) mod container; + pub(crate) mod repr; + + pub(crate) mod prelude { + pub use super::container::*; + pub use super::repr::*; + } +} pub(crate) mod prelude { + pub use super::data::prelude::*; pub use super::records::*; + pub use super::shape::*; } diff --git a/data/src/traits/shape.rs b/data/src/traits/shape.rs new file mode 100644 index 00000000..a8127e46 --- /dev/null +++ b/data/src/traits/shape.rs @@ -0,0 +1,96 @@ +/* + Appellation: shape + Contrib: FL03 +*/ +use nd::{ArrayBase, Dimension, RawData}; + +pub trait IntoPattern { + type Pattern; + + fn into_pattern(self) -> Self::Pattern; +} + +/// [Dimensional] provides a common interface for containers to access their shape and dimension. +pub trait Dimensional { + const RANK: Option = None; + + type Dim: IntoPattern; + + fn dim(&self) -> ::Pattern { + self.raw_dim().into_pattern() + } + + fn is_scalar(&self) -> bool { + self.rank() == 0 || self.shape().iter().all(|x| *x == 1) + } + + fn rank(&self) -> usize { + Self::RANK.unwrap_or(self.shape().len()) + } + + fn raw_dim(&self) -> Self::Dim; + + fn size(&self) -> usize { + self.shape().iter().product() + } + + fn shape(&self) -> &[usize]; +} + +/* + ******** implementations ******** +*/ +impl IntoPattern for D +where + D: Dimension, +{ + type Pattern = D::Pattern; + + fn into_pattern(self) -> Self::Pattern { + Dimension::into_pattern(self) + } +} + +// impl Dimensional for D +// where +// D: Dimension + IntoPattern, +// { +// type Dim = D; + +// fn dim(&self) -> D::Pattern { +// self.clone().into_pattern() +// } + +// fn raw_dim(&self) -> D { +// self.clone() +// } + +// fn shape(&self) -> &[usize] { +// D::slice(self) +// } +// } + +impl Dimensional for ArrayBase +where + D: Dimension, + S: RawData, +{ + const RANK: Option = D::NDIM; + type Dim = D; + + fn dim(&self) -> D::Pattern { + ArrayBase::dim(self) + } + + fn raw_dim(&self) -> D { + ArrayBase::raw_dim(self) + } + + fn shape(&self) -> &[usize] { + ArrayBase::shape(self) + } + + fn size(&self) -> usize { + ArrayBase::len(self) + } +} diff --git a/data/src/types/kernel.rs b/data/src/types/kernel.rs new file mode 100644 index 00000000..248ad95f --- /dev/null +++ b/data/src/types/kernel.rs @@ -0,0 +1,6 @@ +/* + Appellation: kernel + Contrib: FL03 +*/ + +pub struct Kernel; diff --git a/data/src/types/mod.rs b/data/src/types/mod.rs new file mode 100644 index 00000000..b8ca6da5 --- /dev/null +++ b/data/src/types/mod.rs @@ -0,0 +1,11 @@ +/* + Appellation: types + Contrib: FL03 +*/ +pub use self::kernel::Kernel; + +pub mod kernel; + +pub(crate) mod prelude { + pub use super::kernel::Kernel; +} From cb2f72cd83cbf43a2f16c0c3969b72f838ea6508 Mon Sep 17 00:00:00 2001 From: Joe McCain III Date: Fri, 24 May 2024 11:52:31 -0500 Subject: [PATCH 19/19] update Signed-off-by: Joe McCain III --- core/src/traits/arr/misc.rs | 44 ----- core/src/traits/arr/tensor.rs | 317 ------------------------------ core/src/traits/misc/container.rs | 33 ---- core/src/traits/mod.rs | 15 +- data/src/traits/build.rs | 140 +++++++++++++ data/src/traits/ext/ndarray.rs | 45 +++++ data/src/traits/ext/ndtensor.rs | 52 +++++ data/src/traits/ext/ndview.rs | 157 +++++++++++++++ data/src/traits/mod.rs | 19 +- 9 files changed, 418 insertions(+), 404 deletions(-) delete mode 100644 core/src/traits/arr/tensor.rs delete mode 100644 core/src/traits/misc/container.rs create mode 100644 data/src/traits/build.rs create mode 100644 data/src/traits/ext/ndarray.rs create mode 100644 data/src/traits/ext/ndtensor.rs create mode 100644 data/src/traits/ext/ndview.rs diff --git a/core/src/traits/arr/misc.rs b/core/src/traits/arr/misc.rs index 51a5996d..40857596 100644 --- a/core/src/traits/arr/misc.rs +++ b/core/src/traits/arr/misc.rs @@ -5,30 +5,6 @@ use nd::prelude::*; use nd::{DataMut, RawData}; -/// [Dimensional] provides a common interface for containers to access their shape and dimension. -pub trait Dimensional { - const RANK: Option = None; - - type Pattern; - - fn dim(&self) -> Self::Pattern; - - fn is_scalar(&self) -> bool { - self.rank() == 0 || self.shape().iter().all(|x| *x == 1) - } - - fn rank(&self) -> usize { - Self::RANK.unwrap_or(self.shape().len()) - } - - fn raw_dim(&self) -> D; - - fn size(&self) -> usize { - self.shape().iter().product() - } - - fn shape(&self) -> &[usize]; -} /// This trait is used to fill an array with a value based on a mask. /// The mask is a boolean array of the same shape as the array. pub trait MaskFill @@ -51,26 +27,6 @@ pub trait IsSquare { /* ******** implementations ******** */ -impl Dimensional for ArrayBase -where - D: Dimension, - S: RawData, -{ - const RANK: Option = D::NDIM; - type Pattern = D::Pattern; - - fn shape(&self) -> &[usize] { - ArrayBase::shape(self) - } - - fn dim(&self) -> Self::Pattern { - ArrayBase::dim(self) - } - - fn raw_dim(&self) -> D { - ArrayBase::raw_dim(self) - } -} impl MaskFill for ArrayBase where diff --git a/core/src/traits/arr/tensor.rs b/core/src/traits/arr/tensor.rs deleted file mode 100644 index 12571e0f..00000000 --- a/core/src/traits/arr/tensor.rs +++ /dev/null @@ -1,317 +0,0 @@ -/* - Appellation: generator - Contrib: FL03 -*/ -use super::Dimensional; -use nd::iter::{Iter, IterMut}; -use nd::prelude::*; -use nd::{Data, DataMut, DataOwned, OwnedRepr, RawData}; -use num::{One, Zero}; - -pub trait NdArray -where - D: Dimension, -{ - type Data: RawData; - - fn as_slice(&self) -> &[A]; - - fn as_mut_slice(&mut self) -> &mut [A]; - - fn iter(&self) -> Iter<'_, A, D>; - - fn iter_mut(&mut self) -> IterMut<'_, A, D>; - - fn map(&self, f: F) -> Self - where - F: FnMut(&A) -> A; - - fn mapv(&mut self, f: F) - where - A: Clone, - F: FnMut(A) -> A; -} - -pub trait NdIter -where - D: Dimension, -{ - type Data: RawData; - - fn iter(&self) -> Iter<'_, A, D>; - - fn iter_mut(&mut self) -> IterMut<'_, A, D>; -} - -/// [NdBuilder] describes common creation routines for [ArrayBase](ndarray::ArrayBase) -pub trait NdBuilder -where - D: Dimension, -{ - type Data: RawData; - - /// Create a new array with the given shape whose elements are set to the default value of the element type. - fn default(shape: Sh) -> Self - where - A: Default, - Sh: ShapeBuilder, - Self::Data: DataOwned; - - fn fill(shape: Sh, elem: A) -> Self - where - A: Clone, - Sh: ShapeBuilder, - Self::Data: DataOwned; - - fn ones(shape: Sh) -> Self - where - A: Clone + One, - Sh: ShapeBuilder, - Self::Data: DataOwned; - - fn zeros(shape: Sh) -> Self - where - A: Clone + Zero, - Sh: ShapeBuilder, - Self::Data: DataOwned; -} - -pub trait NdBuilderExt: NdBuilder -where - D: Dimension, - Self: Dimensional + Sized, -{ - fn default_like(&self) -> Self - where - A: Default, - Sh: ShapeBuilder, - Self::Data: DataOwned, - { - Self::default(self.dim()) - } - - fn fill_like(&self, elem: A) -> Self - where - A: Clone, - Sh: ShapeBuilder, - Self::Data: DataOwned, - { - Self::fill(self.dim(), elem) - } - - fn ones_like(&self) -> Self - where - A: Clone + One, - Sh: ShapeBuilder, - Self::Data: DataOwned, - { - Self::ones(self.dim()) - } - - fn zeros_like(&self) -> Self - where - A: Clone + Zero, - Sh: ShapeBuilder, - Self::Data: DataOwned, - { - Self::zeros(self.dim()) - } -} - -pub trait AsOwned -where - D: Dimension, - S: RawData, -{ - type Output; - - fn into_owned(self) -> Self::Output - where - S: Data, - S::Elem: Clone; - - fn to_owned(&self) -> Self::Output - where - S: Data, - S::Elem: Clone; -} - -pub trait AsShared -where - D: Dimension, - S: RawData, -{ - type Output; - - fn into_shared(self) -> Self::Output - where - S: DataOwned, - S::Elem: Clone; - - fn to_shared(&self) -> Self::Output - where - S: DataOwned, - S::Elem: Clone; -} - -pub trait NdView, D = Ix2>: AsOwned + AsShared -where - D: Dimension, - S: RawData, -{ - fn view(&self) -> ArrayView<'_, A, D> - where - A: Clone, - S: Data; - - fn view_mut(&mut self) -> ArrayViewMut<'_, A, D> - where - A: Clone, - S: DataMut; -} - -pub trait View -where - D: Dimension, -{ - type Data: RawData; - type Output; - - fn view(&self) -> Self::Output - where - A: Clone, - Self::Data: Data; -} -pub trait ViewMut: View -where - D: Dimension, -{ - fn view_mut(&mut self) -> ArrayViewMut<'_, A, D> - where - A: Clone, - Self::Data: DataMut; -} - -/* - ************* Implementations ************* -*/ -impl NdBuilder for ArrayBase -where - D: Dimension, - S: RawData, -{ - type Data = S; - - fn default(shape: Sh) -> Self - where - A: Default, - Sh: ShapeBuilder, - Self::Data: DataOwned, - { - ArrayBase::default(shape) - } - - fn fill(shape: Sh, elem: A) -> Self - where - A: Clone, - S: DataOwned, - Sh: ShapeBuilder, - { - ArrayBase::from_elem(shape, elem) - } - - fn ones(shape: Sh) -> Self - where - A: Clone + One, - Sh: ShapeBuilder, - Self::Data: DataOwned, - { - ArrayBase::ones(shape) - } - - fn zeros(shape: Sh) -> Self - where - A: Clone + Zero, - Sh: ShapeBuilder, - Self::Data: DataOwned, - { - ArrayBase::zeros(shape) - } -} - -impl NdBuilderExt for U -where - U: Dimensional + NdBuilder, - D: Dimension, -{ -} - -impl AsOwned for ArrayBase -where - D: Dimension, - S: RawData, -{ - type Output = Array; - - fn into_owned(self) -> Self::Output - where - A: Clone, - S: Data, - { - self.into_owned() - } - - fn to_owned(&self) -> Self::Output - where - A: Clone, - S: Data, - { - self.to_owned() - } -} - -impl AsShared for ArrayBase -where - D: Dimension, - S: RawData, -{ - type Output = ArcArray; - - fn into_shared(self) -> Self::Output - where - A: Clone, - S: DataOwned, - { - self.into_shared() - } - - fn to_shared(&self) -> Self::Output - where - A: Clone, - S: DataOwned, - { - self.to_shared() - } -} - -impl NdView for ArrayBase -where - D: Dimension, - S: RawData, -{ - fn view(&self) -> ArrayView<'_, A, D> - where - A: Clone, - S: Data, - { - self.view() - } - - fn view_mut(&mut self) -> ArrayViewMut<'_, A, D> - where - A: Clone, - S: DataMut, - { - self.view_mut() - } -} diff --git a/core/src/traits/misc/container.rs b/core/src/traits/misc/container.rs deleted file mode 100644 index 5243fc79..00000000 --- a/core/src/traits/misc/container.rs +++ /dev/null @@ -1,33 +0,0 @@ -/* - Appellation: container - Contrib: FL03 -*/ -use crate::traits::Dimensional; - -pub trait Container { - type Data: ContainerData; -} - -pub trait ContainerData { - type Elem; -} - -/// This trait describes the basic operations for any n-dimensional container. -pub trait NdContainer: Dimensional { - type Data: ContainerData; - - fn as_slice(&self) -> &[A]; - - fn as_mut_slice(&mut self) -> &mut [A]; -} - -/* - ************* Implementations ************* -*/ -impl ContainerData for Vec { - type Elem = T; -} - -impl Container for Vec { - type Data = Vec; -} diff --git a/core/src/traits/mod.rs b/core/src/traits/mod.rs index 5c48f8af..9dc12247 100644 --- a/core/src/traits/mod.rs +++ b/core/src/traits/mod.rs @@ -17,30 +17,27 @@ pub mod arr { mod misc; mod ops; mod reshape; - mod tensor; pub(crate) mod prelude { pub use super::create::*; pub use super::misc::*; pub use super::ops::*; pub use super::reshape::*; - pub use super::tensor::*; } } pub mod misc { - pub mod adjust; - #[doc(hidden)] - pub mod container; + pub use self::prelude::*; + + pub(crate) mod adjust; #[doc(hidden)] - pub mod sequential; + pub(crate) mod sequential; #[doc(hidden)] - pub mod store; - pub mod toggle; + pub(crate) mod store; + pub(crate) mod toggle; pub(crate) mod prelude { pub use super::adjust::*; - pub use super::container::*; pub use super::sequential::*; pub use super::store::*; pub use super::toggle::*; diff --git a/data/src/traits/build.rs b/data/src/traits/build.rs new file mode 100644 index 00000000..7944014b --- /dev/null +++ b/data/src/traits/build.rs @@ -0,0 +1,140 @@ +/* + Appellation: ndarray + Contrib: FL03 +*/ +use crate::traits::Dimensional; +use nd::{ArrayBase, DataOwned, Dimension, RawData, ShapeBuilder}; +use num::{One, Zero}; + +/// [NdBuilder] describes common creation routines for [ArrayBase] +pub trait NdBuilder +where + D: Dimension, +{ + type Data: RawData; + + /// Create a new array with the given shape whose elements are set to the default value of the element type. + fn default(shape: Sh) -> Self + where + A: Default, + Sh: ShapeBuilder, + Self::Data: DataOwned; + + fn fill(shape: Sh, elem: A) -> Self + where + A: Clone, + Sh: ShapeBuilder, + Self::Data: DataOwned; + + fn ones(shape: Sh) -> Self + where + A: Clone + One, + Sh: ShapeBuilder, + Self::Data: DataOwned; + + fn zeros(shape: Sh) -> Self + where + A: Clone + Zero, + Sh: ShapeBuilder, + Self::Data: DataOwned; +} + +pub trait NdBuilderExt: NdBuilder + Sized +where + D: Dimension, +{ + fn dim(&self) -> D::Pattern; + + fn default_like(&self) -> Self + where + A: Default, + Sh: ShapeBuilder, + Self::Data: DataOwned, + { + Self::default(self.dim()) + } + + fn fill_like(&self, elem: A) -> Self + where + A: Clone, + Sh: ShapeBuilder, + Self::Data: DataOwned, + { + Self::fill(self.dim(), elem) + } + + fn ones_like(&self) -> Self + where + A: Clone + One, + Sh: ShapeBuilder, + Self::Data: DataOwned, + { + Self::ones(self.dim()) + } + + fn zeros_like(&self) -> Self + where + A: Clone + Zero, + Sh: ShapeBuilder, + Self::Data: DataOwned, + { + Self::zeros(self.dim()) + } +} + +/* + ************* Implementations ************* +*/ +impl NdBuilder for ArrayBase +where + D: Dimension, + S: RawData, +{ + type Data = S; + + fn default(shape: Sh) -> Self + where + A: Default, + Sh: ShapeBuilder, + Self::Data: DataOwned, + { + ArrayBase::default(shape) + } + + fn fill(shape: Sh, elem: A) -> Self + where + A: Clone, + S: DataOwned, + Sh: ShapeBuilder, + { + ArrayBase::from_elem(shape, elem) + } + + fn ones(shape: Sh) -> Self + where + A: Clone + One, + Sh: ShapeBuilder, + Self::Data: DataOwned, + { + ArrayBase::ones(shape) + } + + fn zeros(shape: Sh) -> Self + where + A: Clone + Zero, + Sh: ShapeBuilder, + Self::Data: DataOwned, + { + ArrayBase::zeros(shape) + } +} + +impl NdBuilderExt for U +where + U: Dimensional + NdBuilder, + D: Dimension, +{ + fn dim(&self) -> D::Pattern { + self.dim() + } +} diff --git a/data/src/traits/ext/ndarray.rs b/data/src/traits/ext/ndarray.rs new file mode 100644 index 00000000..6d3e6ed8 --- /dev/null +++ b/data/src/traits/ext/ndarray.rs @@ -0,0 +1,45 @@ +/* + Appellation: ndarray + Contrib: FL03 +*/ +use nd::iter::{Iter, IterMut}; +use nd::{Dimension, RawData}; + +pub trait NdArray +where + D: Dimension, +{ + type Data: RawData; + + fn as_slice(&self) -> &[A]; + + fn as_mut_slice(&mut self) -> &mut [A]; + + fn iter(&self) -> Iter<'_, A, D>; + + fn iter_mut(&mut self) -> IterMut<'_, A, D>; + + fn map(&self, f: F) -> Self + where + F: FnMut(&A) -> A; + + fn mapv(&mut self, f: F) + where + A: Clone, + F: FnMut(A) -> A; +} + +pub trait NdIter +where + D: Dimension, +{ + type Data: RawData; + + fn iter(&self) -> Iter<'_, A, D>; + + fn iter_mut(&mut self) -> IterMut<'_, A, D>; +} + +/* + ************* Implementations ************* +*/ diff --git a/data/src/traits/ext/ndtensor.rs b/data/src/traits/ext/ndtensor.rs new file mode 100644 index 00000000..0edbd756 --- /dev/null +++ b/data/src/traits/ext/ndtensor.rs @@ -0,0 +1,52 @@ +/* + Appellation: ndtensor + Contrib: FL03 +*/ +use nd::{ArrayBase, Data, Dimension, RawData}; +use num::complex::ComplexFloat; +use num::traits::Float; + +pub trait Scalar { + type R: Float; +} + +pub trait NdTensor +where + A: ComplexFloat, + D: Dimension, +{ + type Data: RawData; + type Output; + + fn conj(&self) -> Self::Output; + + fn cos(&self) -> Self::Output; + + fn cosh(&self) -> Self::Output; +} + +/* + ************* Implementations ************* +*/ +impl NdTensor for ArrayBase +where + A: ComplexFloat, + D: Dimension, + S: Data, + Self: Clone, +{ + type Data = S; + type Output = nd::Array; + + fn conj(&self) -> Self::Output { + self.mapv(|x| x.conj()) + } + + fn cos(&self) -> Self::Output { + self.mapv(|x| x.cos()) + } + + fn cosh(&self) -> Self::Output { + self.mapv(|x| x.cosh()) + } +} diff --git a/data/src/traits/ext/ndview.rs b/data/src/traits/ext/ndview.rs new file mode 100644 index 00000000..56b88c3f --- /dev/null +++ b/data/src/traits/ext/ndview.rs @@ -0,0 +1,157 @@ +/* + Appellation: ndview + Contrib: FL03 +*/ +/* + Appellation: ndarray + Contrib: FL03 +*/ +use nd::prelude::*; +use nd::{Data, DataMut, DataOwned, OwnedRepr, RawData}; + +pub trait AsOwned +where + D: Dimension, + S: RawData, +{ + type Output; + + fn into_owned(self) -> Self::Output + where + A: Clone, + S: Data; + + fn to_owned(&self) -> Self::Output + where + A: Clone, + S: Data; +} + +pub trait AsShared +where + D: Dimension, + S: RawData, +{ + type Output; + + fn into_shared(self) -> Self::Output + where + S: DataOwned, + S::Elem: Clone; + + fn to_shared(&self) -> Self::Output + where + S: DataOwned, + S::Elem: Clone; +} + +pub trait NdView, D = Ix2>: AsOwned + AsShared +where + D: Dimension, + S: RawData, +{ + fn view(&self) -> ArrayView<'_, A, D> + where + A: Clone, + S: Data; + + fn view_mut(&mut self) -> ArrayViewMut<'_, A, D> + where + A: Clone, + S: DataMut; +} + +pub trait View +where + D: Dimension, +{ + type Data: RawData; + type Output; + + fn view(&self) -> Self::Output + where + A: Clone, + Self::Data: Data; +} +pub trait ViewMut: View +where + D: Dimension, +{ + fn view_mut(&mut self) -> ArrayViewMut<'_, A, D> + where + A: Clone, + Self::Data: DataMut; +} + +/* + ************* Implementations ************* +*/ +impl AsOwned for ArrayBase +where + D: Dimension, + S: RawData, +{ + type Output = Array; + + fn into_owned(self) -> Self::Output + where + A: Clone, + S: Data, + { + self.into_owned() + } + + fn to_owned(&self) -> Self::Output + where + A: Clone, + S: Data, + { + self.to_owned() + } +} + +impl AsShared for ArrayBase +where + D: Dimension, + S: RawData, +{ + type Output = ArcArray; + + fn into_shared(self) -> Self::Output + where + A: Clone, + S: DataOwned, + { + self.into_shared() + } + + fn to_shared(&self) -> Self::Output + where + A: Clone, + S: DataOwned, + { + self.to_shared() + } +} + +impl NdView for ArrayBase +where + D: Dimension, + S: RawData, +{ + fn view(&self) -> ArrayView<'_, A, D> + where + A: Clone, + S: Data, + { + self.view() + } + + fn view_mut(&mut self) -> ArrayViewMut<'_, A, D> + where + A: Clone, + S: DataMut, + { + self.view_mut() + } +} diff --git a/data/src/traits/mod.rs b/data/src/traits/mod.rs index cf2a290a..14b24d38 100644 --- a/data/src/traits/mod.rs +++ b/data/src/traits/mod.rs @@ -2,7 +2,9 @@ Appellation: traits Contrib: FL03 */ -pub use self::{data::*, records::*, shape::*}; +pub use self::{data::*, ext::*, records::*, shape::*}; + +pub mod build; pub mod records; pub mod shape; @@ -20,8 +22,23 @@ pub mod data { } } +pub mod ext { + pub use self::{ndarray::*, ndtensor::*, ndview::*}; + + pub(crate) mod ndarray; + pub(crate) mod ndtensor; + pub(crate) mod ndview; + + pub(crate) mod prelude { + pub use super::ndarray::*; + pub use super::ndtensor::*; + pub use super::ndview::*; + } +} + pub(crate) mod prelude { pub use super::data::prelude::*; + pub use super::ext::prelude::*; pub use super::records::*; pub use super::shape::*; }