diff --git a/.github/ISSUE_TEMPLATE/issue.md b/.github/ISSUE_TEMPLATE/issue.md new file mode 100644 index 00000000..925cfede --- /dev/null +++ b/.github/ISSUE_TEMPLATE/issue.md @@ -0,0 +1,17 @@ +--- +about: A generic issue template +assignees: + - FL03 +labels: [] +projects: ['@FL03/concision:features'] +name: Generic Issue +title: '' +--- + +**Describe the proposal or feature that this issue is tracking.** + +## Issues + +- [] + +## Pull Requests diff --git a/.github/ISSUE_TEMPLATE/proposal.md b/.github/ISSUE_TEMPLATE/proposal.md index d7bacdf8..d8370f22 100644 --- a/.github/ISSUE_TEMPLATE/proposal.md +++ b/.github/ISSUE_TEMPLATE/proposal.md @@ -1,15 +1,14 @@ --- -name: Improvement Proposal about: A formal proposal discussing any new features, changes, or improvements to the project. -title: 'CNC-0000:' -labels: ['proposal'] -projects: ['@FL03/concision:features', '@FL03/concision:roadmap'] assignees: - FL03 - +labels: ['proposal'] +name: Improvement Proposal +projects: ['@FL03/concision:features', '@FL03/concision:roadmap'] +title: 'CNC-0000:' --- ### Resources -- [Google](https://google.com) \ No newline at end of file +- [company](https://github.com/scattered-systems) diff --git a/.github/ISSUE_TEMPLATE/tracking.md b/.github/ISSUE_TEMPLATE/tracking.md new file mode 100644 index 00000000..0139c486 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/tracking.md @@ -0,0 +1,17 @@ +--- +about: Create a new tracking issue to track the progress of a proposal or feature. +assignees: + - FL03 +labels: ['tracking'] +projects: ['@FL03/concision:features'] +name: Tracking Issue +title: 'Tracking Issue:' +--- + +**Describe the proposal or feature that this issue is tracking.** + +## Issues + +- [] + +## Pull Requests diff --git a/concision/Cargo.toml b/concision/Cargo.toml index e8ddbdfb..1cd286ac 100644 --- a/concision/Cargo.toml +++ b/concision/Cargo.toml @@ -45,7 +45,7 @@ models = [ "gnn", "kan", "linear", - "transformers", + "transformer", ] gnn = [ @@ -60,7 +60,7 @@ linear = [ "dep:concision-linear", ] -transformers = [ +transformer = [ "dep:concision-transformers", ] @@ -184,6 +184,10 @@ test = true name = "linear" required-features = ["linear", "rand", "serde", "tracing"] +[[example]] +name = "transformer" +required-features = ["transformer", "rand", "serde", "tracing"] + [build-dependencies] [dependencies.concision-core] @@ -229,6 +233,7 @@ version = "0.1.14" [dev-dependencies] anyhow = "1" +approx.workspace = true lazy_static.workspace = true ndarray.workspace = true num = { features = ["rand", "serde"], version = "0.4" } diff --git a/concision/examples/transformer.rs b/concision/examples/transformer.rs new file mode 100644 index 00000000..cc24879b --- /dev/null +++ b/concision/examples/transformer.rs @@ -0,0 +1,38 @@ +/* + Appellation: transformer + Contrib: FL03 +*/ +extern crate concision as cnc; + +use approx::AbsDiffEq; +use cnc::prelude::Result; +use cnc::transformer::AttentionHead; +use ndarray::Array2; + +fn tracing() { + use tracing::Level; + use tracing_subscriber::fmt::time; + + tracing_subscriber::fmt() + .compact() + .with_ansi(true) + .with_max_level(Level::DEBUG) + .with_target(false) + .with_timer(time::uptime()) + .init(); +} + +fn main() -> Result<()> { + tracing(); + tracing::info!("Starting up the transformer model example..."); + + let shape = (3, 3); + let head = AttentionHead::::ones(shape); + let score = head.attention(); + assert!(score + .attention() + .abs_diff_eq(&Array2::from_elem(shape, 1f64 / 3f64), 1e-6)); + println!("{:?}", score); + + Ok(()) +} diff --git a/concision/src/lib.rs b/concision/src/lib.rs index ced1df1d..ae868246 100644 --- a/concision/src/lib.rs +++ b/concision/src/lib.rs @@ -27,9 +27,9 @@ pub use concision_kan as kan; pub use concision_linear as linear; #[cfg(feature = "macros")] pub use concision_macros::*; -#[cfg(feature = "transformers")] +#[cfg(feature = "transformer")] #[doc(inline)] -pub use concision_transformers as transformers; +pub use concision_transformers as transformer; pub mod prelude { pub use concision_core::prelude::*; @@ -45,4 +45,6 @@ pub mod prelude { pub use concision_linear::prelude::*; #[cfg(feature = "macros")] pub use concision_macros::*; + #[cfg(feature = "transformer")] + pub use concision_transformers::prelude::*; } diff --git a/core/Cargo.toml b/core/Cargo.toml index b0891b64..c8f85779 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -103,16 +103,24 @@ crate-type = ["lib"] doctest = false test = true -[[test]] -name = "random" -required-features = ["rand"] + [[test]] name = "fft" required-features = ["approx"] +[[test]] +name = "init" +required-features = ["rand", "std"] + +[[test]] +name = "nn" + [build-dependencies] +[dev-dependencies] +lazy_static.workspace = true + [dependencies] ndarray.workspace = true num.workspace = true @@ -154,9 +162,6 @@ default-features = false features = ["v5", "v8"] version = "1" -[dev-dependencies] -lazy_static = "1" - [package.metadata.docs.rs] all-features = true rustc-args = ["--cfg", "docsrs"] diff --git a/core/src/func/activate/nl.rs b/core/src/func/activate/nl.rs index 694145c7..d9e70fed 100644 --- a/core/src/func/activate/nl.rs +++ b/core/src/func/activate/nl.rs @@ -7,7 +7,7 @@ use ndarray::*; use num::complex::{Complex, ComplexFloat}; use num::traits::Zero; -pub fn relu(args: T) -> T +fn _relu(args: T) -> T where T: PartialOrd + Zero, { @@ -17,23 +17,33 @@ where T::zero() } -pub fn sigmoid(args: T) -> T +fn _sigmoid(args: T) -> T where T: ComplexFloat, { (T::one() + args.neg().exp()).recip() } -pub fn softmax(args: &ArrayBase) -> Array +fn _softmax(args: &ArrayBase) -> Array where A: ComplexFloat + ScalarOperand, D: Dimension, S: Data, { - args.exp() / args.exp().sum() + let e = args.exp(); + &e / e.sum() } -pub fn tanh(args: T) -> T +// fn __softmax(args: &I) -> I +// where +// I: Clone + core::ops::Div + Exp, T: Exp + core::iter::Sum , +// for<'a> I: IntoIterator, +// { +// let e = args.exp(); +// e.clone() / e.into_iter().sum::() +// } + +fn _tanh(args: T) -> T where T: ComplexFloat, { @@ -64,22 +74,25 @@ macro_rules! nonlinear { nonlinear!(@arr $rho::$call); }; (@impl $rho:ident::$call:ident<$T:ty>) => { - impl $rho for $T { - type Output = $T; + paste::paste! { + impl $rho for $T { + type Output = $T; - fn $call(self) -> Self::Output { - $call(self) + fn $call(self) -> Self::Output { + [<_ $call>](self) + } } - } - impl<'a> $rho for &'a $T { - type Output = $T; + impl<'a> $rho for &'a $T { + type Output = $T; - fn $call(self) -> Self::Output { - $call(*self) + fn $call(self) -> Self::Output { + [<_ $call>](*self) + } } } + }; (@arr $name:ident::$call:ident) => { impl $name for ArrayBase @@ -150,7 +163,7 @@ where type Output = Array; fn softmax(self) -> Self::Output { - softmax(&self) + _softmax(&self) } } @@ -163,6 +176,6 @@ where type Output = Array; fn softmax(self) -> Self::Output { - softmax(self) + _softmax(self) } } diff --git a/core/src/func/mod.rs b/core/src/func/mod.rs index bb99ccba..96513d96 100644 --- a/core/src/func/mod.rs +++ b/core/src/func/mod.rs @@ -7,12 +7,9 @@ pub use self::prelude::*; #[macro_use] pub mod activate; -pub mod dropout; pub mod loss; pub(crate) mod prelude { pub use super::activate::prelude::*; - #[cfg(feature = "rand")] - pub use super::dropout::*; pub use super::loss::prelude::*; } diff --git a/core/src/init/gen/lecun.rs b/core/src/init/distr/lecun.rs similarity index 73% rename from core/src/init/gen/lecun.rs rename to core/src/init/distr/lecun.rs index b8cae16c..0c4763c5 100644 --- a/core/src/init/gen/lecun.rs +++ b/core/src/init/distr/lecun.rs @@ -1,10 +1,11 @@ /* - Appellation: lecun + Appellation: lecun Contrib: FL03 */ +use crate::init::distr::TruncatedNormal; use num::Float; use rand::Rng; -use rand_distr::{Distribution, Normal, NormalError, StandardNormal}; +use rand_distr::{Distribution, NormalError, StandardNormal}; /// [LecunNormal] is a truncated [normal](rand_distr::Normal) distribution centered at 0 /// with a standard deviation that is calculated as `σ = sqrt(1/n_in)` @@ -18,14 +19,14 @@ impl LecunNormal { pub fn new(n: usize) -> Self { Self { n } } - /// Create a [normal](rand_distr::Normal) [distribution](Distribution) centered at 0; + /// Create a [truncated normal](TruncatedNormal) [distribution](Distribution) centered at 0; /// See [Self::std_dev] for the standard deviation calculations. - pub fn distr(&self) -> Result, NormalError> + pub fn distr(&self) -> Result, NormalError> where F: Float, StandardNormal: Distribution, { - Normal::new(F::zero(), self.std_dev()) + TruncatedNormal::new(F::zero(), self.std_dev()) } /// Calculate the standard deviation (`σ`) of the distribution. /// This is done by computing the root of the reciprocal of the number of inputs @@ -48,6 +49,6 @@ where where R: Rng + ?Sized, { - self.distr().unwrap().sample(rng) + self.distr().expect("NormalError").sample(rng) } } diff --git a/core/src/init/distr/trunc.rs b/core/src/init/distr/trunc.rs new file mode 100644 index 00000000..fc94f0b9 --- /dev/null +++ b/core/src/init/distr/trunc.rs @@ -0,0 +1,81 @@ +/* + Appellation: trunc + Contrib: FL03 +*/ +use num::traits::Float; +use rand::Rng; +use rand_distr::{Distribution, Normal, NormalError, StandardNormal}; + +/// A truncated normal distribution is similar to a [normal](rand_distr::Normal) [distribution](rand_distr::Distribution), however, +/// any generated value over two standard deviations from the mean is discarded and re-generated. +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct TruncatedNormal +where + StandardNormal: Distribution, +{ + mean: F, + std: F, +} + +impl TruncatedNormal +where + F: Float, + StandardNormal: Distribution, +{ + /// Create a new truncated normal distribution with a given mean and standard deviation + pub fn new(mean: F, std: F) -> Result { + Ok(Self { mean, std }) + } + + pub(crate) fn boundary(&self) -> F { + self.mean() + self.std_dev() * F::from(2).unwrap() + } + + pub(crate) fn score(&self, x: F) -> F { + self.mean() - self.std_dev() * x + } + + pub fn distr(&self) -> Normal { + Normal::new(self.mean(), self.std_dev()).unwrap() + } + + pub fn mean(&self) -> F { + self.mean + } + + pub fn std_dev(&self) -> F { + self.std + } +} + +impl Distribution for TruncatedNormal +where + F: Float, + StandardNormal: Distribution, +{ + fn sample(&self, rng: &mut R) -> F + where + R: Rng + ?Sized, + { + let bnd = self.boundary(); + let mut x = self.score(rng.sample(StandardNormal)); + // if x is outside of the boundary, re-sample + while x < -bnd || x > bnd { + x = self.score(rng.sample(StandardNormal)); + } + x + } +} + +impl From> for TruncatedNormal +where + F: Float, + StandardNormal: Distribution, +{ + fn from(normal: Normal) -> Self { + Self { + mean: normal.mean(), + std: normal.std_dev(), + } + } +} diff --git a/core/src/init/distr/xavier.rs b/core/src/init/distr/xavier.rs new file mode 100644 index 00000000..ecc1ee0c --- /dev/null +++ b/core/src/init/distr/xavier.rs @@ -0,0 +1,117 @@ +/* + Appellation: xavier + Contrib: FL03 +*/ +//! # Xavier +//! +//! Xavier initialization techniques were developed in 2010 by Xavier Glorot. +//! These methods are designed to initialize the weights of a neural network in a way that +//! prevents the vanishing and exploding gradient problems. The initialization technique +//! manifests into two distributions: [XavierNormal] and [XavierUniform]. +// #76 +use num::Float; +use rand::Rng; +use rand_distr::uniform::{SampleUniform, Uniform}; +use rand_distr::{Distribution, Normal, NormalError, StandardNormal}; + +pub(crate) fn std_dev(inputs: usize, outputs: usize) -> F +where + F: Float, +{ + (F::from(2).unwrap() / F::from(inputs + outputs).unwrap()).sqrt() +} + +pub(crate) fn boundary(inputs: usize, outputs: usize) -> F +where + F: Float, +{ + (F::from(6).unwrap() / F::from(inputs + outputs).unwrap()).sqrt() +} +/// Normal Xavier initializers leverage a normal distribution with a mean of 0 and a standard deviation (`σ`) +/// computed by the formula: `σ = sqrt(2/(d_in + d_out))` +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct XavierNormal +where + F: Float, + StandardNormal: Distribution, +{ + std: F, +} + +impl XavierNormal +where + F: Float, + StandardNormal: Distribution, +{ + pub fn new(inputs: usize, outputs: usize) -> Self { + Self { + std: std_dev(inputs, outputs), + } + } + + pub fn distr(&self) -> Result, NormalError> { + Normal::new(F::zero(), self.std_dev()) + } + + pub fn std_dev(&self) -> F { + self.std + } +} + +impl Distribution for XavierNormal +where + F: Float, + StandardNormal: Distribution, +{ + fn sample(&self, rng: &mut R) -> F + where + R: Rng + ?Sized, + { + self.distr().unwrap().sample(rng) + } +} + +/// Uniform Xavier initializers use a uniform distribution to initialize the weights of a neural network +/// within a given range. +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct XavierUniform +where + X: SampleUniform, +{ + boundary: X, +} + +impl XavierUniform +where + X: Float + SampleUniform, +{ + pub fn new(inputs: usize, outputs: usize) -> Self { + Self { + boundary: boundary(inputs, outputs), + } + } + + pub fn boundary(&self) -> X { + self.boundary + } + + pub fn distr(&self) -> Uniform + where + X: Float, + { + let bnd = self.boundary(); + Uniform::new(-bnd, bnd) + } +} + +impl Distribution for XavierUniform +where + X: Float + SampleUniform, +{ + fn sample(&self, rng: &mut R) -> X + where + R: Rng + ?Sized, + { + self.distr().sample(rng) + } +} diff --git a/core/src/init/initializer.rs b/core/src/init/initializer.rs new file mode 100644 index 00000000..2de38df9 --- /dev/null +++ b/core/src/init/initializer.rs @@ -0,0 +1,40 @@ +/* + Appellation: initializer + Contrib: FL03 +*/ +use super::Initialize; +use core::marker::PhantomData; +use nd::prelude::*; +use nd::DataOwned; +use rand_distr::{Distribution, StandardNormal}; + +pub struct InitializerBase +where + D: Dimension, + Dst: Clone + Distribution, +{ + pub(crate) dim: D, + pub(crate) distr: Dst, + pub(crate) _dtype: PhantomData, +} + +impl InitializerBase +where + D: Dimension, + Dst: Clone + Distribution, +{ + pub fn new(dim: D, distr: Dst) -> Self { + Self { + dim, + distr, + _dtype: PhantomData::, + } + } + + pub fn init(self) -> ArrayBase + where + S: DataOwned, + { + ArrayBase::rand(self.dim, self.distr) + } +} diff --git a/core/src/init/mod.rs b/core/src/init/mod.rs index 22ee1bac..7f5bc4b4 100644 --- a/core/src/init/mod.rs +++ b/core/src/init/mod.rs @@ -11,18 +11,26 @@ //! better suited for machine-learning workloads. #![cfg(feature = "rand")] -pub use self::prelude::*; +pub use self::distr::prelude::*; +pub use self::traits::*; +pub use self::utils::*; -pub(crate) mod initialize; +pub(crate) mod traits; pub(crate) mod utils; -pub mod gen { +pub mod initializer; + +pub mod distr { pub use self::prelude::*; pub mod lecun; + pub mod trunc; + pub mod xavier; pub(crate) mod prelude { pub use super::lecun::*; + pub use super::trunc::*; + pub use super::xavier::*; } } @@ -34,7 +42,7 @@ pub use rand; pub use rand_distr; pub(crate) mod prelude { - pub use super::gen::prelude::*; - pub use super::initialize::{Initialize, InitializeExt}; + pub use super::distr::prelude::*; + pub use super::traits::{Initialize, InitializeExt}; pub use super::utils::*; } diff --git a/core/src/init/initialize.rs b/core/src/init/traits.rs similarity index 87% rename from core/src/init/initialize.rs rename to core/src/init/traits.rs index 91b41b13..a01ca7d9 100644 --- a/core/src/init/initialize.rs +++ b/core/src/init/traits.rs @@ -2,16 +2,17 @@ Appellation: initialize Contrib: FL03 */ +use crate::init::distr::*; + use core::ops::Neg; use nd::{ArrayBase, DataOwned, Dimension, RawData, ShapeBuilder}; use ndrand::RandomExt; use num::complex::ComplexDistribution; use num::traits::Float; -use rand::{rngs, Rng, SeedableRng}; +use rand::rngs::StdRng; +use rand::{Rng, SeedableRng}; use rand_distr::uniform::{SampleUniform, Uniform}; -use rand_distr::{Bernoulli, BernoulliError, Distribution, Normal, StandardNormal}; - -use super::LecunNormal; +use rand_distr::{Bernoulli, BernoulliError, Distribution, Normal, NormalError, StandardNormal}; /// This trait provides the base methods required for initializing an [ndarray](ndarray::ArrayBase) with random values. /// [Initialize] is similar to [RandomExt](ndarray_rand::RandomExt), however, it focuses on flexibility while implementing additional @@ -79,7 +80,7 @@ where Self::rand(shape, distr) } /// Given a shape, mean, and standard deviation generate a new object using the [Normal](rand_distr::Normal) distribution - fn normal(shape: Sh, mean: A, std: A) -> Result + fn normal(shape: Sh, mean: A, std: A) -> Result where A: Float, S: DataOwned, @@ -115,11 +116,18 @@ where Sh: ShapeBuilder, StandardNormal: Distribution, { - Self::rand_with( - shape, - StandardNormal, - &mut rngs::StdRng::seed_from_u64(seed), - ) + Self::rand_with(shape, StandardNormal, &mut StdRng::seed_from_u64(seed)) + } + /// Initialize the object using the [TruncatedNormal](crate::init::distr::TruncatedNormal) distribution + fn truncnorm(shape: Sh, mean: A, std: A) -> Result + where + A: Float, + S: DataOwned, + Sh: ShapeBuilder, + StandardNormal: Distribution, + { + let distr = TruncatedNormal::new(mean, std)?; + Ok(Self::rand(shape, distr)) } /// A [uniform](rand_distr::uniform::Uniform) generator with values between u(-dk, dk) fn uniform(shape: Sh, dk: A) -> Self @@ -131,6 +139,20 @@ where { Self::rand(shape, Uniform::new(dk.clone().neg(), dk)) } + + fn uniform_from_seed(shape: Sh, start: A, stop: A, key: u64) -> Self + where + A: SampleUniform, + S: DataOwned, + Sh: ShapeBuilder, + ::Sampler: Clone, + { + Self::rand_with( + shape, + Uniform::new(start, stop), + &mut StdRng::seed_from_u64(key), + ) + } /// Generate a random array with values between u(-a, a) where a is the reciprocal of the value at the given axis fn uniform_along(shape: Sh, axis: usize) -> Self where diff --git a/core/src/init/utils.rs b/core/src/init/utils.rs index 3994589c..dacb3df2 100644 --- a/core/src/init/utils.rs +++ b/core/src/init/utils.rs @@ -22,23 +22,7 @@ where let distr = ComplexDistribution::::new(A::one(), A::one()); ArrayBase::random(shape, distr) } -/// Creates a random array from a uniform distribution using a given key -pub fn seeded_uniform( - key: u64, - start: T, - stop: T, - shape: impl IntoDimension, -) -> Array -where - D: Dimension, - T: SampleUniform, -{ - Array::random_using( - shape, - Uniform::new(start, stop), - &mut rngs::StdRng::seed_from_u64(key), - ) -} + /// Given a shape, generate a random array using the StandardNormal distribution pub fn stdnorm(shape: Sh) -> ArrayBase where @@ -59,3 +43,20 @@ where { ArrayBase::random_using(shape, StandardNormal, &mut StdRng::seed_from_u64(seed)) } +/// Creates a random array from a uniform distribution using a given key +pub fn uniform_from_seed( + key: u64, + start: T, + stop: T, + shape: impl IntoDimension, +) -> Array +where + D: Dimension, + T: SampleUniform, +{ + Array::random_using( + shape, + Uniform::new(start, stop), + &mut rngs::StdRng::seed_from_u64(key), + ) +} diff --git a/core/src/lib.rs b/core/src/lib.rs index 5906aa6f..a09d48e1 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -34,6 +34,7 @@ pub mod types; pub mod utils; pub mod prelude { + #[allow(unused_imports)] pub(crate) use super::primitives::rust::*; pub use super::error::prelude::*; diff --git a/core/src/macros/builder.rs b/core/src/macros/builder.rs index 8fba06d2..6e545c9f 100644 --- a/core/src/macros/builder.rs +++ b/core/src/macros/builder.rs @@ -5,11 +5,8 @@ #[macro_export] macro_rules! builder { - ($(#[derive($($d:ident),+)])?$name:ident::<$inner:ty> {$($k:ident: $v:ty),* $(,)?}) => { - builder!(@loop builder: $name, derive: [$($($d),+)?], inner: $inner {$($k: $v),*}); - }; ($(#[derive($($d:ident),+)])? $name:ident($inner:ty) {$($k:ident: $v:ty),* $(,)?}) => { - builder!(@loop builder: $name, derive: [$($($d),+)?], inner: $inner {$($k: $v),*}); + $crate::builder!(@loop builder: $name, derive: [$($($d),+)?], inner: $inner {$($k: $v),*}); }; (@loop builder: $name:ident, derive: [$($d:ident),* $(,)?], inner: $inner:ty {$($k:ident: $v:ty),* $(,)?}) => { @@ -18,7 +15,7 @@ macro_rules! builder { inner: $inner, } - builder!(@impl builder: $name, inner: $inner {$($k: $v),*}); + $crate::builder!(@impl builder: $name, inner: $inner {$($k: $v),*}); }; (@impl builder: $name:ident, inner: $inner:ty {$($k:ident: $v:ty),* $(,)?}) => { impl $name { diff --git a/core/src/math/arith.rs b/core/src/math/arith.rs new file mode 100644 index 00000000..04c88c1a --- /dev/null +++ b/core/src/math/arith.rs @@ -0,0 +1,68 @@ +/* + Appellation: arith + Contrib: FL03 +*/ +use num::integer::Roots; +use num::traits::FromPrimitive; + +pub trait Root { + type Output; + + fn nth_root(&self, n: u32) -> Self::Output; + + fn sqrt(&self) -> Self::Output { + self.nth_root(2) + } + + fn cbrt(&self) -> Self::Output { + self.nth_root(3) + } +} + +macro_rules! impl_root { + (float $($T:ty),* $(,)?) => { + $( + impl_root!(@float $T); + )* + }; + ($($T:ty),* $(,)?) => { + $( + impl_root!(@impl $T); + )* + }; + + (@impl $T:ty) => { + impl Root for $T { + type Output = $T; + + fn nth_root(&self, n: u32) -> Self::Output { + Roots::nth_root(self, n) + } + } + }; + (@float $T:ty) => { + impl Root for $T { + type Output = $T; + + fn nth_root(&self, n: u32) -> Self::Output { + self.powf(<$T>::from_u32(n).unwrap().recip()) + } + } + }; +} + +impl_root!(float f32, f64); +impl_root! { + i8, + i16, + i32, + i64, + i128, + isize, + u8, + u16, + u32, + u64, + u128, + usize, +} diff --git a/core/src/math/mod.rs b/core/src/math/mod.rs index bc9dc16f..da193f09 100644 --- a/core/src/math/mod.rs +++ b/core/src/math/mod.rs @@ -4,12 +4,16 @@ */ //! # Mathematics //! -//! This module focuses on implementing various mathematical objects and operations that are -//! critical to the development of machine learning algorithms. +//! This module focuses on providing the mathematical foundation for the library. +//! Any defined operation is designed to extend the functionality of the basic primitives +//! as well as the `ndarray` crate. pub use self::traits::*; +pub mod arith; +pub mod stats; pub mod traits; pub(crate) mod prelude { + pub use super::stats::prelude::*; pub use super::traits::*; } diff --git a/core/src/math/stats/mod.rs b/core/src/math/stats/mod.rs new file mode 100644 index 00000000..7a0a3892 --- /dev/null +++ b/core/src/math/stats/mod.rs @@ -0,0 +1,13 @@ +/* + Appellation: stats + Contrib: FL03 +*/ +//! # Statistics +//! +pub use self::summary::*; + +mod summary; + +pub(crate) mod prelude { + pub use super::summary::*; +} diff --git a/core/src/math/stats/summary.rs b/core/src/math/stats/summary.rs new file mode 100644 index 00000000..35b5821d --- /dev/null +++ b/core/src/math/stats/summary.rs @@ -0,0 +1,151 @@ +/* + Appellation: summary + Contrib: FL03 +*/ +use crate::math::arith::Root; +use core::iter::{Product, Sum}; +use nd::{ArrayBase, Data, Dimension}; +use num::traits::{FromPrimitive, Num, NumOps, Pow}; + +/// This trait describes the fundamental methods of summary statistics. +/// These include the mean, standard deviation, variance, and more. +pub trait SummaryStatistics +where + Self::Item: FromPrimitive, + Self::Output: NumOps, +{ + type Item; + type Output; + + fn elems(&self) -> Self::Item { + Self::Item::from_usize(self.len()).unwrap() + } + + fn len(&self) -> usize; + + fn mean(&self) -> Self::Output { + self.sum() / self.elems() + } + + fn product(&self) -> Self::Output; + + fn sum(&self) -> Self::Output; + + fn std(&self) -> Self::Output; + + fn var(&self) -> Self::Output; +} + +/* + ************* Implementations ************* +*/ +impl<'a, T, I> SummaryStatistics for &'a I +where + I: Clone + ExactSizeIterator, + T: Copy + FromPrimitive + Num + Pow + Product + Root + Sum, +{ + type Item = T; + type Output = T; + + fn len(&self) -> usize { + ExactSizeIterator::len(*self) + } + + fn product(&self) -> Self::Output { + (*self).clone().product() + } + + fn sum(&self) -> Self::Output { + (*self).clone().sum() + } + + fn std(&self) -> Self::Output { + let mean = self.mean(); + let sum = (*self).clone().map(|x| (x - mean).pow(2)).sum::(); + (sum / self.elems()).sqrt() + } + + fn var(&self) -> Self::Output { + let mean = self.mean(); + let sum = (*self).clone().map(|x| (x - mean).pow(2)).sum::(); + sum / self.elems() + } +} + +macro_rules! impl_summary { + ($($T:ty),* $(,)?) => { + $( + impl_summary!(@impl $T); + )* + }; + (@impl $T:ty) => { + + impl SummaryStatistics for $T + where + T: Copy + FromPrimitive + Num + Pow + Product + Root + Sum, + { + type Item = T; + type Output = T; + + fn len(&self) -> usize { + self.len() + } + + fn product(&self) -> Self::Output { + self.iter().copied().product::() + } + + fn sum(&self) -> Self::Output { + self.iter().copied().sum::() + } + + fn std(&self) -> Self::Output { + let mean = self.mean(); + let sum = self.iter().copied().map(|x| (x - mean).pow(2)).sum::(); + (sum / self.elems()).sqrt() + } + + fn var(&self) -> Self::Output { + let mean = self.mean(); + let sum = self.iter().copied().map(|x| (x - mean).pow(2)).sum::(); + sum / self.elems() + } + } + }; +} + +impl_summary!(Vec, [T]); + +impl SummaryStatistics for ArrayBase +where + A: Copy + FromPrimitive + Num + Pow + Product + Root + Sum, + D: Dimension, + S: Data, +{ + type Item = A; + type Output = A; + + fn len(&self) -> usize { + self.len() + } + + fn product(&self) -> Self::Output { + self.iter().copied().product::() + } + + fn sum(&self) -> Self::Output { + self.iter().copied().sum::() + } + + fn std(&self) -> Self::Output { + let mean = self.mean().unwrap_or_else(A::zero); + let sum = self.iter().copied().map(|x| (x - mean).pow(2)).sum::(); + (sum / self.elems()).sqrt() + } + + fn var(&self) -> Self::Output { + let mean = self.mean().unwrap_or_else(A::zero); + let sum = self.iter().copied().map(|x| (x - mean).pow(2)).sum::(); + sum / self.elems() + } +} diff --git a/core/src/func/dropout.rs b/core/src/nn/dropout.rs similarity index 56% rename from core/src/func/dropout.rs rename to core/src/nn/dropout.rs index 00b24b13..19acdbc0 100644 --- a/core/src/func/dropout.rs +++ b/core/src/nn/dropout.rs @@ -2,14 +2,15 @@ Appellation: dropout Contrib: FL03 */ -#![cfg(feature = "rand")] +#![allow(unused_imports)] use crate::Forward; use nd::prelude::*; -use nd::{DataOwned, RemoveAxis, ScalarOperand}; -use ndrand::rand_distr::Bernoulli; -use ndrand::RandomExt; +use nd::{DataOwned, ScalarOperand}; +#[cfg(feature = "rand")] +use ndrand::{rand_distr::Bernoulli, RandomExt}; use num::traits::Num; +#[cfg(feature = "rand")] pub fn dropout(array: &ArrayBase, p: f64) -> Array where A: Num + ScalarOperand, @@ -27,44 +28,46 @@ where array * mask } -pub fn dropout_axis(array: &ArrayBase, _axis: Axis, p: f64) -> Array -where - A: Num + ScalarOperand, - D: RemoveAxis, - S: DataOwned, -{ - // Create a Bernoulli distribution for dropout - let distribution = Bernoulli::new(p).unwrap(); +/// [Dropout] randomly zeroizes elements with a given probability (`p`). +pub trait Dropout { + type Output; - // Create a mask of the same shape as the input array - let _mask: Array = Array::random(array.dim(), distribution); - - unimplemented!() + fn dropout(&self, p: f64) -> Self::Output; } -/// The [Dropout] layer is randomly zeroizes inputs with a given probability (`p`). +/// The [DropoutLayer] layer is randomly zeroizes inputs with a given probability (`p`). /// This regularization technique is often used to prevent overfitting. /// /// /// ### Config /// /// - (p) Probability of dropping an element -pub struct Dropout { - p: f64, +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +pub struct DropoutLayer { + pub(crate) p: f64, } -impl Dropout { - pub fn new(p: f64) -> Self { - Self { p } +/* + ************* Implementations ************* +*/ +#[cfg(feature = "rand")] +impl Dropout for ArrayBase +where + A: Num + ScalarOperand, + D: Dimension, + S: DataOwned, +{ + type Output = Array; + + fn dropout(&self, p: f64) -> Self::Output { + dropout(self, p) } +} - pub fn dropout(&self, array: &ArrayBase) -> Array - where - A: Num + ScalarOperand, - D: Dimension, - S: DataOwned, - { - dropout(array, self.p) +impl DropoutLayer { + pub fn new(p: f64) -> Self { + Self { p } } pub fn scale(&self) -> f64 { @@ -72,13 +75,14 @@ impl Dropout { } } -impl Default for Dropout { +impl Default for DropoutLayer { fn default() -> Self { Self::new(0.5) } } -impl Forward> for Dropout +#[cfg(feature = "rand")] +impl Forward> for DropoutLayer where A: Num + ScalarOperand, D: Dimension, @@ -87,6 +91,6 @@ where type Output = Array; fn forward(&self, input: &ArrayBase) -> Self::Output { - dropout(input, self.p) + input.dropout(self.p) } } diff --git a/core/src/nn/mask/mask.rs b/core/src/nn/mask/mask.rs new file mode 100644 index 00000000..94da711a --- /dev/null +++ b/core/src/nn/mask/mask.rs @@ -0,0 +1,236 @@ +/* + Appellation: mask + Contrib: FL03 +*/ +use nd::iter::{Iter, IterMut}; +use nd::prelude::*; +use nd::{Data, DataMut, OwnedRepr, RawData, RawDataClone}; + +pub struct Mask, D = Ix2>(ArrayBase) +where + D: Dimension, + S: RawData; + +impl Mask +where + D: Dimension, + S: RawData, +{ + pub fn from_arr(data: ArrayBase) -> Self { + Self(data) + } + + pub fn apply(&mut self, data: &ArrayBase, fill: A) -> ArrayBase + where + A: Clone, + S: Data, + T: DataMut + RawDataClone, + { + let mut res = data.clone(); + res.zip_mut_with(self.as_mut(), |x, &m| { + if m { + *x = fill.clone(); + } + }); + res + } + + pub fn mask_inplace<'a, A, T, F>( + &mut self, + data: &'a mut ArrayBase, + fill: A, + ) -> &'a mut ArrayBase + where + A: Clone, + S: Data, + T: DataMut, + { + data.zip_mut_with(&mut self.0, |x, &m| { + if m { + *x = fill.clone(); + } + }); + data + } + + pub fn as_slice(&self) -> &[bool] + where + S: Data, + { + self.get().as_slice().unwrap() + } + + pub fn as_mut_slice(&mut self) -> &mut [bool] + where + S: DataMut, + { + self.get_mut().as_slice_mut().unwrap() + } + + pub fn dim(&self) -> D::Pattern { + self.get().dim() + } + + pub fn iter(&self) -> Iter<'_, bool, D> + where + S: Data, + { + self.get().iter() + } + + pub fn iter_mut(&mut self) -> IterMut<'_, bool, D> + where + S: DataMut, + { + self.get_mut().iter_mut() + } + + pub fn get(&self) -> &ArrayBase { + &self.0 + } + + pub fn get_mut(&mut self) -> &mut ArrayBase { + &mut self.0 + } + + pub fn into_inner(self) -> ArrayBase { + self.0 + } + + pub fn ndim(&self) -> usize { + self.get().ndim() + } + + pub fn raw_dim(&self) -> D { + self.get().raw_dim() + } + + pub fn set(&mut self, data: ArrayBase) { + self.0 = data; + } + + pub fn shape(&self) -> D { + self.get().raw_dim() + } +} + +/* + ************* Implementations ************* +*/ +mod impls { + use super::Mask; + use core::borrow::{Borrow, BorrowMut}; + use core::ops::{Deref, DerefMut, Index, IndexMut}; + use nd::{ArrayBase, Data, DataMut, Dimension, NdIndex, RawData}; + + impl AsRef> for Mask + where + D: Dimension, + S: RawData, + { + fn as_ref(&self) -> &ArrayBase { + &self.0 + } + } + + impl AsMut> for Mask + where + D: Dimension, + S: RawData, + { + fn as_mut(&mut self) -> &mut ArrayBase { + &mut self.0 + } + } + + impl Borrow> for Mask + where + D: Dimension, + S: RawData, + { + fn borrow(&self) -> &ArrayBase { + &self.0 + } + } + + impl BorrowMut> for Mask + where + D: Dimension, + S: RawData, + { + fn borrow_mut(&mut self) -> &mut ArrayBase { + &mut self.0 + } + } + + impl Deref for Mask + where + D: Dimension, + S: RawData, + { + type Target = ArrayBase; + + fn deref(&self) -> &Self::Target { + &self.0 + } + } + + impl DerefMut for Mask + where + D: Dimension, + S: RawData, + { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } + } + + impl Index for Mask + where + D: Dimension, + I: NdIndex, + S: Data, + { + type Output = as Index>::Output; + + fn index(&self, index: I) -> &Self::Output { + &self.0[index] + } + } + + impl IndexMut for Mask + where + D: Dimension, + I: NdIndex, + S: DataMut, + { + fn index_mut(&mut self, index: I) -> &mut Self::Output { + &mut self.0[index] + } + } +} + +mod impl_from { + use super::Mask; + use nd::{ArrayBase, Dimension, RawData}; + + impl From> for Mask + where + D: Dimension, + S: RawData, + { + fn from(mask: ArrayBase) -> Self { + Mask(mask) + } + } + + impl From> for ArrayBase + where + D: Dimension, + S: RawData, + { + fn from(mask: Mask) -> Self { + mask.0 + } + } +} diff --git a/core/src/nn/mask/mod.rs b/core/src/nn/mask/mod.rs new file mode 100644 index 00000000..5a3aaa2b --- /dev/null +++ b/core/src/nn/mask/mod.rs @@ -0,0 +1,29 @@ +/* + Appellation: mask + Contrib: FL03 +*/ +pub use self::mask::*; + +pub(crate) mod mask; + +pub(crate) mod prelude { + pub use super::mask::Mask; + pub use super::NdMask; +} + +use nd::{ArrayBase, Dimension, Ix2, RawData}; + +pub trait NdMask +where + D: Dimension, +{ + type Data: RawData; +} + +impl NdMask for ArrayBase +where + D: Dimension, + S: RawData, +{ + type Data = S; +} diff --git a/core/src/nn/mod.rs b/core/src/nn/mod.rs index c0eb1f81..172b7e7d 100644 --- a/core/src/nn/mod.rs +++ b/core/src/nn/mod.rs @@ -2,19 +2,29 @@ Appellation: nn Contrib: FL03 */ -pub use self::{error::ModelError, model::prelude::*}; +#[cfg(any(feature = "alloc", feature = "std"))] +pub use self::types::*; +pub use self::{dropout::*, error::ModelError, model::prelude::*}; +pub mod dropout; pub mod error; +pub mod mask; pub mod model; pub(crate) mod prelude { - pub use super::error::ModelError; + pub use super::dropout::*; + pub use super::error::*; + pub use super::mask::prelude::*; pub use super::model::prelude::*; } #[cfg(any(feature = "alloc", feature = "std"))] -pub type ForwardDyn, O = T> = - crate::rust::Box>; +mod types { + use crate::rust::Box; + use nd::prelude::Array2; + + pub type ForwardDyn, O = T> = Box>; +} #[cfg(test)] mod tests {} diff --git a/core/src/nn/model.rs b/core/src/nn/model.rs index 8991d08e..316d03e7 100644 --- a/core/src/nn/model.rs +++ b/core/src/nn/model.rs @@ -6,6 +6,8 @@ pub use self::module::*; pub mod config; pub mod module; +#[doc(hidden)] +pub mod repo; pub(crate) mod prelude { pub use super::config::*; @@ -26,3 +28,32 @@ where fn context(&self) -> Self::Ctx; } + +/// This trait describes any neural networks or models that +/// adhears to the deep netural network architecture. +/// This design considers a single input and output layer, while +/// allowing for any number of hidden layers to be persisted. +/// +/// The `HIDDEN` constant is used to specify the number of hidden layers +/// and is used to compute the total number of layers (HIDDEN + 2) +pub trait DeepNeuralNetwork: Forward { + const HIDDEN: Option = None; + + type Input: Forward; + type Hidden: Forward; // The type of `hidden` layers; all hidden layers implement the same activation function + type Out: Forward; + + fn input(&self) -> &Self::Input; + + fn hidden(&self) -> &[Self::Hidden]; + + fn output(&self) -> &Self::Out; + + fn nlayers(&self) -> usize { + self.nhidden() + 2 + } + + fn nhidden(&self) -> usize { + Self::HIDDEN.unwrap_or_else(|| self.hidden().len()) + } +} diff --git a/core/src/nn/model/repo.rs b/core/src/nn/model/repo.rs new file mode 100644 index 00000000..affd401a --- /dev/null +++ b/core/src/nn/model/repo.rs @@ -0,0 +1,10 @@ +/* + Appellation: repo + Contrib: FL03 +*/ +#![allow(unused)] + +pub struct ModelRepo { + pub name: String, + pub(crate) store: String, +} diff --git a/core/src/traits/arr/create.rs b/core/src/traits/arr/create.rs index b99a5eaa..8c45d927 100644 --- a/core/src/traits/arr/create.rs +++ b/core/src/traits/arr/create.rs @@ -86,11 +86,9 @@ where } } -macro_rules! impl_like { +macro_rules! impl_ndlike { + ($name:ident::$method:ident.$call:ident: $($p:tt)*) => { - impl_like!(@impl $name::$method.$call: $($p)*); - }; - (@impl $name:ident::$method:ident.$call:ident: $($p:tt)*) => { impl $name for ArrayBase where A: $($p)*, @@ -106,6 +104,6 @@ macro_rules! impl_like { }; } -impl_like!(DefaultLike::default_like.default: Default); -impl_like!(OnesLike::ones_like.ones: Clone + num::One); -impl_like!(ZerosLike::zeros_like.zeros: Clone + num::Zero); +impl_ndlike!(DefaultLike::default_like.default: Default); +impl_ndlike!(OnesLike::ones_like.ones: Clone + num::One); +impl_ndlike!(ZerosLike::zeros_like.zeros: Clone + num::Zero); diff --git a/core/src/traits/arr/misc.rs b/core/src/traits/arr/misc.rs index 4cc76e4c..40857596 100644 --- a/core/src/traits/arr/misc.rs +++ b/core/src/traits/arr/misc.rs @@ -2,17 +2,18 @@ Appellation: convert Contrib: FL03 */ -use nd::Axis; -use nd::{ArrayBase, Dimension, RawData}; +use nd::prelude::*; +use nd::{DataMut, RawData}; -pub trait Dimensional { - type Pattern; - - fn dim(&self) -> Self::Pattern; - - fn raw_dim(&self) -> D; +/// This trait is used to fill an array with a value based on a mask. +/// The mask is a boolean array of the same shape as the array. +pub trait MaskFill +where + D: Dimension, +{ + type Output; - fn shape(&self) -> &[usize]; + fn masked_fill(&self, mask: &Array, value: A) -> Self::Output; } pub trait IntoAxis { @@ -26,23 +27,24 @@ pub trait IsSquare { /* ******** implementations ******** */ -impl Dimensional for ArrayBase + +impl MaskFill for ArrayBase where + A: Clone, D: Dimension, - S: RawData, + S: DataMut, + Self: Clone, { - type Pattern = D::Pattern; - - fn shape(&self) -> &[usize] { - ArrayBase::shape(self) - } - - fn dim(&self) -> Self::Pattern { - ArrayBase::dim(self) - } + type Output = ArrayBase; - fn raw_dim(&self) -> D { - ArrayBase::raw_dim(self) + fn masked_fill(&self, mask: &Array, value: A) -> Self::Output { + let mut arr = self.clone(); + arr.zip_mut_with(&mask, |x, &m| { + if m { + *x = value.clone(); + } + }); + arr } } diff --git a/core/src/traits/arr/reshape.rs b/core/src/traits/arr/reshape.rs new file mode 100644 index 00000000..7079f130 --- /dev/null +++ b/core/src/traits/arr/reshape.rs @@ -0,0 +1,40 @@ +/* + Appellation: reshape [traits::arr] + Contrib: FL03 +*/ +use nd::prelude::*; +use nd::{RawData, RawDataClone}; + +pub trait Unsqueeze { + type Output; + + fn unsqueeze(self, axis: usize) -> Self::Output; +} + +/* + ************* Implementations ************* +*/ + +impl Unsqueeze for ArrayBase +where + D: Dimension, + S: RawData, +{ + type Output = ArrayBase; + + fn unsqueeze(self, axis: usize) -> Self::Output { + self.insert_axis(Axis(axis)) + } +} + +impl<'a, A, S, D> Unsqueeze for &'a ArrayBase +where + D: Dimension, + S: RawDataClone, +{ + type Output = ArrayBase; + + fn unsqueeze(self, axis: usize) -> Self::Output { + self.clone().insert_axis(Axis(axis)) + } +} diff --git a/core/src/traits/arr/tensor.rs b/core/src/traits/arr/tensor.rs deleted file mode 100644 index 22a00f99..00000000 --- a/core/src/traits/arr/tensor.rs +++ /dev/null @@ -1,264 +0,0 @@ -/* - Appellation: generator - Contrib: FL03 -*/ -use nd::prelude::*; -use nd::{Data, DataMut, DataOwned, OwnedRepr, RawData}; -use num::{One, Zero}; - -/// [NdBuilder] describes common creation routines for [ArrayBase](ndarray::ArrayBase) -pub trait NdBuilder -where - D: Dimension, -{ - type Data: RawData; - type Store; - - /// Create a new array with the given shape whose elements are set to the default value of the element type. - fn default(shape: Sh) -> Self::Store - where - A: Default, - Sh: ShapeBuilder, - Self::Data: DataOwned; - - fn fill(shape: Sh, elem: A) -> Self::Store - where - A: Clone, - Sh: ShapeBuilder, - Self::Data: DataOwned; - - fn ones(shape: Sh) -> Self::Store - where - A: Clone + One, - Sh: ShapeBuilder, - Self::Data: DataOwned; - - fn zeros(shape: Sh) -> Self::Store - where - A: Clone + Zero, - Sh: ShapeBuilder, - Self::Data: DataOwned; -} - -pub trait NdBuilderExt: NdBuilder -where - D: Dimension, -{ - fn dim(&self) -> D::Pattern; - - fn default_like(&self) -> Self::Store - where - A: Default, - Sh: ShapeBuilder, - Self::Data: DataOwned, - { - Self::default(self.dim()) - } - - fn fill_like(&self, elem: A) -> Self::Store - where - A: Clone, - Sh: ShapeBuilder, - Self::Data: DataOwned, - { - Self::fill(self.dim(), elem) - } - - fn ones_like(&self) -> Self::Store - where - A: Clone + One, - Sh: ShapeBuilder, - Self::Data: DataOwned, - { - Self::ones(self.dim()) - } - - fn zeros_like(&self) -> Self::Store - where - A: Clone + Zero, - Sh: ShapeBuilder, - Self::Data: DataOwned, - { - Self::zeros(self.dim()) - } -} - -pub trait AsOwned -where - D: Dimension, - S: RawData, -{ - type Output; - - fn into_owned(self) -> Self::Output - where - S: Data, - S::Elem: Clone; - - fn to_owned(&self) -> Self::Output - where - S: Data, - S::Elem: Clone; -} - -pub trait AsShared -where - D: Dimension, - S: RawData, -{ - type Output; - - fn into_shared(self) -> Self::Output - where - S: DataOwned, - S::Elem: Clone; - - fn to_shared(&self) -> Self::Output - where - S: DataOwned, - S::Elem: Clone; -} - -pub trait NdView, D = Ix2>: AsOwned + AsShared -where - D: Dimension, - S: RawData, -{ - fn view(&self) -> ArrayView<'_, A, D> - where - A: Clone, - S: Data; - - fn view_mut(&mut self) -> ArrayViewMut<'_, A, D> - where - A: Clone, - S: DataMut; -} - -/* - ************* Implementations ************* -*/ -impl NdBuilder for ArrayBase -where - D: Dimension, - S: RawData, -{ - type Data = S; - type Store = ArrayBase; - - fn default(shape: Sh) -> Self - where - A: Default, - Sh: ShapeBuilder, - Self::Data: DataOwned, - { - ArrayBase::default(shape) - } - - fn fill(shape: Sh, elem: A) -> Self - where - A: Clone, - S: DataOwned, - Sh: ShapeBuilder, - { - ArrayBase::from_elem(shape, elem) - } - - fn ones(shape: Sh) -> Self - where - A: Clone + One, - Sh: ShapeBuilder, - Self::Data: DataOwned, - { - ArrayBase::ones(shape) - } - - fn zeros(shape: Sh) -> Self - where - A: Clone + Zero, - Sh: ShapeBuilder, - Self::Data: DataOwned, - { - ArrayBase::zeros(shape) - } -} - -impl NdBuilderExt for ArrayBase -where - D: Dimension, - S: RawData, -{ - fn dim(&self) -> D::Pattern { - ArrayBase::dim(self) - } -} - -impl AsOwned for ArrayBase -where - D: Dimension, - S: RawData, -{ - type Output = Array; - - fn into_owned(self) -> Self::Output - where - A: Clone, - S: Data, - { - self.into_owned() - } - - fn to_owned(&self) -> Self::Output - where - A: Clone, - S: Data, - { - self.to_owned() - } -} - -impl AsShared for ArrayBase -where - D: Dimension, - S: RawData, -{ - type Output = ArcArray; - - fn into_shared(self) -> Self::Output - where - A: Clone, - S: DataOwned, - { - self.into_shared() - } - - fn to_shared(&self) -> Self::Output - where - A: Clone, - S: DataOwned, - { - self.to_shared() - } -} - -impl NdView for ArrayBase -where - D: Dimension, - S: RawData, -{ - fn view(&self) -> ArrayView<'_, A, D> - where - A: Clone, - S: Data, - { - self.view() - } - - fn view_mut(&mut self) -> ArrayViewMut<'_, A, D> - where - A: Clone, - S: DataMut, - { - self.view_mut() - } -} diff --git a/core/src/traits/misc/container.rs b/core/src/traits/misc/container.rs deleted file mode 100644 index 842d96db..00000000 --- a/core/src/traits/misc/container.rs +++ /dev/null @@ -1,12 +0,0 @@ -/* - Appellation: container - Contrib: FL03 -*/ - -pub trait Container { - type Data: Data; -} - -pub trait Data { - type Item; -} diff --git a/core/src/traits/misc/sequential.rs b/core/src/traits/misc/sequential.rs new file mode 100644 index 00000000..8e92192e --- /dev/null +++ b/core/src/traits/misc/sequential.rs @@ -0,0 +1,63 @@ +/* + Appellation: sequential [traits::misc] + Contrib: FL03 +*/ +use num::traits::FromPrimitive; + +/// A trait for sequential data structures; +/// This trait is implemented for iterators that have a known length. +pub trait Sequence { + const LENGTH: Option = None; + + fn len(&self) -> usize; + + fn is_empty(&self) -> bool { + self.len() == 0 + } + + fn elems(&self) -> T + where + T: FromPrimitive, + { + T::from_usize(self.len()).unwrap() + } +} + +pub trait SequenceIter { + type Item; + + fn len(&self) -> usize; +} +/* + ************* Implementations ************* +*/ +impl SequenceIter for I +where + I: ExactSizeIterator, +{ + type Item = T; + + fn len(&self) -> usize { + self.len() + } +} + +impl Sequence for Vec { + fn len(&self) -> usize { + self.len() + } +} + +impl Sequence for [T] { + fn len(&self) -> usize { + self.len() + } +} + +impl Sequence for [T; N] { + const LENGTH: Option = Some(N); + + fn len(&self) -> usize { + N + } +} diff --git a/core/src/traits/mod.rs b/core/src/traits/mod.rs index b6aa6b21..9dc12247 100644 --- a/core/src/traits/mod.rs +++ b/core/src/traits/mod.rs @@ -7,36 +7,38 @@ pub use self::prelude::*; pub mod num; pub mod ops; pub mod predict; +pub mod setup; pub mod train; pub mod arr { pub use self::prelude::*; - pub(crate) mod create; - pub(crate) mod misc; - pub(crate) mod ops; - pub(crate) mod tensor; + mod create; + mod misc; + mod ops; + mod reshape; pub(crate) mod prelude { pub use super::create::*; pub use super::misc::*; pub use super::ops::*; - pub use super::tensor::*; + pub use super::reshape::*; } } pub mod misc { - pub mod adjust; + pub use self::prelude::*; + + pub(crate) mod adjust; + #[doc(hidden)] + pub(crate) mod sequential; #[doc(hidden)] - pub mod container; - pub mod setup; - pub mod store; - pub mod toggle; + pub(crate) mod store; + pub(crate) mod toggle; pub(crate) mod prelude { pub use super::adjust::*; - pub use super::container::*; - pub use super::setup::*; + pub use super::sequential::*; pub use super::store::*; pub use super::toggle::*; } @@ -48,5 +50,6 @@ pub(crate) mod prelude { pub use super::num::*; pub use super::ops::*; pub use super::predict::*; + pub use super::setup::*; pub use super::train::*; } diff --git a/core/src/traits/misc/setup.rs b/core/src/traits/setup.rs similarity index 100% rename from core/src/traits/misc/setup.rs rename to core/src/traits/setup.rs diff --git a/core/src/types/mod.rs b/core/src/types/mod.rs index a639d5a4..d6347e49 100644 --- a/core/src/types/mod.rs +++ b/core/src/types/mod.rs @@ -7,6 +7,7 @@ pub use self::prelude::*; pub use self::std_types::*; pub mod propagate; +pub mod shape; pub type NdResult = core::result::Result; /// A type alias for a [Result](core::result::Result) with the crate's [Error](crate::error::Error) type. @@ -23,6 +24,7 @@ mod std_types { pub(crate) mod prelude { pub use super::propagate::Propagate; + pub use super::shape::ModelShape; #[cfg(feature = "std")] pub use super::std_types::*; pub use super::{NdResult, Result}; diff --git a/core/src/types/shape.rs b/core/src/types/shape.rs new file mode 100644 index 00000000..37e75047 --- /dev/null +++ b/core/src/types/shape.rs @@ -0,0 +1,166 @@ +/* + Appellation: shape + Contrib: FL03 +*/ +use nd::prelude::{Ix1, Ix2}; +use nd::{Dimension, ErrorKind, IntoDimension, RemoveAxis, ShapeBuilder, ShapeError}; + +pub(crate) fn _from_dim(dim: D) -> Result +where + D: Dimension, +{ + if dim.ndim() == 1 { + Ok(Features::new(dim[0], 1)) + } else if dim.ndim() >= 2 { + Ok(Features::new(dim[1], dim[0])) + } else { + Err(ShapeError::from_kind(ErrorKind::IncompatibleShape)) + } +} + +#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +pub struct ModelShape { + pub(crate) features: Features, + pub(crate) network: usize, +} + +impl ModelShape { + pub fn new(model: usize, network: usize) -> Self { + let features = Features::from_network(model, network); + Self { features, network } + } + + pub fn from_features(features: Features) -> Self { + Self { + features, + network: features.size(), + } + } +} + +#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +pub struct Features { + pub(crate) inputs: usize, + pub(crate) outputs: usize, +} + +impl Features { + /// Create a new, unchecked [Features] instance. + /// + pub fn new(inputs: usize, outputs: usize) -> Self { + debug_assert_ne!(inputs, 0); + debug_assert_ne!(outputs, 0); + + Self { inputs, outputs } + } + /// Attempts to build a new [Features] instance from the given dimension ([`D`](Dimension)) + pub fn from_dimension(dim: D) -> Result + where + D: Dimension, + { + _from_dim(dim) + } + /// Builds a new instance from the given shape ([`Sh`](ShapeBuilder)); + /// Unlike [Features::from_dimension], this method requires the dimension (`D`) to + /// additionally implement the [RemoveAxis] trait + pub fn from_shape(shape: Sh) -> Self + where + D: RemoveAxis, + Sh: ShapeBuilder, + { + let dim = shape.into_shape().raw_dim().clone(); + _from_dim(dim).unwrap() + } + /// Creates a new instance given the model size (`inputs`, `d_model`) and total number of nodes within the network (`size`, `network`, `d_network`) + pub fn from_network(model: usize, network: usize) -> Self { + let outputs = network / model; + Self::new(model, outputs) + } + + pub const fn as_array(&self) -> [usize; 2] { + [self.outputs(), self.inputs()] + } + /// Creates a new two-tuple instance from the given dimensions; + pub const fn as_tuple(&self) -> (usize, usize) { + (self.outputs(), self.inputs()) + } + pub fn check_dim(&self, dim: D) -> bool + where + D: Dimension, + { + if dim.ndim() == 1 { + self.inputs() == dim[0] + } else if dim.ndim() >= 2 { + self.outputs() == dim[0] && self.inputs() == dim[1] + } else { + false + } + } + /// Forwards the [into_pattern](ndarray::Dimension::into_pattern) method from the [Dimension] trait + #[inline] + pub fn into_pattern(self) -> (usize, usize) { + self.into_dimension().into_pattern() + } + /// An aliased function that returns the number of input features + pub const fn d_model(&self) -> usize { + self.inputs() + } + /// Returns the number of input features + pub const fn inputs(&self) -> usize { + self.inputs + } + /// Checks to see if the features speak to a so-called `unit`; + /// i.e. see if the number of output features is equal to 1. + pub fn is_unit(&self) -> bool { + self.outputs() == 1 + } + /// Returns the number of output features + pub const fn outputs(&self) -> usize { + self.outputs + } + /// Computes the total number of nodes in the network + pub fn size(&self) -> usize { + self.inputs() * self.outputs() + } + #[doc(hidden)] + pub fn uniform_scale(&self) -> f64 { + (self.inputs as f64).recip().sqrt() + } +} + +impl IntoDimension for Features { + type Dim = Ix2; + + fn into_dimension(self) -> Self::Dim { + (self.outputs, self.inputs).into_dimension() + } +} + +impl From for Features { + fn from(dim: Ix1) -> Self { + Self::new(1, dim[0]) + } +} + +impl From for Features { + fn from(dim: Ix2) -> Self { + Self::new(dim[1], dim[0]) + } +} + +impl From for Ix2 { + fn from(features: Features) -> Self { + features.into_dimension() + } +} + +impl PartialEq for Features +where + [usize; 2]: PartialEq, +{ + fn eq(&self, other: &U) -> bool { + self.as_array() == *other + } +} diff --git a/core/tests/init.rs b/core/tests/init.rs new file mode 100644 index 00000000..39d29e5d --- /dev/null +++ b/core/tests/init.rs @@ -0,0 +1,45 @@ +/* + Appellation: random + Contrib: FL03 +*/ +extern crate concision_core as cnc; + +use cnc::init::distr::LecunNormal; +use cnc::init::InitializeExt; +use ndarray::prelude::*; + +#[test] +fn test_init_ext() { + let shape = [3, 3]; + let seed = 0u64; + let a = Array2::::stdnorm(shape); + let b = Array2::::stdnorm_from_seed(shape, seed); + + assert_eq!(a.shape(), shape); + assert_eq!(a.shape(), b.shape()); +} + +#[test] +fn test_lecun_normal() { + let n = 3; + let shape = (3, 3); + + let distr = LecunNormal::new(n); + + let bnd = 2f64 * distr.std_dev::(); + + let arr = Array2::::lecun_normal(shape, n); + + assert!(arr.iter().all(|&x| x >= -bnd && x <= bnd)); + + assert_eq!(arr.dim(), shape); +} + +#[test] +fn test_truncnorm() { + let (mean, std) = (0f64, 2f64); + let bnd = 2f64 * std; + let shape = (3, 3); + let arr = Array::truncnorm(shape, mean, std).unwrap(); + assert!(arr.iter().all(|&x| x >= -bnd && x <= bnd)); +} diff --git a/core/tests/func.rs b/core/tests/nn.rs similarity index 83% rename from core/tests/func.rs rename to core/tests/nn.rs index e1a5ccef..55b51198 100644 --- a/core/tests/func.rs +++ b/core/tests/nn.rs @@ -1,7 +1,7 @@ #![allow(unused_imports)] extern crate concision_core as concision; -use concision::func::Dropout; +use concision::nn::DropoutLayer; use concision::Forward; use ndarray::prelude::*; @@ -10,7 +10,7 @@ use ndarray::prelude::*; fn test_dropout() { let shape = (512, 2048); let arr = Array2::::ones(shape); - let dropout = Dropout::new(0.5); + let dropout = DropoutLayer::new(0.5); let out = dropout.forward(&arr); assert!(arr.iter().all(|&x| x == 1.0)); diff --git a/core/tests/random.rs b/core/tests/random.rs deleted file mode 100644 index daa76435..00000000 --- a/core/tests/random.rs +++ /dev/null @@ -1,19 +0,0 @@ -/* - Appellation: random - Contrib: FL03 -*/ -extern crate concision_core as cnc; - -use cnc::init::InitializeExt; -use ndarray::prelude::*; - -#[test] -fn test_stdnorm() { - let shape = [3, 3]; - let seed = 0u64; - let a = Array2::::stdnorm(shape); - let b = Array2::::stdnorm_from_seed(shape, seed); - - assert_eq!(a.shape(), shape); - assert_eq!(a.shape(), b.shape()); -} diff --git a/core/tests/traits.rs b/core/tests/traits.rs index 1778fefd..b1038f94 100644 --- a/core/tests/traits.rs +++ b/core/tests/traits.rs @@ -4,20 +4,32 @@ */ extern crate concision_core as cnc; -use cnc::traits::{Affine, AsComplex, Matpow}; -use ndarray::prelude::{array, Array2}; +use cnc::linarr; +use ndarray::prelude::*; use num::Complex; #[test] fn test_affine() { + use cnc::traits::Affine; let x = array![[0.0, 1.0], [2.0, 3.0]]; let y = x.affine(4.0, -2.0); assert_eq!(y, array![[-2.0, 2.0], [6.0, 10.0]]); } +#[test] +fn test_masked_fill() { + use cnc::traits::MaskFill; + let shape = (2, 2); + let mask = array![[true, false], [false, true]]; + let arr = linarr::(shape).unwrap(); + let a = arr.masked_fill(&mask, 0.0); + assert_eq!(a, array![[0.0, 1.0], [2.0, 0.0]]); +} + #[test] fn test_as_complex() { + use cnc::traits::AsComplex; let x = 1.0; let y = x.as_re(); assert_eq!(y, Complex::new(1.0, 0.0)); @@ -25,8 +37,19 @@ fn test_as_complex() { #[test] fn test_matrix_power() { + use cnc::traits::Matpow; let x = array![[1.0, 2.0], [3.0, 4.0]]; assert_eq!(x.pow(0), Array2::::eye(2)); assert_eq!(x.pow(1), x); assert_eq!(x.pow(2), x.dot(&x)); } + +#[test] +fn test_unsqueeze() { + use cnc::traits::Unsqueeze; + let arr = array![1, 2, 3, 4]; + let a = arr.clone().unsqueeze(0); + assert_eq!(a.dim(), (1, 4)); + let b = arr.unsqueeze(1); + assert_eq!(b.dim(), (4, 1)); +} diff --git a/data/src/kernel/mod.rs b/data/src/kernel/mod.rs new file mode 100644 index 00000000..e69de29b diff --git a/data/src/lib.rs b/data/src/lib.rs index 0186ca18..5f1d6ead 100644 --- a/data/src/lib.rs +++ b/data/src/lib.rs @@ -22,9 +22,11 @@ pub mod params; pub mod preproc; pub mod tensor; pub mod traits; +pub mod types; pub mod prelude { pub use super::dataset::*; pub use super::params::prelude::*; pub use super::traits::prelude::*; + pub use super::types::prelude::*; } diff --git a/data/src/tensor/mod.rs b/data/src/tensor/mod.rs index 17945f2f..7d31345f 100644 --- a/data/src/tensor/mod.rs +++ b/data/src/tensor/mod.rs @@ -2,6 +2,6 @@ Appellation: tensor Contrib: FL03 */ -pub use self::ndtensor::NdTensor; +pub use self::ndtensor::NdContainer; pub mod ndtensor; diff --git a/data/src/tensor/ndtensor/traits.rs b/data/src/tensor/ndtensor/traits.rs index c55c3afe..b125c25b 100644 --- a/data/src/tensor/ndtensor/traits.rs +++ b/data/src/tensor/ndtensor/traits.rs @@ -11,7 +11,7 @@ pub trait TensorData { fn as_mut_slice(&mut self) -> &mut [Self::Elem]; } -pub trait NdTensor { +pub trait NdContainer { const RANK: Option = None; type Data: TensorData; diff --git a/data/src/traits/build.rs b/data/src/traits/build.rs new file mode 100644 index 00000000..7944014b --- /dev/null +++ b/data/src/traits/build.rs @@ -0,0 +1,140 @@ +/* + Appellation: ndarray + Contrib: FL03 +*/ +use crate::traits::Dimensional; +use nd::{ArrayBase, DataOwned, Dimension, RawData, ShapeBuilder}; +use num::{One, Zero}; + +/// [NdBuilder] describes common creation routines for [ArrayBase] +pub trait NdBuilder +where + D: Dimension, +{ + type Data: RawData; + + /// Create a new array with the given shape whose elements are set to the default value of the element type. + fn default(shape: Sh) -> Self + where + A: Default, + Sh: ShapeBuilder, + Self::Data: DataOwned; + + fn fill(shape: Sh, elem: A) -> Self + where + A: Clone, + Sh: ShapeBuilder, + Self::Data: DataOwned; + + fn ones(shape: Sh) -> Self + where + A: Clone + One, + Sh: ShapeBuilder, + Self::Data: DataOwned; + + fn zeros(shape: Sh) -> Self + where + A: Clone + Zero, + Sh: ShapeBuilder, + Self::Data: DataOwned; +} + +pub trait NdBuilderExt: NdBuilder + Sized +where + D: Dimension, +{ + fn dim(&self) -> D::Pattern; + + fn default_like(&self) -> Self + where + A: Default, + Sh: ShapeBuilder, + Self::Data: DataOwned, + { + Self::default(self.dim()) + } + + fn fill_like(&self, elem: A) -> Self + where + A: Clone, + Sh: ShapeBuilder, + Self::Data: DataOwned, + { + Self::fill(self.dim(), elem) + } + + fn ones_like(&self) -> Self + where + A: Clone + One, + Sh: ShapeBuilder, + Self::Data: DataOwned, + { + Self::ones(self.dim()) + } + + fn zeros_like(&self) -> Self + where + A: Clone + Zero, + Sh: ShapeBuilder, + Self::Data: DataOwned, + { + Self::zeros(self.dim()) + } +} + +/* + ************* Implementations ************* +*/ +impl NdBuilder for ArrayBase +where + D: Dimension, + S: RawData, +{ + type Data = S; + + fn default(shape: Sh) -> Self + where + A: Default, + Sh: ShapeBuilder, + Self::Data: DataOwned, + { + ArrayBase::default(shape) + } + + fn fill(shape: Sh, elem: A) -> Self + where + A: Clone, + S: DataOwned, + Sh: ShapeBuilder, + { + ArrayBase::from_elem(shape, elem) + } + + fn ones(shape: Sh) -> Self + where + A: Clone + One, + Sh: ShapeBuilder, + Self::Data: DataOwned, + { + ArrayBase::ones(shape) + } + + fn zeros(shape: Sh) -> Self + where + A: Clone + Zero, + Sh: ShapeBuilder, + Self::Data: DataOwned, + { + ArrayBase::zeros(shape) + } +} + +impl NdBuilderExt for U +where + U: Dimensional + NdBuilder, + D: Dimension, +{ + fn dim(&self) -> D::Pattern { + self.dim() + } +} diff --git a/data/src/traits/data/container.rs b/data/src/traits/data/container.rs new file mode 100644 index 00000000..0d9e0044 --- /dev/null +++ b/data/src/traits/data/container.rs @@ -0,0 +1,25 @@ +/* + Appellation: container + Contrib: FL03 +*/ +use crate::traits::{ContainerRepr, Dimensional}; + +pub trait Container { + type Data: ContainerRepr; +} + +/// This trait describes the basic operations for any n-dimensional container. +pub trait NdContainer: Dimensional { + type Data: ContainerRepr; + + fn as_slice(&self) -> &[A]; + + fn as_mut_slice(&mut self) -> &mut [A]; +} + +/* + ************* Implementations ************* +*/ +impl Container for Vec { + type Data = Vec; +} diff --git a/data/src/traits/data/repr.rs b/data/src/traits/data/repr.rs new file mode 100644 index 00000000..3c583d95 --- /dev/null +++ b/data/src/traits/data/repr.rs @@ -0,0 +1,15 @@ +/* + Appellation: data + Contrib: FL03 +*/ + +pub trait ContainerRepr { + type Elem; +} + +/* + ************* Implementations ************* +*/ +impl ContainerRepr for Vec { + type Elem = T; +} diff --git a/data/src/traits/ext/ndarray.rs b/data/src/traits/ext/ndarray.rs new file mode 100644 index 00000000..6d3e6ed8 --- /dev/null +++ b/data/src/traits/ext/ndarray.rs @@ -0,0 +1,45 @@ +/* + Appellation: ndarray + Contrib: FL03 +*/ +use nd::iter::{Iter, IterMut}; +use nd::{Dimension, RawData}; + +pub trait NdArray +where + D: Dimension, +{ + type Data: RawData; + + fn as_slice(&self) -> &[A]; + + fn as_mut_slice(&mut self) -> &mut [A]; + + fn iter(&self) -> Iter<'_, A, D>; + + fn iter_mut(&mut self) -> IterMut<'_, A, D>; + + fn map(&self, f: F) -> Self + where + F: FnMut(&A) -> A; + + fn mapv(&mut self, f: F) + where + A: Clone, + F: FnMut(A) -> A; +} + +pub trait NdIter +where + D: Dimension, +{ + type Data: RawData; + + fn iter(&self) -> Iter<'_, A, D>; + + fn iter_mut(&mut self) -> IterMut<'_, A, D>; +} + +/* + ************* Implementations ************* +*/ diff --git a/data/src/traits/ext/ndtensor.rs b/data/src/traits/ext/ndtensor.rs new file mode 100644 index 00000000..0edbd756 --- /dev/null +++ b/data/src/traits/ext/ndtensor.rs @@ -0,0 +1,52 @@ +/* + Appellation: ndtensor + Contrib: FL03 +*/ +use nd::{ArrayBase, Data, Dimension, RawData}; +use num::complex::ComplexFloat; +use num::traits::Float; + +pub trait Scalar { + type R: Float; +} + +pub trait NdTensor +where + A: ComplexFloat, + D: Dimension, +{ + type Data: RawData; + type Output; + + fn conj(&self) -> Self::Output; + + fn cos(&self) -> Self::Output; + + fn cosh(&self) -> Self::Output; +} + +/* + ************* Implementations ************* +*/ +impl NdTensor for ArrayBase +where + A: ComplexFloat, + D: Dimension, + S: Data, + Self: Clone, +{ + type Data = S; + type Output = nd::Array; + + fn conj(&self) -> Self::Output { + self.mapv(|x| x.conj()) + } + + fn cos(&self) -> Self::Output { + self.mapv(|x| x.cos()) + } + + fn cosh(&self) -> Self::Output { + self.mapv(|x| x.cosh()) + } +} diff --git a/data/src/traits/ext/ndview.rs b/data/src/traits/ext/ndview.rs new file mode 100644 index 00000000..56b88c3f --- /dev/null +++ b/data/src/traits/ext/ndview.rs @@ -0,0 +1,157 @@ +/* + Appellation: ndview + Contrib: FL03 +*/ +/* + Appellation: ndarray + Contrib: FL03 +*/ +use nd::prelude::*; +use nd::{Data, DataMut, DataOwned, OwnedRepr, RawData}; + +pub trait AsOwned +where + D: Dimension, + S: RawData, +{ + type Output; + + fn into_owned(self) -> Self::Output + where + A: Clone, + S: Data; + + fn to_owned(&self) -> Self::Output + where + A: Clone, + S: Data; +} + +pub trait AsShared +where + D: Dimension, + S: RawData, +{ + type Output; + + fn into_shared(self) -> Self::Output + where + S: DataOwned, + S::Elem: Clone; + + fn to_shared(&self) -> Self::Output + where + S: DataOwned, + S::Elem: Clone; +} + +pub trait NdView, D = Ix2>: AsOwned + AsShared +where + D: Dimension, + S: RawData, +{ + fn view(&self) -> ArrayView<'_, A, D> + where + A: Clone, + S: Data; + + fn view_mut(&mut self) -> ArrayViewMut<'_, A, D> + where + A: Clone, + S: DataMut; +} + +pub trait View +where + D: Dimension, +{ + type Data: RawData; + type Output; + + fn view(&self) -> Self::Output + where + A: Clone, + Self::Data: Data; +} +pub trait ViewMut: View +where + D: Dimension, +{ + fn view_mut(&mut self) -> ArrayViewMut<'_, A, D> + where + A: Clone, + Self::Data: DataMut; +} + +/* + ************* Implementations ************* +*/ +impl AsOwned for ArrayBase +where + D: Dimension, + S: RawData, +{ + type Output = Array; + + fn into_owned(self) -> Self::Output + where + A: Clone, + S: Data, + { + self.into_owned() + } + + fn to_owned(&self) -> Self::Output + where + A: Clone, + S: Data, + { + self.to_owned() + } +} + +impl AsShared for ArrayBase +where + D: Dimension, + S: RawData, +{ + type Output = ArcArray; + + fn into_shared(self) -> Self::Output + where + A: Clone, + S: DataOwned, + { + self.into_shared() + } + + fn to_shared(&self) -> Self::Output + where + A: Clone, + S: DataOwned, + { + self.to_shared() + } +} + +impl NdView for ArrayBase +where + D: Dimension, + S: RawData, +{ + fn view(&self) -> ArrayView<'_, A, D> + where + A: Clone, + S: Data, + { + self.view() + } + + fn view_mut(&mut self) -> ArrayViewMut<'_, A, D> + where + A: Clone, + S: DataMut, + { + self.view_mut() + } +} diff --git a/data/src/traits/mod.rs b/data/src/traits/mod.rs index 83d994c3..14b24d38 100644 --- a/data/src/traits/mod.rs +++ b/data/src/traits/mod.rs @@ -2,10 +2,43 @@ Appellation: traits Contrib: FL03 */ -pub use self::prelude::*; +pub use self::{data::*, ext::*, records::*, shape::*}; + +pub mod build; pub mod records; +pub mod shape; + +#[doc(hidden)] +pub mod data { + pub use self::{container::*, repr::*}; + + pub(crate) mod container; + pub(crate) mod repr; + + pub(crate) mod prelude { + pub use super::container::*; + pub use super::repr::*; + } +} + +pub mod ext { + pub use self::{ndarray::*, ndtensor::*, ndview::*}; + + pub(crate) mod ndarray; + pub(crate) mod ndtensor; + pub(crate) mod ndview; + + pub(crate) mod prelude { + pub use super::ndarray::*; + pub use super::ndtensor::*; + pub use super::ndview::*; + } +} pub(crate) mod prelude { + pub use super::data::prelude::*; + pub use super::ext::prelude::*; pub use super::records::*; + pub use super::shape::*; } diff --git a/data/src/traits/shape.rs b/data/src/traits/shape.rs new file mode 100644 index 00000000..a8127e46 --- /dev/null +++ b/data/src/traits/shape.rs @@ -0,0 +1,96 @@ +/* + Appellation: shape + Contrib: FL03 +*/ +use nd::{ArrayBase, Dimension, RawData}; + +pub trait IntoPattern { + type Pattern; + + fn into_pattern(self) -> Self::Pattern; +} + +/// [Dimensional] provides a common interface for containers to access their shape and dimension. +pub trait Dimensional { + const RANK: Option = None; + + type Dim: IntoPattern; + + fn dim(&self) -> ::Pattern { + self.raw_dim().into_pattern() + } + + fn is_scalar(&self) -> bool { + self.rank() == 0 || self.shape().iter().all(|x| *x == 1) + } + + fn rank(&self) -> usize { + Self::RANK.unwrap_or(self.shape().len()) + } + + fn raw_dim(&self) -> Self::Dim; + + fn size(&self) -> usize { + self.shape().iter().product() + } + + fn shape(&self) -> &[usize]; +} + +/* + ******** implementations ******** +*/ +impl IntoPattern for D +where + D: Dimension, +{ + type Pattern = D::Pattern; + + fn into_pattern(self) -> Self::Pattern { + Dimension::into_pattern(self) + } +} + +// impl Dimensional for D +// where +// D: Dimension + IntoPattern, +// { +// type Dim = D; + +// fn dim(&self) -> D::Pattern { +// self.clone().into_pattern() +// } + +// fn raw_dim(&self) -> D { +// self.clone() +// } + +// fn shape(&self) -> &[usize] { +// D::slice(self) +// } +// } + +impl Dimensional for ArrayBase +where + D: Dimension, + S: RawData, +{ + const RANK: Option = D::NDIM; + type Dim = D; + + fn dim(&self) -> D::Pattern { + ArrayBase::dim(self) + } + + fn raw_dim(&self) -> D { + ArrayBase::raw_dim(self) + } + + fn shape(&self) -> &[usize] { + ArrayBase::shape(self) + } + + fn size(&self) -> usize { + ArrayBase::len(self) + } +} diff --git a/data/src/types/kernel.rs b/data/src/types/kernel.rs new file mode 100644 index 00000000..248ad95f --- /dev/null +++ b/data/src/types/kernel.rs @@ -0,0 +1,6 @@ +/* + Appellation: kernel + Contrib: FL03 +*/ + +pub struct Kernel; diff --git a/data/src/types/mod.rs b/data/src/types/mod.rs new file mode 100644 index 00000000..b8ca6da5 --- /dev/null +++ b/data/src/types/mod.rs @@ -0,0 +1,11 @@ +/* + Appellation: types + Contrib: FL03 +*/ +pub use self::kernel::Kernel; + +pub mod kernel; + +pub(crate) mod prelude { + pub use super::kernel::Kernel; +} diff --git a/models/linear/src/impls/impl_rand.rs b/models/linear/src/impls/impl_rand.rs index f2e602e1..28bb0126 100644 --- a/models/linear/src/impls/impl_rand.rs +++ b/models/linear/src/impls/impl_rand.rs @@ -4,7 +4,7 @@ */ #![cfg(feature = "rand")] -use crate::params::{ParamMode, ParamsBase}; +use crate::params::{LinearParams, ParamMode, ParamsBase}; use crate::{bias_dim, Linear}; use concision::init::rand::Rng; use concision::init::rand_distr::{uniform::SampleUniform, Distribution, StandardNormal}; @@ -12,30 +12,32 @@ use concision::{Initialize, InitializeExt}; use nd::*; use num::Float; -impl Linear +impl Linear where A: Clone + Float, D: RemoveAxis, K: ParamMode, + S: DataOwned, StandardNormal: Distribution, { - pub fn uniform(self) -> Self + pub fn uniform(self) -> Linear> where A: SampleUniform, ::Sampler: Clone, { - Self { + Linear { + config: self.config, params: self.params.uniform(), - ..self } } } -impl crate::LinearParams +impl ParamsBase where A: Clone + Float + SampleUniform, D: RemoveAxis, K: ParamMode, + S: RawData, StandardNormal: Distribution, ::Sampler: Clone, { @@ -48,42 +50,42 @@ where self.dk().sqrt() } - pub fn uniform(self) -> Self { + pub fn uniform(self) -> LinearParams + where + S: DataOwned, + { let dk = self.dk_sqrt(); self.uniform_between(-dk, dk) } - pub fn uniform_between(self, low: A, high: A) -> Self { - if self.is_biased() && !self.bias.is_some() { + pub fn uniform_between(self, low: A, high: A) -> LinearParams + where + S: DataOwned, + { + let weight = Array::uniform_between(self.raw_dim(), low, high); + let bias = if self.is_biased() && !self.bias.is_some() { let b_dim = bias_dim(self.raw_dim()); - Self { - bias: Some(Array::uniform_between(b_dim, low, high)), - weight: Array::uniform_between(self.raw_dim(), low, high), - _mode: self._mode, - } + Some(Array::uniform_between(b_dim, low, high)) } else if !self.is_biased() && self.bias.is_some() { - Self { - bias: None, - weight: Array::uniform_between(self.raw_dim(), low, high), - _mode: self._mode, - } + None } else { - Self { - bias: self - .bias - .as_ref() - .map(|b| Array::uniform_between(b.raw_dim(), low, high)), - weight: Array::uniform_between(self.raw_dim(), low, high), - _mode: self._mode, - } + self.bias + .as_ref() + .map(|b| Array::uniform_between(b.raw_dim(), low, high)) + }; + LinearParams { + weight, + bias, + _mode: core::marker::PhantomData::, } } } -impl Initialize for Linear +impl Initialize for Linear where D: RemoveAxis, K: ParamMode, + S: DataOwned, StandardNormal: Distribution, { type Data = OwnedRepr; diff --git a/models/linear/src/impls/model/impl_linear.rs b/models/linear/src/impls/model/impl_linear.rs index 03c97f82..49ee85ba 100644 --- a/models/linear/src/impls/model/impl_linear.rs +++ b/models/linear/src/impls/model/impl_linear.rs @@ -2,47 +2,52 @@ Appellation: impl_linear Contrib: FL03 */ -use crate::{Config, Linear, LinearParams, ParamMode}; +use crate::{Config, Linear, ParamMode, ParamsBase}; use core::borrow::{Borrow, BorrowMut}; -use nd::RemoveAxis; +use nd::{DataOwned, Ix2, RawData, RemoveAxis}; -impl Linear +impl Linear where K: ParamMode, + S: RawData, { pub fn from_features(inputs: usize, outputs: usize) -> Self where A: Clone + Default, + S: DataOwned, { let config = Config::std(inputs, outputs); - let params = LinearParams::new(config.dim()); + let params = ParamsBase::new(config.dim()); Self { config, params } } } -impl Borrow> for Linear +impl Borrow> for Linear where D: RemoveAxis, + S: RawData, { fn borrow(&self) -> &Config { &self.config } } -impl Borrow> for Linear +impl Borrow> for Linear where D: RemoveAxis, + S: RawData, { - fn borrow(&self) -> &LinearParams { + fn borrow(&self) -> &ParamsBase { &self.params } } -impl BorrowMut> for Linear +impl BorrowMut> for Linear where D: RemoveAxis, + S: RawData, { - fn borrow_mut(&mut self) -> &mut LinearParams { + fn borrow_mut(&mut self) -> &mut ParamsBase { &mut self.params } } diff --git a/models/linear/src/mlp/model.rs b/models/linear/src/mlp/model.rs new file mode 100644 index 00000000..4128cb88 --- /dev/null +++ b/models/linear/src/mlp/model.rs @@ -0,0 +1,10 @@ +/* + Appellation: model + Contrib: FL03 +*/ + +pub struct Mlp { + input: I, + hidden: H, + output: O, +} \ No newline at end of file diff --git a/models/linear/src/model/layer.rs b/models/linear/src/model/layer.rs index 486bb43d..9bb00f52 100644 --- a/models/linear/src/model/layer.rs +++ b/models/linear/src/model/layer.rs @@ -3,39 +3,56 @@ Contrib: FL03 */ use super::{Config, Layout}; -use crate::{Biased, LinearParams, ParamMode, Unbiased}; +use crate::{Biased, LinearParams, ParamMode, ParamsBase, Unbiased}; use concision::prelude::{Predict, Result}; use nd::prelude::*; -use nd::RemoveAxis; +use nd::{DataOwned, OwnedRepr, RawData, RemoveAxis}; /// An implementation of a linear model. /// /// In an effort to streamline the api, the [Linear] model relies upon a [ParamMode] type ([Biased] or [Unbiased](crate::params::mode::Unbiased)) /// which enables the model to automatically determine whether or not to include a bias term. Doing so allows the model to inherit several methods /// familar to the underlying [ndarray](https://docs.rs/ndarray) crate. -pub struct Linear +pub struct Linear> where D: Dimension, + S: RawData, { pub(crate) config: Config, - pub(crate) params: LinearParams, + pub(crate) params: ParamsBase, } -impl Linear +impl Linear> +where + K: ParamMode, +{ + pub fn std(inputs: usize, outputs: usize) -> Self + where + A: Default, + { + let config = Config::::new().with_shape((inputs, outputs)); + let params = ParamsBase::new(config.features()); + Linear { config, params } + } +} + +impl Linear where D: RemoveAxis, K: ParamMode, + S: RawData, { - mbuilder!(new where A: Default); - mbuilder!(ones where A: Clone + num::One); - mbuilder!(zeros where A: Clone + num::Zero); + mbuilder!(new where A: Default, S: DataOwned); + mbuilder!(ones where A: Clone + num::One, S: DataOwned); + mbuilder!(zeros where A: Clone + num::Zero, S: DataOwned); pub fn from_config(config: Config) -> Self where A: Clone + Default, K: ParamMode, + S: DataOwned, { - let params = LinearParams::new(config.dim()); + let params = ParamsBase::new(config.dim()); Self { config, params } } @@ -43,13 +60,14 @@ where where A: Clone + Default, K: ParamMode, + S: DataOwned, { let config = Config::::new().with_layout(layout); - let params = LinearParams::new(config.dim()); + let params = ParamsBase::new(config.dim()); Self { config, params } } - pub fn from_params(params: LinearParams) -> Self { + pub fn from_params(params: ParamsBase) -> Self { let config = Config::::new().with_shape(params.raw_dim()); Self { config, params } } @@ -67,26 +85,27 @@ where &self.config } - pub fn weights(&self) -> &Array { + pub fn weights(&self) -> &ArrayBase { self.params.weights() } - pub fn weights_mut(&mut self) -> &mut Array { + pub fn weights_mut(&mut self) -> &mut ArrayBase { self.params.weights_mut() } - pub const fn params(&self) -> &LinearParams { + pub const fn params(&self) -> &ParamsBase { &self.params } - pub fn params_mut(&mut self) -> &mut LinearParams { + pub fn params_mut(&mut self) -> &mut ParamsBase { &mut self.params } - pub fn into_biased(self) -> Linear + pub fn into_biased(self) -> Linear where A: Default, K: 'static, + S: DataOwned, { Linear { config: self.config.into_biased(), @@ -94,10 +113,11 @@ where } } - pub fn into_unbiased(self) -> Linear + pub fn into_unbiased(self) -> Linear where A: Default, K: 'static, + S: DataOwned, { Linear { config: self.config.into_unbiased(), @@ -130,40 +150,44 @@ where concision::dimensional!(params()); } -impl Linear +impl Linear where D: RemoveAxis, + S: RawData, { pub fn biased(shape: Sh) -> Self where A: Default, + S: DataOwned, Sh: ShapeBuilder, { let config = Config::::new().with_shape(shape); - let params = LinearParams::biased(config.dim()); + let params = ParamsBase::biased(config.dim()); Linear { config, params } } - pub fn bias(&self) -> &Array { + pub fn bias(&self) -> &ArrayBase { self.params().bias() } - pub fn bias_mut(&mut self) -> &mut Array { + pub fn bias_mut(&mut self) -> &mut ArrayBase { self.params_mut().bias_mut() } } -impl Linear +impl Linear where D: RemoveAxis, + S: RawData, { pub fn unbiased(shape: Sh) -> Self where A: Default, + S: DataOwned, Sh: ShapeBuilder, { let config = Config::::new().with_shape(shape); - let params = LinearParams::unbiased(config.dim()); + let params = ParamsBase::unbiased(config.dim()); Linear { config, params } } } diff --git a/models/linear/src/norm/layer/mod.rs b/models/linear/src/norm/layer/mod.rs index 6b54d6e8..28254dc1 100644 --- a/models/linear/src/norm/layer/mod.rs +++ b/models/linear/src/norm/layer/mod.rs @@ -19,13 +19,15 @@ pub(crate) mod prelude { } pub(crate) mod utils { - use nd::{Array, Axis, Dimension, RemoveAxis}; + use nd::prelude::*; + use nd::{Data, RemoveAxis}; use num::traits::{Float, FromPrimitive}; - pub(crate) fn layer_norm(x: &Array, eps: f64) -> Array + pub(crate) fn layer_norm(x: &ArrayBase, eps: f64) -> Array where A: Float + FromPrimitive, D: Dimension, + S: Data, { let mean = x.mean().unwrap(); let denom = { @@ -36,10 +38,11 @@ pub(crate) mod utils { x.mapv(|xi| (xi - mean) / denom) } - pub(crate) fn layer_norm_axis(x: &Array, axis: Axis, eps: f64) -> Array + pub(crate) fn layer_norm_axis(x: &ArrayBase, axis: Axis, eps: f64) -> Array where A: Float + FromPrimitive, D: RemoveAxis, + S: Data, { let eps = A::from(eps).unwrap(); let mean = x.mean_axis(axis).unwrap(); diff --git a/models/linear/src/norm/layer/model.rs b/models/linear/src/norm/layer/model.rs index e5dc6b67..1cca2419 100644 --- a/models/linear/src/norm/layer/model.rs +++ b/models/linear/src/norm/layer/model.rs @@ -6,7 +6,7 @@ use super::Config; use crate::{Biased, LinearParams, ParamMode, Unbiased}; use concision::Forward; use nd::prelude::*; -use nd::RemoveAxis; +use nd::{Data, RemoveAxis}; use num::traits::{Float, FromPrimitive, One, Zero}; // #62 @@ -139,14 +139,15 @@ where } } -impl Forward> for LayerNorm +impl Forward> for LayerNorm where A: Float + FromPrimitive, D: RemoveAxis, + S: Data, { type Output = Array; - fn forward(&self, x: &Array) -> Self::Output { + fn forward(&self, x: &ArrayBase) -> Self::Output { let norm = if let Some(axis) = self.config().axis() { super::layer_norm_axis(x, *axis, self.eps()) } else { @@ -156,14 +157,15 @@ where } } -impl Forward> for LayerNorm +impl Forward> for LayerNorm where A: Float + FromPrimitive, D: RemoveAxis, + S: Data, { type Output = Array; - fn forward(&self, x: &Array) -> Self::Output { + fn forward(&self, x: &ArrayBase) -> Self::Output { let norm = if let Some(axis) = self.config().axis() { super::layer_norm_axis(x, *axis, self.eps()) } else { diff --git a/models/transformers/Cargo.toml b/models/transformers/Cargo.toml index 00bf9fb7..7dc36953 100644 --- a/models/transformers/Cargo.toml +++ b/models/transformers/Cargo.toml @@ -95,6 +95,10 @@ crate-type = ["lib"] doctest = true test = true +[[test]] +name = "attention" +required-features = ["approx", "rand"] + [build-dependencies] [dependencies] @@ -128,8 +132,8 @@ version = "1" optional = true version = "0.1" -[dev-dependencies.lazy_static] -workspace = true +[dev-dependencies] +lazy_static.workspace = true [package.metadata.docs.rs] all-features = true diff --git a/models/transformers/src/attention/head.rs b/models/transformers/src/attention/head.rs index c5146a34..e80fdda9 100644 --- a/models/transformers/src/attention/head.rs +++ b/models/transformers/src/attention/head.rs @@ -2,30 +2,68 @@ Appellation: head Contrib: FL03 */ +use super::{Score, _attention}; use crate::params::QkvBase; use concision::getters; -use core::borrow::{Borrow, BorrowMut}; +use concision::nn::DropoutLayer; use nd::linalg::Dot; use nd::*; use num::complex::ComplexFloat; // #68 +/// [AttentionHead] implements the scaled dot-product attention mechanism formally defined in +/// [Attention is all you need](https://arxiv.org/abs/1706.03762). The structure is designed to +/// be flexible, relying upon the n-dimensional [QkvBase] to store the query, key, and value tensors. +/// More so, the head may be configured with an optional dropout and/or masking layers. +/// +/// ### Dropout +/// +/// The [DropoutLayer] is an optional layer applied after the softmax function is applied to the +/// score. The layer is used to prevent overfitting by randomly setting a fraction of the input +/// units to zero at each update during training time. +/// +/// ### Masking +/// +/// After computing the dot-product of the query and key tensors, an optional mask may be applied to +/// the attention score. The mask is used to prevent the model from attending to certain parts of the +/// input sequence. For example, in the case of a language model, the mask may be used to prevent the +/// model from attending to the padding tokens. pub struct AttentionHead> where D: Dimension, S: RawData, { - pub(crate) mask: Option>, + #[cfg(feature = "rand")] + pub(crate) dropout: Option, + pub(crate) mask: Option>, pub(crate) params: QkvBase, } +impl AttentionHead +where + S: RawData, +{ + pub fn std(dm: usize, dk: usize) -> Self + where + A: Default, + S: DataOwned, + { + Self::from_params(QkvBase::new((dk, dm))) + } +} + impl AttentionHead where D: Dimension, S: RawData, { pub fn from_params(params: QkvBase) -> Self { - Self { mask: None, params } + Self { + #[cfg(feature = "rand")] + dropout: None, + mask: None, + params, + } } pub fn builder(shape: Sh, builder: F) -> Self @@ -44,8 +82,8 @@ where { Self::from_params(QkvBase::from_elem(shape, value)) } - - pub fn attention(&self) -> Array + /// Computes the [Score] using scaled dot-product attention. + pub fn attention(&self) -> Score where A: ComplexFloat + ScalarOperand, S: Data, @@ -53,7 +91,11 @@ where Array: Dot, Output = Array>, { let (q, k, v) = self.qkv(); - crate::attention::scaled_dot_product_attention(q, k, v) + _attention(q, k, v, self.mask(), self.dropout()) + } + /// Returns an immutable reference to the, optional, mask. + pub fn mask(&self) -> Option<&Array> { + self.mask.as_ref() } /// Returns an immuable reference to the underlying parameters. pub const fn params(&self) -> &QkvBase { @@ -71,6 +113,30 @@ where pub fn into_qkv(self) -> (ArrayBase, ArrayBase, ArrayBase) { self.params.into_qkv() } + /// Sets the dropout layer for the [AttentionHead] + #[cfg(feature = "rand")] + pub fn set_dropout(&mut self, dropout: Option) { + self.dropout = dropout; + } + /// Sets the mask for the [AttentionHead] + pub fn set_mask(&mut self, mask: Option>) { + self.mask = mask; + } + /// Configure the [AttentionHead] with a [DropoutLayer] + #[cfg(feature = "rand")] + pub fn with_dropout(self, dropout: DropoutLayer) -> Self { + Self { + dropout: Some(dropout), + ..self + } + } + /// Consume and store a mask for the [AttentionHead] + pub fn with_mask(self, mask: Array) -> Self { + Self { + mask: Some(mask), + ..self + } + } getters!(params::<[q, k, v]> => ArrayBase); ndbuilder!(new::default() where A: Default, S: DataOwned); @@ -78,22 +144,31 @@ where ndbuilder!(zeros() where A: Clone + num::Zero, S: DataOwned); } -impl Borrow> for AttentionHead +#[cfg(feature = "rand")] +impl AttentionHead where D: Dimension, S: RawData, { - fn borrow(&self) -> &QkvBase { - self.params() + /// Returns an immutable reference to the, optional, [dropout](DropoutLayer) layer. + /// With the `rand` feature flag disabled, the dropout layer is + /// unavailable and returns `None`. + pub fn dropout(&self) -> Option<&DropoutLayer> { + self.dropout.as_ref() } } -impl BorrowMut> for AttentionHead +#[cfg(not(feature = "rand"))] +impl AttentionHead where D: Dimension, S: RawData, { - fn borrow_mut(&mut self) -> &mut QkvBase { - self.params_mut() + /// Returns an immutable reference to the, optional, [dropout](DropoutLayer) layer. + /// With the `rand` feature flag disabled, the dropout layer is + /// unavailable and returns `None`. + #[cfg(not(feature = "rand"))] + pub fn dropout(&self) -> Option<&DropoutLayer> { + None } } diff --git a/models/transformers/src/attention/mod.rs b/models/transformers/src/attention/mod.rs index 80a264c7..a500b5f5 100644 --- a/models/transformers/src/attention/mod.rs +++ b/models/transformers/src/attention/mod.rs @@ -8,47 +8,95 @@ //! Today, these mechanisms are found in several state-of-the-art models, such as //! the Transformer model, primarily due to its capabilities in natural language //! processing (NLP) domains +pub(crate) use self::_impl_methods::*; pub use self::head::AttentionHead; +pub use self::score::Score; pub use self::utils::*; pub(crate) mod head; +pub(crate) mod score; // #69: Multi-Head Attention implementation pub mod multi; pub(crate) mod prelude { pub use super::head::AttentionHead; + pub use super::multi::prelude::*; + pub use super::score::Score; pub use super::utils::*; } +pub trait Attention { + type Output; + + fn attention(&self) -> Self::Output; +} + pub(crate) mod utils { - use concision::func::activate::Softmax; + use super::Score; + use concision::nn::DropoutLayer; use nd::linalg::Dot; - use nd::prelude::{Array, ArrayBase, ArrayView, Axis, Dimension}; - use nd::{Data, ScalarOperand}; + use nd::prelude::*; use num::complex::ComplexFloat; - pub(crate) fn scale(dk: usize) -> A + /// A functional implementation of the scaled dot-product attention mechanism; + pub fn scaled_dot_product_attention( + q: &ArrayBase, + k: &ArrayBase, + v: &ArrayBase, + mask: Option<&Array>, + dropout: Option<&DropoutLayer>, + ) -> Score where - A: ComplexFloat, + A: ComplexFloat + nd::ScalarOperand, + S: nd::Data, + D: Dimension, + ArrayBase: for<'a> Dot, Output = Array>, + Array: Dot, Output = Array>, { - A::from(dk).unwrap().sqrt().recip() + super::_attention(q, k, v, mask, dropout) } +} - /// A functional implementation of the scaled dot-product attention mechanism; - pub fn scaled_dot_product_attention( +mod _impl_methods { + use super::Score; + use concision::prelude::{DropoutLayer, MaskFill, Softmax}; + use nd::linalg::Dot; + use nd::prelude::*; + use num::complex::ComplexFloat; + + pub(crate) fn _attention( q: &ArrayBase, k: &ArrayBase, v: &ArrayBase, - ) -> Array + mask: Option<&Array>, + dropout: Option<&DropoutLayer>, + ) -> Score where - A: ComplexFloat + ScalarOperand, - S: Data, + A: ComplexFloat + nd::ScalarOperand, + S: nd::Data, D: Dimension, ArrayBase: for<'a> Dot, Output = Array>, Array: Dot, Output = Array>, { - let dk = scale::(k.len_of(Axis(1))); - (q.dot(&k.t()) * dk).softmax().dot(&v) + use concision::Forward; + let dk = scale::(k.len_of(nd::Axis(1))); + let mut z = q.dot(&k.t()) * dk; + if let Some(mask) = mask { + z = z.masked_fill(mask, A::zero()); + } + z = z.softmax(); + #[cfg(feature = "rand")] + if let Some(dropout) = dropout { + z = dropout.forward(&z); + } + (z.dot(&v), z).into() + } + + pub(crate) fn scale(dk: usize) -> A + where + A: ComplexFloat, + { + A::from(dk).unwrap().sqrt().recip() } } diff --git a/models/transformers/src/attention/multi/config.rs b/models/transformers/src/attention/multi/config.rs new file mode 100644 index 00000000..58c510c6 --- /dev/null +++ b/models/transformers/src/attention/multi/config.rs @@ -0,0 +1,49 @@ +/* + Appellation: config + Contrib: FL03 +*/ + +pub(crate) fn dk(d_model: usize, heads: usize) -> usize { + d_model / heads +} + +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +pub struct Config { + pub d_model: usize, + pub heads: usize, +} + +impl Config { + pub fn new() -> ConfigBuilder { + ConfigBuilder::new() + } + + pub fn d_model(&self) -> usize { + self.d_model + } + + pub fn dk(&self) -> usize { + dk(self.d_model(), self.heads()) + } + + pub fn heads(&self) -> usize { + self.heads + } +} + +impl Default for Config { + fn default() -> Self { + Self { + d_model: crate::D_MODEL, + heads: crate::HEADS, + } + } +} + +concision::builder! { + ConfigBuilder(Config) { + d_model: usize, + heads: usize, + } +} diff --git a/models/transformers/src/attention/multi/mod.rs b/models/transformers/src/attention/multi/mod.rs index 014e29b9..e101f032 100644 --- a/models/transformers/src/attention/multi/mod.rs +++ b/models/transformers/src/attention/multi/mod.rs @@ -5,6 +5,12 @@ //! # Multi-Head Attention //! //! -pub use self::multi_head::*; +pub use self::{config::Config, multi_head::*}; +pub(crate) mod config; pub(crate) mod multi_head; + +pub(crate) mod prelude { + pub use super::config::Config as MultiHeadAttentionConfig; + pub use super::multi_head::MultiHeadAttention; +} diff --git a/models/transformers/src/attention/multi/multi_head.rs b/models/transformers/src/attention/multi/multi_head.rs index ad36fe45..36a4051d 100644 --- a/models/transformers/src/attention/multi/multi_head.rs +++ b/models/transformers/src/attention/multi/multi_head.rs @@ -2,5 +2,76 @@ Appellation: multi_head Contrib: FL03 */ +use super::Config; +use crate::AttentionHead; +use linear::{Biased, Linear}; +use nd::prelude::*; +use nd::{DataOwned, OwnedRepr, RawData}; -pub struct MultiHeadAttention; +pub struct MultiHeadAttention> +where + D: Dimension, + S: RawData, +{ + pub(crate) config: Config, + pub(crate) head: AttentionHead, + pub(crate) linears: Vec>, +} + +impl MultiHeadAttention +where + D: Dimension, + S: RawData, +{ + pub const fn config(&self) -> &Config { + &self.config + } + + pub const fn head(&self) -> &AttentionHead { + &self.head + } + + pub fn head_mut(&mut self) -> &mut AttentionHead { + &mut self.head + } + + pub fn linears(&self) -> &[Linear] { + &self.linears + } +} + +impl MultiHeadAttention +where + S: RawData, +{ + pub fn std(d_model: usize, heads: usize) -> Self + where + A: Clone + Default, + S: DataOwned, + { + let config = Config::new().d_model(d_model).heads(heads).build(); + let linears = (0..4) + .map(|_| Linear::from_features(d_model, d_model)) + .collect(); + Self { + config, + head: AttentionHead::std(d_model, config.dk()), + linears, + } + } +} + +impl Default for MultiHeadAttention +where + A: Default, + D: Dimension, + S: DataOwned, +{ + fn default() -> Self { + Self { + config: Config::default(), + head: AttentionHead::default(), + linears: Vec::new(), + } + } +} diff --git a/models/transformers/src/attention/score.rs b/models/transformers/src/attention/score.rs new file mode 100644 index 00000000..3e1df96e --- /dev/null +++ b/models/transformers/src/attention/score.rs @@ -0,0 +1,94 @@ +/* + Appellation: score + Contrib: FL03 +*/ +use core::fmt; +use nd::{Array, Dimension}; + +/// [Score] is a created as a result of invoking an attention mechanism; +/// +/// - attention: the actual result; returns the dot product of the score with the value tensor +/// - score: the attention score tensor +#[derive(Clone, Eq, Hash, PartialEq)] +pub struct Score +where + D: Dimension, +{ + pub(crate) attention: Array, + pub(crate) score: Array, +} + +impl Score +where + D: Dimension, +{ + pub(crate) fn new(attention: Array, score: Array) -> Self { + Self { attention, score } + } + /// Consumes the instance and returns the attention tensor. + pub fn into_attention(self) -> Array { + self.attention + } + /// Consumes the container and returns the score tensor. + pub fn into_score(self) -> Array { + self.score + } + + /// Retrieve the attention tensor. + pub fn attention(&self) -> &Array { + &self.attention + } + /// Retrieve the score tensor + pub fn score(&self) -> &Array { + &self.score + } +} + +impl Copy for Score +where + A: Copy, + D: Copy + Dimension, + Array: Copy, +{ +} + +impl fmt::Debug for Score +where + A: fmt::Debug, + D: Dimension, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Score") + .field("attention", &self.attention) + .field("score", &self.score) + .finish() + } +} + +impl fmt::Display for Score +where + A: fmt::Display, + D: Dimension, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "({}, {})", self.attention, self.score) + } +} + +impl From<(Array, Array)> for Score +where + D: Dimension, +{ + fn from((attention, score): (Array, Array)) -> Self { + Self::new(attention, score) + } +} + +impl From> for (Array, Array) +where + D: Dimension, +{ + fn from(score: Score) -> Self { + (score.attention, score.score) + } +} diff --git a/models/transformers/src/codec/encoder/layer.rs b/models/transformers/src/codec/encoder/layer.rs index 10821bd3..5c00ebcf 100644 --- a/models/transformers/src/codec/encoder/layer.rs +++ b/models/transformers/src/codec/encoder/layer.rs @@ -2,12 +2,25 @@ Appellation: layer Contrib: FL03 */ +use crate::attention::multi::MultiHeadAttention; #[derive(Default)] -pub struct EncoderLayer {} +pub struct EncoderLayer { + pub(crate) attention: MultiHeadAttention, +} impl EncoderLayer { pub fn new() -> Self { - Self {} + let attention = MultiHeadAttention::default(); + + Self { attention } + } + /// Returns an immutable reference to the multi-head, self-attention layer. + pub fn attention(&self) -> &MultiHeadAttention { + &self.attention + } + /// Returns a mutable reference to the multi-head, self-attention layer. + pub fn attention_mut(&mut self) -> &mut MultiHeadAttention { + &mut self.attention } } diff --git a/models/transformers/src/codec/mod.rs b/models/transformers/src/codec/mod.rs index 3a7e3f77..52e34740 100644 --- a/models/transformers/src/codec/mod.rs +++ b/models/transformers/src/codec/mod.rs @@ -2,6 +2,13 @@ Appellation: codec Contrib: FL03 */ +//! # Codec +//! +//! The `codec` module implements the [Decoder] and [Encoder] layers of the [Transformer](crate::Transformer) model. +//! Each layer has two sublayers, namely: +//! - multi-head, self-attention layer +//! - fully-connected, piecewise feed-forward network. +//! pub use self::{decoder::Decoder, encoder::Encoder, model::*}; pub(crate) mod model; diff --git a/models/transformers/src/codec/model.rs b/models/transformers/src/codec/model.rs index 494c0a0e..470938a5 100644 --- a/models/transformers/src/codec/model.rs +++ b/models/transformers/src/codec/model.rs @@ -24,11 +24,13 @@ impl Codec { ); } -builder!(CodecBuilder:: { - ctx: Context, - decoder: Decoder, - encoder: Encoder, -}); +builder! { + CodecBuilder(Codec) { + ctx: Context, + decoder: Decoder, + encoder: Encoder, + } +} #[derive(Default)] pub struct Generator { diff --git a/models/transformers/src/impls/impl_head.rs b/models/transformers/src/impls/impl_head.rs index fa22f80a..4160975d 100644 --- a/models/transformers/src/impls/impl_head.rs +++ b/models/transformers/src/impls/impl_head.rs @@ -2,10 +2,48 @@ Appellation: impl_head Contrib: FL03 */ -use crate::attention::AttentionHead; +use crate::attention::{Attention, AttentionHead, Score}; use crate::params::QkvBase; +use core::borrow::{Borrow, BorrowMut}; +use nd::linalg::Dot; use nd::prelude::*; -use nd::{DataOwned, RawDataClone}; +use nd::{Data, DataOwned, RawData, RawDataClone, ScalarOperand}; +use num::complex::ComplexFloat; + +impl Attention for AttentionHead +where + A: ComplexFloat + ScalarOperand, + D: Dimension, + S: Data, + ArrayBase: for<'a> Dot, Output = Array>, + Array: Dot, Output = Array>, +{ + type Output = Score; + + fn attention(&self) -> Self::Output { + self.attention() + } +} + +impl Borrow> for AttentionHead +where + D: Dimension, + S: RawData, +{ + fn borrow(&self) -> &QkvBase { + self.params() + } +} + +impl BorrowMut> for AttentionHead +where + D: Dimension, + S: RawData, +{ + fn borrow_mut(&mut self) -> &mut QkvBase { + self.params_mut() + } +} impl Clone for AttentionHead where @@ -15,6 +53,8 @@ where { fn clone(&self) -> Self { Self { + #[cfg(feature = "rand")] + dropout: self.dropout.clone(), mask: self.mask.clone(), params: self.params.clone(), } @@ -26,6 +66,7 @@ where A: Copy, D: Copy + Dimension, S: Copy + RawDataClone, + Array: Copy, { } @@ -39,3 +80,13 @@ where Self::from_params(QkvBase::default()) } } + +impl From> for AttentionHead +where + D: Dimension, + S: RawData, +{ + fn from(params: QkvBase) -> Self { + Self::from_params(params) + } +} diff --git a/models/transformers/src/impls/impl_init.rs b/models/transformers/src/impls/impl_init.rs new file mode 100644 index 00000000..1ed7effd --- /dev/null +++ b/models/transformers/src/impls/impl_init.rs @@ -0,0 +1,64 @@ +/* + Appellation: init + Contrib: FL03 +*/ +#![cfg(feature = "rand")] +use crate::QkvBase; +use concision::Initialize; +use concision::init::rand::Rng; +use concision::init::rand_distr::{Distribution, StandardNormal}; +use concision::init::rand_distr::uniform::SampleUniform; +use nd::{ArrayBase, DataOwned, Dimension, ShapeBuilder}; + +impl Initialize for QkvBase where + D: RemoveAxis, + S: DataOwned, + StandardNormal: Distribution, +{ + type Data = S; + + fn rand(shape: Sh, distr: Dstr) -> Self + where + Sh: ShapeBuilder, + Dstr: Clone + Distribution, + { + let dim = shape.into_shape().raw_dim().clone(); + Self { + q: ArrayBase::rand(dim.clone(), distr.clone()), + k: ArrayBase::rand(dim.clone(), distr.clone()), + v: ArrayBase::rand(dim, distr) + } + } + + fn rand_with(shape: Sh, distr: Ds, rng: &mut R) -> Self + where + R: Rng + ?Sized, + Ds: Clone + Distribution, + Sh: ShapeBuilder, + { + let dim = shape.into_shape().raw_dim().clone(); + Self { + q: ArrayBase::rand_with(dim.clone(), distr.clone(), &mut rng), + k: ArrayBase::rand_with(dim.clone(), distr.clone(), &mut rng), + v: ArrayBase::rand_with(dim, distr, &mut rng) + } + } + + fn init_rand(self, distr: Ds) -> Self + where + Ds: Clone + Distribution, + Self: Sized, + { + Self::rand(self.dim(), distr) + } + + fn init_rand_with(self, distr: Ds, rng: &mut R) -> Self + where + R: Rng + ?Sized, + Ds: Clone + Distribution, + { + Self::rand_with(self.dim(), distr, rng) + } +} + + diff --git a/models/transformers/src/impls/impl_linalg.rs b/models/transformers/src/impls/impl_linalg.rs index ce069afe..c2ab8812 100644 --- a/models/transformers/src/impls/impl_linalg.rs +++ b/models/transformers/src/impls/impl_linalg.rs @@ -2,7 +2,7 @@ Appellation: impl_linalg Contrib: FL03 */ -use crate::params::{Params, QkvBase}; +use crate::params::{Qkv, QkvBase}; use concision::Matmul; use nd::linalg::Dot; use nd::*; @@ -17,7 +17,7 @@ where T: Data, ArrayBase: Dot, Output = Array>, { - type Output = Params; + type Output = Qkv; fn matmul(&self, rhs: &QkvBase) -> Self::Output { QkvBase { @@ -38,7 +38,7 @@ where T: Data, ArrayBase: Dot, Output = Array>, { - type Output = Params; + type Output = Qkv; fn matmul(&self, rhs: &ArrayBase) -> Self::Output { QkvBase { diff --git a/models/transformers/src/impls/impl_params.rs b/models/transformers/src/impls/impl_params.rs index 9736c1b0..2ea7dec4 100644 --- a/models/transformers/src/impls/impl_params.rs +++ b/models/transformers/src/impls/impl_params.rs @@ -6,10 +6,12 @@ use crate::params::QkvBase; use nd::prelude::*; use nd::{Data, DataOwned, RawDataClone}; -impl Clone for QkvBase +pub(crate) type ThreeTuple = (A, B, C); + +impl Clone for QkvBase where D: Dimension, - S: RawDataClone, + S: RawDataClone, { fn clone(&self) -> Self { Self { @@ -20,18 +22,18 @@ where } } -impl Copy for QkvBase +impl Copy for QkvBase where D: Copy + Dimension, - S: Copy + RawDataClone, + S: Copy + RawDataClone, { } -impl Default for QkvBase +impl Default for QkvBase where + A: Default, D: Dimension, - S: DataOwned, - S::Elem: Default, + S: DataOwned, { fn default() -> Self { Self { @@ -49,7 +51,7 @@ where S: Data, { fn eq(&self, other: &Self) -> bool { - self.q == *other.q() && self.k == *other.k() && self.v == *other.v() + self.q() == other.q() && self.k() == other.k() && self.v() == other.v() } } @@ -64,6 +66,21 @@ where ArrayBase: PartialEq>, { fn eq(&self, other: &ArrayBase) -> bool { - self.q == *other && self.k == *other && self.v == *other + self.q() == other && self.k() == other && self.v() == other + } +} + +impl PartialEq>> for QkvBase +where + A: PartialEq, + B: PartialEq, + D: Dimension, + S: Data, + S2: Data, + D2: Dimension, + ArrayBase: PartialEq>, +{ + fn eq(&self, (q, k, v): &ThreeTuple>) -> bool { + self.q() == q && self.k() == k && self.v() == v } } diff --git a/models/transformers/src/lib.rs b/models/transformers/src/lib.rs index ed9cf63e..89cc41f1 100644 --- a/models/transformers/src/lib.rs +++ b/models/transformers/src/lib.rs @@ -17,7 +17,7 @@ extern crate concision_core as concision; extern crate concision_linear as linear; extern crate ndarray as nd; -pub use self::attention::AttentionHead; +pub use self::attention::{scaled_dot_product_attention, AttentionHead}; pub use self::params::*; pub use self::primitives::*; pub use self::transformer::Transformer; @@ -29,6 +29,7 @@ pub(crate) mod transformer; pub mod attention; pub mod codec; +pub mod model; pub mod ops; pub mod params; @@ -40,6 +41,5 @@ pub(crate) mod impls { pub mod prelude { pub use super::attention::prelude::*; - pub use super::primitives::*; pub use super::Transformer; } diff --git a/models/transformers/src/macros.rs b/models/transformers/src/macros.rs index fd05142e..e25dafae 100644 --- a/models/transformers/src/macros.rs +++ b/models/transformers/src/macros.rs @@ -3,81 +3,83 @@ Contrib: FL03 */ +#[macro_use] +mod params; + macro_rules! ndbuilder { - ($method:ident$(::$call:ident)?() where $($rest:tt)*) => { - ndbuilder!(@impl $method$(::$call)?() where $($rest)*); + ($method:ident$(::$call:ident)?() $($where:tt)*) => { + ndbuilder!(@impl $method$(::$call)?() $($where)*); }; - (@impl $method:ident() where $($rest:tt)*) => { - ndbuilder!(@impl $method::$method() where $($rest)*); + (@impl $method:ident() $($where:tt)*) => { + ndbuilder!(@impl $method::$method() $($where)*); }; - (@impl $method:ident::$call:ident() where $($rest:tt)*) => { - pub fn $method>(shape: Sh) -> Self where $($rest)* { + (@impl $method:ident::$call:ident() $($where:tt)*) => { + pub fn $method>(shape: Sh) -> Self $($where)* { Self::builder(shape, ndarray::ArrayBase::$call) } }; } -// # TODO: -macro_rules! ndview { - ($method:ident::$($rest:tt)*) => { - ndview!(@impl $method.$method::$($rest)*); - }; - ($method:ident.$call:ident::$($rest:tt)*) => { - ndview!(@impl $method.$call::$($rest)*); - }; - (@impl $method:ident.$call:ident::<$view:ident>(self) where $($rest:tt)*) => { - pub fn $method(self) -> $crate::params::QkvBase<$view, D> - where - $($rest)* - { - ndview!(@apply $call(self)) - } - }; - (@impl $method:ident.$call:ident::<$view:ident>(mut self) where $($rest:tt)*) => { - pub fn $method(mut self) -> $crate::params::QkvBase<$view, D> - where - $($rest)* - { - ndview!(@apply $call(self)) +#[allow(unused_macros)] +macro_rules! cbuilder { + (@impl derive: [$($D:ident),* $(,)?], $name:ident {$($vis:vis $field:ident: $type:ty),*}) => { + #[derive(Clone, Debug, PartialEq, $($D),*)] + #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] + pub struct $name { + $($vis $field: $type),* } - }; - (@impl $method:ident.$call:ident::<$view:ident>(&self) where $($rest:tt)*) => { - pub fn $method(&self) -> $crate::params::QkvBase<$view, D> - where - $($rest)* - { - ndview!(@apply $call(self)) - } - }; - (@impl $method:ident.$call:ident::<$view:ident>(&mut self) where $($rest:tt)*) => { - pub fn $method(&mut self) -> $crate::params::QkvBase<$view, D> - where - $($rest)* - { - ndview!(@apply $call(self)) + impl $name { + paste::paste! { + pub fn new() -> [<$name Builder>] { + [<$name Builder>]::new() + } + } + + $( + pub fn $field(mut self, $field: $type) -> Self { + self.$field = $field; + self + } + )* } }; - (@impl $method:ident.$call:ident::<'a, $view:ident>(&self) where $($rest:tt)*) => { - pub fn $method(&self) -> $crate::params::QkvBase<$view<&'_ A>, D> - where - $($rest)* - { - ndview!(@apply $call(self)) + (@builder derive: [$($D:ident),* $(,)?], $name:ident {$($field:ident: $type:ty),*}) => { + pub struct $name { + $(pub(crate) $field: $type),* } - }; - (@impl $method:ident.$call:ident::<'a, $view:ident>(&mut self) where $($rest:tt)*) => { - pub fn $method(&mut self) -> $crate::params::QkvBase<$view<&'_ mut A>, D> - where - $($rest)* - { - ndview!(@apply $call(self)) + + impl $name { + pub fn new() -> Self { + Self { + $($field: None),* + } + } + + $( + pub fn $field(mut self, $field: $type) -> Self { + self.$field = Some($field); + self + } + )* + + pub fn build(&self) -> Config { + Config { + $($field: self.$field.unwrap_or_else(|| crate::$field),)* + } + } } - }; - (@apply $call:ident($self:expr)) => { - $crate::params::QkvBase { - q: $self.q.$call(), - k: $self.k.$call(), - v: $self.v.$call(), + + impl Default for $name { + fn default() -> Self { + Self::new() + } } }; } + +/// This macro helps create a stack of identical sublayers. +/// +#[allow(unused_macros)] +macro_rules! sublayer { + (@impl heads: $heads:expr) => {}; +} diff --git a/models/transformers/src/macros/params.rs b/models/transformers/src/macros/params.rs new file mode 100644 index 00000000..f7e12e32 --- /dev/null +++ b/models/transformers/src/macros/params.rs @@ -0,0 +1,50 @@ +/* + Appellation: params + Contrib: FL03 +*/ + +macro_rules! qkv_view { + ($method:ident$(.$call:ident)?::$($rest:tt)*) => { + qkv_view!(@impl $method$(.$call)?::$($rest)*); + }; + (@impl $method:ident::$($rest:tt)*) => { + qkv_view!(@impl $method.$method::$($rest)*); + }; + (@impl $method:ident.$call:ident::<$view:ident>(self) $($rest:tt)*) => { + pub fn $method(self) -> $crate::params::QkvBase<$view, D> $($rest)* { + qkv_view!(@apply $call(self)) + } + }; + (@impl $method:ident.$call:ident::<$view:ident>(mut self) $($rest:tt)*) => { + pub fn $method(mut self) -> $crate::params::QkvBase<$view, D> $($rest)* { + qkv_view!(@apply $call(self)) + } + }; + (@impl $method:ident.$call:ident::<$view:ident>(&self) $($rest:tt)*) => { + pub fn $method(&self) -> $crate::params::QkvBase<$view, D> $($rest)* { + qkv_view!(@apply $call(self)) + } + }; + (@impl $method:ident.$call:ident::<$view:ident>(&mut self) $($rest:tt)*) => { + pub fn $method(&mut self) -> $crate::params::QkvBase<$view, D> $($rest)* { + qkv_view!(@apply $call(self)) + } + }; + (@impl $method:ident.$call:ident::<'a, $view:ident>(&self) $($rest:tt)*) => { + pub fn $method(&self) -> $crate::params::QkvBase<$view<&'_ A>, D> $($rest)* { + qkv_view!(@apply $call(self)) + } + }; + (@impl $method:ident.$call:ident::<'a, $view:ident>(&mut self) $($rest:tt)*) => { + pub fn $method(&mut self) -> $crate::params::QkvBase<$view<&'_ mut A>, D> $($rest)* { + qkv_view!(@apply $call(self)) + } + }; + (@apply $call:ident($self:expr)) => { + $crate::params::QkvBase { + q: $self.q.$call(), + k: $self.k.$call(), + v: $self.v.$call(), + } + }; +} diff --git a/models/transformers/src/model/mod.rs b/models/transformers/src/model/mod.rs new file mode 100644 index 00000000..ac227da3 --- /dev/null +++ b/models/transformers/src/model/mod.rs @@ -0,0 +1,6 @@ +/* + Appellation: model + Contrib: FL03 +*/ + +pub mod sublayer; diff --git a/models/transformers/src/model/sublayer.rs b/models/transformers/src/model/sublayer.rs new file mode 100644 index 00000000..a1a5fbe7 --- /dev/null +++ b/models/transformers/src/model/sublayer.rs @@ -0,0 +1,74 @@ +/* + Appellation: sublayer + Contrib: FL03 +*/ +#![cfg(feature = "rand")] +use concision::nn::DropoutLayer; +use concision::Forward; +use linear::{Biased, LayerNorm, ParamMode, Unbiased}; +use nd::prelude::*; +use nd::{DataOwned, RemoveAxis, ScalarOperand}; +use num::traits::{Float, FromPrimitive}; + +/// A residual connection followed by a [layer norm](LayerNorm) +/// [Transformer](crate::Transformer) +pub struct Sublayer +where + D: Dimension, +{ + pub(crate) dropout: DropoutLayer, + pub(crate) norm: LayerNorm, +} + +impl Sublayer +where + D: RemoveAxis, +{ + pub fn new(shape: Sh, dropout: f64) -> Self + where + A: Default, + K: ParamMode, + Sh: ShapeBuilder, + { + Self { + dropout: DropoutLayer::new(dropout), + norm: LayerNorm::new(shape), + } + } + + pub fn dropout(&self) -> &DropoutLayer { + &self.dropout + } + + pub fn norm(&self) -> &LayerNorm { + &self.norm + } +} + +impl Forward> for Sublayer +where + A: Float + FromPrimitive + ScalarOperand, + D: RemoveAxis, + S: DataOwned, +{ + type Output = Array; + + fn forward(&self, input: &ArrayBase) -> Self::Output { + let normal = self.norm().forward(input); + input + self.dropout().forward(&normal) + } +} + +impl Forward> for Sublayer +where + A: Float + FromPrimitive + ScalarOperand, + D: RemoveAxis, + S: DataOwned, +{ + type Output = Array; + + fn forward(&self, input: &ArrayBase) -> Self::Output { + let normal = self.norm().forward(input); + input + self.dropout().forward(&normal) + } +} diff --git a/models/transformers/src/ops/merge.rs b/models/transformers/src/ops/merge.rs index 747cae82..c7e66e3e 100644 --- a/models/transformers/src/ops/merge.rs +++ b/models/transformers/src/ops/merge.rs @@ -2,9 +2,15 @@ Appellation: merge Contrib: FL03 */ +use super::_merge_dim; use concision::NdResult; -use nd::prelude::*; -use nd::{Data, RemoveAxis}; +use nd::{Array, ArrayBase, Data, Dimension, RemoveAxis}; + +pub trait DimMerge { + type Output; + + fn merge(&self, tgt: usize) -> Self::Output; +} // #67: Optimize the Merge trait pub trait Merge { @@ -20,6 +26,19 @@ pub trait Merge { /* ************* Implementations ************* */ +impl DimMerge for D +where + D: RemoveAxis, + D::Smaller: Dimension, + D::Larger: Dimension, +{ + type Output = D::Smaller; + + fn merge(&self, tgt: usize) -> Self::Output { + _merge_dim(self, tgt) + } +} + impl Merge for ArrayBase where A: Clone, @@ -36,7 +55,6 @@ where } fn merge_along(&self, swap: usize) -> NdResult { - use ndarray::Order; - super::merger(self, swap, swap + 1, Order::RowMajor) + super::_merge(self, swap, swap + 1, super::ORDER) } } diff --git a/models/transformers/src/ops/mod.rs b/models/transformers/src/ops/mod.rs index 778e4abc..6612af22 100644 --- a/models/transformers/src/ops/mod.rs +++ b/models/transformers/src/ops/mod.rs @@ -2,21 +2,29 @@ Appellation: ops Contrib: FL03 */ -pub use self::{merge::*, split::*, utils::*}; +pub use self::prelude::*; -pub(crate) mod merge; -pub(crate) mod split; +mod merge; +mod split; + +pub(crate) mod prelude { + pub use super::merge::*; + pub use super::split::*; + pub(crate) use super::utils::*; +} + +pub(crate) const ORDER: nd::Order = nd::Order::RowMajor; pub(crate) mod utils { use concision::NdResult; use nd::prelude::*; use nd::{Data, Order, RemoveAxis}; - #[doc(hidden)] - pub fn merge( + pub(crate) fn _merge( arr: &ArrayBase, src: usize, tgt: usize, + order: Order, ) -> NdResult> where A: Clone, @@ -25,83 +33,68 @@ pub(crate) mod utils { D::Smaller: Dimension, ArrayBase: Clone, { - merger(arr, src, tgt, Order::RowMajor) + let shape = _merge_dim(&arr.raw_dim(), src); + let mut head = arr.clone(); + head.swap_axes(src, tgt); + head.to_shape((shape, order)).map(|x| x.to_owned()) } - pub(crate) fn merger( + pub(crate) fn _split( arr: &ArrayBase, - src: usize, - tgt: usize, + h: usize, order: Order, - ) -> NdResult> + ) -> NdResult> where A: Clone, - D: RemoveAxis, + D: Dimension, + E: RemoveAxis, S: Data, - D::Smaller: Dimension, ArrayBase: Clone, { - let shape = merge_dims(arr.raw_dim(), src); - let mut head = arr.clone(); + let src = if arr.ndim() >= 2 { arr.ndim() - 2 } else { 0 }; + let tgt = src + 1; + let shape: E = _split_dim(&arr.raw_dim(), h); + let mut head = arr.to_shape((shape, order))?.to_owned(); head.swap_axes(src, tgt); - head.to_shape((shape, order)).map(|x| x.to_owned()) + Ok(head) } - - #[doc(hidden)] - pub fn merge_dims(dim: D, src: usize) -> D::Smaller + /// Creates the new dimension after merging two axes. + pub(crate) fn _merge_dim(dim: &D, axis: usize) -> D::Smaller where D: RemoveAxis, D::Smaller: Dimension, { // create a new dimension with one less axis; initialized with zeros - let mut new_dim = ::Smaller::zeros(dim.ndim() - 1); + let mut dn = ::Smaller::zeros(dim.ndim() - 1); // create a mutable vector from the slice let mut shape = dim.slice().to_vec(); // multiply the last axis by the target - shape[new_dim.ndim()] *= shape[src]; + shape[dn.ndim()] *= shape[axis]; // remove the last dimension - shape.remove(src); - - new_dim.slice_mut().copy_from_slice(&shape); - new_dim - } - - #[doc(hidden)] - pub fn merge_batch(heads: &Array4) -> NdResult> - where - T: Clone, - { - let (batch, n, seq, query) = heads.dim(); - let mut tmp = heads.clone(); - // swap the head and sequence axes - tmp.swap_axes(1, 2); - // reshape the qkv matrix into a 2d array - tmp.into_shape((batch, seq, n * query)) - } + shape.remove(axis); - pub fn split_heads(param: &Array2, h: usize) -> NdResult> - where - T: Clone, - { - let dim = param.shape().last().unwrap() / h; - // reshape the qkv matrix into a 3d array - let mut res = param.clone().into_shape((param.shape()[0], h, dim))?; - // swap the sequence and head axes - res.swap_axes(0, 1); - Ok(res) + dn.slice_mut().copy_from_slice(&shape); + dn } - pub fn split_batch(param: &Array3, h: usize) -> NdResult> + pub(crate) fn _split_dim(dim: &D::Smaller, h: usize) -> D where - T: Clone, + D: RemoveAxis, + D::Smaller: Dimension, { - let dim = param.shape().last().unwrap() / h; - // reshape the qkv matrix into a 3d array - let mut res = param - .clone() - .into_shape((param.shape()[0], param.shape()[1], h, dim))?; - // swap the sequence and head axes - res.swap_axes(1, 2); - Ok(res) + let rank = dim.ndim() + 1; + // create a new dimension with one less axis; initialized with zeros + let mut new_dim = D::zeros(rank); + // create a mutable vector from the slice + let mut shape = dim.slice().to_vec(); + // get and remove the last axis + let bx = shape.pop().unwrap() / h; + // extend the shape with the new axes + shape.push(h); + shape.push(bx); + // shape.swap(rank - 2, rank - 3); + // copy the values into the new dimension + new_dim.slice_mut().copy_from_slice(&shape); + new_dim } } diff --git a/models/transformers/src/ops/split.rs b/models/transformers/src/ops/split.rs index 3a182710..d98d861d 100644 --- a/models/transformers/src/ops/split.rs +++ b/models/transformers/src/ops/split.rs @@ -2,49 +2,76 @@ Appellation: split Contrib: FL03 */ -use ndarray::prelude::{Array2, Array3, Array4}; -use ndarray::ShapeError; - -// pub fn split(param: &Array, heads: usize) -> Result, ShapeError> { -// let mut dim = param.dim() -// let query = param.shape().last().unwrap() / heads; -// // reshape the qkv matrix into a 3d array -// let mut res = param.clone().into_shape((param.shape()[0], heads, query))?; -// // swap the sequence and head axes -// res.swap_axes(0, 1); -// Ok(res) -// } +use ndarray::{Array, ArrayBase, Data, Dimension, RemoveAxis, ShapeError}; + +/// Split a dimension into two parts +pub trait DimSplit { + type Output; + + fn split(&self, h: usize) -> Self::Output; +} -pub trait Split { +pub trait SplitHead { type Output; fn split(&self, heads: usize) -> Result; } -impl Split for Array2 { - type Output = Array3; - - fn split(&self, heads: usize) -> Result { - let (seq, model) = self.dim(); - let query = model / heads; - // reshape the qkv matrix into a 3d array - let mut res = self.clone().into_shape((seq, heads, query))?; - // swap the sequence and head axes - res.swap_axes(0, 1); - Ok(res) +/* + ************* Implementations ************* +*/ + +impl DimSplit for D +where + D: Dimension, + E: RemoveAxis, +{ + type Output = E; + + fn split(&self, h: usize) -> Self::Output { + super::utils::_split_dim(self, h) } } -impl Split for Array3 { - type Output = Array4; - - fn split(&self, heads: usize) -> Result { - let (batch, seq, model) = self.dim(); - let query = model / heads; - // reshape the qkv matrix into a 3d array - let mut res = self.clone().into_shape((batch, seq, heads, query))?; - // swap the sequence and head axes - res.swap_axes(1, 2); - Ok(res) +impl SplitHead for ArrayBase +where + A: Clone, + D: Dimension, + E: RemoveAxis, + S: Data, + ArrayBase: Clone, +{ + type Output = Array; + + fn split(&self, h: usize) -> Result { + super::_split(self, h, super::ORDER) } } + +// impl Split for Array2 { +// type Output = Array3; + +// fn split(&self, heads: usize) -> Result { +// let (seq, model) = self.dim(); +// let query = model / heads; +// // reshape the qkv matrix into a 3d array +// let mut res = self.clone().into_shape((seq, heads, query))?; +// // swap the sequence and head axes +// res.swap_axes(0, 1); +// Ok(res) +// } +// } + +// impl Split for Array3 { +// type Output = Array4; + +// fn split(&self, heads: usize) -> Result { +// let (batch, seq, model) = self.dim(); +// let query = model / heads; +// // reshape the qkv matrix into a 3d array +// let mut res = self.clone().into_shape((batch, seq, heads, query))?; +// // swap the sequence and head axes +// res.swap_axes(1, 2); +// Ok(res) +// } +// } diff --git a/models/transformers/src/params/mod.rs b/models/transformers/src/params/mod.rs index 367f8b2a..ba79e10f 100644 --- a/models/transformers/src/params/mod.rs +++ b/models/transformers/src/params/mod.rs @@ -4,11 +4,12 @@ */ pub use self::{item::*, store::QkvBase}; -pub(crate) mod item; -pub(crate) mod store; +mod store; + +pub mod item; macro_rules! params_ty { - ($target:ident: [$($name:ident<$(&$lt:lifetime)?$repr:ident>),* $(,)?]) => { + ($target:ident {$($name:ident: $(&$lt:lifetime)? $repr:ident),* $(,)?}) => { $(params_ty!(@impl $target: $name<$(&$lt)? $repr>);)* }; (@impl $target:ident: $name:ident<$repr:ident>) => { @@ -20,16 +21,17 @@ macro_rules! params_ty { } params_ty!( - QkvBase: [ - Params, - ArcParams, - ParamsView<&'a ViewRepr>, - ] + QkvBase { + Qkv: OwnedRepr, + ArcQkv: OwnedArcRepr, + ViewQkv: &'a ViewRepr, + + } ); #[allow(unused_imports)] pub(crate) mod prelude { pub use super::item::{Entry, QKV}; pub use super::store::QkvBase; - pub use super::{ArcParams, Params}; + pub use super::{ArcQkv, Qkv, ViewQkv}; } diff --git a/models/transformers/src/params/store.rs b/models/transformers/src/params/store.rs index 90c13693..f59ee6eb 100644 --- a/models/transformers/src/params/store.rs +++ b/models/transformers/src/params/store.rs @@ -2,10 +2,16 @@ Appellation: params Contrib: FL03 */ +use crate::attention::{Score, _attention}; +use concision::nn::DropoutLayer; use concision::{dimensional, getters}; +use nd::linalg::Dot; use nd::*; +use num::complex::ComplexFloat; use num::traits::{One, Zero}; +/// [QkvBase] is a container for the query, key, and value arrays used in the +/// attention mechanism of the transformer model. pub struct QkvBase, D = Ix2> where D: Dimension, @@ -72,12 +78,53 @@ where dimensional!(q()); - ndview!(into_owned::(self) where A: Clone, S: Data); - ndview!(to_owned::(&self) where A: Clone, S: Data); + qkv_view!(into_owned::(self) where A: Clone, S: Data); + qkv_view!(to_owned::(&self) where A: Clone, S: Data); - ndview!(into_shared::(self) where A: Clone, S: DataOwned); - ndview!(to_shared::(&self) where A: Clone, S: DataShared); + qkv_view!(into_shared::(self) where A: Clone, S: DataOwned); + qkv_view!(to_shared::(&self) where A: Clone, S: DataShared); - ndview!(view::<'a, ViewRepr>(&self) where S: Data); - ndview!(view_mut::<'a, ViewRepr>(&mut self) where S: DataMut); + qkv_view!(view::<'a, ViewRepr>(&self) where S: Data); + qkv_view!(view_mut::<'a, ViewRepr>(&mut self) where S: DataMut); +} + +#[cfg(not(feature = "rand"))] +impl QkvBase +where + D: Dimension, + S: RawData, + A: Clone, +{ + /// Computes the [Score] using scaled dot-product attention. + pub fn attention(&self, dropout: Option, mask: Option<&Array>) -> Score + where + A: ComplexFloat + ScalarOperand, + S: Data, + ArrayBase: for<'a> Dot, Output = Array>, + Array: Dot, Output = Array>, + { + let (q, k, v) = self.qkv(); + _attention(q, k, v, mask, None) + } +} + +#[cfg(feature = "rand")] +impl QkvBase +where + D: Dimension, + S: RawData, + A: Clone, +{ + /// Computes the [Score] using scaled dot-product attention. + pub fn attention(&self, dropout: Option, mask: Option<&Array>) -> Score + where + A: ComplexFloat + ScalarOperand, + S: Data, + ArrayBase: for<'a> Dot, Output = Array>, + Array: Dot, Output = Array>, + { + let dropout = dropout.map(DropoutLayer::new); + let (q, k, v) = self.qkv(); + _attention(q, k, v, mask, dropout.as_ref()) + } } diff --git a/models/transformers/src/primitives.rs b/models/transformers/src/primitives.rs index 96db829b..3b30e7aa 100644 --- a/models/transformers/src/primitives.rs +++ b/models/transformers/src/primitives.rs @@ -5,6 +5,18 @@ pub use self::consts::*; pub mod consts { + /// The default dimension of the model; i.e. the number of inputs + pub const D_MODEL: usize = 512; + /// The default size of the network; i.e. the number of neurons in the network + pub const D_NETWORK: usize = 2048; + /// The default dimension of the key and query vectors + pub const DK: usize = D_MODEL / HEADS; + /// The default number of attention heads + pub const HEADS: usize = 8; /// The default number of layers used for the encoder / decoder. pub const N: usize = 6; } + +pub fn outputs_from_ratio(model: usize, network: usize) -> usize { + network / model +} diff --git a/models/transformers/tests/attention.rs b/models/transformers/tests/attention.rs index db1efe2a..6bc023af 100644 --- a/models/transformers/tests/attention.rs +++ b/models/transformers/tests/attention.rs @@ -5,39 +5,18 @@ extern crate concision_core as concision; extern crate concision_transformers as transformers; -use concision::{linarr, Matmul}; -use transformers::{AttentionHead, Params}; +use approx::AbsDiffEq; +use transformers::AttentionHead; use ndarray::prelude::*; #[test] -fn test_qkv() { - let shape = (2048, 10); - let params = Params::::new(shape); - assert_eq!(params.q(), &Array::default(shape)); -} - -#[test] -fn test_qkv_matmul() { - let shape = (2048, 10); - // generate some sample data - let data = linarr(shape).unwrap(); - // initialize the parameters - let params = Params::::ones(shape); - // calculate the expected result - let exp = Array2::::ones(shape).dot(&data.t()); - // calculate the result - let res = params.matmul(&data.t()); - // compare the results - assert_eq!(res.q(), &exp); - assert_eq!(res.k(), &exp); - assert_eq!(res.v(), &exp); -} - -#[test] -fn test_attention_head() { - let shape = (30, 3); +fn attention_head() { + let shape = (3, 3); let head = AttentionHead::::ones(shape); assert_eq!(head.q(), &Array::ones(shape)); + let exp = Array2::from_elem(shape, 1f64 / 3f64); + let score = head.attention(); + assert!(score.attention().abs_diff_eq(&exp, 1e-6)); } diff --git a/models/transformers/tests/ops.rs b/models/transformers/tests/ops.rs index c39b8efa..687b50db 100644 --- a/models/transformers/tests/ops.rs +++ b/models/transformers/tests/ops.rs @@ -4,22 +4,24 @@ */ extern crate concision_core as concision; extern crate concision_transformers as transformers; +extern crate ndarray as nd; use concision::linarr; -use ndarray::prelude::*; +use nd::prelude::*; use transformers::ops::*; +pub const HEADS: usize = 2; +pub const ORDER: nd::Order = nd::Order::RowMajor; + #[test] fn test_merge() { let shape = (3, 4, 5); let dout = (4, 15); let arr = linarr::(shape.clone()).unwrap(); let a = arr.clone().merge().unwrap(); - let b = merge(&arr, 0, 1).unwrap(); assert_eq!(a.dim(), dout); - assert_eq!(a.dim(), b.dim()); - assert_eq!(a, b); + assert_eq!(a, utils::merge3(&arr).unwrap()); } #[test] @@ -28,25 +30,100 @@ fn test_merge_batch() { let dout = (2, 4, 15); let arr = linarr::(shape).unwrap(); let a = arr.merge().unwrap(); - let b = merge(&arr, 1, 2).unwrap(); assert_eq!(a.dim(), dout); - assert_eq!(a, b); + assert_eq!(a, utils::merge4(&arr).unwrap()); +} + +#[test] +fn test_split() { + let heads = 2; + let shape = (4, 6); + let arr = linarr::(shape).unwrap(); + let a = arr.split(heads).unwrap(); + + assert_eq!(a.dim(), (heads, 4, 3)); + assert_eq!(a, utils::split_heads(&arr, heads).unwrap()); +} + +#[test] +fn test_split_batch() { + let heads = 2; + let shape = (3, 4, 6); + let arr = linarr::(shape).unwrap(); + let a = arr.split(heads).unwrap(); + + assert_eq!(a.dim(), (3, heads, 4, 3)); + assert_eq!(a, utils::split_batch(&arr, heads).unwrap()); } #[test] fn reshape_ops() { - let dim_input: [usize; 3] = [2, 4, 6]; // (batch, seq, model) - let dim_split = [2, 2, 4, 3]; // (batch, heads, seq, model) - let data = linarr::(dim_input).unwrap(); - - let a = split_batch(&data, 2).unwrap(); - let b = a.merge().unwrap(); // merge_batch(&a).unwrap(); - - assert_eq!(a.shape(), &dim_split); - assert_eq!(b.shape(), &dim_input); - assert_eq!(a, data.split(2).unwrap()); - for (i, &j) in b.indexed_iter() { - assert_eq!(j, data[i]); + let shape = (2, 4, 6); + let data = linarr::(shape).unwrap(); + + let a = data.split(HEADS).unwrap(); + assert_eq!(a.dim(), (2, HEADS, 4, 3)); + let b = a.merge().unwrap(); + assert_eq!(b.dim(), shape); + // verify that doing the ops consecutively is the identity + assert_eq!(b, data); +} + +#[allow(dead_code)] +pub(crate) mod utils { + use concision::NdResult; + use ndarray::*; + + pub fn merge3(heads: &Array3) -> NdResult> + where + T: Clone, + { + let (n, seq, query) = heads.dim(); + let shape = (seq, n * query); + let mut tmp = heads.clone(); + // swap the head and sequence axes + tmp.swap_axes(0, 1); + // reshape the qkv matrix into a 2d array + tmp.to_shape((shape, super::ORDER)).map(|x| x.to_owned()) + } + + pub fn merge4(heads: &Array4) -> NdResult> + where + T: Clone, + { + let (batch, n, seq, query) = heads.dim(); + let shape = (batch, seq, n * query); + let mut tmp = heads.clone(); + // swap the head and sequence axes + tmp.swap_axes(1, 2); + // reshape the qkv matrix into a 2d array + tmp.to_shape((shape, super::ORDER)).map(|x| x.to_owned()) + } + + pub fn split_heads(param: &Array2, h: usize) -> NdResult> + where + T: Clone, + { + let dim = param.shape().last().unwrap() / h; + // reshape the qkv matrix into a 3d array + let mut res = param.clone().into_shape((param.shape()[0], h, dim))?; + // swap the sequence and head axes + res.swap_axes(0, 1); + Ok(res) + } + + pub fn split_batch(param: &Array3, h: usize) -> NdResult> + where + T: Clone, + { + let dim = param.shape().last().unwrap() / h; + // reshape the qkv matrix into a 3d array + let mut res = param + .clone() + .into_shape((param.shape()[0], param.shape()[1], h, dim))?; + // swap the sequence and head axes + res.swap_axes(1, 2); + Ok(res) } } diff --git a/models/transformers/tests/params.rs b/models/transformers/tests/params.rs new file mode 100644 index 00000000..18656be8 --- /dev/null +++ b/models/transformers/tests/params.rs @@ -0,0 +1,35 @@ +/* + Appellation: params + Contrib: FL03 +*/ +extern crate concision_core as concision; +extern crate concision_transformers as transformers; + +use concision::{linarr, Matmul}; +use transformers::Qkv; + +use ndarray::prelude::*; + +#[test] +fn test_qkv() { + let shape = (2048, 10); + let params = Qkv::::new(shape); + assert_eq!(params.q(), &Array::default(shape)); +} + +#[test] +fn test_qkv_matmul() { + let shape = (2048, 10); + // generate some sample data + let data = linarr(shape).unwrap(); + // initialize the parameters + let params = Qkv::::ones(shape); + // calculate the expected result + let exp = Array2::::ones(shape).dot(&data.t()); + // calculate the result + let res = params.matmul(&data.t()); + // compare the results + assert_eq!(res.q(), &exp); + assert_eq!(res.k(), &exp); + assert_eq!(res.v(), &exp); +}