From eae1274b3db450fa64aad768b6e1dc4354e4c260 Mon Sep 17 00:00:00 2001 From: Sebastian Wilzbach Date: Sun, 26 Jun 2016 00:28:46 +0200 Subject: [PATCH 01/10] Add std.random.mixture: Discrete sampler --- doc/Makefile | 5 +- source/mir/random/mixture.d | 112 ++++++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 2 deletions(-) create mode 100644 source/mir/random/mixture.d diff --git a/doc/Makefile b/doc/Makefile index 2997d55d..9eb3e562 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -26,8 +26,8 @@ ARTWORK_DIR=$(DOC_SOURCE_DIR)/artwork # xy/zz is in variable PACKAGE_xy_zz. This allows automation in iterating # packages and their modules. MIR_PACKAGES = mir $(addprefix mir/,\ - blas combinatorics ndslice \ - model/lda \ + blas combinatorics model/lda ndslice \ + random \ sparse sparse/blas) PACKAGE_mir = math sum @@ -35,6 +35,7 @@ PACKAGE_mir_blas = package dot gemm gemv PACKAGE_mir_combinatorics = package PACKAGE_mir_model_lda = hoffman PACKAGE_mir_ndslice = package iteration selection slice +PACKAGE_mir_random = mixture PACKAGE_mir_sparse = package PACKAGE_mir_sparse_blas = package axpy dot gemm gemv diff --git a/source/mir/random/mixture.d b/source/mir/random/mixture.d new file mode 100644 index 00000000..2edacdf2 --- /dev/null +++ b/source/mir/random/mixture.d @@ -0,0 +1,112 @@ +/** +Mixture methods that can be used to combine random variables. + +License: $(LINK2 http://boost.org/LICENSE_1_0.txt, Boost License 1.0). + +Authors: Ilya Yaroshenko, Sebastian Wilzbach +*/ + +module mir.random.mixture; + +import std.traits : isNumeric; + +/** +_Discrete distribution sampler. +Given an array of cumulative density points `cdPoints`, +a _discrete distribution is defined. +The cumulative density points can be calculated from probabilities or counts +using `std.algorithm.cumulativeFold`. Hence `cdPoints` is assumed to be sorted. + +Params: + cdPoints = cumulative density points + +Returns: + A sampler that can be called to sample from the distribution. +*/ +struct Discrete(T) + if (isNumeric!T) +{ + import std.range : SortedRange; + + private const(T)[] _cdPoints; + private SortedRange!(const(T)[], "a <= b") r; + + /// + this(const(T)[] cdPoints) + { + _cdPoints = cdPoints; + r = typeof(r)(_cdPoints); + } + + /// cumulative density points + const(T)[] cdPoints() const @property + { + return _cdPoints; + } + + /// samples a value from the discrete distribution + size_t opCall() const + { + import std.random : rndGen; + return opCall(rndGen); + } + + /// samples a value from the discrete distribution using a custom random generator + size_t opCall(RNG)(ref RNG gen) const + { + import std.random : uniform; + T v = uniform(0, _cdPoints[$-1], gen); + return (cast(SortedRange!(const(T)[], "a <= b")) r).lowerBound(v).length; + } +} + +/// ditto +Discrete!T discrete(T)(const(T)[] cdPoints) +{ + return Discrete!T(cdPoints); +} + +/// +unittest +{ + // 10%, 20%, 20%, 40%, 10% + auto probs = [0.1, 0.3, 0.5, 0.9, 1]; + auto ds = discrete(probs); + + // sample from the discrete distribution + uint[size_t] obs; + foreach (i; 0..10_000) + obs[ds()]++; +} + +unittest +{ + import std.random : rndGen; + rndGen.seed(42); + + // 10%, 20%, 20%, 40%, 10% + auto probs = [0.1, 0.3, 0.5, 0.9, 1]; + auto ds = discrete(probs); + + uint[size_t] obs; + foreach (i; 0..10_000) + obs[ds(rndGen)]++; + + assert(obs.keys.length == probs.length, "Invalid element sampled"); +} + +unittest +{ + import std.random : rndGen; + rndGen.seed(42); + + // 1, 2, 1 + auto cdPoints = [1, 3, 4]; + auto ds = discrete(cdPoints); + + uint[size_t] obs; + foreach (i; 0..10_000) + obs[ds(rndGen)]++; + + assert(obs.keys.length == cdPoints.length, "Invalid element sampled"); +} From f1ec4164a0d19b79129e67d8ae9ac75603e190cc Mon Sep 17 00:00:00 2001 From: Sebastian Wilzbach Date: Sun, 26 Jun 2016 01:25:52 +0200 Subject: [PATCH 02/10] addressed comments - uint array instead of associative array - named Random variable - renamed probs -> cdPoints - reduced iteration size to 1000 --- source/mir/random/mixture.d | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/source/mir/random/mixture.d b/source/mir/random/mixture.d index 2edacdf2..2454d596 100644 --- a/source/mir/random/mixture.d +++ b/source/mir/random/mixture.d @@ -70,43 +70,39 @@ Discrete!T discrete(T)(const(T)[] cdPoints) unittest { // 10%, 20%, 20%, 40%, 10% - auto probs = [0.1, 0.3, 0.5, 0.9, 1]; - auto ds = discrete(probs); + auto cdPoints = [0.1, 0.3, 0.5, 0.9, 1]; + auto ds = discrete(cdPoints); // sample from the discrete distribution - uint[size_t] obs; - foreach (i; 0..10_000) + auto obs = new uint[cdPoints.length]; + foreach (i; 0..1000) obs[ds()]++; } unittest { - import std.random : rndGen; - rndGen.seed(42); + import std.random : Random; + auto rndGen = Random(42); // 10%, 20%, 20%, 40%, 10% - auto probs = [0.1, 0.3, 0.5, 0.9, 1]; - auto ds = discrete(probs); + auto cdPoints = [0.1, 0.3, 0.5, 0.9, 1]; + auto ds = discrete(cdPoints); - uint[size_t] obs; - foreach (i; 0..10_000) + auto obs = new uint[cdPoints.length]; + foreach (i; 0..1000) obs[ds(rndGen)]++; - - assert(obs.keys.length == probs.length, "Invalid element sampled"); } unittest { - import std.random : rndGen; - rndGen.seed(42); + import std.random : Random; + auto rndGen = Random(42); // 1, 2, 1 auto cdPoints = [1, 3, 4]; auto ds = discrete(cdPoints); - uint[size_t] obs; - foreach (i; 0..10_000) + auto obs = new uint[cdPoints.length]; + foreach (i; 0..1000) obs[ds(rndGen)]++; - - assert(obs.keys.length == cdPoints.length, "Invalid element sampled"); } From ed7fb70386ad6c5d3d22b65eea3ec38034bc7683 Mon Sep 17 00:00:00 2001 From: Sebastian Wilzbach Date: Sun, 26 Jun 2016 01:33:13 +0200 Subject: [PATCH 03/10] fix missing line in code coverage --- source/mir/random/mixture.d | 1 + 1 file changed, 1 insertion(+) diff --git a/source/mir/random/mixture.d b/source/mir/random/mixture.d index 2454d596..2abdcbd7 100644 --- a/source/mir/random/mixture.d +++ b/source/mir/random/mixture.d @@ -101,6 +101,7 @@ unittest // 1, 2, 1 auto cdPoints = [1, 3, 4]; auto ds = discrete(cdPoints); + assert(ds.cdPoints == cdPoints); auto obs = new uint[cdPoints.length]; foreach (i; 0..1000) From 75d7e4e005184477e045dcaf6a9c5774e3acd65f Mon Sep 17 00:00:00 2001 From: Sebastian Wilzbach Date: Sun, 26 Jun 2016 11:16:50 +0200 Subject: [PATCH 04/10] rename to discrete.d --- source/mir/random/{mixture.d => discrete.d} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename source/mir/random/{mixture.d => discrete.d} (100%) diff --git a/source/mir/random/mixture.d b/source/mir/random/discrete.d similarity index 100% rename from source/mir/random/mixture.d rename to source/mir/random/discrete.d From 8f62f6b80577b6d6d109f45c41e50096443c1674 Mon Sep 17 00:00:00 2001 From: Sebastian Wilzbach Date: Sun, 26 Jun 2016 11:25:30 +0200 Subject: [PATCH 05/10] renaming to discrete - part 2 --- doc/Makefile | 2 +- source/mir/random/discrete.d | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/Makefile b/doc/Makefile index 9eb3e562..d435fe89 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -35,7 +35,7 @@ PACKAGE_mir_blas = package dot gemm gemv PACKAGE_mir_combinatorics = package PACKAGE_mir_model_lda = hoffman PACKAGE_mir_ndslice = package iteration selection slice -PACKAGE_mir_random = mixture +PACKAGE_mir_random = discrete PACKAGE_mir_sparse = package PACKAGE_mir_sparse_blas = package axpy dot gemm gemv diff --git a/source/mir/random/discrete.d b/source/mir/random/discrete.d index 2abdcbd7..b4fea229 100644 --- a/source/mir/random/discrete.d +++ b/source/mir/random/discrete.d @@ -1,12 +1,12 @@ /** -Mixture methods that can be used to combine random variables. +Discrete distributions. License: $(LINK2 http://boost.org/LICENSE_1_0.txt, Boost License 1.0). Authors: Ilya Yaroshenko, Sebastian Wilzbach */ -module mir.random.mixture; +module mir.random.discrete; import std.traits : isNumeric; From 04ed13b2fc0c84d4d59774a0b699f3295221c325 Mon Sep 17 00:00:00 2001 From: Sebastian Wilzbach Date: Sun, 26 Jun 2016 11:26:14 +0200 Subject: [PATCH 06/10] Move struct declaration below function --- source/mir/random/discrete.d | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/source/mir/random/discrete.d b/source/mir/random/discrete.d index b4fea229..2a84c3f2 100644 --- a/source/mir/random/discrete.d +++ b/source/mir/random/discrete.d @@ -23,6 +23,12 @@ Params: Returns: A sampler that can be called to sample from the distribution. */ +Discrete!T discrete(T)(const(T)[] cdPoints) +{ + return Discrete!T(cdPoints); +} + +/// ditto struct Discrete(T) if (isNumeric!T) { @@ -60,12 +66,6 @@ struct Discrete(T) } } -/// ditto -Discrete!T discrete(T)(const(T)[] cdPoints) -{ - return Discrete!T(cdPoints); -} - /// unittest { From 2c9d5113f4dbf9d41f5d323f377604f711791116 Mon Sep 17 00:00:00 2001 From: Sebastian Wilzbach Date: Sun, 26 Jun 2016 11:30:19 +0200 Subject: [PATCH 07/10] use explicit template for uniform --- source/mir/random/discrete.d | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/mir/random/discrete.d b/source/mir/random/discrete.d index 2a84c3f2..56862954 100644 --- a/source/mir/random/discrete.d +++ b/source/mir/random/discrete.d @@ -61,7 +61,7 @@ struct Discrete(T) size_t opCall(RNG)(ref RNG gen) const { import std.random : uniform; - T v = uniform(0, _cdPoints[$-1], gen); + T v = uniform!("[)", T, T)(0, _cdPoints[$-1], gen); return (cast(SortedRange!(const(T)[], "a <= b")) r).lowerBound(v).length; } } From 49d6506c03838a56f0599351f94610d5a3992212 Mon Sep 17 00:00:00 2001 From: Sebastian Wilzbach Date: Sun, 26 Jun 2016 12:05:02 +0200 Subject: [PATCH 08/10] more doc descriptions --- source/mir/random/discrete.d | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/source/mir/random/discrete.d b/source/mir/random/discrete.d index 56862954..7edd6ad5 100644 --- a/source/mir/random/discrete.d +++ b/source/mir/random/discrete.d @@ -11,7 +11,7 @@ module mir.random.discrete; import std.traits : isNumeric; /** -_Discrete distribution sampler. +Setup a _discrete distribution sampler. Given an array of cumulative density points `cdPoints`, a _discrete distribution is defined. The cumulative density points can be calculated from probabilities or counts @@ -28,7 +28,14 @@ Discrete!T discrete(T)(const(T)[] cdPoints) return Discrete!T(cdPoints); } -/// ditto +/** +_Discrete distribution sampler that draws random values from a _discrete +distribution given an array of the respective cumulative density points. +`cdPoints` is an array of the cummulative sum over the probabilities +or counts of a _discrete distribution without a starting zero. + +Complexity: O(log n) where n is the number of `cdPoints`. +*/ struct Discrete(T) if (isNumeric!T) { @@ -37,7 +44,14 @@ struct Discrete(T) private const(T)[] _cdPoints; private SortedRange!(const(T)[], "a <= b") r; - /// + /** + The cumulative density points `cdPoints` are assumed to be sorted and given + without a starting zero. They can be calculated with + `std.algorithm.cumulativeFold` from probabilities or counts. + + Params: + cdPoints = cumulative density points + */ this(const(T)[] cdPoints) { _cdPoints = cdPoints; From c2f893068ecfc7f1be72bf4f3441a117752539c7 Mon Sep 17 00:00:00 2001 From: Sebastian Wilzbach Date: Sun, 26 Jun 2016 12:21:18 +0200 Subject: [PATCH 09/10] use std.algorithm.iteration and move unittest to function --- source/mir/random/discrete.d | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/source/mir/random/discrete.d b/source/mir/random/discrete.d index 7edd6ad5..7b9509a2 100644 --- a/source/mir/random/discrete.d +++ b/source/mir/random/discrete.d @@ -15,7 +15,7 @@ Setup a _discrete distribution sampler. Given an array of cumulative density points `cdPoints`, a _discrete distribution is defined. The cumulative density points can be calculated from probabilities or counts -using `std.algorithm.cumulativeFold`. Hence `cdPoints` is assumed to be sorted. +using `std.algorithm.iteration.cumulativeFold`. Hence `cdPoints` is assumed to be sorted. Params: cdPoints = cumulative density points @@ -28,6 +28,19 @@ Discrete!T discrete(T)(const(T)[] cdPoints) return Discrete!T(cdPoints); } +/// +unittest +{ + // 10%, 20%, 20%, 40%, 10% + auto cdPoints = [0.1, 0.3, 0.5, 0.9, 1]; + auto ds = discrete(cdPoints); + + // sample from the discrete distribution + auto obs = new uint[cdPoints.length]; + foreach (i; 0..1000) + obs[ds()]++; +} + /** _Discrete distribution sampler that draws random values from a _discrete distribution given an array of the respective cumulative density points. @@ -47,7 +60,7 @@ struct Discrete(T) /** The cumulative density points `cdPoints` are assumed to be sorted and given without a starting zero. They can be calculated with - `std.algorithm.cumulativeFold` from probabilities or counts. + `std.algorithm.iteration.cumulativeFold` from probabilities or counts. Params: cdPoints = cumulative density points @@ -80,19 +93,6 @@ struct Discrete(T) } } -/// -unittest -{ - // 10%, 20%, 20%, 40%, 10% - auto cdPoints = [0.1, 0.3, 0.5, 0.9, 1]; - auto ds = discrete(cdPoints); - - // sample from the discrete distribution - auto obs = new uint[cdPoints.length]; - foreach (i; 0..1000) - obs[ds()]++; -} - unittest { import std.random : Random; From 7562bb4c0505358c90e3238ede19335c82e47954 Mon Sep 17 00:00:00 2001 From: Sebastian Wilzbach Date: Sun, 26 Jun 2016 12:22:29 +0200 Subject: [PATCH 10/10] add lref to Discrete struct --- source/mir/random/discrete.d | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/mir/random/discrete.d b/source/mir/random/discrete.d index 7b9509a2..940eea13 100644 --- a/source/mir/random/discrete.d +++ b/source/mir/random/discrete.d @@ -21,7 +21,7 @@ Params: cdPoints = cumulative density points Returns: - A sampler that can be called to sample from the distribution. + A $(LREF Discrete) sampler that can be called to sample from the distribution. */ Discrete!T discrete(T)(const(T)[] cdPoints) {