diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..21b51b0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +.dub/ +dub.selections.json +__test__library__ +*.sublime-* +*.o +*.a \ No newline at end of file diff --git a/source/dstats/summary.d b/source/dstats/summary.d index 56ff424..8f2bb5c 100644 --- a/source/dstats/summary.d +++ b/source/dstats/summary.d @@ -1204,3 +1204,160 @@ unittest { assert(approxEqual(z[i], (arr[i] - m) / sd)); } } + +enum QuantileType { + Type1, + Type2, + Type3, + Type4, + Type5, + Type6, + Type7, + Type8, + Type9 +}; + +/** + Function to compute the quantile of a $(D SortedRange) given $(D p), which + is a number between 0 and 1. The range MUST be a $(D SortedRange) that is sorted + in ascending order. Otherwise this function will give incorrect results. If the + range is empty, this function will return $(D real.nan). Is + $(BIGOH r.length * p) for forward ranges and $(BIGOH 1) for random access ranges. + + Formula: + The quantile of a range x is found with quantile(p) = (1 - y) * x[j] + y * x[j + 1]. + Where y is a function of the constants j and g, depending on the type. + + Params: + r = a forward range with length + p = where to calculate the quantile, must be $(D p >= 0 && p <= 1) + + Returns: + the quantile of r given p +*/ +real quantile(QuantileType type = QuantileType.Type7, R)(R r, real p) + if (isInstanceOf!(SortedRange, R) && + is(ElementType!R : real) && + isRandomAccessRange!R && + hasLength!R && + !isInfinite!R) +in +{ + assert(p >= 0 && p <= 1); + assert(!r.empty, "The range cannot be empty"); +} +body +{ + import std.math : floor, approxEqual; + + if (p == 0) + { + return r[0]; + } + + if (p == 1) + { + return r[$]; + } + + // Q(p) = (1 - y) * x[j] + y * x[j + 1] + // y is a function of j = floor(np + m) + // and g = np + m - j + auto n = r.length; + real m; + real g; + size_t j; + real y; + + static if (type == QuantileType.Type1) + { + // y = 0 if g = 0, else y = 1 + m = 0; + j = cast(size_t) floor((n * p) + m); + g = (n * p) + m - j; + + if (g.approxEqual(0)) + { + y = 0; + } + else + { + y = 1; + } + } + else static if (type == QuantileType.Type2) + { + // y = 0.5 if g = 0, else y = 12 + m = 0; + j = cast(size_t) floor((n * p) + m); + g = (n * p) + m - j; + + if (g.approxEqual(0)) + { + y = 0.5; + } + else + { + y = 1; + } + } + else static if (type == QuantileType.Type3) + { + // y = 0 if g = 0 and j is even, else y = 1 + m = -1.0 / 2.0; + j = cast(size_t) floor((n * p) + m); + g = (n * p) + m - j; + + if (g.approxEqual(0) && j % 2 == 0) + { + y = 0; + } + else + { + y = 1; + } + } + + size_t index = j - 1 == size_t.max ? 0 : j - 1; + + return ((1 - y) * r[index]) + (y * r[j]); +} + +// Type 1 +unittest +{ + import std.range : assumeSorted; + + enum data = [10, 20, 30, 40, 50].assumeSorted; + + assert(data.quantile!(QuantileType.Type1)(0.2) == 10); + assert(data.quantile!(QuantileType.Type1)(0.25) == 20); + assert(data.quantile!(QuantileType.Type1)(0.5) == 30); + assert(data.quantile!(QuantileType.Type1)(0.75) == 40); +} + +// Type 2 +unittest +{ + import std.range : assumeSorted; + + enum data = [10, 20, 30, 40, 50].assumeSorted; + + assert(data.quantile!(QuantileType.Type2)(0.2) == 15); + assert(data.quantile!(QuantileType.Type2)(0.25) == 20); + assert(data.quantile!(QuantileType.Type2)(0.5) == 30); + assert(data.quantile!(QuantileType.Type2)(0.75) == 40); +} + +// Type 3 +unittest +{ + import std.range : assumeSorted; + + enum data = [10, 20, 30, 40, 50].assumeSorted; + + assert(data.quantile!(QuantileType.Type3)(0.2) == 10); + assert(data.quantile!(QuantileType.Type3)(0.25) == 10); + assert(data.quantile!(QuantileType.Type3)(0.5) == 20); + assert(data.quantile!(QuantileType.Type3)(0.75) == 40); +} \ No newline at end of file