From dc648c5b38f9c43d07a69a20855beb2616fa49d4 Mon Sep 17 00:00:00 2001 From: RazvanN7 Date: Fri, 21 Jul 2017 09:19:02 +0300 Subject: [PATCH] Add documentation and unittests regarding multisets --- std/algorithm/package.d | 3 +- std/algorithm/setops.d | 128 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 124 insertions(+), 7 deletions(-) diff --git a/std/algorithm/package.d b/std/algorithm/package.d index 656dbcfd85e..4c9a72f71c9 100644 --- a/std/algorithm/package.d +++ b/std/algorithm/package.d @@ -116,7 +116,8 @@ $(TR $(SUBREF setops, cartesianProduct) $(SUBREF setops, largestPartialIntersection) $(SUBREF setops, largestPartialIntersectionWeighted) - $(SUBREF setops, nWayUnion) + $(SUBREF setops, multiwayMerge) + $(SUBREF setops, multiwayUnion) $(SUBREF setops, setDifference) $(SUBREF setops, setIntersection) $(SUBREF setops, setSymmetricDifference) diff --git a/std/algorithm/setops.d b/std/algorithm/setops.d index c556cbf8526..f7a0080af2c 100644 --- a/std/algorithm/setops.d +++ b/std/algorithm/setops.d @@ -3,6 +3,14 @@ This is a submodule of $(MREF std, algorithm). It contains generic algorithms that implement set operations. +The functions $(LREF multiwayMerge), $(LREF multiwayUnion), $(LREF setDifference), +$(LREF setIntersection), $(LREF setSymmetricDifference) expect a range of sorted +ranges as input. + +All algorithms are generalized to accept as input not only sets but also +$(HTTP https://en.wikipedia.org/wiki/Multiset, multisets). Each algorithm +documents behaviour in the presence of duplicated inputs. + $(SCRIPT inhibitQuickIndex = 1;) $(BOOKTABLE Cheat Sheet, $(TR $(TH Function Name) $(TH Description)) @@ -14,8 +22,9 @@ $(T2 largestPartialIntersectionWeighted, Copies out the values that occur most frequently (multiplied by per-value weights) in a range of ranges.) $(T2 multiwayMerge, - Computes the union of a set of sets implemented as a range of sorted - ranges.) + Merges a range of sorted ranges.) +$(T2 multiwayUnion, + Computes the union of a range of sorted ranges.) $(T2 setDifference, Lazily computes the set difference of two or more sorted ranges.) $(T2 setIntersection, @@ -568,6 +577,11 @@ array of the occurrences and then selecting its top items, and also requires less memory ($(D largestPartialIntersection) builds its result directly in $(D tgt) and requires no extra memory). +If at least one of the ranges is a multiset, then all occurences +of a duplicate element are taken into account. The result is +equivalent to merging all ranges and picking the most frequent +$(D tgt.length) elements. + Warning: Because $(D largestPartialIntersection) does not allocate extra memory, it will leave $(D ror) modified. Namely, $(D largestPartialIntersection) assumes ownership of $(D ror) and @@ -616,6 +630,22 @@ void largestPartialIntersection largestPartialIntersection(a, c); assert(c[0] == tuple(1.0, 3u)); // 1.0 occurs in 3 inputs + + // multiset + double[][] x = + [ + [1, 1, 1, 1, 4, 7, 8], + [1, 7], + [1, 7, 8], + [4, 7], + [7] + ]; + auto y = new Tuple!(double, uint)[2]; + largestPartialIntersection(x.dup, y); + // 7.0 occurs 5 times + assert(y[0] == tuple(7.0, 5u)); + // 1.0 occurs 6 times + assert(y[1] == tuple(1.0, 6u)); } import std.algorithm.sorting : SortOutput; // FIXME @@ -625,6 +655,11 @@ import std.algorithm.sorting : SortOutput; // FIXME Similar to $(D largestPartialIntersection), but associates a weight with each distinct element in the intersection. +If at least one of the ranges is a multiset, then all occurences +of a duplicate element are taken into account. The result +is equivalent to merging all input ranges and picking the highest +$(D tgt.length), weight-based ranking elements. + Params: less = The predicate the ranges are sorted by. ror = A range of $(REF_ALTTEXT forward ranges, isForwardRange, std,range,primitives) @@ -672,6 +707,20 @@ void largestPartialIntersectionWeighted assert(b[0] == tuple(4.0, 2u)); // 4.0 occurs 2 times -> 4.6 (2 * 2.3) // 7.0 occurs 3 times -> 4.4 (3 * 1.1) + + // multiset + double[][] x = + [ + [ 1, 1, 1, 4, 7, 8 ], + [ 1, 7 ], + [ 1, 7, 8], + [ 4 ], + [ 7 ], + ]; + auto y = new Tuple!(double, uint)[1]; + largestPartialIntersectionWeighted(x, y, weights); + assert(y[0] == tuple(1.0, 5u)); + // 1.0 occurs 5 times -> 1.2 * 5 = 6 } @system unittest @@ -746,7 +795,7 @@ void largestPartialIntersectionWeighted // MultiwayMerge /** -Computes the union of multiple sets. The input sets are passed as a +Merges multiple sets. The input sets are passed as a range of ranges and each is assumed to be sorted by $(D less). Computation is done lazily, one union element at a time. The complexity of one $(D popFront) operation is $(BIGOH @@ -759,6 +808,10 @@ MultiwayMerge) is $(BIGOH n * ror.length * log(ror.length)), i.e., $(D log(ror.length)) times worse than just spanning all ranges in turn. The output comes sorted (unstably) by $(D less). +The length of the resulting range is the sum of all lengths of +the ranges passed as input. This means that all elements (duplicates +included) are transferred to the resulting range. + For backward compatibility, `multiwayMerge` is available under the name `nWayUnion` and `MultiwayMerge` under the name of `NWayUnion` . Future code should use `multiwayMerge` and `MultiwayMerge` as `nWayUnion` @@ -859,6 +912,18 @@ MultiwayMerge!(less, RangeOfRanges) multiwayMerge 1, 1, 1, 4, 4, 7, 7, 7, 7, 8, 8 ]; assert(equal(multiwayMerge(a), witness)); + + double[][] b = + [ + // range with duplicates + [ 1, 1, 4, 7, 8 ], + [ 7 ], + [ 1, 7, 8], + [ 4 ], + [ 7 ], + ]; + // duplicates are propagated to the resulting range + assert(equal(multiwayMerge(b), witness)); } alias nWayUnion = multiwayMerge; @@ -870,14 +935,16 @@ as a range of ranges and each is assumed to be sorted by $(D less). Computation is done lazily, one union element at a time. `multiwayUnion(ror)` is functionally equivalent to `multiwayMerge(ror).uniq`. +"The output of multiwayUnion has no duplicates even when its inputs contain duplicates." + Params: less = Predicate the given ranges are sorted by. ror = A range of ranges sorted by `less` to compute the intersection for. Returns: - A range of the intersection of the ranges in `ror`. + A range of the union of the ranges in `ror`. -See also: $(LREF NWayUnion) +See also: $(LREF multiwayMerge) */ auto multiwayUnion(alias less = "a < b", RangeOfRanges)(RangeOfRanges ror) { @@ -890,6 +957,7 @@ auto multiwayUnion(alias less = "a < b", RangeOfRanges)(RangeOfRanges ror) { import std.algorithm.comparison : equal; + // sets double[][] a = [ [ 1, 4, 7, 8 ], @@ -901,6 +969,17 @@ auto multiwayUnion(alias less = "a < b", RangeOfRanges)(RangeOfRanges ror) auto witness = [1, 4, 7, 8]; assert(equal(multiwayUnion(a), witness)); + + // multisets + double[][] b = + [ + [ 1, 1, 1, 4, 7, 8 ], + [ 1, 7 ], + [ 1, 7, 7, 8], + [ 4 ], + [ 7 ], + ]; + assert(equal(multiwayUnion(b), witness)); } /** @@ -908,6 +987,11 @@ Lazily computes the difference of $(D r1) and $(D r2). The two ranges are assumed to be sorted by $(D less). The element types of the two ranges must have a common type. + +In the case of multisets, considering that element `a` appears `x` +times in $(D r1) and `y` times and $(D r2), the number of occurences +of `a` in the resulting range is going to be `x-y` if x > y or 0 othwerise. + Params: less = Predicate the given ranges are sorted by. r1 = The first range. @@ -997,10 +1081,18 @@ SetDifference!(less, R1, R2) setDifference(alias less = "a < b", R1, R2) import std.algorithm.comparison : equal; import std.range.primitives : isForwardRange; + //sets int[] a = [ 1, 2, 4, 5, 7, 9 ]; int[] b = [ 0, 1, 2, 4, 7, 8 ]; - assert(equal(setDifference(a, b), [5, 9][])); + assert(equal(setDifference(a, b), [5, 9])); static assert(isForwardRange!(typeof(setDifference(a, b)))); + + // multisets + int[] x = [1, 1, 1, 2, 3]; + int[] y = [1, 1, 2, 4, 5]; + auto r = setDifference(x, y); + assert(equal(r, [1, 3])); + assert(setDifference(r, x).empty); } @safe unittest // Issue 10460 @@ -1019,6 +1111,10 @@ Lazily computes the intersection of two or more input ranges $(D ranges). The ranges are assumed to be sorted by $(D less). The element types of the ranges must have a common type. +In the case of multisets, the range with the minimum number of +occurences of a given element, propagates the number of +occurences of this element to the resulting range. + Params: less = Predicate the given ranges are sorted by. ranges = The ranges to compute the intersection for. @@ -1132,12 +1228,19 @@ if (Rs.length >= 2 && allSatisfy!(isInputRange, Rs) && { import std.algorithm.comparison : equal; + // sets int[] a = [ 1, 2, 4, 5, 7, 9 ]; int[] b = [ 0, 1, 2, 4, 7, 8 ]; int[] c = [ 0, 1, 4, 5, 7, 8 ]; assert(equal(setIntersection(a, a), a)); assert(equal(setIntersection(a, b), [1, 2, 4, 7])); assert(equal(setIntersection(a, b, c), [1, 4, 7])); + + // multisets + int[] d = [ 1, 1, 2, 2, 7, 7 ]; + int[] e = [ 1, 1, 1, 7]; + assert(equal(setIntersection(a, d), [1, 2, 7])); + assert(equal(setIntersection(d, e), [1, 1, 7])); } @safe unittest @@ -1177,6 +1280,12 @@ r2). The two ranges are assumed to be sorted by $(D less), and the output is also sorted by $(D less). The element types of the two ranges must have a common type. +If both ranges are sets (without duplicated elements), the resulting +range is going to be a set. If at least one of the ranges is a multiset, +the number of occurences of an element `x` in the resulting range is `abs(a-b)` +where `a` is the number of occurences of `x` in $(D r1), `b` is the number of +occurences of `x` in $(D r2), and `abs` is the absolute value. + If both arguments are ranges of L-values of the same type then $(D SetSymmetricDifference) will also be a range of L-values of that type. @@ -1288,10 +1397,17 @@ setSymmetricDifference(alias less = "a < b", R1, R2) import std.algorithm.comparison : equal; import std.range.primitives : isForwardRange; + // sets int[] a = [ 1, 2, 4, 5, 7, 9 ]; int[] b = [ 0, 1, 2, 4, 7, 8 ]; assert(equal(setSymmetricDifference(a, b), [0, 5, 8, 9][])); static assert(isForwardRange!(typeof(setSymmetricDifference(a, b)))); + + //mutisets + int[] c = [1, 1, 1, 1, 2, 2, 2, 4, 5, 6]; + int[] d = [1, 1, 2, 2, 2, 2, 4, 7, 9]; + assert(equal(setSymmetricDifference(c, d), setSymmetricDifference(d, c))); + assert(equal(setSymmetricDifference(c, d), [1, 1, 2, 5, 6, 7, 9])); } @safe unittest // Issue 10460