From 2fd83132c5b74a60ae07018f5ce161dc06fe6fc4 Mon Sep 17 00:00:00 2001 From: AlexanderSaydakov Date: Mon, 3 Jun 2024 13:40:21 -0700 Subject: [PATCH] no insertions of duplicates --- .../filters/quotientfilter/Filter.java | 6 +- .../quotientfilter/QuotientFilter.java | 41 +++---- .../filters/quotientfilter/DeletionTests.java | 96 ++++++++------- .../quotientfilter/QuotientFilterTest.java | 114 ++++++++++-------- 4 files changed, 127 insertions(+), 130 deletions(-) diff --git a/src/main/java/org/apache/datasketches/filters/quotientfilter/Filter.java b/src/main/java/org/apache/datasketches/filters/quotientfilter/Filter.java index 8f0796738..bfb1ad25d 100644 --- a/src/main/java/org/apache/datasketches/filters/quotientfilter/Filter.java +++ b/src/main/java/org/apache/datasketches/filters/quotientfilter/Filter.java @@ -34,7 +34,7 @@ public abstract class Filter { //abstract boolean rejuvenate(long key); //abstract boolean expand(); //protected abstract boolean _delete(long large_hash); - abstract protected boolean _insert(long large_hash, boolean insert_only_if_no_match); + abstract protected boolean _insert(long large_hash); abstract protected boolean _search(long large_hash); @@ -53,11 +53,11 @@ public abstract class Filter { // return _delete(HashFunctions.xxhash(input_buffer)); // } // - public boolean insert(long input, boolean insert_only_if_no_match) { + public boolean insert(long input) { //System.out.println("The ABC input is " + input); long hash = get_hash(input); //System.out.println("The ABC hash is " + hash); - return _insert(hash, insert_only_if_no_match); + return _insert(hash); } // // public boolean insert(String input, boolean insert_only_if_no_match) { diff --git a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java index 40406dc4e..e0a442fc5 100644 --- a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java +++ b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java @@ -400,30 +400,19 @@ boolean insert_new_run(long canonical_slot, long long_fp) { return insert_fingerprint_and_push_all_else(long_fp, start_of_this_new_run, false); } - boolean insert(long long_fp, long index, boolean insert_only_if_no_match) { - //System.out.println("Inserting Fingerprint " + long_fp); - //System.out.println("Inserting @ index " + index); - //System.out.println("BoolMatch? " + insert_only_if_no_match); - //System.out.println("**********"); - //System.out.println("Num items: " + num_entries); - //System.out.println("Max items: " + max_entries_before_expansion); - - if (index >= get_num_slots() || num_entries == get_num_slots()) { - return false; - } - boolean does_run_exist = is_occupied(index); - if (!does_run_exist) { - return insert_new_run(index, long_fp); - } - - long run_start_index = find_run_start(index); - if (insert_only_if_no_match) { - long found_index = find_first_fingerprint_in_run(run_start_index, long_fp); - if (found_index > -1) { - return false; - } - } - return insert_fingerprint_and_push_all_else(long_fp, run_start_index, true); + boolean insert(long long_fp, long index) { + if (index >= get_num_slots() || num_entries == get_num_slots()) { + return false; + } + if (!is_occupied(index)) { + return insert_new_run(index, long_fp); + } + long run_start_index = find_run_start(index); + final long found_index = find_first_fingerprint_in_run(run_start_index, long_fp); + if (found_index > -1) { + return false; + } + return insert_fingerprint_and_push_all_else(long_fp, run_start_index, true); } // insert a fingerprint as the last fingerprint of the run and push all other entries in the cluster to the right. @@ -601,7 +590,7 @@ long gen_fingerprint(long large_hash) { Hence, the `large_hash` argument is already a hash key that has been generated by the hashing library (eg xxhash). */ - protected boolean _insert(long large_hash, boolean insert_only_if_no_match) { + protected boolean _insert(long large_hash) { //System.out.println("Inserting long hash " + large_hash); if (is_full) { return false; @@ -615,7 +604,7 @@ protected boolean _insert(long large_hash, boolean insert_only_if_no_match) { System.out.println(slot_index + " " + fingerprint ); System.out.println(); */ - boolean success = insert(fingerprint, slot_index, false); + boolean success = insert(fingerprint, slot_index); /*if (!success) { System.out.println("insertion failure"); System.out.println(input + "\t" + slot_index + "\t" + get_fingerprint_str(fingerprint, fingerprintLength)); diff --git a/src/test/java/org/apache/datasketches/filters/quotientfilter/DeletionTests.java b/src/test/java/org/apache/datasketches/filters/quotientfilter/DeletionTests.java index 1ed94b8a9..6e1beb9f2 100644 --- a/src/test/java/org/apache/datasketches/filters/quotientfilter/DeletionTests.java +++ b/src/test/java/org/apache/datasketches/filters/quotientfilter/DeletionTests.java @@ -45,13 +45,13 @@ static public void BasicDeletions() { long fp3 = 1 << 2; long fp4 = 31; - qf.insert(fp4, 1, false); - qf.insert(fp1, 1, false); - qf.insert(fp1, 1, false); - qf.insert(fp2, 2, false); - qf.insert(fp1, 1, false); - qf.insert(fp1, 1, false); - qf.insert(fp3, 4, false); + qf.insert(fp4, 1); + qf.insert(fp1, 1); + qf.insert(fp1, 1); + qf.insert(fp2, 2); + qf.insert(fp1, 1); + qf.insert(fp1, 1); + qf.insert(fp3, 4); qf.delete(31, 1); @@ -75,39 +75,36 @@ static public void BasicDeletions() { * The expected outcome is that after deletion, the remaining keys should be in their canonical slots. */ @Test - static public void DeletionsWithSameFingerprint() { + static public void Deletions() { int bits_per_entry = 8; int num_entries_power = 3; int num_entries = (int)Math.pow(2, num_entries_power); QuotientFilter qf = new QuotientFilter(num_entries_power, bits_per_entry); - - // All keys have the same fingerprint but are mapped into (mostly) different slots - qf.insert(0, 1, false); - qf.insert(0, 1, false); - qf.insert(0, 2, false); - qf.insert(0, 2, false); - qf.insert(0, 3, false); - qf.insert(0, 3, false); - qf.insert(0, 3, false); - qf.insert(0, 6, false); - qf.insert(0, 6, false); // these are ignored - qf.insert(0, 6, false); - qf.insert(0, 7, false); - - qf.delete(0, 2); - qf.delete(0, 3); + qf.insert(1, 1); + qf.insert(2, 1); + qf.insert(3, 2); + qf.insert(4, 2); + qf.insert(5, 3); + qf.insert(6, 3); + qf.insert(7, 3); + qf.insert(8, 6); + qf.insert(9, 6); // these are ignored + qf.insert(10, 6); + qf.insert(11, 7); + + qf.delete(3, 2); + qf.delete(5, 3); BitSet result = new BitSet(num_entries * bits_per_entry); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 1, true, false, false, 0); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 2, true, true, true, 0); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 3, true, false, true, 0); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 4, false, false, true, 0); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 5, false, true, true, 0); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 6, true, false, false, 0); + result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 0, false, false, false, 0); + result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 1, true, false, false, 1); + result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 2, true, true, true, 2); + result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 3, true, false, true, 4); + result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 4, false, false, true, 6); + result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 5, false, true, true, 7); + result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 6, true, false, false, 8); result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 7, false, false, false, 0); -// result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 8, false, true, true, 0); -// result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 9, false, false, true, 0); assertTrue(QuotientFilterTest.check_equality(qf, result, true)); } @@ -123,33 +120,34 @@ static public void DeletionsWithSameFingerprint() { * * The expected outcome is that after deletion, the remaining keys should be in their canonical slots. */ - static public void DeletionsWithOverflow() { + static public void DeletionsWithWrap() { int bits_per_entry = 8; int num_entries_power = 3; int num_entries = (int)Math.pow(2, num_entries_power); QuotientFilter qf = new QuotientFilter(num_entries_power, bits_per_entry); - qf.insert(0, 1, false); - qf.insert(0, 1, false); - qf.insert(0, 2, false); - qf.insert(0, 2, false); - qf.insert(0, 3, false); - qf.insert(0, 4, false); - qf.insert(0, 4, false); - qf.insert(0, 5, false); + qf.insert(1, 1); + qf.insert(2, 1); + qf.insert(3, 2); + qf.insert(4, 2); + qf.insert(5, 3); + qf.insert(6, 4); + qf.insert(7, 4); + qf.insert(8, 5); //qf.pretty_print(); - qf.delete(0, 3); + qf.delete(5, 3); //qf.pretty_print(); BitSet result = new BitSet(num_entries * bits_per_entry); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 1, true, false, false, 0); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 2, true, true, true, 0); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 3, false, false, true, 0); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 4, true, true, true, 0); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 5, true, false, true, 0); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 6, false, true, true, 0); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 7, false, false, true, 0); + result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 0, false, false, false, 0); + result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 1, true, false, false, 1); + result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 2, true, true, true, 2); + result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 3, false, false, true, 3); + result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 4, true, true, true, 4); + result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 5, true, false, true, 6); + result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 6, false, true, true, 7); + result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 7, false, false, true, 8); assertTrue(QuotientFilterTest.check_equality(qf, result, true)); } } diff --git a/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java b/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java index 64f92ddc8..4f6486eae 100644 --- a/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java +++ b/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java @@ -56,12 +56,12 @@ public void WikiInsertionTest() { final int E = 5; final int F = 6; - qf.insert(B, 1, false); - qf.insert(E, 4, false); - qf.insert(F, 7, false); - qf.insert(C, 1, false); - qf.insert(D, 2, false); - qf.insert(A, 1, false); + qf.insert(B, 1); + qf.insert(E, 4); + qf.insert(F, 7); + qf.insert(C, 1); + qf.insert(D, 2); + qf.insert(A, 1); assertEquals(qf.get_num_entries(), 6); assertEquals(getState(qf, 0), 0); @@ -99,26 +99,35 @@ public void PaperInsertionTest() { int num_entries = (int)Math.pow(2, num_entries_power); QuotientFilter qf = new QuotientFilter(4, 8); - // (key, slot): {(a, 1), (b,1), (c ,3), (d, 3), (e, 3), (f, 4), (g, 6), (h, 6)} - qf.insert(0, 1, false); - qf.insert(0, 1, false); - qf.insert(0, 3, false); - qf.insert(0, 3, false); - qf.insert(0, 3, false); - qf.insert(0, 4, false); - qf.insert(0, 6, false); - qf.insert(0, 6, false); + final int A = 1; + final int B = 2; + final int C = 3; + final int D = 4; + final int E = 5; + final int F = 6; + final int G = 7; + final int H = 8; + + // (key, slot): {(a, 1), (b, 1), (c, 3), (d, 3), (e, 3), (f, 4), (g, 6), (h, 6)} + qf.insert(A, 1); + qf.insert(B, 1); + qf.insert(C, 3); + qf.insert(D, 3); + qf.insert(E, 3); + qf.insert(F, 4); + qf.insert(G, 6); + qf.insert(H, 6); BitSet result = new BitSet(num_entries * bits_per_entry); result = set_slot_in_test(result, bits_per_entry, 0, false, false, false, 0); - result = set_slot_in_test(result, bits_per_entry, 1, true, false, false, 0); - result = set_slot_in_test(result, bits_per_entry, 2, false, true, true, 0); - result = set_slot_in_test(result, bits_per_entry, 3, true, false, false, 0); - result = set_slot_in_test(result, bits_per_entry, 4, true, true, true, 0); - result = set_slot_in_test(result, bits_per_entry, 5, false, true, true, 0); - result = set_slot_in_test(result, bits_per_entry, 6, true, false, true, 0); - result = set_slot_in_test(result, bits_per_entry, 7, false, false, true, 0); - result = set_slot_in_test(result, bits_per_entry, 8, false, true, true, 0); + result = set_slot_in_test(result, bits_per_entry, 1, true, false, false, A); + result = set_slot_in_test(result, bits_per_entry, 2, false, true, true, B); + result = set_slot_in_test(result, bits_per_entry, 3, true, false, false, C); + result = set_slot_in_test(result, bits_per_entry, 4, true, true, true, D); + result = set_slot_in_test(result, bits_per_entry, 5, false, true, true, E); + result = set_slot_in_test(result, bits_per_entry, 6, true, false, true, F); + result = set_slot_in_test(result, bits_per_entry, 7, false, false, true, G); + result = set_slot_in_test(result, bits_per_entry, 8, false, true, true, H); assertTrue(check_equality(qf, result, false)); } @@ -139,23 +148,24 @@ public void FalseNegativeTest() { */ @Test public void OverflowTest() { - int bits_per_entry = 8; - int num_entries_power = 3; - int num_entries = (int)Math.pow(2, num_entries_power); - int fingerprint_size = bits_per_entry - 3; - QuotientFilter qf = new QuotientFilter(num_entries_power, bits_per_entry); - - long fp2 = 1 << fingerprint_size - 1; - qf.insert(fp2, num_entries - 1, false); - assertEquals(qf.get_fingerprint(7), fp2); - assertEquals(getState(qf, 7), 0b100); - qf.insert(fp2, num_entries - 1, false); + final int bits_per_entry = 8; + final int num_entries_power = 3; + final int num_entries = (int)Math.pow(2, num_entries_power); + final int fingerprint_size = bits_per_entry - 3; + final QuotientFilter qf = new QuotientFilter(num_entries_power, bits_per_entry); + + final long fp1 = 1; + final long fp2 = 1 << fingerprint_size - 1; + qf.insert(fp1, num_entries - 1); + assertEquals(qf.get_fingerprint(num_entries - 1), fp1); + assertEquals(getState(qf, num_entries - 1), 0b100); + qf.insert(fp2, num_entries - 1); assertEquals(qf.get_fingerprint(0), fp2); assertEquals(getState(qf, 0), 0b011); qf.delete(fp2, num_entries - 1); assertEquals(qf.get_fingerprint(0), 0); assertEquals(getState(qf, 0), 0b000); - boolean found = qf.search(fp2, num_entries - 1); + final boolean found = qf.search(fp1, num_entries - 1); assertTrue(found); } @@ -174,16 +184,16 @@ public void testQuotientFilterInsertionAndIteration() { //int fingerprint_size = bits_per_entry - 3; QuotientFilter qf = new QuotientFilter(num_entries_power, bits_per_entry); - qf.insert(0x1F, 2, false); - qf.insert(0x1F, 3, false); - qf.insert(0x1F, 3, false); - qf.insert(0x1F, 4, false); - qf.insert(0x1F, 15, false); // last slot in the filter - qf.insert(0x1F, 16, false); // outside the bounds + qf.insert(0x1F, 2); + qf.insert(0x1F, 3); + qf.insert(0x1F, 3); + qf.insert(0x1F, 4); + qf.insert(0x1F, 15); // last slot in the filter + qf.insert(0x1F, 16); // outside the bounds qf.pretty_print() ; Iterator it = new Iterator(qf); - int[] arr = new int[] {2, 3, 3, 4, 15}; + int[] arr = new int[] {2, 3, 4, 15}; int arr_index = 0; while (it.next()) {assertEquals(it.bucket_index, arr[arr_index++]);} } @@ -195,16 +205,16 @@ public void testQuotientFilterIterator() { int num_entries_power = 4; QuotientFilter qf = new QuotientFilter(num_entries_power, bits_per_entry); - qf.insert(0, 1, false); - qf.insert(0, 4, false); - qf.insert(0, 7, false); - qf.insert(0, 1, false); - qf.insert(0, 2, false); - qf.insert(0, 1, false); - qf.insert(0, 15, false); + qf.insert(0, 1); + qf.insert(0, 4); + qf.insert(0, 7); + qf.insert(0, 1); + qf.insert(0, 2); + qf.insert(0, 1); + qf.insert(0, 15); Iterator it = new Iterator(qf); - int[] arr = new int[] {1, 1, 1, 2, 4, 7, 15}; + int[] arr = new int[] {1, 2, 4, 7, 15}; int arr_index = 0; while (it.next()) {assertEquals(arr[arr_index++], it.bucket_index);} } @@ -270,7 +280,7 @@ static public boolean test_no_false_negatives(QuotientFilter filter, int num_ent for (int i = 0; i < num_entries; i++) { int rand_num = rand.nextInt(); - boolean success = filter.insert(rand_num, false); + boolean success = filter.insert(rand_num); if (success) { added.add(rand_num); } @@ -279,7 +289,7 @@ static public boolean test_no_false_negatives(QuotientFilter filter, int num_ent } } - for (Integer i : added) { + for (Integer i: added) { boolean found = filter.search((long)i); if (!found) { return false;