From 87abf9cf55022c0c4d59e5e1b8984c9546170573 Mon Sep 17 00:00:00 2001 From: AlexanderSaydakov Date: Tue, 28 May 2024 14:19:25 -0700 Subject: [PATCH 1/2] no need to find the first empty slot --- .../filters/quotientfilter/QuotientFilter.java | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java index 93a6761c1..35c6f005f 100644 --- a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java +++ b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java @@ -379,14 +379,6 @@ long swap_fingerprints(long index, long new_fingerprint) { return existing; } - // finds the first empty slot after the given slot index - long find_first_empty_slot(long index) { - while (!is_slot_empty(index)) { - index = (index + 1) & getMask(); - } - return index; - } - // return the first slot to the right where the current run starting at the index parameter ends long find_new_run_location(long index) { if (!is_slot_empty(index)) { @@ -399,14 +391,13 @@ long find_new_run_location(long index) { } boolean insert_new_run(long canonical_slot, long long_fp) { - long first_empty_slot = find_first_empty_slot(canonical_slot); // finds the first empty slot to the right of the canonical slot that is empty long preexisting_run_start_index = find_run_start(canonical_slot); // scans the cluster leftwards and then to the right until reaching our run's would be location long start_of_this_new_run = find_new_run_location(preexisting_run_start_index); // If there is already a run at the would-be location, find its end and insert the new run after it boolean slot_initially_empty = is_slot_empty(start_of_this_new_run); // modify some metadata flags to mark the new run set_occupied(canonical_slot, true); - if (first_empty_slot != canonical_slot) { + if (start_of_this_new_run != canonical_slot) { set_shifted(start_of_this_new_run, true); } set_continuation(start_of_this_new_run, false); From 62776da548c15819454d990b21c4043452f7c73e Mon Sep 17 00:00:00 2001 From: AlexanderSaydakov Date: Tue, 28 May 2024 19:26:18 -0700 Subject: [PATCH 2/2] use the same method to shift --- .../quotientfilter/QuotientFilter.java | 34 ++++--------------- 1 file changed, 7 insertions(+), 27 deletions(-) diff --git a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java index 35c6f005f..db1da48ad 100644 --- a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java +++ b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java @@ -408,26 +408,7 @@ boolean insert_new_run(long canonical_slot, long long_fp) { num_entries++; return true; } - - // push all entries one slot to the right - // if we inserted this run in the middle of a cluster - long current_index = start_of_this_new_run; - boolean is_this_slot_empty; - boolean temp_continuation = false; - do { - is_this_slot_empty = is_slot_empty(current_index); - long_fp = swap_fingerprints(current_index, long_fp); - - if (current_index != start_of_this_new_run) { - set_shifted(current_index, true); - boolean current_continuation = is_continuation(current_index); - set_continuation(current_index, temp_continuation); - temp_continuation = current_continuation; - } - current_index = (current_index + 1) & getMask(); - } while (!is_this_slot_empty); - num_entries++; - return true; + return insert_fingerprint_and_push_all_else(long_fp, start_of_this_new_run, false); } boolean insert(long long_fp, long index, boolean insert_only_if_no_match) { @@ -453,14 +434,13 @@ boolean insert(long long_fp, long index, boolean insert_only_if_no_match) { return false; } } - return insert_fingerprint_and_push_all_else(long_fp, run_start_index); + return insert_fingerprint_and_push_all_else(long_fp, run_start_index, true); } - // insert a fingerprint as the first fingerprint of the new run and push all other entries in the cluster to the right. - boolean insert_fingerprint_and_push_all_else(long long_fp, long run_start_index) { + // insert a fingerprint as the last fingerprint of the run and push all other entries in the cluster to the right. + boolean insert_fingerprint_and_push_all_else(long long_fp, long run_start_index, boolean is_same_run) { long current_index = run_start_index; boolean is_this_slot_empty; - boolean finished_first_run = false; boolean temp_continuation = false; do { @@ -468,12 +448,12 @@ boolean insert_fingerprint_and_push_all_else(long long_fp, long run_start_index) if (current_index != run_start_index) { set_shifted(current_index, true); } - if (current_index != run_start_index && !finished_first_run && !is_continuation(current_index)) { - finished_first_run = true; + if (current_index != run_start_index && is_same_run && !is_continuation(current_index)) { + is_same_run = false; set_continuation(current_index, true); long_fp = swap_fingerprints(current_index, long_fp); } - else if (finished_first_run) { + else if (!is_same_run) { boolean current_continuation = is_continuation(current_index); set_continuation(current_index, temp_continuation); temp_continuation = current_continuation;