diff --git a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java index 93a6761c1..db1da48ad 100644 --- a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java +++ b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java @@ -379,14 +379,6 @@ long swap_fingerprints(long index, long new_fingerprint) { return existing; } - // finds the first empty slot after the given slot index - long find_first_empty_slot(long index) { - while (!is_slot_empty(index)) { - index = (index + 1) & getMask(); - } - return index; - } - // return the first slot to the right where the current run starting at the index parameter ends long find_new_run_location(long index) { if (!is_slot_empty(index)) { @@ -399,14 +391,13 @@ long find_new_run_location(long index) { } boolean insert_new_run(long canonical_slot, long long_fp) { - long first_empty_slot = find_first_empty_slot(canonical_slot); // finds the first empty slot to the right of the canonical slot that is empty long preexisting_run_start_index = find_run_start(canonical_slot); // scans the cluster leftwards and then to the right until reaching our run's would be location long start_of_this_new_run = find_new_run_location(preexisting_run_start_index); // If there is already a run at the would-be location, find its end and insert the new run after it boolean slot_initially_empty = is_slot_empty(start_of_this_new_run); // modify some metadata flags to mark the new run set_occupied(canonical_slot, true); - if (first_empty_slot != canonical_slot) { + if (start_of_this_new_run != canonical_slot) { set_shifted(start_of_this_new_run, true); } set_continuation(start_of_this_new_run, false); @@ -417,26 +408,7 @@ boolean insert_new_run(long canonical_slot, long long_fp) { num_entries++; return true; } - - // push all entries one slot to the right - // if we inserted this run in the middle of a cluster - long current_index = start_of_this_new_run; - boolean is_this_slot_empty; - boolean temp_continuation = false; - do { - is_this_slot_empty = is_slot_empty(current_index); - long_fp = swap_fingerprints(current_index, long_fp); - - if (current_index != start_of_this_new_run) { - set_shifted(current_index, true); - boolean current_continuation = is_continuation(current_index); - set_continuation(current_index, temp_continuation); - temp_continuation = current_continuation; - } - current_index = (current_index + 1) & getMask(); - } while (!is_this_slot_empty); - num_entries++; - return true; + return insert_fingerprint_and_push_all_else(long_fp, start_of_this_new_run, false); } boolean insert(long long_fp, long index, boolean insert_only_if_no_match) { @@ -462,14 +434,13 @@ boolean insert(long long_fp, long index, boolean insert_only_if_no_match) { return false; } } - return insert_fingerprint_and_push_all_else(long_fp, run_start_index); + return insert_fingerprint_and_push_all_else(long_fp, run_start_index, true); } - // insert a fingerprint as the first fingerprint of the new run and push all other entries in the cluster to the right. - boolean insert_fingerprint_and_push_all_else(long long_fp, long run_start_index) { + // insert a fingerprint as the last fingerprint of the run and push all other entries in the cluster to the right. + boolean insert_fingerprint_and_push_all_else(long long_fp, long run_start_index, boolean is_same_run) { long current_index = run_start_index; boolean is_this_slot_empty; - boolean finished_first_run = false; boolean temp_continuation = false; do { @@ -477,12 +448,12 @@ boolean insert_fingerprint_and_push_all_else(long long_fp, long run_start_index) if (current_index != run_start_index) { set_shifted(current_index, true); } - if (current_index != run_start_index && !finished_first_run && !is_continuation(current_index)) { - finished_first_run = true; + if (current_index != run_start_index && is_same_run && !is_continuation(current_index)) { + is_same_run = false; set_continuation(current_index, true); long_fp = swap_fingerprints(current_index, long_fp); } - else if (finished_first_run) { + else if (!is_same_run) { boolean current_continuation = is_continuation(current_index); set_continuation(current_index, temp_continuation); temp_continuation = current_continuation;