From a9d6d681f18d876f51a657db1e7663a4540aaf87 Mon Sep 17 00:00:00 2001 From: B Vadlamani Date: Thu, 23 Apr 2026 23:21:10 -0700 Subject: [PATCH 1/2] add_existence_benchmarks_hashjoin --- benchmarks/src/hj.rs | 192 +++++++++ datafusion/physical-plan/Cargo.toml | 5 + .../benches/hash_join_semi_anti.rs | 370 ++++++++++++++++++ 3 files changed, 567 insertions(+) create mode 100644 datafusion/physical-plan/benches/hash_join_semi_anti.rs diff --git a/benchmarks/src/hj.rs b/benchmarks/src/hj.rs index 301fe0d599cd6..dbf9f9b92ffa2 100644 --- a/benchmarks/src/hj.rs +++ b/benchmarks/src/hj.rs @@ -303,6 +303,198 @@ const HASH_QUERIES: &[HashJoinQuery] = &[ build_size: "100K_(20%_dups)", probe_size: "60M", }, + // RightSemi Join benchmarks with Int32 keys + // Q16: RightSemi, 100% Density, 100% Hit rate + HashJoinQuery { + sql: r###"SELECT l.k + FROM ( + SELECT CAST(l_suppkey AS INT) as k FROM lineitem + ) l + WHERE EXISTS ( + SELECT 1 FROM (SELECT CAST(s_suppkey AS INT) as k FROM supplier) s WHERE s.k = l.k + )"###, + density: 1.0, + prob_hit: 1.0, + build_size: "100K", + probe_size: "60M_RightSemi", + }, + // Q17: RightSemi, 100% Density, 10% Hit rate + HashJoinQuery { + sql: r###"SELECT l.k + FROM ( + SELECT CAST(CASE WHEN l_suppkey % 10 = 0 THEN l_suppkey ELSE l_suppkey + 1000000 END AS INT) as k + FROM lineitem + ) l + WHERE EXISTS ( + SELECT 1 FROM (SELECT CAST(s_suppkey AS INT) as k FROM supplier) s WHERE s.k = l.k + )"###, + density: 1.0, + prob_hit: 0.1, + build_size: "100K", + probe_size: "60M_RightSemi", + }, + // Q18: RightSemi, 50% Density, 100% Hit rate + HashJoinQuery { + sql: r###"SELECT l.k + FROM ( + SELECT CAST(l_suppkey * 2 AS INT) as k FROM lineitem + ) l + WHERE EXISTS ( + SELECT 1 FROM (SELECT CAST(s_suppkey * 2 AS INT) as k FROM supplier) s WHERE s.k = l.k + )"###, + density: 0.5, + prob_hit: 1.0, + build_size: "100K", + probe_size: "60M_RightSemi", + }, + // Q19: RightSemi, 50% Density, 10% Hit rate + HashJoinQuery { + sql: r###"SELECT l.k + FROM ( + SELECT CAST(CASE + WHEN l_suppkey % 10 = 0 THEN l_suppkey * 2 + WHEN l_suppkey % 10 < 9 THEN l_suppkey * 2 + 1 + ELSE l_suppkey * 2 + 1000000 + END AS INT) as k + FROM lineitem + ) l + WHERE EXISTS ( + SELECT 1 FROM (SELECT CAST(s_suppkey * 2 AS INT) as k FROM supplier) s WHERE s.k = l.k + )"###, + density: 0.5, + prob_hit: 0.1, + build_size: "100K", + probe_size: "60M_RightSemi", + }, + // Q20: RightSemi, 10% Density, 100% Hit rate + HashJoinQuery { + sql: r###"SELECT l.k + FROM ( + SELECT CAST(l_suppkey * 10 AS INT) as k FROM lineitem + ) l + WHERE EXISTS ( + SELECT 1 FROM (SELECT CAST(s_suppkey * 10 AS INT) as k FROM supplier) s WHERE s.k = l.k + )"###, + density: 0.1, + prob_hit: 1.0, + build_size: "100K", + probe_size: "60M_RightSemi", + }, + // Q21: RightSemi, 10% Density, 10% Hit rate + HashJoinQuery { + sql: r###"SELECT l.k + FROM ( + SELECT CAST(CASE + WHEN l_suppkey % 10 = 0 THEN l_suppkey * 10 + WHEN l_suppkey % 10 < 9 THEN l_suppkey * 10 + 1 + ELSE l_suppkey * 10 + 1000000 + END AS INT) as k + FROM lineitem + ) l + WHERE EXISTS ( + SELECT 1 FROM (SELECT CAST(s_suppkey * 10 AS INT) as k FROM supplier) s WHERE s.k = l.k + )"###, + density: 0.1, + prob_hit: 0.1, + build_size: "100K", + probe_size: "60M_RightSemi", + }, + // RightAnti Join benchmarks with Int32 keys + // Q22: RightAnti, 100% Density, 100% Hit rate (no output) + HashJoinQuery { + sql: r###"SELECT l.k + FROM ( + SELECT CAST(l_suppkey AS INT) as k FROM lineitem + ) l + WHERE NOT EXISTS ( + SELECT 1 FROM (SELECT CAST(s_suppkey AS INT) as k FROM supplier) s WHERE s.k = l.k + )"###, + density: 1.0, + prob_hit: 1.0, + build_size: "100K", + probe_size: "60M_RightAnti", + }, + // Q23: RightAnti, 100% Density, 10% Hit rate (90% output) + HashJoinQuery { + sql: r###"SELECT l.k + FROM ( + SELECT CAST(CASE WHEN l_suppkey % 10 = 0 THEN l_suppkey ELSE l_suppkey + 1000000 END AS INT) as k + FROM lineitem + ) l + WHERE NOT EXISTS ( + SELECT 1 FROM (SELECT CAST(s_suppkey AS INT) as k FROM supplier) s WHERE s.k = l.k + )"###, + density: 1.0, + prob_hit: 0.1, + build_size: "100K", + probe_size: "60M_RightAnti", + }, + // Q24: RightAnti, 50% Density, 100% Hit rate (no output) + HashJoinQuery { + sql: r###"SELECT l.k + FROM ( + SELECT CAST(l_suppkey * 2 AS INT) as k FROM lineitem + ) l + WHERE NOT EXISTS ( + SELECT 1 FROM (SELECT CAST(s_suppkey * 2 AS INT) as k FROM supplier) s WHERE s.k = l.k + )"###, + density: 0.5, + prob_hit: 1.0, + build_size: "100K", + probe_size: "60M_RightAnti", + }, + // Q25: RightAnti, 50% Density, 10% Hit rate (90% output) + HashJoinQuery { + sql: r###"SELECT l.k + FROM ( + SELECT CAST(CASE + WHEN l_suppkey % 10 = 0 THEN l_suppkey * 2 + WHEN l_suppkey % 10 < 9 THEN l_suppkey * 2 + 1 + ELSE l_suppkey * 2 + 1000000 + END AS INT) as k + FROM lineitem + ) l + WHERE NOT EXISTS ( + SELECT 1 FROM (SELECT CAST(s_suppkey * 2 AS INT) as k FROM supplier) s WHERE s.k = l.k + )"###, + density: 0.5, + prob_hit: 0.1, + build_size: "100K", + probe_size: "60M_RightAnti", + }, + // Q26: RightAnti, 10% Density, 100% Hit rate (no output) + HashJoinQuery { + sql: r###"SELECT l.k + FROM ( + SELECT CAST(l_suppkey * 10 AS INT) as k FROM lineitem + ) l + WHERE NOT EXISTS ( + SELECT 1 FROM (SELECT CAST(s_suppkey * 10 AS INT) as k FROM supplier) s WHERE s.k = l.k + )"###, + density: 0.1, + prob_hit: 1.0, + build_size: "100K", + probe_size: "60M_RightAnti", + }, + // Q27: RightAnti, 10% Density, 10% Hit rate (90% output) + HashJoinQuery { + sql: r###"SELECT l.k + FROM ( + SELECT CAST(CASE + WHEN l_suppkey % 10 = 0 THEN l_suppkey * 10 + WHEN l_suppkey % 10 < 9 THEN l_suppkey * 10 + 1 + ELSE l_suppkey * 10 + 1000000 + END AS INT) as k + FROM lineitem + ) l + WHERE NOT EXISTS ( + SELECT 1 FROM (SELECT CAST(s_suppkey * 10 AS INT) as k FROM supplier) s WHERE s.k = l.k + )"###, + density: 0.1, + prob_hit: 0.1, + build_size: "100K", + probe_size: "60M_RightAnti", + }, ]; impl RunOpt { diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml index 374fc275a06e0..7c3914ecb6b96 100644 --- a/datafusion/physical-plan/Cargo.toml +++ b/datafusion/physical-plan/Cargo.toml @@ -108,3 +108,8 @@ required-features = ["test_utils"] harness = false name = "aggregate_vectorized" required-features = ["test_utils"] + +[[bench]] +harness = false +name = "hash_join_semi_anti" +required-features = ["test_utils"] diff --git a/datafusion/physical-plan/benches/hash_join_semi_anti.rs b/datafusion/physical-plan/benches/hash_join_semi_anti.rs new file mode 100644 index 0000000000000..9b5efe2d13d22 --- /dev/null +++ b/datafusion/physical-plan/benches/hash_join_semi_anti.rs @@ -0,0 +1,370 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Criterion benchmarks for Hash Join with RightSemi/RightAnti +//! +//! These benchmarks measure the hash join kernel for semi/anti joins +//! with Int32 keys, which can use roaring bitmap optimization. + +use std::sync::Arc; + +use arrow::array::{Int32Array, RecordBatch, StringArray}; +use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; +use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; +use datafusion_common::{JoinType, NullEquality}; +use datafusion_execution::TaskContext; +use datafusion_physical_expr::expressions::col; +use datafusion_physical_plan::collect; +use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode, utils::JoinOn}; +use datafusion_physical_plan::test::TestMemoryExec; +use tokio::runtime::Runtime; + +/// Build RecordBatches with Int32 keys (for roaring optimization). +/// +/// Schema: (key: Int32, data: Int32, payload: Utf8) +/// +/// `key_mod` controls distinct key count: key = row_index % key_mod. +/// `key_offset` shifts keys to control hit rate. +fn build_batches( + num_rows: usize, + key_mod: usize, + key_offset: i32, + schema: &SchemaRef, +) -> Vec { + let keys: Vec = (0..num_rows) + .map(|i| ((i % key_mod) as i32) + key_offset) + .collect(); + let data: Vec = (0..num_rows).map(|i| i as i32).collect(); + let payload: Vec = data.iter().map(|d| format!("val_{d}")).collect(); + + let batch = RecordBatch::try_new( + Arc::clone(schema), + vec![ + Arc::new(Int32Array::from(keys)), + Arc::new(Int32Array::from(data)), + Arc::new(StringArray::from(payload)), + ], + ) + .unwrap(); + + let batch_size = 8192; + let mut batches = Vec::new(); + let mut offset = 0; + while offset < batch.num_rows() { + let len = (batch.num_rows() - offset).min(batch_size); + batches.push(batch.slice(offset, len)); + offset += len; + } + batches +} + +fn make_exec( + batches: &[RecordBatch], + schema: &SchemaRef, +) -> Arc { + TestMemoryExec::try_new_exec(&[batches.to_vec()], Arc::clone(schema), None).unwrap() +} + +fn schema() -> SchemaRef { + Arc::new(Schema::new(vec![ + Field::new("key", DataType::Int32, false), + Field::new("data", DataType::Int32, false), + Field::new("payload", DataType::Utf8, false), + ])) +} + +fn do_hash_join( + left: Arc, + right: Arc, + join_type: JoinType, + rt: &Runtime, +) -> usize { + let on: JoinOn = vec![( + col("key", &left.schema()).unwrap(), + col("key", &right.schema()).unwrap(), + )]; + let join = HashJoinExec::try_new( + left, + right, + on, + None, + &join_type, + None, + PartitionMode::CollectLeft, + NullEquality::NullEqualsNothing, + false, + ) + .unwrap(); + + let task_ctx = Arc::new(TaskContext::default()); + rt.block_on(async { + let batches = collect(Arc::new(join), task_ctx).await.unwrap(); + batches.iter().map(|b| b.num_rows()).sum() + }) +} + +/// Build batches with sparse keys (key = row_index % key_mod * multiplier + key_offset). +/// The `multiplier` controls density: 1 = 100%, 2 = 50%, 10 = 10%. +fn build_batches_sparse( + num_rows: usize, + key_mod: usize, + key_offset: i32, + multiplier: i32, + schema: &SchemaRef, +) -> Vec { + let keys: Vec = (0..num_rows) + .map(|i| ((i % key_mod) as i32) * multiplier + key_offset) + .collect(); + let data: Vec = (0..num_rows).map(|i| i as i32).collect(); + let payload: Vec = data.iter().map(|d| format!("val_{d}")).collect(); + + let batch = RecordBatch::try_new( + Arc::clone(schema), + vec![ + Arc::new(Int32Array::from(keys)), + Arc::new(Int32Array::from(data)), + Arc::new(StringArray::from(payload)), + ], + ) + .unwrap(); + + let batch_size = 8192; + let mut batches = Vec::new(); + let mut offset = 0; + while offset < batch.num_rows() { + let len = (batch.num_rows() - offset).min(batch_size); + batches.push(batch.slice(offset, len)); + offset += len; + } + batches +} + +fn bench_hash_join_semi_anti(c: &mut Criterion) { + let rt = Runtime::new().unwrap(); + let s = schema(); + + let mut group = c.benchmark_group("hash_join_semi_anti"); + + // Build side: 100K rows, Probe side: 1M rows + let build_rows = 100_000; + let probe_rows = 1_000_000; + + // ========================================================================= + // RightSemi Join benchmarks + // ========================================================================= + + // RightSemi - 100% Density, 100% hit rate + { + let left_batches = build_batches(build_rows, build_rows, 0, &s); + let right_batches = build_batches(probe_rows, build_rows, 0, &s); + group.bench_function( + BenchmarkId::new("right_semi_d100_h100", probe_rows), + |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightSemi, &rt) + }) + }, + ); + } + + // RightSemi - 100% Density, 10% hit rate + { + let left_batches = build_batches(build_rows, build_rows, 0, &s); + let right_batches = build_batches(probe_rows, build_rows * 10, 0, &s); + group.bench_function( + BenchmarkId::new("right_semi_d100_h10", probe_rows), + |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightSemi, &rt) + }) + }, + ); + } + + // RightSemi - 50% Density, 100% hit rate + { + let left_batches = build_batches_sparse(build_rows, build_rows, 0, 2, &s); + let right_batches = build_batches_sparse(probe_rows, build_rows, 0, 2, &s); + group.bench_function( + BenchmarkId::new("right_semi_d50_h100", probe_rows), + |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightSemi, &rt) + }) + }, + ); + } + + // RightSemi - 50% Density, 10% hit rate + { + let left_batches = build_batches_sparse(build_rows, build_rows, 0, 2, &s); + let right_batches = build_batches_sparse(probe_rows, build_rows * 10, 0, 2, &s); + group.bench_function( + BenchmarkId::new("right_semi_d50_h10", probe_rows), + |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightSemi, &rt) + }) + }, + ); + } + + // RightSemi - 10% Density, 100% hit rate + { + let left_batches = build_batches_sparse(build_rows, build_rows, 0, 10, &s); + let right_batches = build_batches_sparse(probe_rows, build_rows, 0, 10, &s); + group.bench_function( + BenchmarkId::new("right_semi_d10_h100", probe_rows), + |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightSemi, &rt) + }) + }, + ); + } + + // RightSemi - 10% Density, 10% hit rate + { + let left_batches = build_batches_sparse(build_rows, build_rows, 0, 10, &s); + let right_batches = build_batches_sparse(probe_rows, build_rows * 10, 0, 10, &s); + group.bench_function( + BenchmarkId::new("right_semi_d10_h10", probe_rows), + |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightSemi, &rt) + }) + }, + ); + } + + // ========================================================================= + // RightAnti Join benchmarks + // ========================================================================= + + // RightAnti - 100% Density, 100% hit rate (no output) + { + let left_batches = build_batches(build_rows, build_rows, 0, &s); + let right_batches = build_batches(probe_rows, build_rows, 0, &s); + group.bench_function( + BenchmarkId::new("right_anti_d100_h100", probe_rows), + |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightAnti, &rt) + }) + }, + ); + } + + // RightAnti - 100% Density, 10% hit rate (90% output) + { + let left_batches = build_batches(build_rows, build_rows, 0, &s); + let right_batches = build_batches(probe_rows, build_rows * 10, 0, &s); + group.bench_function( + BenchmarkId::new("right_anti_d100_h10", probe_rows), + |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightAnti, &rt) + }) + }, + ); + } + + // RightAnti - 50% Density, 100% hit rate (no output) + { + let left_batches = build_batches_sparse(build_rows, build_rows, 0, 2, &s); + let right_batches = build_batches_sparse(probe_rows, build_rows, 0, 2, &s); + group.bench_function( + BenchmarkId::new("right_anti_d50_h100", probe_rows), + |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightAnti, &rt) + }) + }, + ); + } + + // RightAnti - 50% Density, 10% hit rate (90% output) + { + let left_batches = build_batches_sparse(build_rows, build_rows, 0, 2, &s); + let right_batches = build_batches_sparse(probe_rows, build_rows * 10, 0, 2, &s); + group.bench_function( + BenchmarkId::new("right_anti_d50_h10", probe_rows), + |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightAnti, &rt) + }) + }, + ); + } + + // RightAnti - 10% Density, 100% hit rate (no output) + { + let left_batches = build_batches_sparse(build_rows, build_rows, 0, 10, &s); + let right_batches = build_batches_sparse(probe_rows, build_rows, 0, 10, &s); + group.bench_function( + BenchmarkId::new("right_anti_d10_h100", probe_rows), + |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightAnti, &rt) + }) + }, + ); + } + + // RightAnti - 10% Density, 10% hit rate (90% output) + { + let left_batches = build_batches_sparse(build_rows, build_rows, 0, 10, &s); + let right_batches = build_batches_sparse(probe_rows, build_rows * 10, 0, 10, &s); + group.bench_function( + BenchmarkId::new("right_anti_d10_h10", probe_rows), + |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightAnti, &rt) + }) + }, + ); + } + + group.finish(); +} + +criterion_group!(benches, bench_hash_join_semi_anti); +criterion_main!(benches); From 6eb40b7ea04504ccd22e4b129916128f384e3b5c Mon Sep 17 00:00:00 2001 From: B Vadlamani Date: Fri, 24 Apr 2026 00:02:53 -0700 Subject: [PATCH 2/2] add_existence_benchmarks_hashjoin --- .../benches/hash_join_semi_anti.rs | 204 ++++++++---------- 1 file changed, 84 insertions(+), 120 deletions(-) diff --git a/datafusion/physical-plan/benches/hash_join_semi_anti.rs b/datafusion/physical-plan/benches/hash_join_semi_anti.rs index 9b5efe2d13d22..e56ba73c40bf7 100644 --- a/datafusion/physical-plan/benches/hash_join_semi_anti.rs +++ b/datafusion/physical-plan/benches/hash_join_semi_anti.rs @@ -171,96 +171,78 @@ fn bench_hash_join_semi_anti(c: &mut Criterion) { { let left_batches = build_batches(build_rows, build_rows, 0, &s); let right_batches = build_batches(probe_rows, build_rows, 0, &s); - group.bench_function( - BenchmarkId::new("right_semi_d100_h100", probe_rows), - |b| { - b.iter(|| { - let left = make_exec(&left_batches, &s); - let right = make_exec(&right_batches, &s); - do_hash_join(left, right, JoinType::RightSemi, &rt) - }) - }, - ); + group.bench_function(BenchmarkId::new("right_semi_d100_h100", probe_rows), |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightSemi, &rt) + }) + }); } // RightSemi - 100% Density, 10% hit rate { let left_batches = build_batches(build_rows, build_rows, 0, &s); let right_batches = build_batches(probe_rows, build_rows * 10, 0, &s); - group.bench_function( - BenchmarkId::new("right_semi_d100_h10", probe_rows), - |b| { - b.iter(|| { - let left = make_exec(&left_batches, &s); - let right = make_exec(&right_batches, &s); - do_hash_join(left, right, JoinType::RightSemi, &rt) - }) - }, - ); + group.bench_function(BenchmarkId::new("right_semi_d100_h10", probe_rows), |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightSemi, &rt) + }) + }); } // RightSemi - 50% Density, 100% hit rate { let left_batches = build_batches_sparse(build_rows, build_rows, 0, 2, &s); let right_batches = build_batches_sparse(probe_rows, build_rows, 0, 2, &s); - group.bench_function( - BenchmarkId::new("right_semi_d50_h100", probe_rows), - |b| { - b.iter(|| { - let left = make_exec(&left_batches, &s); - let right = make_exec(&right_batches, &s); - do_hash_join(left, right, JoinType::RightSemi, &rt) - }) - }, - ); + group.bench_function(BenchmarkId::new("right_semi_d50_h100", probe_rows), |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightSemi, &rt) + }) + }); } // RightSemi - 50% Density, 10% hit rate { let left_batches = build_batches_sparse(build_rows, build_rows, 0, 2, &s); let right_batches = build_batches_sparse(probe_rows, build_rows * 10, 0, 2, &s); - group.bench_function( - BenchmarkId::new("right_semi_d50_h10", probe_rows), - |b| { - b.iter(|| { - let left = make_exec(&left_batches, &s); - let right = make_exec(&right_batches, &s); - do_hash_join(left, right, JoinType::RightSemi, &rt) - }) - }, - ); + group.bench_function(BenchmarkId::new("right_semi_d50_h10", probe_rows), |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightSemi, &rt) + }) + }); } // RightSemi - 10% Density, 100% hit rate { let left_batches = build_batches_sparse(build_rows, build_rows, 0, 10, &s); let right_batches = build_batches_sparse(probe_rows, build_rows, 0, 10, &s); - group.bench_function( - BenchmarkId::new("right_semi_d10_h100", probe_rows), - |b| { - b.iter(|| { - let left = make_exec(&left_batches, &s); - let right = make_exec(&right_batches, &s); - do_hash_join(left, right, JoinType::RightSemi, &rt) - }) - }, - ); + group.bench_function(BenchmarkId::new("right_semi_d10_h100", probe_rows), |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightSemi, &rt) + }) + }); } // RightSemi - 10% Density, 10% hit rate { let left_batches = build_batches_sparse(build_rows, build_rows, 0, 10, &s); let right_batches = build_batches_sparse(probe_rows, build_rows * 10, 0, 10, &s); - group.bench_function( - BenchmarkId::new("right_semi_d10_h10", probe_rows), - |b| { - b.iter(|| { - let left = make_exec(&left_batches, &s); - let right = make_exec(&right_batches, &s); - do_hash_join(left, right, JoinType::RightSemi, &rt) - }) - }, - ); + group.bench_function(BenchmarkId::new("right_semi_d10_h10", probe_rows), |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightSemi, &rt) + }) + }); } // ========================================================================= @@ -271,96 +253,78 @@ fn bench_hash_join_semi_anti(c: &mut Criterion) { { let left_batches = build_batches(build_rows, build_rows, 0, &s); let right_batches = build_batches(probe_rows, build_rows, 0, &s); - group.bench_function( - BenchmarkId::new("right_anti_d100_h100", probe_rows), - |b| { - b.iter(|| { - let left = make_exec(&left_batches, &s); - let right = make_exec(&right_batches, &s); - do_hash_join(left, right, JoinType::RightAnti, &rt) - }) - }, - ); + group.bench_function(BenchmarkId::new("right_anti_d100_h100", probe_rows), |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightAnti, &rt) + }) + }); } // RightAnti - 100% Density, 10% hit rate (90% output) { let left_batches = build_batches(build_rows, build_rows, 0, &s); let right_batches = build_batches(probe_rows, build_rows * 10, 0, &s); - group.bench_function( - BenchmarkId::new("right_anti_d100_h10", probe_rows), - |b| { - b.iter(|| { - let left = make_exec(&left_batches, &s); - let right = make_exec(&right_batches, &s); - do_hash_join(left, right, JoinType::RightAnti, &rt) - }) - }, - ); + group.bench_function(BenchmarkId::new("right_anti_d100_h10", probe_rows), |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightAnti, &rt) + }) + }); } // RightAnti - 50% Density, 100% hit rate (no output) { let left_batches = build_batches_sparse(build_rows, build_rows, 0, 2, &s); let right_batches = build_batches_sparse(probe_rows, build_rows, 0, 2, &s); - group.bench_function( - BenchmarkId::new("right_anti_d50_h100", probe_rows), - |b| { - b.iter(|| { - let left = make_exec(&left_batches, &s); - let right = make_exec(&right_batches, &s); - do_hash_join(left, right, JoinType::RightAnti, &rt) - }) - }, - ); + group.bench_function(BenchmarkId::new("right_anti_d50_h100", probe_rows), |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightAnti, &rt) + }) + }); } // RightAnti - 50% Density, 10% hit rate (90% output) { let left_batches = build_batches_sparse(build_rows, build_rows, 0, 2, &s); let right_batches = build_batches_sparse(probe_rows, build_rows * 10, 0, 2, &s); - group.bench_function( - BenchmarkId::new("right_anti_d50_h10", probe_rows), - |b| { - b.iter(|| { - let left = make_exec(&left_batches, &s); - let right = make_exec(&right_batches, &s); - do_hash_join(left, right, JoinType::RightAnti, &rt) - }) - }, - ); + group.bench_function(BenchmarkId::new("right_anti_d50_h10", probe_rows), |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightAnti, &rt) + }) + }); } // RightAnti - 10% Density, 100% hit rate (no output) { let left_batches = build_batches_sparse(build_rows, build_rows, 0, 10, &s); let right_batches = build_batches_sparse(probe_rows, build_rows, 0, 10, &s); - group.bench_function( - BenchmarkId::new("right_anti_d10_h100", probe_rows), - |b| { - b.iter(|| { - let left = make_exec(&left_batches, &s); - let right = make_exec(&right_batches, &s); - do_hash_join(left, right, JoinType::RightAnti, &rt) - }) - }, - ); + group.bench_function(BenchmarkId::new("right_anti_d10_h100", probe_rows), |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightAnti, &rt) + }) + }); } // RightAnti - 10% Density, 10% hit rate (90% output) { let left_batches = build_batches_sparse(build_rows, build_rows, 0, 10, &s); let right_batches = build_batches_sparse(probe_rows, build_rows * 10, 0, 10, &s); - group.bench_function( - BenchmarkId::new("right_anti_d10_h10", probe_rows), - |b| { - b.iter(|| { - let left = make_exec(&left_batches, &s); - let right = make_exec(&right_batches, &s); - do_hash_join(left, right, JoinType::RightAnti, &rt) - }) - }, - ); + group.bench_function(BenchmarkId::new("right_anti_d10_h10", probe_rows), |b| { + b.iter(|| { + let left = make_exec(&left_batches, &s); + let right = make_exec(&right_batches, &s); + do_hash_join(left, right, JoinType::RightAnti, &rt) + }) + }); } group.finish();