Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ prost = "0.14.1"
rand = "0.9"
recursive = "0.1.1"
regex = "1.12"
roaring = "0.11.3"
rstest = "0.26.1"
serde_json = "1"
sha2 = "^0.10.9"
Expand Down
192 changes: 192 additions & 0 deletions benchmarks/src/hj.rs
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,198 @@ const HASH_QUERIES: &[HashJoinQuery] = &[
build_size: "100K_(20%_dups)",
probe_size: "60M",
},
// RightSemi Join benchmarks with Int32 keys
// Q16: RightSemi, 100% Density, 100% Hit rate
HashJoinQuery {
sql: r###"SELECT l.k
FROM (
SELECT CAST(l_suppkey AS INT) as k FROM lineitem
) l
WHERE EXISTS (
SELECT 1 FROM (SELECT CAST(s_suppkey AS INT) as k FROM supplier) s WHERE s.k = l.k
)"###,
density: 1.0,
prob_hit: 1.0,
build_size: "100K",
probe_size: "60M_RightSemi",
},
// Q17: RightSemi, 100% Density, 10% Hit rate
HashJoinQuery {
sql: r###"SELECT l.k
FROM (
SELECT CAST(CASE WHEN l_suppkey % 10 = 0 THEN l_suppkey ELSE l_suppkey + 1000000 END AS INT) as k
FROM lineitem
) l
WHERE EXISTS (
SELECT 1 FROM (SELECT CAST(s_suppkey AS INT) as k FROM supplier) s WHERE s.k = l.k
)"###,
density: 1.0,
prob_hit: 0.1,
build_size: "100K",
probe_size: "60M_RightSemi",
},
// Q18: RightSemi, 50% Density, 100% Hit rate
HashJoinQuery {
sql: r###"SELECT l.k
FROM (
SELECT CAST(l_suppkey * 2 AS INT) as k FROM lineitem
) l
WHERE EXISTS (
SELECT 1 FROM (SELECT CAST(s_suppkey * 2 AS INT) as k FROM supplier) s WHERE s.k = l.k
)"###,
density: 0.5,
prob_hit: 1.0,
build_size: "100K",
probe_size: "60M_RightSemi",
},
// Q19: RightSemi, 50% Density, 10% Hit rate
HashJoinQuery {
sql: r###"SELECT l.k
FROM (
SELECT CAST(CASE
WHEN l_suppkey % 10 = 0 THEN l_suppkey * 2
WHEN l_suppkey % 10 < 9 THEN l_suppkey * 2 + 1
ELSE l_suppkey * 2 + 1000000
END AS INT) as k
FROM lineitem
) l
WHERE EXISTS (
SELECT 1 FROM (SELECT CAST(s_suppkey * 2 AS INT) as k FROM supplier) s WHERE s.k = l.k
)"###,
density: 0.5,
prob_hit: 0.1,
build_size: "100K",
probe_size: "60M_RightSemi",
},
// Q20: RightSemi, 10% Density, 100% Hit rate
HashJoinQuery {
sql: r###"SELECT l.k
FROM (
SELECT CAST(l_suppkey * 10 AS INT) as k FROM lineitem
) l
WHERE EXISTS (
SELECT 1 FROM (SELECT CAST(s_suppkey * 10 AS INT) as k FROM supplier) s WHERE s.k = l.k
)"###,
density: 0.1,
prob_hit: 1.0,
build_size: "100K",
probe_size: "60M_RightSemi",
},
// Q21: RightSemi, 10% Density, 10% Hit rate
HashJoinQuery {
sql: r###"SELECT l.k
FROM (
SELECT CAST(CASE
WHEN l_suppkey % 10 = 0 THEN l_suppkey * 10
WHEN l_suppkey % 10 < 9 THEN l_suppkey * 10 + 1
ELSE l_suppkey * 10 + 1000000
END AS INT) as k
FROM lineitem
) l
WHERE EXISTS (
SELECT 1 FROM (SELECT CAST(s_suppkey * 10 AS INT) as k FROM supplier) s WHERE s.k = l.k
)"###,
density: 0.1,
prob_hit: 0.1,
build_size: "100K",
probe_size: "60M_RightSemi",
},
// RightAnti Join benchmarks with Int32 keys
// Q22: RightAnti, 100% Density, 100% Hit rate (no output)
HashJoinQuery {
sql: r###"SELECT l.k
FROM (
SELECT CAST(l_suppkey AS INT) as k FROM lineitem
) l
WHERE NOT EXISTS (
SELECT 1 FROM (SELECT CAST(s_suppkey AS INT) as k FROM supplier) s WHERE s.k = l.k
)"###,
density: 1.0,
prob_hit: 1.0,
build_size: "100K",
probe_size: "60M_RightAnti",
},
// Q23: RightAnti, 100% Density, 10% Hit rate (90% output)
HashJoinQuery {
sql: r###"SELECT l.k
FROM (
SELECT CAST(CASE WHEN l_suppkey % 10 = 0 THEN l_suppkey ELSE l_suppkey + 1000000 END AS INT) as k
FROM lineitem
) l
WHERE NOT EXISTS (
SELECT 1 FROM (SELECT CAST(s_suppkey AS INT) as k FROM supplier) s WHERE s.k = l.k
)"###,
density: 1.0,
prob_hit: 0.1,
build_size: "100K",
probe_size: "60M_RightAnti",
},
// Q24: RightAnti, 50% Density, 100% Hit rate (no output)
HashJoinQuery {
sql: r###"SELECT l.k
FROM (
SELECT CAST(l_suppkey * 2 AS INT) as k FROM lineitem
) l
WHERE NOT EXISTS (
SELECT 1 FROM (SELECT CAST(s_suppkey * 2 AS INT) as k FROM supplier) s WHERE s.k = l.k
)"###,
density: 0.5,
prob_hit: 1.0,
build_size: "100K",
probe_size: "60M_RightAnti",
},
// Q25: RightAnti, 50% Density, 10% Hit rate (90% output)
HashJoinQuery {
sql: r###"SELECT l.k
FROM (
SELECT CAST(CASE
WHEN l_suppkey % 10 = 0 THEN l_suppkey * 2
WHEN l_suppkey % 10 < 9 THEN l_suppkey * 2 + 1
ELSE l_suppkey * 2 + 1000000
END AS INT) as k
FROM lineitem
) l
WHERE NOT EXISTS (
SELECT 1 FROM (SELECT CAST(s_suppkey * 2 AS INT) as k FROM supplier) s WHERE s.k = l.k
)"###,
density: 0.5,
prob_hit: 0.1,
build_size: "100K",
probe_size: "60M_RightAnti",
},
// Q26: RightAnti, 10% Density, 100% Hit rate (no output)
HashJoinQuery {
sql: r###"SELECT l.k
FROM (
SELECT CAST(l_suppkey * 10 AS INT) as k FROM lineitem
) l
WHERE NOT EXISTS (
SELECT 1 FROM (SELECT CAST(s_suppkey * 10 AS INT) as k FROM supplier) s WHERE s.k = l.k
)"###,
density: 0.1,
prob_hit: 1.0,
build_size: "100K",
probe_size: "60M_RightAnti",
},
// Q27: RightAnti, 10% Density, 10% Hit rate (90% output)
HashJoinQuery {
sql: r###"SELECT l.k
FROM (
SELECT CAST(CASE
WHEN l_suppkey % 10 = 0 THEN l_suppkey * 10
WHEN l_suppkey % 10 < 9 THEN l_suppkey * 10 + 1
ELSE l_suppkey * 10 + 1000000
END AS INT) as k
FROM lineitem
) l
WHERE NOT EXISTS (
SELECT 1 FROM (SELECT CAST(s_suppkey * 10 AS INT) as k FROM supplier) s WHERE s.k = l.k
)"###,
density: 0.1,
prob_hit: 0.1,
build_size: "100K",
probe_size: "60M_RightAnti",
},
];

impl RunOpt {
Expand Down
1 change: 1 addition & 0 deletions datafusion/physical-plan/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ log = { workspace = true }
num-traits = { workspace = true }
parking_lot = { workspace = true }
pin-project-lite = "^0.2.7"
roaring = { workspace = true }
tokio = { workspace = true }

[dev-dependencies]
Expand Down
Loading
Loading