Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 83 additions & 83 deletions benchmarks/src/smj.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,27 +60,27 @@ pub struct RunOpt {
/// - Key cardinality (rows per key)
/// - Filter selectivity (if applicable)
const SMJ_QUERIES: &[&str] = &[
// Q1: INNER 100K x 100K | 1:1
// Q1: INNER 1M x 1M | 1:1
r#"
WITH t1_sorted AS (
SELECT value as key FROM range(100000) ORDER BY value
SELECT value as key FROM range(1000000) ORDER BY value
),
t2_sorted AS (
SELECT value as key FROM range(100000) ORDER BY value
SELECT value as key FROM range(1000000) ORDER BY value
)
SELECT t1_sorted.key as k1, t2_sorted.key as k2
FROM t1_sorted JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
"#,
// Q2: INNER 100K x 1M | 1:10
// Q2: INNER 1M x 10M | 1:10
r#"
WITH t1_sorted AS (
SELECT value % 10000 as key, value as data
FROM range(100000)
SELECT value % 100000 as key, value as data
FROM range(1000000)
ORDER BY key, data
),
t2_sorted AS (
SELECT value % 10000 as key, value as data
FROM range(1000000)
SELECT value % 100000 as key, value as data
FROM range(10000000)
ORDER BY key, data
)
SELECT t1_sorted.key, t1_sorted.data as d1, t2_sorted.data as d2
Expand All @@ -101,16 +101,16 @@ const SMJ_QUERIES: &[&str] = &[
SELECT t1_sorted.key, t1_sorted.data as d1, t2_sorted.data as d2
FROM t1_sorted JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
"#,
// Q4: INNER 100K x 1M | 1:10 | 1%
// Q4: INNER 1M x 10M | 1:10 | 1%
r#"
WITH t1_sorted AS (
SELECT value % 10000 as key, value as data
FROM range(100000)
SELECT value % 100000 as key, value as data
FROM range(1000000)
ORDER BY key, data
),
t2_sorted AS (
SELECT value % 10000 as key, value as data
FROM range(1000000)
SELECT value % 100000 as key, value as data
FROM range(10000000)
ORDER BY key, data
)
SELECT t1_sorted.key, t1_sorted.data as d1, t2_sorted.data as d2
Expand All @@ -133,63 +133,63 @@ const SMJ_QUERIES: &[&str] = &[
FROM t1_sorted JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
WHERE t1_sorted.data <> t2_sorted.data AND t2_sorted.data % 10 = 0
"#,
// Q6: LEFT 100K x 1M | 1:10
// Q6: LEFT 1M x 10M | 1:10
r#"
WITH t1_sorted AS (
SELECT value % 10500 as key, value as data
FROM range(100000)
SELECT value % 105000 as key, value as data
FROM range(1000000)
ORDER BY key, data
),
t2_sorted AS (
SELECT value % 10000 as key, value as data
FROM range(1000000)
SELECT value % 100000 as key, value as data
FROM range(10000000)
ORDER BY key, data
)
SELECT t1_sorted.key, t1_sorted.data as d1, t2_sorted.data as d2
FROM t1_sorted LEFT JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
"#,
// Q7: LEFT 100K x 1M | 1:10 | 50%
// Q7: LEFT 1M x 10M | 1:10 | 50%
r#"
WITH t1_sorted AS (
SELECT value % 10000 as key, value as data
FROM range(100000)
SELECT value % 100000 as key, value as data
FROM range(1000000)
ORDER BY key, data
),
t2_sorted AS (
SELECT value % 10000 as key, value as data
FROM range(1000000)
SELECT value % 100000 as key, value as data
FROM range(10000000)
ORDER BY key, data
)
SELECT t1_sorted.key, t1_sorted.data as d1, t2_sorted.data as d2
FROM t1_sorted LEFT JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
WHERE t2_sorted.data IS NULL OR t2_sorted.data % 2 = 0
"#,
// Q8: FULL 100K x 100K | 1:10
// Q8: FULL 1M x 1M | 1:10
r#"
WITH t1_sorted AS (
SELECT value % 10000 as key, value as data
FROM range(100000)
SELECT value % 100000 as key, value as data
FROM range(1000000)
ORDER BY key, data
),
t2_sorted AS (
SELECT value % 12500 as key, value as data
FROM range(100000)
SELECT value % 125000 as key, value as data
FROM range(1000000)
ORDER BY key, data
)
SELECT t1_sorted.key as k1, t1_sorted.data as d1,
t2_sorted.key as k2, t2_sorted.data as d2
FROM t1_sorted FULL JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
"#,
// Q9: FULL 100K x 1M | 1:10 | 10%
// Q9: FULL 1M x 10M | 1:10 | 10%
r#"
WITH t1_sorted AS (
SELECT value % 10000 as key, value as data
FROM range(100000)
SELECT value % 100000 as key, value as data
FROM range(1000000)
ORDER BY key, data
),
t2_sorted AS (
SELECT value % 10000 as key, value as data
FROM range(1000000)
SELECT value % 100000 as key, value as data
FROM range(10000000)
ORDER BY key, data
)
SELECT t1_sorted.key as k1, t1_sorted.data as d1,
Expand All @@ -199,16 +199,16 @@ const SMJ_QUERIES: &[&str] = &[
OR t1_sorted.data <> t2_sorted.data)
AND (t1_sorted.data IS NULL OR t1_sorted.data % 10 = 0)
"#,
// Q10: LEFT SEMI 100K x 1M | 1:10
// Q10: LEFT SEMI 1M x 10M | 1:10
r#"
WITH t1_sorted AS (
SELECT value % 10000 as key, value as data
FROM range(100000)
SELECT value % 100000 as key, value as data
FROM range(1000000)
ORDER BY key, data
),
t2_sorted AS (
SELECT value % 10000 as key
FROM range(1000000)
SELECT value % 100000 as key
FROM range(10000000)
ORDER BY key
)
SELECT t1_sorted.key, t1_sorted.data
Expand All @@ -218,16 +218,16 @@ const SMJ_QUERIES: &[&str] = &[
WHERE t2_sorted.key = t1_sorted.key
)
"#,
// Q11: LEFT SEMI 100K x 1M | 1:10 | 1%
// Q11: LEFT SEMI 1M x 10M | 1:10 | 1%
r#"
WITH t1_sorted AS (
SELECT value % 10000 as key, value as data
FROM range(100000)
SELECT value % 100000 as key, value as data
FROM range(1000000)
ORDER BY key, data
),
t2_sorted AS (
SELECT value % 10000 as key, value as data
FROM range(1000000)
SELECT value % 100000 as key, value as data
FROM range(10000000)
ORDER BY key, data
)
SELECT t1_sorted.key, t1_sorted.data
Expand All @@ -239,16 +239,16 @@ const SMJ_QUERIES: &[&str] = &[
AND t2_sorted.data % 100 = 0
)
"#,
// Q12: LEFT SEMI 100K x 1M | 1:10 | 50%
// Q12: LEFT SEMI 1M x 10M | 1:10 | 50%
r#"
WITH t1_sorted AS (
SELECT value % 10000 as key, value as data
FROM range(100000)
SELECT value % 100000 as key, value as data
FROM range(1000000)
ORDER BY key, data
),
t2_sorted AS (
SELECT value % 10000 as key, value as data
FROM range(1000000)
SELECT value % 100000 as key, value as data
FROM range(10000000)
ORDER BY key, data
)
SELECT t1_sorted.key, t1_sorted.data
Expand All @@ -260,16 +260,16 @@ const SMJ_QUERIES: &[&str] = &[
AND t2_sorted.data % 2 = 0
)
"#,
// Q13: LEFT SEMI 100K x 1M | 1:10 | 90%
// Q13: LEFT SEMI 1M x 10M | 1:10 | 90%
r#"
WITH t1_sorted AS (
SELECT value % 10000 as key, value as data
FROM range(100000)
SELECT value % 100000 as key, value as data
FROM range(1000000)
ORDER BY key, data
),
t2_sorted AS (
SELECT value % 10000 as key, value as data
FROM range(1000000)
SELECT value % 100000 as key, value as data
FROM range(10000000)
ORDER BY key, data
)
SELECT t1_sorted.key, t1_sorted.data
Expand All @@ -281,16 +281,16 @@ const SMJ_QUERIES: &[&str] = &[
AND t2_sorted.data % 10 <> 0
)
"#,
// Q14: LEFT ANTI 100K x 1M | 1:10
// Q14: LEFT ANTI 1M x 10M | 1:10
r#"
WITH t1_sorted AS (
SELECT value % 10500 as key, value as data
FROM range(100000)
SELECT value % 105000 as key, value as data
FROM range(1000000)
ORDER BY key, data
),
t2_sorted AS (
SELECT value % 10000 as key
FROM range(1000000)
SELECT value % 100000 as key
FROM range(10000000)
ORDER BY key
)
SELECT t1_sorted.key, t1_sorted.data
Expand All @@ -300,16 +300,16 @@ const SMJ_QUERIES: &[&str] = &[
WHERE t2_sorted.key = t1_sorted.key
)
"#,
// Q15: LEFT ANTI 100K x 1M | 1:10 | partial match
// Q15: LEFT ANTI 1M x 10M | 1:10 | partial match
r#"
WITH t1_sorted AS (
SELECT value % 12000 as key, value as data
FROM range(100000)
SELECT value % 120000 as key, value as data
FROM range(1000000)
ORDER BY key, data
),
t2_sorted AS (
SELECT value % 10000 as key
FROM range(1000000)
SELECT value % 100000 as key
FROM range(10000000)
ORDER BY key
)
SELECT t1_sorted.key, t1_sorted.data
Expand All @@ -319,16 +319,16 @@ const SMJ_QUERIES: &[&str] = &[
WHERE t2_sorted.key = t1_sorted.key
)
"#,
// Q16: LEFT ANTI 100K x 100K | 1:1 | stress
// Q16: LEFT ANTI 1M x 1M | 1:1 | stress
r#"
WITH t1_sorted AS (
SELECT value % 11000 as key, value as data
FROM range(100000)
SELECT value % 110000 as key, value as data
FROM range(1000000)
ORDER BY key, data
),
t2_sorted AS (
SELECT value % 10000 as key
FROM range(100000)
SELECT value % 100000 as key
FROM range(1000000)
ORDER BY key
)
SELECT t1_sorted.key, t1_sorted.data
Expand All @@ -338,32 +338,32 @@ const SMJ_QUERIES: &[&str] = &[
WHERE t2_sorted.key = t1_sorted.key
)
"#,
// Q17: INNER 100K x 5M | 1:50 | 5%
// Q17: INNER 1M x 50M | 1:50 | 5%
r#"
WITH t1_sorted AS (
SELECT value % 10000 as key, value as data
FROM range(100000)
SELECT value % 100000 as key, value as data
FROM range(1000000)
ORDER BY key, data
),
t2_sorted AS (
SELECT value % 10000 as key, value as data
FROM range(5000000)
SELECT value % 100000 as key, value as data
FROM range(50000000)
ORDER BY key, data
)
SELECT t1_sorted.key, t1_sorted.data as d1, t2_sorted.data as d2
FROM t1_sorted JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
WHERE t2_sorted.data <> t1_sorted.data AND t2_sorted.data % 20 = 0
"#,
// Q18: LEFT SEMI 100K x 5M | 1:50 | 2%
// Q18: LEFT SEMI 1M x 50M | 1:50 | 2%
r#"
WITH t1_sorted AS (
SELECT value % 10000 as key, value as data
FROM range(100000)
SELECT value % 100000 as key, value as data
FROM range(1000000)
ORDER BY key, data
),
t2_sorted AS (
SELECT value % 10000 as key, value as data
FROM range(5000000)
SELECT value % 100000 as key, value as data
FROM range(50000000)
ORDER BY key, data
)
SELECT t1_sorted.key, t1_sorted.data
Expand All @@ -375,16 +375,16 @@ const SMJ_QUERIES: &[&str] = &[
AND t2_sorted.data % 50 = 0
)
"#,
// Q19: LEFT ANTI 100K x 5M | 1:50 | partial match
// Q19: LEFT ANTI 1M x 50M | 1:50 | partial match
r#"
WITH t1_sorted AS (
SELECT value % 15000 as key, value as data
FROM range(100000)
SELECT value % 150000 as key, value as data
FROM range(1000000)
ORDER BY key, data
),
t2_sorted AS (
SELECT value % 10000 as key
FROM range(5000000)
SELECT value % 100000 as key
FROM range(50000000)
ORDER BY key
)
SELECT t1_sorted.key, t1_sorted.data
Expand Down
Loading