From 46f713345c36bc71c3a53baedca412e9c52807a6 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Sun, 11 Oct 2020 19:07:04 +0200 Subject: [PATCH] Fixed benchmarks --- .../datafusion/benches/aggregate_query_sql.rs | 14 ++++---- rust/datafusion/benches/math_query_sql.rs | 36 +++++++++++-------- .../benches/sort_limit_query_sql.rs | 17 +++++---- 3 files changed, 37 insertions(+), 30 deletions(-) diff --git a/rust/datafusion/benches/aggregate_query_sql.rs b/rust/datafusion/benches/aggregate_query_sql.rs index 547bf9e5d3c..bbb692d329e 100644 --- a/rust/datafusion/benches/aggregate_query_sql.rs +++ b/rust/datafusion/benches/aggregate_query_sql.rs @@ -22,6 +22,7 @@ use criterion::Criterion; use rand::seq::SliceRandom; use rand::Rng; use std::sync::{Arc, Mutex}; +use tokio::runtime::Runtime; extern crate arrow; extern crate datafusion; @@ -38,13 +39,12 @@ use datafusion::datasource::MemTable; use datafusion::error::Result; use datafusion::execution::context::ExecutionContext; -async fn query(ctx: Arc>, sql: &str) { +fn query(ctx: Arc>, sql: &str) { + let mut rt = Runtime::new().unwrap(); + // execute the query let df = ctx.lock().unwrap().sql(&sql).unwrap(); - let results = df.collect().await.unwrap(); - - // display the relation - for _batch in results {} + rt.block_on(df.collect()).unwrap(); } fn create_data(size: usize, null_density: f64) -> Vec> { @@ -116,8 +116,8 @@ fn create_context( } fn criterion_benchmark(c: &mut Criterion) { - let partitions_len = 4; - let array_len = 32768; // 2^15 + let partitions_len = 8; + let array_len = 32768 * 2; // 2^16 let batch_size = 2048; // 2^11 let ctx = create_context(partitions_len, array_len, batch_size).unwrap(); diff --git a/rust/datafusion/benches/math_query_sql.rs b/rust/datafusion/benches/math_query_sql.rs index b7e08106ff6..65f613b6cdd 100644 --- a/rust/datafusion/benches/math_query_sql.rs +++ b/rust/datafusion/benches/math_query_sql.rs @@ -21,6 +21,8 @@ use criterion::Criterion; use std::sync::{Arc, Mutex}; +use tokio::runtime::Runtime; + extern crate arrow; extern crate datafusion; @@ -34,13 +36,12 @@ use datafusion::error::Result; use datafusion::datasource::MemTable; use datafusion::execution::context::ExecutionContext; -async fn query(ctx: Arc>, sql: &str) { +fn query(ctx: Arc>, sql: &str) { + let mut rt = Runtime::new().unwrap(); + // execute the query let df = ctx.lock().unwrap().sql(&sql).unwrap(); - let results = df.collect().await.unwrap(); - - // display the relation - for _batch in results {} + rt.block_on(df.collect()).unwrap(); } fn create_context( @@ -77,24 +78,31 @@ fn create_context( } fn criterion_benchmark(c: &mut Criterion) { + let array_len = 1048576; // 2^20 + let batch_size = 512; // 2^9 + let ctx = create_context(array_len, batch_size).unwrap(); + c.bench_function("sqrt_20_9", |b| { + b.iter(|| query(ctx.clone(), "SELECT sqrt(f32) FROM t")) + }); + + let array_len = 1048576; // 2^20 + let batch_size = 4096; // 2^12 + let ctx = create_context(array_len, batch_size).unwrap(); c.bench_function("sqrt_20_12", |b| { - let array_len = 1048576; // 2^20 - let batch_size = 4096; // 2^12 - let ctx = create_context(array_len, batch_size).unwrap(); b.iter(|| query(ctx.clone(), "SELECT sqrt(f32) FROM t")) }); + let array_len = 4194304; // 2^22 + let batch_size = 4096; // 2^12 + let ctx = create_context(array_len, batch_size).unwrap(); c.bench_function("sqrt_22_12", |b| { - let array_len = 4194304; // 2^22 - let batch_size = 4096; // 2^12 - let ctx = create_context(array_len, batch_size).unwrap(); b.iter(|| query(ctx.clone(), "SELECT sqrt(f32) FROM t")) }); + let array_len = 4194304; // 2^22 + let batch_size = 16384; // 2^14 + let ctx = create_context(array_len, batch_size).unwrap(); c.bench_function("sqrt_22_14", |b| { - let array_len = 4194304; // 2^22 - let batch_size = 16384; // 2^14 - let ctx = create_context(array_len, batch_size).unwrap(); b.iter(|| query(ctx.clone(), "SELECT sqrt(f32) FROM t")) }); } diff --git a/rust/datafusion/benches/sort_limit_query_sql.rs b/rust/datafusion/benches/sort_limit_query_sql.rs index 1b2f1621c67..02440046b99 100644 --- a/rust/datafusion/benches/sort_limit_query_sql.rs +++ b/rust/datafusion/benches/sort_limit_query_sql.rs @@ -32,13 +32,12 @@ use datafusion::execution::context::ExecutionContext; use tokio::runtime::Runtime; -async fn run_query(ctx: Arc>, sql: &str) { +fn query(ctx: Arc>, sql: &str) { + let mut rt = Runtime::new().unwrap(); + // execute the query let df = ctx.lock().unwrap().sql(&sql).unwrap(); - let results = df.collect().await.unwrap(); - - // display the relation - for _batch in results {} + rt.block_on(df.collect()).unwrap(); } fn create_context() -> Arc> { @@ -90,7 +89,7 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("sort_and_limit_by_int", |b| { let ctx = create_context(); b.iter(|| { - run_query( + query( ctx.clone(), "SELECT c1, c13, c6, c10 \ FROM aggregate_test_100 \ @@ -103,7 +102,7 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("sort_and_limit_by_float", |b| { let ctx = create_context(); b.iter(|| { - run_query( + query( ctx.clone(), "SELECT c1, c13, c12 \ FROM aggregate_test_100 \ @@ -116,7 +115,7 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("sort_and_limit_lex_by_int", |b| { let ctx = create_context(); b.iter(|| { - run_query( + query( ctx.clone(), "SELECT c1, c13, c6, c10 \ FROM aggregate_test_100 \ @@ -129,7 +128,7 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("sort_and_limit_lex_by_string", |b| { let ctx = create_context(); b.iter(|| { - run_query( + query( ctx.clone(), "SELECT c1, c13, c6, c10 \ FROM aggregate_test_100 \