Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions rust/datafusion/benches/aggregate_query_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use criterion::Criterion;
use rand::seq::SliceRandom;
use rand::Rng;
use std::sync::{Arc, Mutex};
use tokio::runtime::Runtime;

extern crate arrow;
extern crate datafusion;
Expand All @@ -38,13 +39,12 @@ use datafusion::datasource::MemTable;
use datafusion::error::Result;
use datafusion::execution::context::ExecutionContext;

async fn query(ctx: Arc<Mutex<ExecutionContext>>, sql: &str) {
fn query(ctx: Arc<Mutex<ExecutionContext>>, sql: &str) {
let mut rt = Runtime::new().unwrap();

// execute the query
let df = ctx.lock().unwrap().sql(&sql).unwrap();
let results = df.collect().await.unwrap();

// display the relation
for _batch in results {}
rt.block_on(df.collect()).unwrap();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 this is the major change, right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, the block is pick up the task. We also benchmark creating a runtime itself, but that was a very small compared to the task itself, so I left it there.

}

fn create_data(size: usize, null_density: f64) -> Vec<Option<f64>> {
Expand Down Expand Up @@ -116,8 +116,8 @@ fn create_context(
}

fn criterion_benchmark(c: &mut Criterion) {
let partitions_len = 4;
let array_len = 32768; // 2^15
let partitions_len = 8;
let array_len = 32768 * 2; // 2^16
let batch_size = 2048; // 2^11
let ctx = create_context(partitions_len, array_len, batch_size).unwrap();

Expand Down
36 changes: 22 additions & 14 deletions rust/datafusion/benches/math_query_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ use criterion::Criterion;

use std::sync::{Arc, Mutex};

use tokio::runtime::Runtime;

extern crate arrow;
extern crate datafusion;

Expand All @@ -34,13 +36,12 @@ use datafusion::error::Result;
use datafusion::datasource::MemTable;
use datafusion::execution::context::ExecutionContext;

async fn query(ctx: Arc<Mutex<ExecutionContext>>, sql: &str) {
fn query(ctx: Arc<Mutex<ExecutionContext>>, sql: &str) {
let mut rt = Runtime::new().unwrap();

// execute the query
let df = ctx.lock().unwrap().sql(&sql).unwrap();
let results = df.collect().await.unwrap();

// display the relation
for _batch in results {}
rt.block_on(df.collect()).unwrap();
}

fn create_context(
Expand Down Expand Up @@ -77,24 +78,31 @@ fn create_context(
}

fn criterion_benchmark(c: &mut Criterion) {
let array_len = 1048576; // 2^20
let batch_size = 512; // 2^9
let ctx = create_context(array_len, batch_size).unwrap();
c.bench_function("sqrt_20_9", |b| {
b.iter(|| query(ctx.clone(), "SELECT sqrt(f32) FROM t"))
});

let array_len = 1048576; // 2^20
let batch_size = 4096; // 2^12
let ctx = create_context(array_len, batch_size).unwrap();
c.bench_function("sqrt_20_12", |b| {
let array_len = 1048576; // 2^20
let batch_size = 4096; // 2^12
let ctx = create_context(array_len, batch_size).unwrap();
b.iter(|| query(ctx.clone(), "SELECT sqrt(f32) FROM t"))
});

let array_len = 4194304; // 2^22
let batch_size = 4096; // 2^12
let ctx = create_context(array_len, batch_size).unwrap();
c.bench_function("sqrt_22_12", |b| {
let array_len = 4194304; // 2^22
let batch_size = 4096; // 2^12
let ctx = create_context(array_len, batch_size).unwrap();
b.iter(|| query(ctx.clone(), "SELECT sqrt(f32) FROM t"))
});

let array_len = 4194304; // 2^22
let batch_size = 16384; // 2^14
let ctx = create_context(array_len, batch_size).unwrap();
c.bench_function("sqrt_22_14", |b| {
let array_len = 4194304; // 2^22
let batch_size = 16384; // 2^14
let ctx = create_context(array_len, batch_size).unwrap();
b.iter(|| query(ctx.clone(), "SELECT sqrt(f32) FROM t"))
});
}
Expand Down
17 changes: 8 additions & 9 deletions rust/datafusion/benches/sort_limit_query_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,12 @@ use datafusion::execution::context::ExecutionContext;

use tokio::runtime::Runtime;

async fn run_query(ctx: Arc<Mutex<ExecutionContext>>, sql: &str) {
fn query(ctx: Arc<Mutex<ExecutionContext>>, sql: &str) {
let mut rt = Runtime::new().unwrap();

// execute the query
let df = ctx.lock().unwrap().sql(&sql).unwrap();
let results = df.collect().await.unwrap();

// display the relation
for _batch in results {}
rt.block_on(df.collect()).unwrap();
}

fn create_context() -> Arc<Mutex<ExecutionContext>> {
Expand Down Expand Up @@ -90,7 +89,7 @@ fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("sort_and_limit_by_int", |b| {
let ctx = create_context();
b.iter(|| {
run_query(
query(
ctx.clone(),
"SELECT c1, c13, c6, c10 \
FROM aggregate_test_100 \
Expand All @@ -103,7 +102,7 @@ fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("sort_and_limit_by_float", |b| {
let ctx = create_context();
b.iter(|| {
run_query(
query(
ctx.clone(),
"SELECT c1, c13, c12 \
FROM aggregate_test_100 \
Expand All @@ -116,7 +115,7 @@ fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("sort_and_limit_lex_by_int", |b| {
let ctx = create_context();
b.iter(|| {
run_query(
query(
ctx.clone(),
"SELECT c1, c13, c6, c10 \
FROM aggregate_test_100 \
Expand All @@ -129,7 +128,7 @@ fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("sort_and_limit_lex_by_string", |b| {
let ctx = create_context();
b.iter(|| {
run_query(
query(
ctx.clone(),
"SELECT c1, c13, c6, c10 \
FROM aggregate_test_100 \
Expand Down