From 9d358b6c51796127b579ec79dc9b0233c9cb92a6 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Tue, 29 Mar 2022 21:38:17 +0200 Subject: [PATCH 1/4] Add code to create expression based on schema information --- datafusion/jit/Cargo.toml | 1 + datafusion/jit/src/ast.rs | 32 +++++++++++++++++++++++++++----- datafusion/jit/src/lib.rs | 31 ++++++++++++++++++++++++++++--- 3 files changed, 56 insertions(+), 8 deletions(-) diff --git a/datafusion/jit/Cargo.toml b/datafusion/jit/Cargo.toml index 54c010812eb5b..b8759b52025cb 100644 --- a/datafusion/jit/Cargo.toml +++ b/datafusion/jit/Cargo.toml @@ -42,5 +42,6 @@ cranelift-module = "0.82.0" cranelift-native = "0.82.0" datafusion-common = { path = "../common", version = "7.0.0", features = ["jit"] } datafusion-expr = { path = "../expr", version = "7.0.0" } +arrow = {version = "11"} parking_lot = "0.12" diff --git a/datafusion/jit/src/ast.rs b/datafusion/jit/src/ast.rs index 8b9139a32e402..2392da6482d21 100644 --- a/datafusion/jit/src/ast.rs +++ b/datafusion/jit/src/ast.rs @@ -16,7 +16,7 @@ // under the License. use cranelift::codegen::ir; -use datafusion_common::{DataFusionError, ScalarValue}; +use datafusion_common::{DFSchemaRef, DataFusionError, ScalarValue}; use std::fmt::{Display, Formatter}; #[derive(Clone, Debug)] @@ -138,11 +138,13 @@ pub enum Literal { Typed(TypedLit), } -impl TryFrom for Expr { +impl TryFrom<(datafusion_expr::Expr, DFSchemaRef)> for Expr { type Error = DataFusionError; // Try to JIT compile the Expr for faster evaluation - fn try_from(value: datafusion_expr::Expr) -> Result { + fn try_from( + (value, schema): (datafusion_expr::Expr, DFSchemaRef), + ) -> Result { match &value { datafusion_expr::Expr::BinaryExpr { left, op, right } => { let op = match op { @@ -164,10 +166,30 @@ impl TryFrom for Expr { } }; Ok(Expr::Binary(op( - Box::new((*left.clone()).try_into()?), - Box::new((*right.clone()).try_into()?), + Box::new(((*left.clone(), schema.clone())).try_into()?), + Box::new(((*right.clone(), schema)).try_into()?), ))) } + datafusion_expr::Expr::Column(col) => { + let field = schema.field_from_column(col)?; + let ty = field.data_type(); + + let jit_type = match ty { + arrow::datatypes::DataType::Int64 => I64, + arrow::datatypes::DataType::Float32 => F32, + arrow::datatypes::DataType::Float64 => F64, + arrow::datatypes::DataType::Boolean => BOOL, + + _ => { + return Err(DataFusionError::NotImplemented(format!( + "Compiling Expression with type {} not yet supported in JIT mode", + ty + ))) + } + }; + + Ok(Expr::Identifier(field.qualified_name(), jit_type)) + } datafusion_expr::Expr::Literal(s) => { let lit = match s { ScalarValue::Boolean(Some(b)) => TypedLit::Bool(*b), diff --git a/datafusion/jit/src/lib.rs b/datafusion/jit/src/lib.rs index c1db48b45bae4..dff27da317e49 100644 --- a/datafusion/jit/src/lib.rs +++ b/datafusion/jit/src/lib.rs @@ -23,11 +23,15 @@ pub mod jit; #[cfg(test)] mod tests { + use std::collections::HashMap; + use std::sync::Arc; + use crate::api::{Assembler, GeneratedFunction}; use crate::ast::{BinaryExpr, Expr, Literal, TypedLit, I64}; use crate::jit::JIT; - use datafusion_common::Result; - use datafusion_expr::lit; + use arrow::datatypes::DataType; + use datafusion_common::{DFField, DFSchema, Result}; + use datafusion_expr::{col, lit}; #[test] fn iterative_fib() -> Result<()> { @@ -89,7 +93,8 @@ mod tests { #[test] fn from_datafusion_expression() -> Result<()> { let df_expr = lit(1.0f32) + lit(2.0f32); - let jit_expr: crate::ast::Expr = df_expr.try_into()?; + let schema = Arc::new(DFSchema::empty()); + let jit_expr: crate::ast::Expr = (df_expr, schema).try_into()?; assert_eq!( jit_expr, @@ -102,6 +107,26 @@ mod tests { Ok(()) } + #[test] + fn from_datafusion_expression_schema() -> Result<()> { + let df_expr = col("a") + lit(1i64); + let schema = Arc::new(DFSchema::new_with_metadata( + vec![DFField::new(Some("table1"), "a", DataType::Int64, false)], + HashMap::new(), + )?); + let jit_expr: crate::ast::Expr = (df_expr, schema).try_into()?; + + assert_eq!( + jit_expr, + Expr::Binary(BinaryExpr::Add( + Box::new(Expr::Identifier("table1.a".to_string(), I64)), + Box::new(Expr::Literal(Literal::Typed(TypedLit::Int(1)))) + )), + ); + + Ok(()) + } + unsafe fn run_code( jit: &mut JIT, code: GeneratedFunction, From 8d4f21285b5e3ba0480929f6c65a3d36fb62fd45 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Tue, 29 Mar 2022 21:42:22 +0200 Subject: [PATCH 2/4] Add spaces --- datafusion/jit/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/jit/Cargo.toml b/datafusion/jit/Cargo.toml index b8759b52025cb..265b30ae8760e 100644 --- a/datafusion/jit/Cargo.toml +++ b/datafusion/jit/Cargo.toml @@ -42,6 +42,6 @@ cranelift-module = "0.82.0" cranelift-native = "0.82.0" datafusion-common = { path = "../common", version = "7.0.0", features = ["jit"] } datafusion-expr = { path = "../expr", version = "7.0.0" } -arrow = {version = "11"} +arrow = { version = "11" } parking_lot = "0.12" From f55d4edc75587016a6dfb0f26d16922518be2078 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Tue, 29 Mar 2022 22:06:38 +0200 Subject: [PATCH 3/4] Move to top --- datafusion/jit/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/jit/Cargo.toml b/datafusion/jit/Cargo.toml index 265b30ae8760e..e539e2b1f30bd 100644 --- a/datafusion/jit/Cargo.toml +++ b/datafusion/jit/Cargo.toml @@ -36,12 +36,12 @@ path = "src/lib.rs" jit = [] [dependencies] +arrow = { version = "11" } cranelift = "0.82.0" cranelift-jit = "0.82.0" cranelift-module = "0.82.0" cranelift-native = "0.82.0" datafusion-common = { path = "../common", version = "7.0.0", features = ["jit"] } datafusion-expr = { path = "../expr", version = "7.0.0" } -arrow = { version = "11" } parking_lot = "0.12" From 2af1436e18a67bc69d163b490e83d41cf5d8fc1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Tue, 29 Mar 2022 23:17:24 +0200 Subject: [PATCH 4/4] Clippy --- datafusion/jit/src/ast.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/jit/src/ast.rs b/datafusion/jit/src/ast.rs index 2392da6482d21..fd10a909e7833 100644 --- a/datafusion/jit/src/ast.rs +++ b/datafusion/jit/src/ast.rs @@ -166,8 +166,8 @@ impl TryFrom<(datafusion_expr::Expr, DFSchemaRef)> for Expr { } }; Ok(Expr::Binary(op( - Box::new(((*left.clone(), schema.clone())).try_into()?), - Box::new(((*right.clone(), schema)).try_into()?), + Box::new((*left.clone(), schema.clone()).try_into()?), + Box::new((*right.clone(), schema).try_into()?), ))) } datafusion_expr::Expr::Column(col) => {