-
Notifications
You must be signed in to change notification settings - Fork 4k
ARROW-11426: [Rust][DataFusion] EXTRACT support #9359
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ae0d3a3
b91632d
304008b
3eb552b
8284f1c
dbbb647
a471f57
5de73f6
7ce35ab
1e96898
bda3cab
0cad90f
0254e45
ac62940
dc67999
ae2489f
b4efb64
4c0dac6
6311917
9b97b47
a52e074
31f8d28
af8792f
e538726
ca771de
a23e4c9
aa149e7
85bb7b0
40e184b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -71,6 +71,8 @@ pub enum Signature { | |
| Exact(Vec<DataType>), | ||
| /// fixed number of arguments of arbitrary types | ||
| Any(usize), | ||
| /// One of a list of signatures | ||
| OneOf(Vec<Signature>), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FYI @seddonm1 I am not sure how this affects your string functions / other postgres function plans
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I missed this but all good. This is actually better :D |
||
| } | ||
|
|
||
| /// Scalar function | ||
|
|
@@ -138,6 +140,8 @@ pub enum BuiltinScalarFunction { | |
| NullIf, | ||
| /// Date truncate | ||
| DateTrunc, | ||
| /// Date part | ||
| DatePart, | ||
| /// MD5 | ||
| MD5, | ||
| /// SHA224 | ||
|
|
@@ -192,6 +196,7 @@ impl FromStr for BuiltinScalarFunction { | |
| "upper" => BuiltinScalarFunction::Upper, | ||
| "to_timestamp" => BuiltinScalarFunction::ToTimestamp, | ||
| "date_trunc" => BuiltinScalarFunction::DateTrunc, | ||
| "date_part" => BuiltinScalarFunction::DatePart, | ||
| "array" => BuiltinScalarFunction::Array, | ||
| "nullif" => BuiltinScalarFunction::NullIf, | ||
| "md5" => BuiltinScalarFunction::MD5, | ||
|
|
@@ -294,6 +299,7 @@ pub fn return_type( | |
| BuiltinScalarFunction::DateTrunc => { | ||
| Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)) | ||
| } | ||
| BuiltinScalarFunction::DatePart => Ok(DataType::Int32), | ||
| BuiltinScalarFunction::Array => Ok(DataType::FixedSizeList( | ||
| Box::new(Field::new("item", arg_types[0].clone(), true)), | ||
| arg_types.len() as i32, | ||
|
|
@@ -463,6 +469,7 @@ pub fn create_physical_expr( | |
| _ => unreachable!(), | ||
| }, | ||
| }, | ||
| BuiltinScalarFunction::DatePart => datetime_expressions::date_part, | ||
| }); | ||
| // coerce | ||
| let args = coerce(args, input_schema, &signature(fun))?; | ||
|
|
@@ -507,6 +514,26 @@ fn signature(fun: &BuiltinScalarFunction) -> Signature { | |
| DataType::Utf8, | ||
| DataType::Timestamp(TimeUnit::Nanosecond, None), | ||
| ]), | ||
| BuiltinScalarFunction::DatePart => Signature::OneOf(vec![ | ||
| Signature::Exact(vec![DataType::Utf8, DataType::Date32]), | ||
| Signature::Exact(vec![DataType::Utf8, DataType::Date64]), | ||
| Signature::Exact(vec![ | ||
| DataType::Utf8, | ||
| DataType::Timestamp(TimeUnit::Second, None), | ||
| ]), | ||
| Signature::Exact(vec![ | ||
| DataType::Utf8, | ||
| DataType::Timestamp(TimeUnit::Microsecond, None), | ||
| ]), | ||
| Signature::Exact(vec![ | ||
| DataType::Utf8, | ||
| DataType::Timestamp(TimeUnit::Millisecond, None), | ||
| ]), | ||
| Signature::Exact(vec![ | ||
| DataType::Utf8, | ||
| DataType::Timestamp(TimeUnit::Nanosecond, None), | ||
| ]), | ||
| ]), | ||
| BuiltinScalarFunction::Array => { | ||
| Signature::Variadic(array_expressions::SUPPORTED_ARRAY_TYPES.to_vec()) | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1717,7 +1717,7 @@ fn make_timestamp_nano_table() -> Result<Arc<MemTable>> { | |
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn to_timstamp() -> Result<()> { | ||
| async fn to_timestamp() -> Result<()> { | ||
| let mut ctx = ExecutionContext::new(); | ||
| ctx.register_table("ts_data", make_timestamp_nano_table()?); | ||
|
|
||
|
|
@@ -2134,6 +2134,24 @@ async fn crypto_expressions() -> Result<()> { | |
| Ok(()) | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn extract_date_part() -> Result<()> { | ||
| let mut ctx = ExecutionContext::new(); | ||
| let sql = "SELECT | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
| date_part('hour', CAST('2020-01-01' AS DATE)) AS hr1, | ||
| EXTRACT(HOUR FROM CAST('2020-01-01' AS DATE)) AS hr2, | ||
| EXTRACT(HOUR FROM to_timestamp('2020-09-08T12:00:00+00:00')) AS hr3, | ||
| date_part('YEAR', CAST('2000-01-01' AS DATE)) AS year1, | ||
| EXTRACT(year FROM to_timestamp('2020-09-08T12:00:00+00:00')) AS year2 | ||
| "; | ||
|
|
||
| let actual = execute(&mut ctx, sql).await; | ||
|
|
||
| let expected = vec![vec!["0", "0", "12", "2000", "2020"]]; | ||
| assert_eq!(expected, actual); | ||
| Ok(()) | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn in_list_array() -> Result<()> { | ||
| let mut ctx = ExecutionContext::new(); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I assume the longer term plan will be to handle the
Scalarcase more efficiently. This (converting to an array) is fine for now I thinkThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, indeed. For now we can use this approach to avoid reimplementing hours/years etc, with a bit of overhead.
Maybe longer term would be nice to have something like
Datumin Arrow in order to both gain some performance and avoid reimplementing things for the scalar case.