-
Notifications
You must be signed in to change notification settings - Fork 1.9k
feat: add arrow_cast function to support supports arbitrary arrow types
#5166
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
52e5c58
a58f00a
78965db
3df081f
d3b17e8
7fa282b
739d1c0
a4f2753
b3defe4
84901bf
1e6b648
ff5d72b
6c5c55e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -52,31 +52,242 @@ SELECT arrow_typeof(1.0::float) | |
| Float32 | ||
|
|
||
| # arrow_typeof_decimal | ||
| # query T | ||
| # SELECT arrow_typeof(1::Decimal) | ||
| # ---- | ||
| # Decimal128(38, 10) | ||
|
|
||
| # # arrow_typeof_timestamp | ||
| # query T | ||
| # SELECT arrow_typeof(now()::timestamp) | ||
| # ---- | ||
| # Timestamp(Nanosecond, None) | ||
|
|
||
| # # arrow_typeof_timestamp_utc | ||
| # query T | ||
| # SELECT arrow_typeof(now()) | ||
| # ---- | ||
| # Timestamp(Nanosecond, Some(\"+00:00\")) | ||
|
|
||
| # # arrow_typeof_timestamp_date32( | ||
| # query T | ||
| # SELECT arrow_typeof(now()::date) | ||
| # ---- | ||
| # Date32 | ||
|
|
||
| # # arrow_typeof_utf8 | ||
| # query T | ||
| # SELECT arrow_typeof('1') | ||
| # ---- | ||
| # Utf8 | ||
| query T | ||
| SELECT arrow_typeof(1::Decimal) | ||
| ---- | ||
| Decimal128(38, 10) | ||
|
|
||
| # arrow_typeof_timestamp | ||
| query T | ||
| SELECT arrow_typeof(now()::timestamp) | ||
| ---- | ||
| Timestamp(Nanosecond, None) | ||
|
|
||
| # arrow_typeof_timestamp_utc | ||
| query T | ||
| SELECT arrow_typeof(now()) | ||
| ---- | ||
| Timestamp(Nanosecond, Some("+00:00")) | ||
|
|
||
| # arrow_typeof_timestamp_date32( | ||
| query T | ||
| SELECT arrow_typeof(now()::date) | ||
| ---- | ||
| Date32 | ||
|
|
||
| # arrow_typeof_utf8 | ||
| query T | ||
| SELECT arrow_typeof('1') | ||
| ---- | ||
| Utf8 | ||
|
|
||
|
|
||
| #### arrow_cast (in some ways opposite of arrow_typeof) | ||
|
|
||
| # Basic tests | ||
|
|
||
| query I | ||
| SELECT arrow_cast('1', 'Int16') | ||
| ---- | ||
| 1 | ||
|
|
||
| # Basic error test | ||
| query error Error during planning: arrow_cast needs 2 arguments, 1 provided | ||
| SELECT arrow_cast('1') | ||
|
|
||
| query error Error during planning: arrow_cast requires its second argument to be a constant string, got Int64\(43\) | ||
| SELECT arrow_cast('1', 43) | ||
|
|
||
| query error Error unrecognized word: unknown | ||
| SELECT arrow_cast('1', 'unknown') | ||
|
|
||
| # Round Trip tests: | ||
| query TTTTTTTTTTTTTTTTTTT | ||
| SELECT | ||
| arrow_typeof(arrow_cast(1, 'Int8')) as col_i8, | ||
| arrow_typeof(arrow_cast(1, 'Int16')) as col_i16, | ||
| arrow_typeof(arrow_cast(1, 'Int32')) as col_i32, | ||
| arrow_typeof(arrow_cast(1, 'Int64')) as col_i64, | ||
| arrow_typeof(arrow_cast(1, 'UInt8')) as col_u8, | ||
| arrow_typeof(arrow_cast(1, 'UInt16')) as col_u16, | ||
| arrow_typeof(arrow_cast(1, 'UInt32')) as col_u32, | ||
| arrow_typeof(arrow_cast(1, 'UInt64')) as col_u64, | ||
| -- can't seem to cast to Float16 for some reason | ||
| -- arrow_typeof(arrow_cast(1, 'Float16')) as col_f16, | ||
| arrow_typeof(arrow_cast(1, 'Float32')) as col_f32, | ||
| arrow_typeof(arrow_cast(1, 'Float64')) as col_f64, | ||
| arrow_typeof(arrow_cast('foo', 'Utf8')) as col_utf8, | ||
| arrow_typeof(arrow_cast('foo', 'LargeUtf8')) as col_large_utf8, | ||
| arrow_typeof(arrow_cast('foo', 'Binary')) as col_binary, | ||
| arrow_typeof(arrow_cast('foo', 'LargeBinary')) as col_large_binary, | ||
| arrow_typeof(arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Second, None)')) as col_ts_s, | ||
| arrow_typeof(arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Millisecond, None)')) as col_ts_ms, | ||
| arrow_typeof(arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Microsecond, None)')) as col_ts_us, | ||
| arrow_typeof(arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Nanosecond, None)')) as col_ts_ns, | ||
| arrow_typeof(arrow_cast('foo', 'Dictionary(Int32, Utf8)')) as col_dict | ||
| ---- | ||
| Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64 Float32 Float64 Utf8 LargeUtf8 Binary LargeBinary Timestamp(Second, None) Timestamp(Millisecond, None) Timestamp(Microsecond, None) Timestamp(Nanosecond, None) Dictionary(Int32, Utf8) | ||
|
|
||
|
|
||
|
|
||
|
|
||
| ## Basic Types: Create a table | ||
|
|
||
| statement ok | ||
| create table foo as select | ||
| arrow_cast(1, 'Int8') as col_i8, | ||
| arrow_cast(1, 'Int16') as col_i16, | ||
| arrow_cast(1, 'Int32') as col_i32, | ||
| arrow_cast(1, 'Int64') as col_i64, | ||
| arrow_cast(1, 'UInt8') as col_u8, | ||
| arrow_cast(1, 'UInt16') as col_u16, | ||
| arrow_cast(1, 'UInt32') as col_u32, | ||
| arrow_cast(1, 'UInt64') as col_u64, | ||
| -- can't seem to cast to Float16 for some reason | ||
| -- arrow_cast(1.0, 'Float16') as col_f16, | ||
| arrow_cast(1.0, 'Float32') as col_f32, | ||
| arrow_cast(1.0, 'Float64') as col_f64 | ||
| ; | ||
|
|
||
| ## Ensure each column in the table has the expected type | ||
|
|
||
| query TTTTTTTTTT | ||
| SELECT | ||
| arrow_typeof(col_i8), | ||
| arrow_typeof(col_i16), | ||
| arrow_typeof(col_i32), | ||
| arrow_typeof(col_i64), | ||
| arrow_typeof(col_u8), | ||
| arrow_typeof(col_u16), | ||
| arrow_typeof(col_u32), | ||
| arrow_typeof(col_u64), | ||
| -- arrow_typeof(col_f16), | ||
| arrow_typeof(col_f32), | ||
| arrow_typeof(col_f64) | ||
| FROM foo; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: you can remove
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When I did that I got the error: The point of this test was that the values inserted into the |
||
| ---- | ||
| Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64 Float32 Float64 | ||
|
|
||
|
|
||
| statement ok | ||
| drop table foo | ||
|
|
||
| ## Decimals: Create a table | ||
|
|
||
| statement ok | ||
| create table foo as select | ||
| arrow_cast(100, 'Decimal128(3,2)') as col_d128 | ||
| -- Can't make a decimal 156: | ||
| -- This feature is not implemented: Can't create a scalar from array of type "Decimal256(3, 2)" | ||
| --arrow_cast(100, 'Decimal256(3,2)') as col_d256 | ||
| ; | ||
|
|
||
|
|
||
| ## Ensure each column in the table has the expected type | ||
|
|
||
| query T | ||
| SELECT | ||
| arrow_typeof(col_d128) | ||
| -- arrow_typeof(col_d256), | ||
| FROM foo; | ||
| ---- | ||
| Decimal128(3, 2) | ||
|
|
||
|
|
||
| statement ok | ||
| drop table foo | ||
|
|
||
| ## Strings, Binary: Create a table | ||
|
|
||
| statement ok | ||
| create table foo as select | ||
| arrow_cast('foo', 'Utf8') as col_utf8, | ||
| arrow_cast('foo', 'LargeUtf8') as col_large_utf8, | ||
| arrow_cast('foo', 'Binary') as col_binary, | ||
| arrow_cast('foo', 'LargeBinary') as col_large_binary | ||
| ; | ||
|
|
||
| ## Ensure each column in the table has the expected type | ||
|
|
||
| query TTTT | ||
| SELECT | ||
| arrow_typeof(col_utf8), | ||
| arrow_typeof(col_large_utf8), | ||
| arrow_typeof(col_binary), | ||
| arrow_typeof(col_large_binary) | ||
| FROM foo; | ||
| ---- | ||
| Utf8 LargeUtf8 Binary LargeBinary | ||
|
|
||
|
|
||
| statement ok | ||
| drop table foo | ||
|
|
||
|
|
||
| ## Timestamps: Create a table | ||
|
|
||
| statement ok | ||
| create table foo as select | ||
| arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Second, None)') as col_ts_s, | ||
| arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Millisecond, None)') as col_ts_ms, | ||
| arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Microsecond, None)') as col_ts_us, | ||
| arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Nanosecond, None)') as col_ts_ns | ||
| ; | ||
|
|
||
| ## Ensure each column in the table has the expected type | ||
|
|
||
| query TTTT | ||
| SELECT | ||
| arrow_typeof(col_ts_s), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would be also great to have tests
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added in a4f2753 |
||
| arrow_typeof(col_ts_ms), | ||
| arrow_typeof(col_ts_us), | ||
| arrow_typeof(col_ts_ns) | ||
| FROM foo; | ||
| ---- | ||
| Timestamp(Second, None) Timestamp(Millisecond, None) Timestamp(Microsecond, None) Timestamp(Nanosecond, None) | ||
|
|
||
|
|
||
| statement ok | ||
| drop table foo | ||
|
|
||
| ## Dictionaries | ||
|
|
||
| statement ok | ||
| create table foo as select | ||
| arrow_cast('foo', 'Dictionary(Int32, Utf8)') as col_dict_int32_utf8, | ||
| arrow_cast('foo', 'Dictionary(Int8, LargeUtf8)') as col_dict_int8_largeutf8 | ||
| ; | ||
|
|
||
| ## Ensure each column in the table has the expected type | ||
|
|
||
| query TT | ||
| SELECT | ||
| arrow_typeof(col_dict_int32_utf8), | ||
| arrow_typeof(col_dict_int8_largeutf8) | ||
| FROM foo; | ||
| ---- | ||
| Dictionary(Int32, Utf8) Dictionary(Int8, LargeUtf8) | ||
|
|
||
|
|
||
| statement ok | ||
| drop table foo | ||
|
|
||
|
|
||
| ## Intervals: | ||
|
|
||
| query error Cannot automatically convert Interval\(DayTime\) to Interval\(MonthDayNano\) | ||
| --- | ||
| select arrow_cast(interval '30 minutes', 'Interval(MonthDayNano)'); | ||
|
|
||
| query error DataFusion error: Error during planning: Cannot automatically convert Utf8 to Interval\(MonthDayNano\) | ||
| select arrow_cast('30 minutes', 'Interval(MonthDayNano)'); | ||
|
|
||
|
|
||
| ## Duration | ||
|
|
||
| query error Cannot automatically convert Interval\(DayTime\) to Duration\(Second\) | ||
| --- | ||
| select arrow_cast(interval '30 minutes', 'Duration(Second)'); | ||
|
|
||
| query error DataFusion error: Error during planning: Cannot automatically convert Utf8 to Duration\(Second\) | ||
| select arrow_cast('30 minutes', 'Duration(Second)'); | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just uncommented out these tests -- I am not sure why they were commented out 🤷