Skip to content
267 changes: 239 additions & 28 deletions datafusion/core/tests/sqllogictests/test_files/arrow_typeof.slt
Original file line number Diff line number Diff line change
Expand Up @@ -52,31 +52,242 @@ SELECT arrow_typeof(1.0::float)
Float32

# arrow_typeof_decimal
# query T
# SELECT arrow_typeof(1::Decimal)
# ----
# Decimal128(38, 10)

# # arrow_typeof_timestamp
# query T
# SELECT arrow_typeof(now()::timestamp)
# ----
# Timestamp(Nanosecond, None)

# # arrow_typeof_timestamp_utc
# query T
# SELECT arrow_typeof(now())
# ----
# Timestamp(Nanosecond, Some(\"+00:00\"))

# # arrow_typeof_timestamp_date32(
# query T
# SELECT arrow_typeof(now()::date)
# ----
# Date32

# # arrow_typeof_utf8
# query T
# SELECT arrow_typeof('1')
# ----
# Utf8
query T
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just uncommented out these tests -- I am not sure why they were commented out 🤷

SELECT arrow_typeof(1::Decimal)
----
Decimal128(38, 10)

# arrow_typeof_timestamp
query T
SELECT arrow_typeof(now()::timestamp)
----
Timestamp(Nanosecond, None)

# arrow_typeof_timestamp_utc
query T
SELECT arrow_typeof(now())
----
Timestamp(Nanosecond, Some("+00:00"))

# arrow_typeof_timestamp_date32(
query T
SELECT arrow_typeof(now()::date)
----
Date32

# arrow_typeof_utf8
query T
SELECT arrow_typeof('1')
----
Utf8


#### arrow_cast (in some ways opposite of arrow_typeof)

# Basic tests

query I
SELECT arrow_cast('1', 'Int16')
----
1

# Basic error test
query error Error during planning: arrow_cast needs 2 arguments, 1 provided
SELECT arrow_cast('1')

query error Error during planning: arrow_cast requires its second argument to be a constant string, got Int64\(43\)
SELECT arrow_cast('1', 43)

query error Error unrecognized word: unknown
SELECT arrow_cast('1', 'unknown')

# Round Trip tests:
query TTTTTTTTTTTTTTTTTTT
SELECT
arrow_typeof(arrow_cast(1, 'Int8')) as col_i8,
arrow_typeof(arrow_cast(1, 'Int16')) as col_i16,
arrow_typeof(arrow_cast(1, 'Int32')) as col_i32,
arrow_typeof(arrow_cast(1, 'Int64')) as col_i64,
arrow_typeof(arrow_cast(1, 'UInt8')) as col_u8,
arrow_typeof(arrow_cast(1, 'UInt16')) as col_u16,
arrow_typeof(arrow_cast(1, 'UInt32')) as col_u32,
arrow_typeof(arrow_cast(1, 'UInt64')) as col_u64,
-- can't seem to cast to Float16 for some reason
-- arrow_typeof(arrow_cast(1, 'Float16')) as col_f16,
arrow_typeof(arrow_cast(1, 'Float32')) as col_f32,
arrow_typeof(arrow_cast(1, 'Float64')) as col_f64,
arrow_typeof(arrow_cast('foo', 'Utf8')) as col_utf8,
arrow_typeof(arrow_cast('foo', 'LargeUtf8')) as col_large_utf8,
arrow_typeof(arrow_cast('foo', 'Binary')) as col_binary,
arrow_typeof(arrow_cast('foo', 'LargeBinary')) as col_large_binary,
arrow_typeof(arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Second, None)')) as col_ts_s,
arrow_typeof(arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Millisecond, None)')) as col_ts_ms,
arrow_typeof(arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Microsecond, None)')) as col_ts_us,
arrow_typeof(arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Nanosecond, None)')) as col_ts_ns,
arrow_typeof(arrow_cast('foo', 'Dictionary(Int32, Utf8)')) as col_dict
----
Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64 Float32 Float64 Utf8 LargeUtf8 Binary LargeBinary Timestamp(Second, None) Timestamp(Millisecond, None) Timestamp(Microsecond, None) Timestamp(Nanosecond, None) Dictionary(Int32, Utf8)




## Basic Types: Create a table

statement ok
create table foo as select
arrow_cast(1, 'Int8') as col_i8,
arrow_cast(1, 'Int16') as col_i16,
arrow_cast(1, 'Int32') as col_i32,
arrow_cast(1, 'Int64') as col_i64,
arrow_cast(1, 'UInt8') as col_u8,
arrow_cast(1, 'UInt16') as col_u16,
arrow_cast(1, 'UInt32') as col_u32,
arrow_cast(1, 'UInt64') as col_u64,
-- can't seem to cast to Float16 for some reason
-- arrow_cast(1.0, 'Float16') as col_f16,
arrow_cast(1.0, 'Float32') as col_f32,
arrow_cast(1.0, 'Float64') as col_f64
;

## Ensure each column in the table has the expected type

query TTTTTTTTTT
SELECT
arrow_typeof(col_i8),
arrow_typeof(col_i16),
arrow_typeof(col_i32),
arrow_typeof(col_i64),
arrow_typeof(col_u8),
arrow_typeof(col_u16),
arrow_typeof(col_u32),
arrow_typeof(col_u64),
-- arrow_typeof(col_f16),
arrow_typeof(col_f32),
arrow_typeof(col_f64)
FROM foo;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: you can remove from foo

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When I did that I got the error:

  arrow_typeof(col_f64);"
Error: query failed: DataFusion error: Schema error: No field named 'col_i8'.
[SQL] SELECT
  arrow_typeof(col_i8),
  arrow_typeof(col_i16),
  arrow_typeof(col_i32),
  arrow_typeof(col_i64),
  arrow_typeof(col_u8),
  arrow_typeof(col_u16),
  arrow_typeof(col_u32),
  arrow_typeof(col_u64),
  arrow_typeof(col_f32),
  arrow_typeof(col_f64);

The point of this test was that the values inserted into the foo table do indeed have the proper types. I will add some comments to make this clearer.

----
Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64 Float32 Float64


statement ok
drop table foo

## Decimals: Create a table

statement ok
create table foo as select
arrow_cast(100, 'Decimal128(3,2)') as col_d128
-- Can't make a decimal 156:
-- This feature is not implemented: Can't create a scalar from array of type "Decimal256(3, 2)"
--arrow_cast(100, 'Decimal256(3,2)') as col_d256
;


## Ensure each column in the table has the expected type

query T
SELECT
arrow_typeof(col_d128)
-- arrow_typeof(col_d256),
FROM foo;
----
Decimal128(3, 2)


statement ok
drop table foo

## Strings, Binary: Create a table

statement ok
create table foo as select
arrow_cast('foo', 'Utf8') as col_utf8,
arrow_cast('foo', 'LargeUtf8') as col_large_utf8,
arrow_cast('foo', 'Binary') as col_binary,
arrow_cast('foo', 'LargeBinary') as col_large_binary
;

## Ensure each column in the table has the expected type

query TTTT
SELECT
arrow_typeof(col_utf8),
arrow_typeof(col_large_utf8),
arrow_typeof(col_binary),
arrow_typeof(col_large_binary)
FROM foo;
----
Utf8 LargeUtf8 Binary LargeBinary


statement ok
drop table foo


## Timestamps: Create a table

statement ok
create table foo as select
arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Second, None)') as col_ts_s,
arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Millisecond, None)') as col_ts_ms,
arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Microsecond, None)') as col_ts_us,
arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Nanosecond, None)') as col_ts_ns
;

## Ensure each column in the table has the expected type

query TTTT
SELECT
arrow_typeof(col_ts_s),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be also great to have tests select arrow_type_of(arrow_cast(.....))

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added in a4f2753

arrow_typeof(col_ts_ms),
arrow_typeof(col_ts_us),
arrow_typeof(col_ts_ns)
FROM foo;
----
Timestamp(Second, None) Timestamp(Millisecond, None) Timestamp(Microsecond, None) Timestamp(Nanosecond, None)


statement ok
drop table foo

## Dictionaries

statement ok
create table foo as select
arrow_cast('foo', 'Dictionary(Int32, Utf8)') as col_dict_int32_utf8,
arrow_cast('foo', 'Dictionary(Int8, LargeUtf8)') as col_dict_int8_largeutf8
;

## Ensure each column in the table has the expected type

query TT
SELECT
arrow_typeof(col_dict_int32_utf8),
arrow_typeof(col_dict_int8_largeutf8)
FROM foo;
----
Dictionary(Int32, Utf8) Dictionary(Int8, LargeUtf8)


statement ok
drop table foo


## Intervals:

query error Cannot automatically convert Interval\(DayTime\) to Interval\(MonthDayNano\)
---
select arrow_cast(interval '30 minutes', 'Interval(MonthDayNano)');

query error DataFusion error: Error during planning: Cannot automatically convert Utf8 to Interval\(MonthDayNano\)
select arrow_cast('30 minutes', 'Interval(MonthDayNano)');


## Duration

query error Cannot automatically convert Interval\(DayTime\) to Duration\(Second\)
---
select arrow_cast(interval '30 minutes', 'Duration(Second)');

query error DataFusion error: Error during planning: Cannot automatically convert Utf8 to Duration\(Second\)
select arrow_cast('30 minutes', 'Duration(Second)');
5 changes: 4 additions & 1 deletion datafusion/proto/src/logical_plan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2120,8 +2120,11 @@ mod roundtrip_tests {
DataType::Float16,
DataType::Float32,
DataType::Float64,
// Add more timestamp tests
DataType::Timestamp(TimeUnit::Second, None),
DataType::Timestamp(TimeUnit::Millisecond, None),
DataType::Timestamp(TimeUnit::Microsecond, None),
DataType::Timestamp(TimeUnit::Nanosecond, None),
DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())),
DataType::Date32,
DataType::Date64,
DataType::Time32(TimeUnit::Second),
Expand Down
Loading